1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 2 3 /* 4 * Common eBPF ELF object loading operations. 5 * 6 * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org> 7 * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> 8 * Copyright (C) 2015 Huawei Inc. 9 * Copyright (C) 2017 Nicira, Inc. 10 * Copyright (C) 2019 Isovalent, Inc. 11 */ 12 13 #ifndef _GNU_SOURCE 14 #define _GNU_SOURCE 15 #endif 16 #include <stdlib.h> 17 #include <stdio.h> 18 #include <stdarg.h> 19 #include <libgen.h> 20 #include <inttypes.h> 21 #include <limits.h> 22 #include <string.h> 23 #include <unistd.h> 24 #include <endian.h> 25 #include <fcntl.h> 26 #include <errno.h> 27 #include <ctype.h> 28 #include <asm/unistd.h> 29 #include <linux/err.h> 30 #include <linux/kernel.h> 31 #include <linux/bpf.h> 32 #include <linux/btf.h> 33 #include <linux/filter.h> 34 #include <linux/limits.h> 35 #include <linux/perf_event.h> 36 #include <linux/bpf_perf_event.h> 37 #include <linux/ring_buffer.h> 38 #include <sys/epoll.h> 39 #include <sys/ioctl.h> 40 #include <sys/mman.h> 41 #include <sys/stat.h> 42 #include <sys/types.h> 43 #include <sys/vfs.h> 44 #include <sys/utsname.h> 45 #include <sys/resource.h> 46 #include <libelf.h> 47 #include <gelf.h> 48 #include <zlib.h> 49 50 #include "libbpf.h" 51 #include "bpf.h" 52 #include "btf.h" 53 #include "str_error.h" 54 #include "libbpf_internal.h" 55 #include "hashmap.h" 56 #include "bpf_gen_internal.h" 57 #include "zip.h" 58 59 #ifndef BPF_FS_MAGIC 60 #define BPF_FS_MAGIC 0xcafe4a11 61 #endif 62 63 #define BPF_FS_DEFAULT_PATH "/sys/fs/bpf" 64 65 #define BPF_INSN_SZ (sizeof(struct bpf_insn)) 66 67 /* vsprintf() in __base_pr() uses nonliteral format string. It may break 68 * compilation if user enables corresponding warning. Disable it explicitly. 69 */ 70 #pragma GCC diagnostic ignored "-Wformat-nonliteral" 71 72 #define __printf(a, b) __attribute__((format(printf, a, b))) 73 74 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj); 75 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog); 76 static int map_set_def_max_entries(struct bpf_map *map); 77 78 static const char * const attach_type_name[] = { 79 [BPF_CGROUP_INET_INGRESS] = "cgroup_inet_ingress", 80 [BPF_CGROUP_INET_EGRESS] = "cgroup_inet_egress", 81 [BPF_CGROUP_INET_SOCK_CREATE] = "cgroup_inet_sock_create", 82 [BPF_CGROUP_INET_SOCK_RELEASE] = "cgroup_inet_sock_release", 83 [BPF_CGROUP_SOCK_OPS] = "cgroup_sock_ops", 84 [BPF_CGROUP_DEVICE] = "cgroup_device", 85 [BPF_CGROUP_INET4_BIND] = "cgroup_inet4_bind", 86 [BPF_CGROUP_INET6_BIND] = "cgroup_inet6_bind", 87 [BPF_CGROUP_INET4_CONNECT] = "cgroup_inet4_connect", 88 [BPF_CGROUP_INET6_CONNECT] = "cgroup_inet6_connect", 89 [BPF_CGROUP_UNIX_CONNECT] = "cgroup_unix_connect", 90 [BPF_CGROUP_INET4_POST_BIND] = "cgroup_inet4_post_bind", 91 [BPF_CGROUP_INET6_POST_BIND] = "cgroup_inet6_post_bind", 92 [BPF_CGROUP_INET4_GETPEERNAME] = "cgroup_inet4_getpeername", 93 [BPF_CGROUP_INET6_GETPEERNAME] = "cgroup_inet6_getpeername", 94 [BPF_CGROUP_UNIX_GETPEERNAME] = "cgroup_unix_getpeername", 95 [BPF_CGROUP_INET4_GETSOCKNAME] = "cgroup_inet4_getsockname", 96 [BPF_CGROUP_INET6_GETSOCKNAME] = "cgroup_inet6_getsockname", 97 [BPF_CGROUP_UNIX_GETSOCKNAME] = "cgroup_unix_getsockname", 98 [BPF_CGROUP_UDP4_SENDMSG] = "cgroup_udp4_sendmsg", 99 [BPF_CGROUP_UDP6_SENDMSG] = "cgroup_udp6_sendmsg", 100 [BPF_CGROUP_UNIX_SENDMSG] = "cgroup_unix_sendmsg", 101 [BPF_CGROUP_SYSCTL] = "cgroup_sysctl", 102 [BPF_CGROUP_UDP4_RECVMSG] = "cgroup_udp4_recvmsg", 103 [BPF_CGROUP_UDP6_RECVMSG] = "cgroup_udp6_recvmsg", 104 [BPF_CGROUP_UNIX_RECVMSG] = "cgroup_unix_recvmsg", 105 [BPF_CGROUP_GETSOCKOPT] = "cgroup_getsockopt", 106 [BPF_CGROUP_SETSOCKOPT] = "cgroup_setsockopt", 107 [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser", 108 [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict", 109 [BPF_SK_SKB_VERDICT] = "sk_skb_verdict", 110 [BPF_SK_MSG_VERDICT] = "sk_msg_verdict", 111 [BPF_LIRC_MODE2] = "lirc_mode2", 112 [BPF_FLOW_DISSECTOR] = "flow_dissector", 113 [BPF_TRACE_RAW_TP] = "trace_raw_tp", 114 [BPF_TRACE_FENTRY] = "trace_fentry", 115 [BPF_TRACE_FEXIT] = "trace_fexit", 116 [BPF_MODIFY_RETURN] = "modify_return", 117 [BPF_LSM_MAC] = "lsm_mac", 118 [BPF_LSM_CGROUP] = "lsm_cgroup", 119 [BPF_SK_LOOKUP] = "sk_lookup", 120 [BPF_TRACE_ITER] = "trace_iter", 121 [BPF_XDP_DEVMAP] = "xdp_devmap", 122 [BPF_XDP_CPUMAP] = "xdp_cpumap", 123 [BPF_XDP] = "xdp", 124 [BPF_SK_REUSEPORT_SELECT] = "sk_reuseport_select", 125 [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_reuseport_select_or_migrate", 126 [BPF_PERF_EVENT] = "perf_event", 127 [BPF_TRACE_KPROBE_MULTI] = "trace_kprobe_multi", 128 [BPF_STRUCT_OPS] = "struct_ops", 129 [BPF_NETFILTER] = "netfilter", 130 [BPF_TCX_INGRESS] = "tcx_ingress", 131 [BPF_TCX_EGRESS] = "tcx_egress", 132 [BPF_TRACE_UPROBE_MULTI] = "trace_uprobe_multi", 133 [BPF_NETKIT_PRIMARY] = "netkit_primary", 134 [BPF_NETKIT_PEER] = "netkit_peer", 135 [BPF_TRACE_KPROBE_SESSION] = "trace_kprobe_session", 136 }; 137 138 static const char * const link_type_name[] = { 139 [BPF_LINK_TYPE_UNSPEC] = "unspec", 140 [BPF_LINK_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", 141 [BPF_LINK_TYPE_TRACING] = "tracing", 142 [BPF_LINK_TYPE_CGROUP] = "cgroup", 143 [BPF_LINK_TYPE_ITER] = "iter", 144 [BPF_LINK_TYPE_NETNS] = "netns", 145 [BPF_LINK_TYPE_XDP] = "xdp", 146 [BPF_LINK_TYPE_PERF_EVENT] = "perf_event", 147 [BPF_LINK_TYPE_KPROBE_MULTI] = "kprobe_multi", 148 [BPF_LINK_TYPE_STRUCT_OPS] = "struct_ops", 149 [BPF_LINK_TYPE_NETFILTER] = "netfilter", 150 [BPF_LINK_TYPE_TCX] = "tcx", 151 [BPF_LINK_TYPE_UPROBE_MULTI] = "uprobe_multi", 152 [BPF_LINK_TYPE_NETKIT] = "netkit", 153 [BPF_LINK_TYPE_SOCKMAP] = "sockmap", 154 }; 155 156 static const char * const map_type_name[] = { 157 [BPF_MAP_TYPE_UNSPEC] = "unspec", 158 [BPF_MAP_TYPE_HASH] = "hash", 159 [BPF_MAP_TYPE_ARRAY] = "array", 160 [BPF_MAP_TYPE_PROG_ARRAY] = "prog_array", 161 [BPF_MAP_TYPE_PERF_EVENT_ARRAY] = "perf_event_array", 162 [BPF_MAP_TYPE_PERCPU_HASH] = "percpu_hash", 163 [BPF_MAP_TYPE_PERCPU_ARRAY] = "percpu_array", 164 [BPF_MAP_TYPE_STACK_TRACE] = "stack_trace", 165 [BPF_MAP_TYPE_CGROUP_ARRAY] = "cgroup_array", 166 [BPF_MAP_TYPE_LRU_HASH] = "lru_hash", 167 [BPF_MAP_TYPE_LRU_PERCPU_HASH] = "lru_percpu_hash", 168 [BPF_MAP_TYPE_LPM_TRIE] = "lpm_trie", 169 [BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps", 170 [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps", 171 [BPF_MAP_TYPE_DEVMAP] = "devmap", 172 [BPF_MAP_TYPE_DEVMAP_HASH] = "devmap_hash", 173 [BPF_MAP_TYPE_SOCKMAP] = "sockmap", 174 [BPF_MAP_TYPE_CPUMAP] = "cpumap", 175 [BPF_MAP_TYPE_XSKMAP] = "xskmap", 176 [BPF_MAP_TYPE_SOCKHASH] = "sockhash", 177 [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage", 178 [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray", 179 [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE] = "percpu_cgroup_storage", 180 [BPF_MAP_TYPE_QUEUE] = "queue", 181 [BPF_MAP_TYPE_STACK] = "stack", 182 [BPF_MAP_TYPE_SK_STORAGE] = "sk_storage", 183 [BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops", 184 [BPF_MAP_TYPE_RINGBUF] = "ringbuf", 185 [BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage", 186 [BPF_MAP_TYPE_TASK_STORAGE] = "task_storage", 187 [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter", 188 [BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf", 189 [BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage", 190 [BPF_MAP_TYPE_ARENA] = "arena", 191 }; 192 193 static const char * const prog_type_name[] = { 194 [BPF_PROG_TYPE_UNSPEC] = "unspec", 195 [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter", 196 [BPF_PROG_TYPE_KPROBE] = "kprobe", 197 [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls", 198 [BPF_PROG_TYPE_SCHED_ACT] = "sched_act", 199 [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint", 200 [BPF_PROG_TYPE_XDP] = "xdp", 201 [BPF_PROG_TYPE_PERF_EVENT] = "perf_event", 202 [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb", 203 [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock", 204 [BPF_PROG_TYPE_LWT_IN] = "lwt_in", 205 [BPF_PROG_TYPE_LWT_OUT] = "lwt_out", 206 [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit", 207 [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops", 208 [BPF_PROG_TYPE_SK_SKB] = "sk_skb", 209 [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device", 210 [BPF_PROG_TYPE_SK_MSG] = "sk_msg", 211 [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", 212 [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr", 213 [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local", 214 [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2", 215 [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport", 216 [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector", 217 [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl", 218 [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable", 219 [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt", 220 [BPF_PROG_TYPE_TRACING] = "tracing", 221 [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops", 222 [BPF_PROG_TYPE_EXT] = "ext", 223 [BPF_PROG_TYPE_LSM] = "lsm", 224 [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup", 225 [BPF_PROG_TYPE_SYSCALL] = "syscall", 226 [BPF_PROG_TYPE_NETFILTER] = "netfilter", 227 }; 228 229 static int __base_pr(enum libbpf_print_level level, const char *format, 230 va_list args) 231 { 232 const char *env_var = "LIBBPF_LOG_LEVEL"; 233 static enum libbpf_print_level min_level = LIBBPF_INFO; 234 static bool initialized; 235 236 if (!initialized) { 237 char *verbosity; 238 239 initialized = true; 240 verbosity = getenv(env_var); 241 if (verbosity) { 242 if (strcasecmp(verbosity, "warn") == 0) 243 min_level = LIBBPF_WARN; 244 else if (strcasecmp(verbosity, "debug") == 0) 245 min_level = LIBBPF_DEBUG; 246 else if (strcasecmp(verbosity, "info") == 0) 247 min_level = LIBBPF_INFO; 248 else 249 fprintf(stderr, "libbpf: unrecognized '%s' envvar value: '%s', should be one of 'warn', 'debug', or 'info'.\n", 250 env_var, verbosity); 251 } 252 } 253 254 /* if too verbose, skip logging */ 255 if (level > min_level) 256 return 0; 257 258 return vfprintf(stderr, format, args); 259 } 260 261 static libbpf_print_fn_t __libbpf_pr = __base_pr; 262 263 libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn) 264 { 265 libbpf_print_fn_t old_print_fn; 266 267 old_print_fn = __atomic_exchange_n(&__libbpf_pr, fn, __ATOMIC_RELAXED); 268 269 return old_print_fn; 270 } 271 272 __printf(2, 3) 273 void libbpf_print(enum libbpf_print_level level, const char *format, ...) 274 { 275 va_list args; 276 int old_errno; 277 libbpf_print_fn_t print_fn; 278 279 print_fn = __atomic_load_n(&__libbpf_pr, __ATOMIC_RELAXED); 280 if (!print_fn) 281 return; 282 283 old_errno = errno; 284 285 va_start(args, format); 286 __libbpf_pr(level, format, args); 287 va_end(args); 288 289 errno = old_errno; 290 } 291 292 static void pr_perm_msg(int err) 293 { 294 struct rlimit limit; 295 char buf[100]; 296 297 if (err != -EPERM || geteuid() != 0) 298 return; 299 300 err = getrlimit(RLIMIT_MEMLOCK, &limit); 301 if (err) 302 return; 303 304 if (limit.rlim_cur == RLIM_INFINITY) 305 return; 306 307 if (limit.rlim_cur < 1024) 308 snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur); 309 else if (limit.rlim_cur < 1024*1024) 310 snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024); 311 else 312 snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024)); 313 314 pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n", 315 buf); 316 } 317 318 #define STRERR_BUFSIZE 128 319 320 /* Copied from tools/perf/util/util.h */ 321 #ifndef zfree 322 # define zfree(ptr) ({ free(*ptr); *ptr = NULL; }) 323 #endif 324 325 #ifndef zclose 326 # define zclose(fd) ({ \ 327 int ___err = 0; \ 328 if ((fd) >= 0) \ 329 ___err = close((fd)); \ 330 fd = -1; \ 331 ___err; }) 332 #endif 333 334 static inline __u64 ptr_to_u64(const void *ptr) 335 { 336 return (__u64) (unsigned long) ptr; 337 } 338 339 int libbpf_set_strict_mode(enum libbpf_strict_mode mode) 340 { 341 /* as of v1.0 libbpf_set_strict_mode() is a no-op */ 342 return 0; 343 } 344 345 __u32 libbpf_major_version(void) 346 { 347 return LIBBPF_MAJOR_VERSION; 348 } 349 350 __u32 libbpf_minor_version(void) 351 { 352 return LIBBPF_MINOR_VERSION; 353 } 354 355 const char *libbpf_version_string(void) 356 { 357 #define __S(X) #X 358 #define _S(X) __S(X) 359 return "v" _S(LIBBPF_MAJOR_VERSION) "." _S(LIBBPF_MINOR_VERSION); 360 #undef _S 361 #undef __S 362 } 363 364 enum reloc_type { 365 RELO_LD64, 366 RELO_CALL, 367 RELO_DATA, 368 RELO_EXTERN_LD64, 369 RELO_EXTERN_CALL, 370 RELO_SUBPROG_ADDR, 371 RELO_CORE, 372 }; 373 374 struct reloc_desc { 375 enum reloc_type type; 376 int insn_idx; 377 union { 378 const struct bpf_core_relo *core_relo; /* used when type == RELO_CORE */ 379 struct { 380 int map_idx; 381 int sym_off; 382 int ext_idx; 383 }; 384 }; 385 }; 386 387 /* stored as sec_def->cookie for all libbpf-supported SEC()s */ 388 enum sec_def_flags { 389 SEC_NONE = 0, 390 /* expected_attach_type is optional, if kernel doesn't support that */ 391 SEC_EXP_ATTACH_OPT = 1, 392 /* legacy, only used by libbpf_get_type_names() and 393 * libbpf_attach_type_by_name(), not used by libbpf itself at all. 394 * This used to be associated with cgroup (and few other) BPF programs 395 * that were attachable through BPF_PROG_ATTACH command. Pretty 396 * meaningless nowadays, though. 397 */ 398 SEC_ATTACHABLE = 2, 399 SEC_ATTACHABLE_OPT = SEC_ATTACHABLE | SEC_EXP_ATTACH_OPT, 400 /* attachment target is specified through BTF ID in either kernel or 401 * other BPF program's BTF object 402 */ 403 SEC_ATTACH_BTF = 4, 404 /* BPF program type allows sleeping/blocking in kernel */ 405 SEC_SLEEPABLE = 8, 406 /* BPF program support non-linear XDP buffer */ 407 SEC_XDP_FRAGS = 16, 408 /* Setup proper attach type for usdt probes. */ 409 SEC_USDT = 32, 410 }; 411 412 struct bpf_sec_def { 413 char *sec; 414 enum bpf_prog_type prog_type; 415 enum bpf_attach_type expected_attach_type; 416 long cookie; 417 int handler_id; 418 419 libbpf_prog_setup_fn_t prog_setup_fn; 420 libbpf_prog_prepare_load_fn_t prog_prepare_load_fn; 421 libbpf_prog_attach_fn_t prog_attach_fn; 422 }; 423 424 /* 425 * bpf_prog should be a better name but it has been used in 426 * linux/filter.h. 427 */ 428 struct bpf_program { 429 char *name; 430 char *sec_name; 431 size_t sec_idx; 432 const struct bpf_sec_def *sec_def; 433 /* this program's instruction offset (in number of instructions) 434 * within its containing ELF section 435 */ 436 size_t sec_insn_off; 437 /* number of original instructions in ELF section belonging to this 438 * program, not taking into account subprogram instructions possible 439 * appended later during relocation 440 */ 441 size_t sec_insn_cnt; 442 /* Offset (in number of instructions) of the start of instruction 443 * belonging to this BPF program within its containing main BPF 444 * program. For the entry-point (main) BPF program, this is always 445 * zero. For a sub-program, this gets reset before each of main BPF 446 * programs are processed and relocated and is used to determined 447 * whether sub-program was already appended to the main program, and 448 * if yes, at which instruction offset. 449 */ 450 size_t sub_insn_off; 451 452 /* instructions that belong to BPF program; insns[0] is located at 453 * sec_insn_off instruction within its ELF section in ELF file, so 454 * when mapping ELF file instruction index to the local instruction, 455 * one needs to subtract sec_insn_off; and vice versa. 456 */ 457 struct bpf_insn *insns; 458 /* actual number of instruction in this BPF program's image; for 459 * entry-point BPF programs this includes the size of main program 460 * itself plus all the used sub-programs, appended at the end 461 */ 462 size_t insns_cnt; 463 464 struct reloc_desc *reloc_desc; 465 int nr_reloc; 466 467 /* BPF verifier log settings */ 468 char *log_buf; 469 size_t log_size; 470 __u32 log_level; 471 472 struct bpf_object *obj; 473 474 int fd; 475 bool autoload; 476 bool autoattach; 477 bool sym_global; 478 bool mark_btf_static; 479 enum bpf_prog_type type; 480 enum bpf_attach_type expected_attach_type; 481 int exception_cb_idx; 482 483 int prog_ifindex; 484 __u32 attach_btf_obj_fd; 485 __u32 attach_btf_id; 486 __u32 attach_prog_fd; 487 488 void *func_info; 489 __u32 func_info_rec_size; 490 __u32 func_info_cnt; 491 492 void *line_info; 493 __u32 line_info_rec_size; 494 __u32 line_info_cnt; 495 __u32 prog_flags; 496 }; 497 498 struct bpf_struct_ops { 499 const char *tname; 500 const struct btf_type *type; 501 struct bpf_program **progs; 502 __u32 *kern_func_off; 503 /* e.g. struct tcp_congestion_ops in bpf_prog's btf format */ 504 void *data; 505 /* e.g. struct bpf_struct_ops_tcp_congestion_ops in 506 * btf_vmlinux's format. 507 * struct bpf_struct_ops_tcp_congestion_ops { 508 * [... some other kernel fields ...] 509 * struct tcp_congestion_ops data; 510 * } 511 * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops) 512 * bpf_map__init_kern_struct_ops() will populate the "kern_vdata" 513 * from "data". 514 */ 515 void *kern_vdata; 516 __u32 type_id; 517 }; 518 519 #define DATA_SEC ".data" 520 #define BSS_SEC ".bss" 521 #define RODATA_SEC ".rodata" 522 #define KCONFIG_SEC ".kconfig" 523 #define KSYMS_SEC ".ksyms" 524 #define STRUCT_OPS_SEC ".struct_ops" 525 #define STRUCT_OPS_LINK_SEC ".struct_ops.link" 526 #define ARENA_SEC ".addr_space.1" 527 528 enum libbpf_map_type { 529 LIBBPF_MAP_UNSPEC, 530 LIBBPF_MAP_DATA, 531 LIBBPF_MAP_BSS, 532 LIBBPF_MAP_RODATA, 533 LIBBPF_MAP_KCONFIG, 534 }; 535 536 struct bpf_map_def { 537 unsigned int type; 538 unsigned int key_size; 539 unsigned int value_size; 540 unsigned int max_entries; 541 unsigned int map_flags; 542 }; 543 544 struct bpf_map { 545 struct bpf_object *obj; 546 char *name; 547 /* real_name is defined for special internal maps (.rodata*, 548 * .data*, .bss, .kconfig) and preserves their original ELF section 549 * name. This is important to be able to find corresponding BTF 550 * DATASEC information. 551 */ 552 char *real_name; 553 int fd; 554 int sec_idx; 555 size_t sec_offset; 556 int map_ifindex; 557 int inner_map_fd; 558 struct bpf_map_def def; 559 __u32 numa_node; 560 __u32 btf_var_idx; 561 int mod_btf_fd; 562 __u32 btf_key_type_id; 563 __u32 btf_value_type_id; 564 __u32 btf_vmlinux_value_type_id; 565 enum libbpf_map_type libbpf_type; 566 void *mmaped; 567 struct bpf_struct_ops *st_ops; 568 struct bpf_map *inner_map; 569 void **init_slots; 570 int init_slots_sz; 571 char *pin_path; 572 bool pinned; 573 bool reused; 574 bool autocreate; 575 bool autoattach; 576 __u64 map_extra; 577 }; 578 579 enum extern_type { 580 EXT_UNKNOWN, 581 EXT_KCFG, 582 EXT_KSYM, 583 }; 584 585 enum kcfg_type { 586 KCFG_UNKNOWN, 587 KCFG_CHAR, 588 KCFG_BOOL, 589 KCFG_INT, 590 KCFG_TRISTATE, 591 KCFG_CHAR_ARR, 592 }; 593 594 struct extern_desc { 595 enum extern_type type; 596 int sym_idx; 597 int btf_id; 598 int sec_btf_id; 599 const char *name; 600 char *essent_name; 601 bool is_set; 602 bool is_weak; 603 union { 604 struct { 605 enum kcfg_type type; 606 int sz; 607 int align; 608 int data_off; 609 bool is_signed; 610 } kcfg; 611 struct { 612 unsigned long long addr; 613 614 /* target btf_id of the corresponding kernel var. */ 615 int kernel_btf_obj_fd; 616 int kernel_btf_id; 617 618 /* local btf_id of the ksym extern's type. */ 619 __u32 type_id; 620 /* BTF fd index to be patched in for insn->off, this is 621 * 0 for vmlinux BTF, index in obj->fd_array for module 622 * BTF 623 */ 624 __s16 btf_fd_idx; 625 } ksym; 626 }; 627 }; 628 629 struct module_btf { 630 struct btf *btf; 631 char *name; 632 __u32 id; 633 int fd; 634 int fd_array_idx; 635 }; 636 637 enum sec_type { 638 SEC_UNUSED = 0, 639 SEC_RELO, 640 SEC_BSS, 641 SEC_DATA, 642 SEC_RODATA, 643 SEC_ST_OPS, 644 }; 645 646 struct elf_sec_desc { 647 enum sec_type sec_type; 648 Elf64_Shdr *shdr; 649 Elf_Data *data; 650 }; 651 652 struct elf_state { 653 int fd; 654 const void *obj_buf; 655 size_t obj_buf_sz; 656 Elf *elf; 657 Elf64_Ehdr *ehdr; 658 Elf_Data *symbols; 659 Elf_Data *arena_data; 660 size_t shstrndx; /* section index for section name strings */ 661 size_t strtabidx; 662 struct elf_sec_desc *secs; 663 size_t sec_cnt; 664 int btf_maps_shndx; 665 __u32 btf_maps_sec_btf_id; 666 int text_shndx; 667 int symbols_shndx; 668 bool has_st_ops; 669 int arena_data_shndx; 670 }; 671 672 struct usdt_manager; 673 674 struct bpf_object { 675 char name[BPF_OBJ_NAME_LEN]; 676 char license[64]; 677 __u32 kern_version; 678 679 struct bpf_program *programs; 680 size_t nr_programs; 681 struct bpf_map *maps; 682 size_t nr_maps; 683 size_t maps_cap; 684 685 char *kconfig; 686 struct extern_desc *externs; 687 int nr_extern; 688 int kconfig_map_idx; 689 690 bool loaded; 691 bool has_subcalls; 692 bool has_rodata; 693 694 struct bpf_gen *gen_loader; 695 696 /* Information when doing ELF related work. Only valid if efile.elf is not NULL */ 697 struct elf_state efile; 698 699 struct btf *btf; 700 struct btf_ext *btf_ext; 701 702 /* Parse and load BTF vmlinux if any of the programs in the object need 703 * it at load time. 704 */ 705 struct btf *btf_vmlinux; 706 /* Path to the custom BTF to be used for BPF CO-RE relocations as an 707 * override for vmlinux BTF. 708 */ 709 char *btf_custom_path; 710 /* vmlinux BTF override for CO-RE relocations */ 711 struct btf *btf_vmlinux_override; 712 /* Lazily initialized kernel module BTFs */ 713 struct module_btf *btf_modules; 714 bool btf_modules_loaded; 715 size_t btf_module_cnt; 716 size_t btf_module_cap; 717 718 /* optional log settings passed to BPF_BTF_LOAD and BPF_PROG_LOAD commands */ 719 char *log_buf; 720 size_t log_size; 721 __u32 log_level; 722 723 int *fd_array; 724 size_t fd_array_cap; 725 size_t fd_array_cnt; 726 727 struct usdt_manager *usdt_man; 728 729 struct bpf_map *arena_map; 730 void *arena_data; 731 size_t arena_data_sz; 732 733 struct kern_feature_cache *feat_cache; 734 char *token_path; 735 int token_fd; 736 737 char path[]; 738 }; 739 740 static const char *elf_sym_str(const struct bpf_object *obj, size_t off); 741 static const char *elf_sec_str(const struct bpf_object *obj, size_t off); 742 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx); 743 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name); 744 static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn); 745 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn); 746 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn); 747 static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx); 748 static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx); 749 750 void bpf_program__unload(struct bpf_program *prog) 751 { 752 if (!prog) 753 return; 754 755 zclose(prog->fd); 756 757 zfree(&prog->func_info); 758 zfree(&prog->line_info); 759 } 760 761 static void bpf_program__exit(struct bpf_program *prog) 762 { 763 if (!prog) 764 return; 765 766 bpf_program__unload(prog); 767 zfree(&prog->name); 768 zfree(&prog->sec_name); 769 zfree(&prog->insns); 770 zfree(&prog->reloc_desc); 771 772 prog->nr_reloc = 0; 773 prog->insns_cnt = 0; 774 prog->sec_idx = -1; 775 } 776 777 static bool insn_is_subprog_call(const struct bpf_insn *insn) 778 { 779 return BPF_CLASS(insn->code) == BPF_JMP && 780 BPF_OP(insn->code) == BPF_CALL && 781 BPF_SRC(insn->code) == BPF_K && 782 insn->src_reg == BPF_PSEUDO_CALL && 783 insn->dst_reg == 0 && 784 insn->off == 0; 785 } 786 787 static bool is_call_insn(const struct bpf_insn *insn) 788 { 789 return insn->code == (BPF_JMP | BPF_CALL); 790 } 791 792 static bool insn_is_pseudo_func(struct bpf_insn *insn) 793 { 794 return is_ldimm64_insn(insn) && insn->src_reg == BPF_PSEUDO_FUNC; 795 } 796 797 static int 798 bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, 799 const char *name, size_t sec_idx, const char *sec_name, 800 size_t sec_off, void *insn_data, size_t insn_data_sz) 801 { 802 if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) { 803 pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n", 804 sec_name, name, sec_off, insn_data_sz); 805 return -EINVAL; 806 } 807 808 memset(prog, 0, sizeof(*prog)); 809 prog->obj = obj; 810 811 prog->sec_idx = sec_idx; 812 prog->sec_insn_off = sec_off / BPF_INSN_SZ; 813 prog->sec_insn_cnt = insn_data_sz / BPF_INSN_SZ; 814 /* insns_cnt can later be increased by appending used subprograms */ 815 prog->insns_cnt = prog->sec_insn_cnt; 816 817 prog->type = BPF_PROG_TYPE_UNSPEC; 818 prog->fd = -1; 819 prog->exception_cb_idx = -1; 820 821 /* libbpf's convention for SEC("?abc...") is that it's just like 822 * SEC("abc...") but the corresponding bpf_program starts out with 823 * autoload set to false. 824 */ 825 if (sec_name[0] == '?') { 826 prog->autoload = false; 827 /* from now on forget there was ? in section name */ 828 sec_name++; 829 } else { 830 prog->autoload = true; 831 } 832 833 prog->autoattach = true; 834 835 /* inherit object's log_level */ 836 prog->log_level = obj->log_level; 837 838 prog->sec_name = strdup(sec_name); 839 if (!prog->sec_name) 840 goto errout; 841 842 prog->name = strdup(name); 843 if (!prog->name) 844 goto errout; 845 846 prog->insns = malloc(insn_data_sz); 847 if (!prog->insns) 848 goto errout; 849 memcpy(prog->insns, insn_data, insn_data_sz); 850 851 return 0; 852 errout: 853 pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name); 854 bpf_program__exit(prog); 855 return -ENOMEM; 856 } 857 858 static int 859 bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, 860 const char *sec_name, int sec_idx) 861 { 862 Elf_Data *symbols = obj->efile.symbols; 863 struct bpf_program *prog, *progs; 864 void *data = sec_data->d_buf; 865 size_t sec_sz = sec_data->d_size, sec_off, prog_sz, nr_syms; 866 int nr_progs, err, i; 867 const char *name; 868 Elf64_Sym *sym; 869 870 progs = obj->programs; 871 nr_progs = obj->nr_programs; 872 nr_syms = symbols->d_size / sizeof(Elf64_Sym); 873 874 for (i = 0; i < nr_syms; i++) { 875 sym = elf_sym_by_idx(obj, i); 876 877 if (sym->st_shndx != sec_idx) 878 continue; 879 if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC) 880 continue; 881 882 prog_sz = sym->st_size; 883 sec_off = sym->st_value; 884 885 name = elf_sym_str(obj, sym->st_name); 886 if (!name) { 887 pr_warn("sec '%s': failed to get symbol name for offset %zu\n", 888 sec_name, sec_off); 889 return -LIBBPF_ERRNO__FORMAT; 890 } 891 892 if (sec_off + prog_sz > sec_sz) { 893 pr_warn("sec '%s': program at offset %zu crosses section boundary\n", 894 sec_name, sec_off); 895 return -LIBBPF_ERRNO__FORMAT; 896 } 897 898 if (sec_idx != obj->efile.text_shndx && ELF64_ST_BIND(sym->st_info) == STB_LOCAL) { 899 pr_warn("sec '%s': program '%s' is static and not supported\n", sec_name, name); 900 return -ENOTSUP; 901 } 902 903 pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n", 904 sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz); 905 906 progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(*progs)); 907 if (!progs) { 908 /* 909 * In this case the original obj->programs 910 * is still valid, so don't need special treat for 911 * bpf_close_object(). 912 */ 913 pr_warn("sec '%s': failed to alloc memory for new program '%s'\n", 914 sec_name, name); 915 return -ENOMEM; 916 } 917 obj->programs = progs; 918 919 prog = &progs[nr_progs]; 920 921 err = bpf_object__init_prog(obj, prog, name, sec_idx, sec_name, 922 sec_off, data + sec_off, prog_sz); 923 if (err) 924 return err; 925 926 if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL) 927 prog->sym_global = true; 928 929 /* if function is a global/weak symbol, but has restricted 930 * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC 931 * as static to enable more permissive BPF verification mode 932 * with more outside context available to BPF verifier 933 */ 934 if (prog->sym_global && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN 935 || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)) 936 prog->mark_btf_static = true; 937 938 nr_progs++; 939 obj->nr_programs = nr_progs; 940 } 941 942 return 0; 943 } 944 945 static const struct btf_member * 946 find_member_by_offset(const struct btf_type *t, __u32 bit_offset) 947 { 948 struct btf_member *m; 949 int i; 950 951 for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) { 952 if (btf_member_bit_offset(t, i) == bit_offset) 953 return m; 954 } 955 956 return NULL; 957 } 958 959 static const struct btf_member * 960 find_member_by_name(const struct btf *btf, const struct btf_type *t, 961 const char *name) 962 { 963 struct btf_member *m; 964 int i; 965 966 for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) { 967 if (!strcmp(btf__name_by_offset(btf, m->name_off), name)) 968 return m; 969 } 970 971 return NULL; 972 } 973 974 static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name, 975 __u16 kind, struct btf **res_btf, 976 struct module_btf **res_mod_btf); 977 978 #define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_" 979 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, 980 const char *name, __u32 kind); 981 982 static int 983 find_struct_ops_kern_types(struct bpf_object *obj, const char *tname_raw, 984 struct module_btf **mod_btf, 985 const struct btf_type **type, __u32 *type_id, 986 const struct btf_type **vtype, __u32 *vtype_id, 987 const struct btf_member **data_member) 988 { 989 const struct btf_type *kern_type, *kern_vtype; 990 const struct btf_member *kern_data_member; 991 struct btf *btf; 992 __s32 kern_vtype_id, kern_type_id; 993 char tname[256]; 994 __u32 i; 995 996 snprintf(tname, sizeof(tname), "%.*s", 997 (int)bpf_core_essential_name_len(tname_raw), tname_raw); 998 999 kern_type_id = find_ksym_btf_id(obj, tname, BTF_KIND_STRUCT, 1000 &btf, mod_btf); 1001 if (kern_type_id < 0) { 1002 pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", 1003 tname); 1004 return kern_type_id; 1005 } 1006 kern_type = btf__type_by_id(btf, kern_type_id); 1007 1008 /* Find the corresponding "map_value" type that will be used 1009 * in map_update(BPF_MAP_TYPE_STRUCT_OPS). For example, 1010 * find "struct bpf_struct_ops_tcp_congestion_ops" from the 1011 * btf_vmlinux. 1012 */ 1013 kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX, 1014 tname, BTF_KIND_STRUCT); 1015 if (kern_vtype_id < 0) { 1016 pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n", 1017 STRUCT_OPS_VALUE_PREFIX, tname); 1018 return kern_vtype_id; 1019 } 1020 kern_vtype = btf__type_by_id(btf, kern_vtype_id); 1021 1022 /* Find "struct tcp_congestion_ops" from 1023 * struct bpf_struct_ops_tcp_congestion_ops { 1024 * [ ... ] 1025 * struct tcp_congestion_ops data; 1026 * } 1027 */ 1028 kern_data_member = btf_members(kern_vtype); 1029 for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) { 1030 if (kern_data_member->type == kern_type_id) 1031 break; 1032 } 1033 if (i == btf_vlen(kern_vtype)) { 1034 pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n", 1035 tname, STRUCT_OPS_VALUE_PREFIX, tname); 1036 return -EINVAL; 1037 } 1038 1039 *type = kern_type; 1040 *type_id = kern_type_id; 1041 *vtype = kern_vtype; 1042 *vtype_id = kern_vtype_id; 1043 *data_member = kern_data_member; 1044 1045 return 0; 1046 } 1047 1048 static bool bpf_map__is_struct_ops(const struct bpf_map *map) 1049 { 1050 return map->def.type == BPF_MAP_TYPE_STRUCT_OPS; 1051 } 1052 1053 static bool is_valid_st_ops_program(struct bpf_object *obj, 1054 const struct bpf_program *prog) 1055 { 1056 int i; 1057 1058 for (i = 0; i < obj->nr_programs; i++) { 1059 if (&obj->programs[i] == prog) 1060 return prog->type == BPF_PROG_TYPE_STRUCT_OPS; 1061 } 1062 1063 return false; 1064 } 1065 1066 /* For each struct_ops program P, referenced from some struct_ops map M, 1067 * enable P.autoload if there are Ms for which M.autocreate is true, 1068 * disable P.autoload if for all Ms M.autocreate is false. 1069 * Don't change P.autoload for programs that are not referenced from any maps. 1070 */ 1071 static int bpf_object_adjust_struct_ops_autoload(struct bpf_object *obj) 1072 { 1073 struct bpf_program *prog, *slot_prog; 1074 struct bpf_map *map; 1075 int i, j, k, vlen; 1076 1077 for (i = 0; i < obj->nr_programs; ++i) { 1078 int should_load = false; 1079 int use_cnt = 0; 1080 1081 prog = &obj->programs[i]; 1082 if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) 1083 continue; 1084 1085 for (j = 0; j < obj->nr_maps; ++j) { 1086 map = &obj->maps[j]; 1087 if (!bpf_map__is_struct_ops(map)) 1088 continue; 1089 1090 vlen = btf_vlen(map->st_ops->type); 1091 for (k = 0; k < vlen; ++k) { 1092 slot_prog = map->st_ops->progs[k]; 1093 if (prog != slot_prog) 1094 continue; 1095 1096 use_cnt++; 1097 if (map->autocreate) 1098 should_load = true; 1099 } 1100 } 1101 if (use_cnt) 1102 prog->autoload = should_load; 1103 } 1104 1105 return 0; 1106 } 1107 1108 /* Init the map's fields that depend on kern_btf */ 1109 static int bpf_map__init_kern_struct_ops(struct bpf_map *map) 1110 { 1111 const struct btf_member *member, *kern_member, *kern_data_member; 1112 const struct btf_type *type, *kern_type, *kern_vtype; 1113 __u32 i, kern_type_id, kern_vtype_id, kern_data_off; 1114 struct bpf_object *obj = map->obj; 1115 const struct btf *btf = obj->btf; 1116 struct bpf_struct_ops *st_ops; 1117 const struct btf *kern_btf; 1118 struct module_btf *mod_btf; 1119 void *data, *kern_data; 1120 const char *tname; 1121 int err; 1122 1123 st_ops = map->st_ops; 1124 type = st_ops->type; 1125 tname = st_ops->tname; 1126 err = find_struct_ops_kern_types(obj, tname, &mod_btf, 1127 &kern_type, &kern_type_id, 1128 &kern_vtype, &kern_vtype_id, 1129 &kern_data_member); 1130 if (err) 1131 return err; 1132 1133 kern_btf = mod_btf ? mod_btf->btf : obj->btf_vmlinux; 1134 1135 pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n", 1136 map->name, st_ops->type_id, kern_type_id, kern_vtype_id); 1137 1138 map->mod_btf_fd = mod_btf ? mod_btf->fd : -1; 1139 map->def.value_size = kern_vtype->size; 1140 map->btf_vmlinux_value_type_id = kern_vtype_id; 1141 1142 st_ops->kern_vdata = calloc(1, kern_vtype->size); 1143 if (!st_ops->kern_vdata) 1144 return -ENOMEM; 1145 1146 data = st_ops->data; 1147 kern_data_off = kern_data_member->offset / 8; 1148 kern_data = st_ops->kern_vdata + kern_data_off; 1149 1150 member = btf_members(type); 1151 for (i = 0; i < btf_vlen(type); i++, member++) { 1152 const struct btf_type *mtype, *kern_mtype; 1153 __u32 mtype_id, kern_mtype_id; 1154 void *mdata, *kern_mdata; 1155 struct bpf_program *prog; 1156 __s64 msize, kern_msize; 1157 __u32 moff, kern_moff; 1158 __u32 kern_member_idx; 1159 const char *mname; 1160 1161 mname = btf__name_by_offset(btf, member->name_off); 1162 moff = member->offset / 8; 1163 mdata = data + moff; 1164 msize = btf__resolve_size(btf, member->type); 1165 if (msize < 0) { 1166 pr_warn("struct_ops init_kern %s: failed to resolve the size of member %s\n", 1167 map->name, mname); 1168 return msize; 1169 } 1170 1171 kern_member = find_member_by_name(kern_btf, kern_type, mname); 1172 if (!kern_member) { 1173 if (!libbpf_is_mem_zeroed(mdata, msize)) { 1174 pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n", 1175 map->name, mname); 1176 return -ENOTSUP; 1177 } 1178 1179 if (st_ops->progs[i]) { 1180 /* If we had declaratively set struct_ops callback, we need to 1181 * force its autoload to false, because it doesn't have 1182 * a chance of succeeding from POV of the current struct_ops map. 1183 * If this program is still referenced somewhere else, though, 1184 * then bpf_object_adjust_struct_ops_autoload() will update its 1185 * autoload accordingly. 1186 */ 1187 st_ops->progs[i]->autoload = false; 1188 st_ops->progs[i] = NULL; 1189 } 1190 1191 /* Skip all-zero/NULL fields if they are not present in the kernel BTF */ 1192 pr_info("struct_ops %s: member %s not found in kernel, skipping it as it's set to zero\n", 1193 map->name, mname); 1194 continue; 1195 } 1196 1197 kern_member_idx = kern_member - btf_members(kern_type); 1198 if (btf_member_bitfield_size(type, i) || 1199 btf_member_bitfield_size(kern_type, kern_member_idx)) { 1200 pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n", 1201 map->name, mname); 1202 return -ENOTSUP; 1203 } 1204 1205 kern_moff = kern_member->offset / 8; 1206 kern_mdata = kern_data + kern_moff; 1207 1208 mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id); 1209 kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type, 1210 &kern_mtype_id); 1211 if (BTF_INFO_KIND(mtype->info) != 1212 BTF_INFO_KIND(kern_mtype->info)) { 1213 pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n", 1214 map->name, mname, BTF_INFO_KIND(mtype->info), 1215 BTF_INFO_KIND(kern_mtype->info)); 1216 return -ENOTSUP; 1217 } 1218 1219 if (btf_is_ptr(mtype)) { 1220 prog = *(void **)mdata; 1221 /* just like for !kern_member case above, reset declaratively 1222 * set (at compile time) program's autload to false, 1223 * if user replaced it with another program or NULL 1224 */ 1225 if (st_ops->progs[i] && st_ops->progs[i] != prog) 1226 st_ops->progs[i]->autoload = false; 1227 1228 /* Update the value from the shadow type */ 1229 st_ops->progs[i] = prog; 1230 if (!prog) 1231 continue; 1232 1233 if (!is_valid_st_ops_program(obj, prog)) { 1234 pr_warn("struct_ops init_kern %s: member %s is not a struct_ops program\n", 1235 map->name, mname); 1236 return -ENOTSUP; 1237 } 1238 1239 kern_mtype = skip_mods_and_typedefs(kern_btf, 1240 kern_mtype->type, 1241 &kern_mtype_id); 1242 1243 /* mtype->type must be a func_proto which was 1244 * guaranteed in bpf_object__collect_st_ops_relos(), 1245 * so only check kern_mtype for func_proto here. 1246 */ 1247 if (!btf_is_func_proto(kern_mtype)) { 1248 pr_warn("struct_ops init_kern %s: kernel member %s is not a func ptr\n", 1249 map->name, mname); 1250 return -ENOTSUP; 1251 } 1252 1253 if (mod_btf) 1254 prog->attach_btf_obj_fd = mod_btf->fd; 1255 1256 /* if we haven't yet processed this BPF program, record proper 1257 * attach_btf_id and member_idx 1258 */ 1259 if (!prog->attach_btf_id) { 1260 prog->attach_btf_id = kern_type_id; 1261 prog->expected_attach_type = kern_member_idx; 1262 } 1263 1264 /* struct_ops BPF prog can be re-used between multiple 1265 * .struct_ops & .struct_ops.link as long as it's the 1266 * same struct_ops struct definition and the same 1267 * function pointer field 1268 */ 1269 if (prog->attach_btf_id != kern_type_id) { 1270 pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: attach_btf_id %u != kern_type_id %u\n", 1271 map->name, mname, prog->name, prog->sec_name, prog->type, 1272 prog->attach_btf_id, kern_type_id); 1273 return -EINVAL; 1274 } 1275 if (prog->expected_attach_type != kern_member_idx) { 1276 pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: expected_attach_type %u != kern_member_idx %u\n", 1277 map->name, mname, prog->name, prog->sec_name, prog->type, 1278 prog->expected_attach_type, kern_member_idx); 1279 return -EINVAL; 1280 } 1281 1282 st_ops->kern_func_off[i] = kern_data_off + kern_moff; 1283 1284 pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n", 1285 map->name, mname, prog->name, moff, 1286 kern_moff); 1287 1288 continue; 1289 } 1290 1291 kern_msize = btf__resolve_size(kern_btf, kern_mtype_id); 1292 if (kern_msize < 0 || msize != kern_msize) { 1293 pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n", 1294 map->name, mname, (ssize_t)msize, 1295 (ssize_t)kern_msize); 1296 return -ENOTSUP; 1297 } 1298 1299 pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n", 1300 map->name, mname, (unsigned int)msize, 1301 moff, kern_moff); 1302 memcpy(kern_mdata, mdata, msize); 1303 } 1304 1305 return 0; 1306 } 1307 1308 static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj) 1309 { 1310 struct bpf_map *map; 1311 size_t i; 1312 int err; 1313 1314 for (i = 0; i < obj->nr_maps; i++) { 1315 map = &obj->maps[i]; 1316 1317 if (!bpf_map__is_struct_ops(map)) 1318 continue; 1319 1320 if (!map->autocreate) 1321 continue; 1322 1323 err = bpf_map__init_kern_struct_ops(map); 1324 if (err) 1325 return err; 1326 } 1327 1328 return 0; 1329 } 1330 1331 static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name, 1332 int shndx, Elf_Data *data) 1333 { 1334 const struct btf_type *type, *datasec; 1335 const struct btf_var_secinfo *vsi; 1336 struct bpf_struct_ops *st_ops; 1337 const char *tname, *var_name; 1338 __s32 type_id, datasec_id; 1339 const struct btf *btf; 1340 struct bpf_map *map; 1341 __u32 i; 1342 1343 if (shndx == -1) 1344 return 0; 1345 1346 btf = obj->btf; 1347 datasec_id = btf__find_by_name_kind(btf, sec_name, 1348 BTF_KIND_DATASEC); 1349 if (datasec_id < 0) { 1350 pr_warn("struct_ops init: DATASEC %s not found\n", 1351 sec_name); 1352 return -EINVAL; 1353 } 1354 1355 datasec = btf__type_by_id(btf, datasec_id); 1356 vsi = btf_var_secinfos(datasec); 1357 for (i = 0; i < btf_vlen(datasec); i++, vsi++) { 1358 type = btf__type_by_id(obj->btf, vsi->type); 1359 var_name = btf__name_by_offset(obj->btf, type->name_off); 1360 1361 type_id = btf__resolve_type(obj->btf, vsi->type); 1362 if (type_id < 0) { 1363 pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n", 1364 vsi->type, sec_name); 1365 return -EINVAL; 1366 } 1367 1368 type = btf__type_by_id(obj->btf, type_id); 1369 tname = btf__name_by_offset(obj->btf, type->name_off); 1370 if (!tname[0]) { 1371 pr_warn("struct_ops init: anonymous type is not supported\n"); 1372 return -ENOTSUP; 1373 } 1374 if (!btf_is_struct(type)) { 1375 pr_warn("struct_ops init: %s is not a struct\n", tname); 1376 return -EINVAL; 1377 } 1378 1379 map = bpf_object__add_map(obj); 1380 if (IS_ERR(map)) 1381 return PTR_ERR(map); 1382 1383 map->sec_idx = shndx; 1384 map->sec_offset = vsi->offset; 1385 map->name = strdup(var_name); 1386 if (!map->name) 1387 return -ENOMEM; 1388 map->btf_value_type_id = type_id; 1389 1390 /* Follow same convention as for programs autoload: 1391 * SEC("?.struct_ops") means map is not created by default. 1392 */ 1393 if (sec_name[0] == '?') { 1394 map->autocreate = false; 1395 /* from now on forget there was ? in section name */ 1396 sec_name++; 1397 } 1398 1399 map->def.type = BPF_MAP_TYPE_STRUCT_OPS; 1400 map->def.key_size = sizeof(int); 1401 map->def.value_size = type->size; 1402 map->def.max_entries = 1; 1403 map->def.map_flags = strcmp(sec_name, STRUCT_OPS_LINK_SEC) == 0 ? BPF_F_LINK : 0; 1404 map->autoattach = true; 1405 1406 map->st_ops = calloc(1, sizeof(*map->st_ops)); 1407 if (!map->st_ops) 1408 return -ENOMEM; 1409 st_ops = map->st_ops; 1410 st_ops->data = malloc(type->size); 1411 st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs)); 1412 st_ops->kern_func_off = malloc(btf_vlen(type) * 1413 sizeof(*st_ops->kern_func_off)); 1414 if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off) 1415 return -ENOMEM; 1416 1417 if (vsi->offset + type->size > data->d_size) { 1418 pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n", 1419 var_name, sec_name); 1420 return -EINVAL; 1421 } 1422 1423 memcpy(st_ops->data, 1424 data->d_buf + vsi->offset, 1425 type->size); 1426 st_ops->tname = tname; 1427 st_ops->type = type; 1428 st_ops->type_id = type_id; 1429 1430 pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n", 1431 tname, type_id, var_name, vsi->offset); 1432 } 1433 1434 return 0; 1435 } 1436 1437 static int bpf_object_init_struct_ops(struct bpf_object *obj) 1438 { 1439 const char *sec_name; 1440 int sec_idx, err; 1441 1442 for (sec_idx = 0; sec_idx < obj->efile.sec_cnt; ++sec_idx) { 1443 struct elf_sec_desc *desc = &obj->efile.secs[sec_idx]; 1444 1445 if (desc->sec_type != SEC_ST_OPS) 1446 continue; 1447 1448 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); 1449 if (!sec_name) 1450 return -LIBBPF_ERRNO__FORMAT; 1451 1452 err = init_struct_ops_maps(obj, sec_name, sec_idx, desc->data); 1453 if (err) 1454 return err; 1455 } 1456 1457 return 0; 1458 } 1459 1460 static struct bpf_object *bpf_object__new(const char *path, 1461 const void *obj_buf, 1462 size_t obj_buf_sz, 1463 const char *obj_name) 1464 { 1465 struct bpf_object *obj; 1466 char *end; 1467 1468 obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1); 1469 if (!obj) { 1470 pr_warn("alloc memory failed for %s\n", path); 1471 return ERR_PTR(-ENOMEM); 1472 } 1473 1474 strcpy(obj->path, path); 1475 if (obj_name) { 1476 libbpf_strlcpy(obj->name, obj_name, sizeof(obj->name)); 1477 } else { 1478 /* Using basename() GNU version which doesn't modify arg. */ 1479 libbpf_strlcpy(obj->name, basename((void *)path), sizeof(obj->name)); 1480 end = strchr(obj->name, '.'); 1481 if (end) 1482 *end = 0; 1483 } 1484 1485 obj->efile.fd = -1; 1486 /* 1487 * Caller of this function should also call 1488 * bpf_object__elf_finish() after data collection to return 1489 * obj_buf to user. If not, we should duplicate the buffer to 1490 * avoid user freeing them before elf finish. 1491 */ 1492 obj->efile.obj_buf = obj_buf; 1493 obj->efile.obj_buf_sz = obj_buf_sz; 1494 obj->efile.btf_maps_shndx = -1; 1495 obj->kconfig_map_idx = -1; 1496 1497 obj->kern_version = get_kernel_version(); 1498 obj->loaded = false; 1499 1500 return obj; 1501 } 1502 1503 static void bpf_object__elf_finish(struct bpf_object *obj) 1504 { 1505 if (!obj->efile.elf) 1506 return; 1507 1508 elf_end(obj->efile.elf); 1509 obj->efile.elf = NULL; 1510 obj->efile.symbols = NULL; 1511 obj->efile.arena_data = NULL; 1512 1513 zfree(&obj->efile.secs); 1514 obj->efile.sec_cnt = 0; 1515 zclose(obj->efile.fd); 1516 obj->efile.obj_buf = NULL; 1517 obj->efile.obj_buf_sz = 0; 1518 } 1519 1520 static int bpf_object__elf_init(struct bpf_object *obj) 1521 { 1522 Elf64_Ehdr *ehdr; 1523 int err = 0; 1524 Elf *elf; 1525 1526 if (obj->efile.elf) { 1527 pr_warn("elf: init internal error\n"); 1528 return -LIBBPF_ERRNO__LIBELF; 1529 } 1530 1531 if (obj->efile.obj_buf_sz > 0) { 1532 /* obj_buf should have been validated by bpf_object__open_mem(). */ 1533 elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz); 1534 } else { 1535 obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC); 1536 if (obj->efile.fd < 0) { 1537 char errmsg[STRERR_BUFSIZE], *cp; 1538 1539 err = -errno; 1540 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); 1541 pr_warn("elf: failed to open %s: %s\n", obj->path, cp); 1542 return err; 1543 } 1544 1545 elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL); 1546 } 1547 1548 if (!elf) { 1549 pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1)); 1550 err = -LIBBPF_ERRNO__LIBELF; 1551 goto errout; 1552 } 1553 1554 obj->efile.elf = elf; 1555 1556 if (elf_kind(elf) != ELF_K_ELF) { 1557 err = -LIBBPF_ERRNO__FORMAT; 1558 pr_warn("elf: '%s' is not a proper ELF object\n", obj->path); 1559 goto errout; 1560 } 1561 1562 if (gelf_getclass(elf) != ELFCLASS64) { 1563 err = -LIBBPF_ERRNO__FORMAT; 1564 pr_warn("elf: '%s' is not a 64-bit ELF object\n", obj->path); 1565 goto errout; 1566 } 1567 1568 obj->efile.ehdr = ehdr = elf64_getehdr(elf); 1569 if (!obj->efile.ehdr) { 1570 pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1)); 1571 err = -LIBBPF_ERRNO__FORMAT; 1572 goto errout; 1573 } 1574 1575 if (elf_getshdrstrndx(elf, &obj->efile.shstrndx)) { 1576 pr_warn("elf: failed to get section names section index for %s: %s\n", 1577 obj->path, elf_errmsg(-1)); 1578 err = -LIBBPF_ERRNO__FORMAT; 1579 goto errout; 1580 } 1581 1582 /* ELF is corrupted/truncated, avoid calling elf_strptr. */ 1583 if (!elf_rawdata(elf_getscn(elf, obj->efile.shstrndx), NULL)) { 1584 pr_warn("elf: failed to get section names strings from %s: %s\n", 1585 obj->path, elf_errmsg(-1)); 1586 err = -LIBBPF_ERRNO__FORMAT; 1587 goto errout; 1588 } 1589 1590 /* Old LLVM set e_machine to EM_NONE */ 1591 if (ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF)) { 1592 pr_warn("elf: %s is not a valid eBPF object file\n", obj->path); 1593 err = -LIBBPF_ERRNO__FORMAT; 1594 goto errout; 1595 } 1596 1597 return 0; 1598 errout: 1599 bpf_object__elf_finish(obj); 1600 return err; 1601 } 1602 1603 static int bpf_object__check_endianness(struct bpf_object *obj) 1604 { 1605 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 1606 if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2LSB) 1607 return 0; 1608 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 1609 if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2MSB) 1610 return 0; 1611 #else 1612 # error "Unrecognized __BYTE_ORDER__" 1613 #endif 1614 pr_warn("elf: endianness mismatch in %s.\n", obj->path); 1615 return -LIBBPF_ERRNO__ENDIAN; 1616 } 1617 1618 static int 1619 bpf_object__init_license(struct bpf_object *obj, void *data, size_t size) 1620 { 1621 if (!data) { 1622 pr_warn("invalid license section in %s\n", obj->path); 1623 return -LIBBPF_ERRNO__FORMAT; 1624 } 1625 /* libbpf_strlcpy() only copies first N - 1 bytes, so size + 1 won't 1626 * go over allowed ELF data section buffer 1627 */ 1628 libbpf_strlcpy(obj->license, data, min(size + 1, sizeof(obj->license))); 1629 pr_debug("license of %s is %s\n", obj->path, obj->license); 1630 return 0; 1631 } 1632 1633 static int 1634 bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size) 1635 { 1636 __u32 kver; 1637 1638 if (!data || size != sizeof(kver)) { 1639 pr_warn("invalid kver section in %s\n", obj->path); 1640 return -LIBBPF_ERRNO__FORMAT; 1641 } 1642 memcpy(&kver, data, sizeof(kver)); 1643 obj->kern_version = kver; 1644 pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version); 1645 return 0; 1646 } 1647 1648 static bool bpf_map_type__is_map_in_map(enum bpf_map_type type) 1649 { 1650 if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS || 1651 type == BPF_MAP_TYPE_HASH_OF_MAPS) 1652 return true; 1653 return false; 1654 } 1655 1656 static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32 *size) 1657 { 1658 Elf_Data *data; 1659 Elf_Scn *scn; 1660 1661 if (!name) 1662 return -EINVAL; 1663 1664 scn = elf_sec_by_name(obj, name); 1665 data = elf_sec_data(obj, scn); 1666 if (data) { 1667 *size = data->d_size; 1668 return 0; /* found it */ 1669 } 1670 1671 return -ENOENT; 1672 } 1673 1674 static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *name) 1675 { 1676 Elf_Data *symbols = obj->efile.symbols; 1677 const char *sname; 1678 size_t si; 1679 1680 for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) { 1681 Elf64_Sym *sym = elf_sym_by_idx(obj, si); 1682 1683 if (ELF64_ST_TYPE(sym->st_info) != STT_OBJECT) 1684 continue; 1685 1686 if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL && 1687 ELF64_ST_BIND(sym->st_info) != STB_WEAK) 1688 continue; 1689 1690 sname = elf_sym_str(obj, sym->st_name); 1691 if (!sname) { 1692 pr_warn("failed to get sym name string for var %s\n", name); 1693 return ERR_PTR(-EIO); 1694 } 1695 if (strcmp(name, sname) == 0) 1696 return sym; 1697 } 1698 1699 return ERR_PTR(-ENOENT); 1700 } 1701 1702 /* Some versions of Android don't provide memfd_create() in their libc 1703 * implementation, so avoid complications and just go straight to Linux 1704 * syscall. 1705 */ 1706 static int sys_memfd_create(const char *name, unsigned flags) 1707 { 1708 return syscall(__NR_memfd_create, name, flags); 1709 } 1710 1711 #ifndef MFD_CLOEXEC 1712 #define MFD_CLOEXEC 0x0001U 1713 #endif 1714 1715 static int create_placeholder_fd(void) 1716 { 1717 int fd; 1718 1719 fd = ensure_good_fd(sys_memfd_create("libbpf-placeholder-fd", MFD_CLOEXEC)); 1720 if (fd < 0) 1721 return -errno; 1722 return fd; 1723 } 1724 1725 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) 1726 { 1727 struct bpf_map *map; 1728 int err; 1729 1730 err = libbpf_ensure_mem((void **)&obj->maps, &obj->maps_cap, 1731 sizeof(*obj->maps), obj->nr_maps + 1); 1732 if (err) 1733 return ERR_PTR(err); 1734 1735 map = &obj->maps[obj->nr_maps++]; 1736 map->obj = obj; 1737 /* Preallocate map FD without actually creating BPF map just yet. 1738 * These map FD "placeholders" will be reused later without changing 1739 * FD value when map is actually created in the kernel. 1740 * 1741 * This is useful to be able to perform BPF program relocations 1742 * without having to create BPF maps before that step. This allows us 1743 * to finalize and load BTF very late in BPF object's loading phase, 1744 * right before BPF maps have to be created and BPF programs have to 1745 * be loaded. By having these map FD placeholders we can perform all 1746 * the sanitizations, relocations, and any other adjustments before we 1747 * start creating actual BPF kernel objects (BTF, maps, progs). 1748 */ 1749 map->fd = create_placeholder_fd(); 1750 if (map->fd < 0) 1751 return ERR_PTR(map->fd); 1752 map->inner_map_fd = -1; 1753 map->autocreate = true; 1754 1755 return map; 1756 } 1757 1758 static size_t array_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) 1759 { 1760 const long page_sz = sysconf(_SC_PAGE_SIZE); 1761 size_t map_sz; 1762 1763 map_sz = (size_t)roundup(value_sz, 8) * max_entries; 1764 map_sz = roundup(map_sz, page_sz); 1765 return map_sz; 1766 } 1767 1768 static size_t bpf_map_mmap_sz(const struct bpf_map *map) 1769 { 1770 const long page_sz = sysconf(_SC_PAGE_SIZE); 1771 1772 switch (map->def.type) { 1773 case BPF_MAP_TYPE_ARRAY: 1774 return array_map_mmap_sz(map->def.value_size, map->def.max_entries); 1775 case BPF_MAP_TYPE_ARENA: 1776 return page_sz * map->def.max_entries; 1777 default: 1778 return 0; /* not supported */ 1779 } 1780 } 1781 1782 static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz) 1783 { 1784 void *mmaped; 1785 1786 if (!map->mmaped) 1787 return -EINVAL; 1788 1789 if (old_sz == new_sz) 1790 return 0; 1791 1792 mmaped = mmap(NULL, new_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); 1793 if (mmaped == MAP_FAILED) 1794 return -errno; 1795 1796 memcpy(mmaped, map->mmaped, min(old_sz, new_sz)); 1797 munmap(map->mmaped, old_sz); 1798 map->mmaped = mmaped; 1799 return 0; 1800 } 1801 1802 static char *internal_map_name(struct bpf_object *obj, const char *real_name) 1803 { 1804 char map_name[BPF_OBJ_NAME_LEN], *p; 1805 int pfx_len, sfx_len = max((size_t)7, strlen(real_name)); 1806 1807 /* This is one of the more confusing parts of libbpf for various 1808 * reasons, some of which are historical. The original idea for naming 1809 * internal names was to include as much of BPF object name prefix as 1810 * possible, so that it can be distinguished from similar internal 1811 * maps of a different BPF object. 1812 * As an example, let's say we have bpf_object named 'my_object_name' 1813 * and internal map corresponding to '.rodata' ELF section. The final 1814 * map name advertised to user and to the kernel will be 1815 * 'my_objec.rodata', taking first 8 characters of object name and 1816 * entire 7 characters of '.rodata'. 1817 * Somewhat confusingly, if internal map ELF section name is shorter 1818 * than 7 characters, e.g., '.bss', we still reserve 7 characters 1819 * for the suffix, even though we only have 4 actual characters, and 1820 * resulting map will be called 'my_objec.bss', not even using all 15 1821 * characters allowed by the kernel. Oh well, at least the truncated 1822 * object name is somewhat consistent in this case. But if the map 1823 * name is '.kconfig', we'll still have entirety of '.kconfig' added 1824 * (8 chars) and thus will be left with only first 7 characters of the 1825 * object name ('my_obje'). Happy guessing, user, that the final map 1826 * name will be "my_obje.kconfig". 1827 * Now, with libbpf starting to support arbitrarily named .rodata.* 1828 * and .data.* data sections, it's possible that ELF section name is 1829 * longer than allowed 15 chars, so we now need to be careful to take 1830 * only up to 15 first characters of ELF name, taking no BPF object 1831 * name characters at all. So '.rodata.abracadabra' will result in 1832 * '.rodata.abracad' kernel and user-visible name. 1833 * We need to keep this convoluted logic intact for .data, .bss and 1834 * .rodata maps, but for new custom .data.custom and .rodata.custom 1835 * maps we use their ELF names as is, not prepending bpf_object name 1836 * in front. We still need to truncate them to 15 characters for the 1837 * kernel. Full name can be recovered for such maps by using DATASEC 1838 * BTF type associated with such map's value type, though. 1839 */ 1840 if (sfx_len >= BPF_OBJ_NAME_LEN) 1841 sfx_len = BPF_OBJ_NAME_LEN - 1; 1842 1843 /* if there are two or more dots in map name, it's a custom dot map */ 1844 if (strchr(real_name + 1, '.') != NULL) 1845 pfx_len = 0; 1846 else 1847 pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, strlen(obj->name)); 1848 1849 snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name, 1850 sfx_len, real_name); 1851 1852 /* sanitise map name to characters allowed by kernel */ 1853 for (p = map_name; *p && p < map_name + sizeof(map_name); p++) 1854 if (!isalnum(*p) && *p != '_' && *p != '.') 1855 *p = '_'; 1856 1857 return strdup(map_name); 1858 } 1859 1860 static int 1861 map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map); 1862 1863 /* Internal BPF map is mmap()'able only if at least one of corresponding 1864 * DATASEC's VARs are to be exposed through BPF skeleton. I.e., it's a GLOBAL 1865 * variable and it's not marked as __hidden (which turns it into, effectively, 1866 * a STATIC variable). 1867 */ 1868 static bool map_is_mmapable(struct bpf_object *obj, struct bpf_map *map) 1869 { 1870 const struct btf_type *t, *vt; 1871 struct btf_var_secinfo *vsi; 1872 int i, n; 1873 1874 if (!map->btf_value_type_id) 1875 return false; 1876 1877 t = btf__type_by_id(obj->btf, map->btf_value_type_id); 1878 if (!btf_is_datasec(t)) 1879 return false; 1880 1881 vsi = btf_var_secinfos(t); 1882 for (i = 0, n = btf_vlen(t); i < n; i++, vsi++) { 1883 vt = btf__type_by_id(obj->btf, vsi->type); 1884 if (!btf_is_var(vt)) 1885 continue; 1886 1887 if (btf_var(vt)->linkage != BTF_VAR_STATIC) 1888 return true; 1889 } 1890 1891 return false; 1892 } 1893 1894 static int 1895 bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, 1896 const char *real_name, int sec_idx, void *data, size_t data_sz) 1897 { 1898 struct bpf_map_def *def; 1899 struct bpf_map *map; 1900 size_t mmap_sz; 1901 int err; 1902 1903 map = bpf_object__add_map(obj); 1904 if (IS_ERR(map)) 1905 return PTR_ERR(map); 1906 1907 map->libbpf_type = type; 1908 map->sec_idx = sec_idx; 1909 map->sec_offset = 0; 1910 map->real_name = strdup(real_name); 1911 map->name = internal_map_name(obj, real_name); 1912 if (!map->real_name || !map->name) { 1913 zfree(&map->real_name); 1914 zfree(&map->name); 1915 return -ENOMEM; 1916 } 1917 1918 def = &map->def; 1919 def->type = BPF_MAP_TYPE_ARRAY; 1920 def->key_size = sizeof(int); 1921 def->value_size = data_sz; 1922 def->max_entries = 1; 1923 def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG 1924 ? BPF_F_RDONLY_PROG : 0; 1925 1926 /* failures are fine because of maps like .rodata.str1.1 */ 1927 (void) map_fill_btf_type_info(obj, map); 1928 1929 if (map_is_mmapable(obj, map)) 1930 def->map_flags |= BPF_F_MMAPABLE; 1931 1932 pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", 1933 map->name, map->sec_idx, map->sec_offset, def->map_flags); 1934 1935 mmap_sz = bpf_map_mmap_sz(map); 1936 map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, 1937 MAP_SHARED | MAP_ANONYMOUS, -1, 0); 1938 if (map->mmaped == MAP_FAILED) { 1939 err = -errno; 1940 map->mmaped = NULL; 1941 pr_warn("failed to alloc map '%s' content buffer: %d\n", 1942 map->name, err); 1943 zfree(&map->real_name); 1944 zfree(&map->name); 1945 return err; 1946 } 1947 1948 if (data) 1949 memcpy(map->mmaped, data, data_sz); 1950 1951 pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name); 1952 return 0; 1953 } 1954 1955 static int bpf_object__init_global_data_maps(struct bpf_object *obj) 1956 { 1957 struct elf_sec_desc *sec_desc; 1958 const char *sec_name; 1959 int err = 0, sec_idx; 1960 1961 /* 1962 * Populate obj->maps with libbpf internal maps. 1963 */ 1964 for (sec_idx = 1; sec_idx < obj->efile.sec_cnt; sec_idx++) { 1965 sec_desc = &obj->efile.secs[sec_idx]; 1966 1967 /* Skip recognized sections with size 0. */ 1968 if (!sec_desc->data || sec_desc->data->d_size == 0) 1969 continue; 1970 1971 switch (sec_desc->sec_type) { 1972 case SEC_DATA: 1973 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); 1974 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA, 1975 sec_name, sec_idx, 1976 sec_desc->data->d_buf, 1977 sec_desc->data->d_size); 1978 break; 1979 case SEC_RODATA: 1980 obj->has_rodata = true; 1981 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); 1982 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA, 1983 sec_name, sec_idx, 1984 sec_desc->data->d_buf, 1985 sec_desc->data->d_size); 1986 break; 1987 case SEC_BSS: 1988 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); 1989 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS, 1990 sec_name, sec_idx, 1991 NULL, 1992 sec_desc->data->d_size); 1993 break; 1994 default: 1995 /* skip */ 1996 break; 1997 } 1998 if (err) 1999 return err; 2000 } 2001 return 0; 2002 } 2003 2004 2005 static struct extern_desc *find_extern_by_name(const struct bpf_object *obj, 2006 const void *name) 2007 { 2008 int i; 2009 2010 for (i = 0; i < obj->nr_extern; i++) { 2011 if (strcmp(obj->externs[i].name, name) == 0) 2012 return &obj->externs[i]; 2013 } 2014 return NULL; 2015 } 2016 2017 static struct extern_desc *find_extern_by_name_with_len(const struct bpf_object *obj, 2018 const void *name, int len) 2019 { 2020 const char *ext_name; 2021 int i; 2022 2023 for (i = 0; i < obj->nr_extern; i++) { 2024 ext_name = obj->externs[i].name; 2025 if (strlen(ext_name) == len && strncmp(ext_name, name, len) == 0) 2026 return &obj->externs[i]; 2027 } 2028 return NULL; 2029 } 2030 2031 static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val, 2032 char value) 2033 { 2034 switch (ext->kcfg.type) { 2035 case KCFG_BOOL: 2036 if (value == 'm') { 2037 pr_warn("extern (kcfg) '%s': value '%c' implies tristate or char type\n", 2038 ext->name, value); 2039 return -EINVAL; 2040 } 2041 *(bool *)ext_val = value == 'y' ? true : false; 2042 break; 2043 case KCFG_TRISTATE: 2044 if (value == 'y') 2045 *(enum libbpf_tristate *)ext_val = TRI_YES; 2046 else if (value == 'm') 2047 *(enum libbpf_tristate *)ext_val = TRI_MODULE; 2048 else /* value == 'n' */ 2049 *(enum libbpf_tristate *)ext_val = TRI_NO; 2050 break; 2051 case KCFG_CHAR: 2052 *(char *)ext_val = value; 2053 break; 2054 case KCFG_UNKNOWN: 2055 case KCFG_INT: 2056 case KCFG_CHAR_ARR: 2057 default: 2058 pr_warn("extern (kcfg) '%s': value '%c' implies bool, tristate, or char type\n", 2059 ext->name, value); 2060 return -EINVAL; 2061 } 2062 ext->is_set = true; 2063 return 0; 2064 } 2065 2066 static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val, 2067 const char *value) 2068 { 2069 size_t len; 2070 2071 if (ext->kcfg.type != KCFG_CHAR_ARR) { 2072 pr_warn("extern (kcfg) '%s': value '%s' implies char array type\n", 2073 ext->name, value); 2074 return -EINVAL; 2075 } 2076 2077 len = strlen(value); 2078 if (value[len - 1] != '"') { 2079 pr_warn("extern (kcfg) '%s': invalid string config '%s'\n", 2080 ext->name, value); 2081 return -EINVAL; 2082 } 2083 2084 /* strip quotes */ 2085 len -= 2; 2086 if (len >= ext->kcfg.sz) { 2087 pr_warn("extern (kcfg) '%s': long string '%s' of (%zu bytes) truncated to %d bytes\n", 2088 ext->name, value, len, ext->kcfg.sz - 1); 2089 len = ext->kcfg.sz - 1; 2090 } 2091 memcpy(ext_val, value + 1, len); 2092 ext_val[len] = '\0'; 2093 ext->is_set = true; 2094 return 0; 2095 } 2096 2097 static int parse_u64(const char *value, __u64 *res) 2098 { 2099 char *value_end; 2100 int err; 2101 2102 errno = 0; 2103 *res = strtoull(value, &value_end, 0); 2104 if (errno) { 2105 err = -errno; 2106 pr_warn("failed to parse '%s' as integer: %d\n", value, err); 2107 return err; 2108 } 2109 if (*value_end) { 2110 pr_warn("failed to parse '%s' as integer completely\n", value); 2111 return -EINVAL; 2112 } 2113 return 0; 2114 } 2115 2116 static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v) 2117 { 2118 int bit_sz = ext->kcfg.sz * 8; 2119 2120 if (ext->kcfg.sz == 8) 2121 return true; 2122 2123 /* Validate that value stored in u64 fits in integer of `ext->sz` 2124 * bytes size without any loss of information. If the target integer 2125 * is signed, we rely on the following limits of integer type of 2126 * Y bits and subsequent transformation: 2127 * 2128 * -2^(Y-1) <= X <= 2^(Y-1) - 1 2129 * 0 <= X + 2^(Y-1) <= 2^Y - 1 2130 * 0 <= X + 2^(Y-1) < 2^Y 2131 * 2132 * For unsigned target integer, check that all the (64 - Y) bits are 2133 * zero. 2134 */ 2135 if (ext->kcfg.is_signed) 2136 return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz); 2137 else 2138 return (v >> bit_sz) == 0; 2139 } 2140 2141 static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val, 2142 __u64 value) 2143 { 2144 if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR && 2145 ext->kcfg.type != KCFG_BOOL) { 2146 pr_warn("extern (kcfg) '%s': value '%llu' implies integer, char, or boolean type\n", 2147 ext->name, (unsigned long long)value); 2148 return -EINVAL; 2149 } 2150 if (ext->kcfg.type == KCFG_BOOL && value > 1) { 2151 pr_warn("extern (kcfg) '%s': value '%llu' isn't boolean compatible\n", 2152 ext->name, (unsigned long long)value); 2153 return -EINVAL; 2154 2155 } 2156 if (!is_kcfg_value_in_range(ext, value)) { 2157 pr_warn("extern (kcfg) '%s': value '%llu' doesn't fit in %d bytes\n", 2158 ext->name, (unsigned long long)value, ext->kcfg.sz); 2159 return -ERANGE; 2160 } 2161 switch (ext->kcfg.sz) { 2162 case 1: 2163 *(__u8 *)ext_val = value; 2164 break; 2165 case 2: 2166 *(__u16 *)ext_val = value; 2167 break; 2168 case 4: 2169 *(__u32 *)ext_val = value; 2170 break; 2171 case 8: 2172 *(__u64 *)ext_val = value; 2173 break; 2174 default: 2175 return -EINVAL; 2176 } 2177 ext->is_set = true; 2178 return 0; 2179 } 2180 2181 static int bpf_object__process_kconfig_line(struct bpf_object *obj, 2182 char *buf, void *data) 2183 { 2184 struct extern_desc *ext; 2185 char *sep, *value; 2186 int len, err = 0; 2187 void *ext_val; 2188 __u64 num; 2189 2190 if (!str_has_pfx(buf, "CONFIG_")) 2191 return 0; 2192 2193 sep = strchr(buf, '='); 2194 if (!sep) { 2195 pr_warn("failed to parse '%s': no separator\n", buf); 2196 return -EINVAL; 2197 } 2198 2199 /* Trim ending '\n' */ 2200 len = strlen(buf); 2201 if (buf[len - 1] == '\n') 2202 buf[len - 1] = '\0'; 2203 /* Split on '=' and ensure that a value is present. */ 2204 *sep = '\0'; 2205 if (!sep[1]) { 2206 *sep = '='; 2207 pr_warn("failed to parse '%s': no value\n", buf); 2208 return -EINVAL; 2209 } 2210 2211 ext = find_extern_by_name(obj, buf); 2212 if (!ext || ext->is_set) 2213 return 0; 2214 2215 ext_val = data + ext->kcfg.data_off; 2216 value = sep + 1; 2217 2218 switch (*value) { 2219 case 'y': case 'n': case 'm': 2220 err = set_kcfg_value_tri(ext, ext_val, *value); 2221 break; 2222 case '"': 2223 err = set_kcfg_value_str(ext, ext_val, value); 2224 break; 2225 default: 2226 /* assume integer */ 2227 err = parse_u64(value, &num); 2228 if (err) { 2229 pr_warn("extern (kcfg) '%s': value '%s' isn't a valid integer\n", ext->name, value); 2230 return err; 2231 } 2232 if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) { 2233 pr_warn("extern (kcfg) '%s': value '%s' implies integer type\n", ext->name, value); 2234 return -EINVAL; 2235 } 2236 err = set_kcfg_value_num(ext, ext_val, num); 2237 break; 2238 } 2239 if (err) 2240 return err; 2241 pr_debug("extern (kcfg) '%s': set to %s\n", ext->name, value); 2242 return 0; 2243 } 2244 2245 static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data) 2246 { 2247 char buf[PATH_MAX]; 2248 struct utsname uts; 2249 int len, err = 0; 2250 gzFile file; 2251 2252 uname(&uts); 2253 len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release); 2254 if (len < 0) 2255 return -EINVAL; 2256 else if (len >= PATH_MAX) 2257 return -ENAMETOOLONG; 2258 2259 /* gzopen also accepts uncompressed files. */ 2260 file = gzopen(buf, "re"); 2261 if (!file) 2262 file = gzopen("/proc/config.gz", "re"); 2263 2264 if (!file) { 2265 pr_warn("failed to open system Kconfig\n"); 2266 return -ENOENT; 2267 } 2268 2269 while (gzgets(file, buf, sizeof(buf))) { 2270 err = bpf_object__process_kconfig_line(obj, buf, data); 2271 if (err) { 2272 pr_warn("error parsing system Kconfig line '%s': %d\n", 2273 buf, err); 2274 goto out; 2275 } 2276 } 2277 2278 out: 2279 gzclose(file); 2280 return err; 2281 } 2282 2283 static int bpf_object__read_kconfig_mem(struct bpf_object *obj, 2284 const char *config, void *data) 2285 { 2286 char buf[PATH_MAX]; 2287 int err = 0; 2288 FILE *file; 2289 2290 file = fmemopen((void *)config, strlen(config), "r"); 2291 if (!file) { 2292 err = -errno; 2293 pr_warn("failed to open in-memory Kconfig: %d\n", err); 2294 return err; 2295 } 2296 2297 while (fgets(buf, sizeof(buf), file)) { 2298 err = bpf_object__process_kconfig_line(obj, buf, data); 2299 if (err) { 2300 pr_warn("error parsing in-memory Kconfig line '%s': %d\n", 2301 buf, err); 2302 break; 2303 } 2304 } 2305 2306 fclose(file); 2307 return err; 2308 } 2309 2310 static int bpf_object__init_kconfig_map(struct bpf_object *obj) 2311 { 2312 struct extern_desc *last_ext = NULL, *ext; 2313 size_t map_sz; 2314 int i, err; 2315 2316 for (i = 0; i < obj->nr_extern; i++) { 2317 ext = &obj->externs[i]; 2318 if (ext->type == EXT_KCFG) 2319 last_ext = ext; 2320 } 2321 2322 if (!last_ext) 2323 return 0; 2324 2325 map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz; 2326 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG, 2327 ".kconfig", obj->efile.symbols_shndx, 2328 NULL, map_sz); 2329 if (err) 2330 return err; 2331 2332 obj->kconfig_map_idx = obj->nr_maps - 1; 2333 2334 return 0; 2335 } 2336 2337 const struct btf_type * 2338 skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id) 2339 { 2340 const struct btf_type *t = btf__type_by_id(btf, id); 2341 2342 if (res_id) 2343 *res_id = id; 2344 2345 while (btf_is_mod(t) || btf_is_typedef(t)) { 2346 if (res_id) 2347 *res_id = t->type; 2348 t = btf__type_by_id(btf, t->type); 2349 } 2350 2351 return t; 2352 } 2353 2354 static const struct btf_type * 2355 resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id) 2356 { 2357 const struct btf_type *t; 2358 2359 t = skip_mods_and_typedefs(btf, id, NULL); 2360 if (!btf_is_ptr(t)) 2361 return NULL; 2362 2363 t = skip_mods_and_typedefs(btf, t->type, res_id); 2364 2365 return btf_is_func_proto(t) ? t : NULL; 2366 } 2367 2368 static const char *__btf_kind_str(__u16 kind) 2369 { 2370 switch (kind) { 2371 case BTF_KIND_UNKN: return "void"; 2372 case BTF_KIND_INT: return "int"; 2373 case BTF_KIND_PTR: return "ptr"; 2374 case BTF_KIND_ARRAY: return "array"; 2375 case BTF_KIND_STRUCT: return "struct"; 2376 case BTF_KIND_UNION: return "union"; 2377 case BTF_KIND_ENUM: return "enum"; 2378 case BTF_KIND_FWD: return "fwd"; 2379 case BTF_KIND_TYPEDEF: return "typedef"; 2380 case BTF_KIND_VOLATILE: return "volatile"; 2381 case BTF_KIND_CONST: return "const"; 2382 case BTF_KIND_RESTRICT: return "restrict"; 2383 case BTF_KIND_FUNC: return "func"; 2384 case BTF_KIND_FUNC_PROTO: return "func_proto"; 2385 case BTF_KIND_VAR: return "var"; 2386 case BTF_KIND_DATASEC: return "datasec"; 2387 case BTF_KIND_FLOAT: return "float"; 2388 case BTF_KIND_DECL_TAG: return "decl_tag"; 2389 case BTF_KIND_TYPE_TAG: return "type_tag"; 2390 case BTF_KIND_ENUM64: return "enum64"; 2391 default: return "unknown"; 2392 } 2393 } 2394 2395 const char *btf_kind_str(const struct btf_type *t) 2396 { 2397 return __btf_kind_str(btf_kind(t)); 2398 } 2399 2400 /* 2401 * Fetch integer attribute of BTF map definition. Such attributes are 2402 * represented using a pointer to an array, in which dimensionality of array 2403 * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY]; 2404 * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF 2405 * type definition, while using only sizeof(void *) space in ELF data section. 2406 */ 2407 static bool get_map_field_int(const char *map_name, const struct btf *btf, 2408 const struct btf_member *m, __u32 *res) 2409 { 2410 const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL); 2411 const char *name = btf__name_by_offset(btf, m->name_off); 2412 const struct btf_array *arr_info; 2413 const struct btf_type *arr_t; 2414 2415 if (!btf_is_ptr(t)) { 2416 pr_warn("map '%s': attr '%s': expected PTR, got %s.\n", 2417 map_name, name, btf_kind_str(t)); 2418 return false; 2419 } 2420 2421 arr_t = btf__type_by_id(btf, t->type); 2422 if (!arr_t) { 2423 pr_warn("map '%s': attr '%s': type [%u] not found.\n", 2424 map_name, name, t->type); 2425 return false; 2426 } 2427 if (!btf_is_array(arr_t)) { 2428 pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n", 2429 map_name, name, btf_kind_str(arr_t)); 2430 return false; 2431 } 2432 arr_info = btf_array(arr_t); 2433 *res = arr_info->nelems; 2434 return true; 2435 } 2436 2437 static bool get_map_field_long(const char *map_name, const struct btf *btf, 2438 const struct btf_member *m, __u64 *res) 2439 { 2440 const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL); 2441 const char *name = btf__name_by_offset(btf, m->name_off); 2442 2443 if (btf_is_ptr(t)) { 2444 __u32 res32; 2445 bool ret; 2446 2447 ret = get_map_field_int(map_name, btf, m, &res32); 2448 if (ret) 2449 *res = (__u64)res32; 2450 return ret; 2451 } 2452 2453 if (!btf_is_enum(t) && !btf_is_enum64(t)) { 2454 pr_warn("map '%s': attr '%s': expected ENUM or ENUM64, got %s.\n", 2455 map_name, name, btf_kind_str(t)); 2456 return false; 2457 } 2458 2459 if (btf_vlen(t) != 1) { 2460 pr_warn("map '%s': attr '%s': invalid __ulong\n", 2461 map_name, name); 2462 return false; 2463 } 2464 2465 if (btf_is_enum(t)) { 2466 const struct btf_enum *e = btf_enum(t); 2467 2468 *res = e->val; 2469 } else { 2470 const struct btf_enum64 *e = btf_enum64(t); 2471 2472 *res = btf_enum64_value(e); 2473 } 2474 return true; 2475 } 2476 2477 static int pathname_concat(char *buf, size_t buf_sz, const char *path, const char *name) 2478 { 2479 int len; 2480 2481 len = snprintf(buf, buf_sz, "%s/%s", path, name); 2482 if (len < 0) 2483 return -EINVAL; 2484 if (len >= buf_sz) 2485 return -ENAMETOOLONG; 2486 2487 return 0; 2488 } 2489 2490 static int build_map_pin_path(struct bpf_map *map, const char *path) 2491 { 2492 char buf[PATH_MAX]; 2493 int err; 2494 2495 if (!path) 2496 path = BPF_FS_DEFAULT_PATH; 2497 2498 err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map)); 2499 if (err) 2500 return err; 2501 2502 return bpf_map__set_pin_path(map, buf); 2503 } 2504 2505 /* should match definition in bpf_helpers.h */ 2506 enum libbpf_pin_type { 2507 LIBBPF_PIN_NONE, 2508 /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */ 2509 LIBBPF_PIN_BY_NAME, 2510 }; 2511 2512 int parse_btf_map_def(const char *map_name, struct btf *btf, 2513 const struct btf_type *def_t, bool strict, 2514 struct btf_map_def *map_def, struct btf_map_def *inner_def) 2515 { 2516 const struct btf_type *t; 2517 const struct btf_member *m; 2518 bool is_inner = inner_def == NULL; 2519 int vlen, i; 2520 2521 vlen = btf_vlen(def_t); 2522 m = btf_members(def_t); 2523 for (i = 0; i < vlen; i++, m++) { 2524 const char *name = btf__name_by_offset(btf, m->name_off); 2525 2526 if (!name) { 2527 pr_warn("map '%s': invalid field #%d.\n", map_name, i); 2528 return -EINVAL; 2529 } 2530 if (strcmp(name, "type") == 0) { 2531 if (!get_map_field_int(map_name, btf, m, &map_def->map_type)) 2532 return -EINVAL; 2533 map_def->parts |= MAP_DEF_MAP_TYPE; 2534 } else if (strcmp(name, "max_entries") == 0) { 2535 if (!get_map_field_int(map_name, btf, m, &map_def->max_entries)) 2536 return -EINVAL; 2537 map_def->parts |= MAP_DEF_MAX_ENTRIES; 2538 } else if (strcmp(name, "map_flags") == 0) { 2539 if (!get_map_field_int(map_name, btf, m, &map_def->map_flags)) 2540 return -EINVAL; 2541 map_def->parts |= MAP_DEF_MAP_FLAGS; 2542 } else if (strcmp(name, "numa_node") == 0) { 2543 if (!get_map_field_int(map_name, btf, m, &map_def->numa_node)) 2544 return -EINVAL; 2545 map_def->parts |= MAP_DEF_NUMA_NODE; 2546 } else if (strcmp(name, "key_size") == 0) { 2547 __u32 sz; 2548 2549 if (!get_map_field_int(map_name, btf, m, &sz)) 2550 return -EINVAL; 2551 if (map_def->key_size && map_def->key_size != sz) { 2552 pr_warn("map '%s': conflicting key size %u != %u.\n", 2553 map_name, map_def->key_size, sz); 2554 return -EINVAL; 2555 } 2556 map_def->key_size = sz; 2557 map_def->parts |= MAP_DEF_KEY_SIZE; 2558 } else if (strcmp(name, "key") == 0) { 2559 __s64 sz; 2560 2561 t = btf__type_by_id(btf, m->type); 2562 if (!t) { 2563 pr_warn("map '%s': key type [%d] not found.\n", 2564 map_name, m->type); 2565 return -EINVAL; 2566 } 2567 if (!btf_is_ptr(t)) { 2568 pr_warn("map '%s': key spec is not PTR: %s.\n", 2569 map_name, btf_kind_str(t)); 2570 return -EINVAL; 2571 } 2572 sz = btf__resolve_size(btf, t->type); 2573 if (sz < 0) { 2574 pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n", 2575 map_name, t->type, (ssize_t)sz); 2576 return sz; 2577 } 2578 if (map_def->key_size && map_def->key_size != sz) { 2579 pr_warn("map '%s': conflicting key size %u != %zd.\n", 2580 map_name, map_def->key_size, (ssize_t)sz); 2581 return -EINVAL; 2582 } 2583 map_def->key_size = sz; 2584 map_def->key_type_id = t->type; 2585 map_def->parts |= MAP_DEF_KEY_SIZE | MAP_DEF_KEY_TYPE; 2586 } else if (strcmp(name, "value_size") == 0) { 2587 __u32 sz; 2588 2589 if (!get_map_field_int(map_name, btf, m, &sz)) 2590 return -EINVAL; 2591 if (map_def->value_size && map_def->value_size != sz) { 2592 pr_warn("map '%s': conflicting value size %u != %u.\n", 2593 map_name, map_def->value_size, sz); 2594 return -EINVAL; 2595 } 2596 map_def->value_size = sz; 2597 map_def->parts |= MAP_DEF_VALUE_SIZE; 2598 } else if (strcmp(name, "value") == 0) { 2599 __s64 sz; 2600 2601 t = btf__type_by_id(btf, m->type); 2602 if (!t) { 2603 pr_warn("map '%s': value type [%d] not found.\n", 2604 map_name, m->type); 2605 return -EINVAL; 2606 } 2607 if (!btf_is_ptr(t)) { 2608 pr_warn("map '%s': value spec is not PTR: %s.\n", 2609 map_name, btf_kind_str(t)); 2610 return -EINVAL; 2611 } 2612 sz = btf__resolve_size(btf, t->type); 2613 if (sz < 0) { 2614 pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n", 2615 map_name, t->type, (ssize_t)sz); 2616 return sz; 2617 } 2618 if (map_def->value_size && map_def->value_size != sz) { 2619 pr_warn("map '%s': conflicting value size %u != %zd.\n", 2620 map_name, map_def->value_size, (ssize_t)sz); 2621 return -EINVAL; 2622 } 2623 map_def->value_size = sz; 2624 map_def->value_type_id = t->type; 2625 map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE; 2626 } 2627 else if (strcmp(name, "values") == 0) { 2628 bool is_map_in_map = bpf_map_type__is_map_in_map(map_def->map_type); 2629 bool is_prog_array = map_def->map_type == BPF_MAP_TYPE_PROG_ARRAY; 2630 const char *desc = is_map_in_map ? "map-in-map inner" : "prog-array value"; 2631 char inner_map_name[128]; 2632 int err; 2633 2634 if (is_inner) { 2635 pr_warn("map '%s': multi-level inner maps not supported.\n", 2636 map_name); 2637 return -ENOTSUP; 2638 } 2639 if (i != vlen - 1) { 2640 pr_warn("map '%s': '%s' member should be last.\n", 2641 map_name, name); 2642 return -EINVAL; 2643 } 2644 if (!is_map_in_map && !is_prog_array) { 2645 pr_warn("map '%s': should be map-in-map or prog-array.\n", 2646 map_name); 2647 return -ENOTSUP; 2648 } 2649 if (map_def->value_size && map_def->value_size != 4) { 2650 pr_warn("map '%s': conflicting value size %u != 4.\n", 2651 map_name, map_def->value_size); 2652 return -EINVAL; 2653 } 2654 map_def->value_size = 4; 2655 t = btf__type_by_id(btf, m->type); 2656 if (!t) { 2657 pr_warn("map '%s': %s type [%d] not found.\n", 2658 map_name, desc, m->type); 2659 return -EINVAL; 2660 } 2661 if (!btf_is_array(t) || btf_array(t)->nelems) { 2662 pr_warn("map '%s': %s spec is not a zero-sized array.\n", 2663 map_name, desc); 2664 return -EINVAL; 2665 } 2666 t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL); 2667 if (!btf_is_ptr(t)) { 2668 pr_warn("map '%s': %s def is of unexpected kind %s.\n", 2669 map_name, desc, btf_kind_str(t)); 2670 return -EINVAL; 2671 } 2672 t = skip_mods_and_typedefs(btf, t->type, NULL); 2673 if (is_prog_array) { 2674 if (!btf_is_func_proto(t)) { 2675 pr_warn("map '%s': prog-array value def is of unexpected kind %s.\n", 2676 map_name, btf_kind_str(t)); 2677 return -EINVAL; 2678 } 2679 continue; 2680 } 2681 if (!btf_is_struct(t)) { 2682 pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n", 2683 map_name, btf_kind_str(t)); 2684 return -EINVAL; 2685 } 2686 2687 snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", map_name); 2688 err = parse_btf_map_def(inner_map_name, btf, t, strict, inner_def, NULL); 2689 if (err) 2690 return err; 2691 2692 map_def->parts |= MAP_DEF_INNER_MAP; 2693 } else if (strcmp(name, "pinning") == 0) { 2694 __u32 val; 2695 2696 if (is_inner) { 2697 pr_warn("map '%s': inner def can't be pinned.\n", map_name); 2698 return -EINVAL; 2699 } 2700 if (!get_map_field_int(map_name, btf, m, &val)) 2701 return -EINVAL; 2702 if (val != LIBBPF_PIN_NONE && val != LIBBPF_PIN_BY_NAME) { 2703 pr_warn("map '%s': invalid pinning value %u.\n", 2704 map_name, val); 2705 return -EINVAL; 2706 } 2707 map_def->pinning = val; 2708 map_def->parts |= MAP_DEF_PINNING; 2709 } else if (strcmp(name, "map_extra") == 0) { 2710 __u64 map_extra; 2711 2712 if (!get_map_field_long(map_name, btf, m, &map_extra)) 2713 return -EINVAL; 2714 map_def->map_extra = map_extra; 2715 map_def->parts |= MAP_DEF_MAP_EXTRA; 2716 } else { 2717 if (strict) { 2718 pr_warn("map '%s': unknown field '%s'.\n", map_name, name); 2719 return -ENOTSUP; 2720 } 2721 pr_debug("map '%s': ignoring unknown field '%s'.\n", map_name, name); 2722 } 2723 } 2724 2725 if (map_def->map_type == BPF_MAP_TYPE_UNSPEC) { 2726 pr_warn("map '%s': map type isn't specified.\n", map_name); 2727 return -EINVAL; 2728 } 2729 2730 return 0; 2731 } 2732 2733 static size_t adjust_ringbuf_sz(size_t sz) 2734 { 2735 __u32 page_sz = sysconf(_SC_PAGE_SIZE); 2736 __u32 mul; 2737 2738 /* if user forgot to set any size, make sure they see error */ 2739 if (sz == 0) 2740 return 0; 2741 /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be 2742 * a power-of-2 multiple of kernel's page size. If user diligently 2743 * satisified these conditions, pass the size through. 2744 */ 2745 if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz)) 2746 return sz; 2747 2748 /* Otherwise find closest (page_sz * power_of_2) product bigger than 2749 * user-set size to satisfy both user size request and kernel 2750 * requirements and substitute correct max_entries for map creation. 2751 */ 2752 for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) { 2753 if (mul * page_sz > sz) 2754 return mul * page_sz; 2755 } 2756 2757 /* if it's impossible to satisfy the conditions (i.e., user size is 2758 * very close to UINT_MAX but is not a power-of-2 multiple of 2759 * page_size) then just return original size and let kernel reject it 2760 */ 2761 return sz; 2762 } 2763 2764 static bool map_is_ringbuf(const struct bpf_map *map) 2765 { 2766 return map->def.type == BPF_MAP_TYPE_RINGBUF || 2767 map->def.type == BPF_MAP_TYPE_USER_RINGBUF; 2768 } 2769 2770 static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def) 2771 { 2772 map->def.type = def->map_type; 2773 map->def.key_size = def->key_size; 2774 map->def.value_size = def->value_size; 2775 map->def.max_entries = def->max_entries; 2776 map->def.map_flags = def->map_flags; 2777 map->map_extra = def->map_extra; 2778 2779 map->numa_node = def->numa_node; 2780 map->btf_key_type_id = def->key_type_id; 2781 map->btf_value_type_id = def->value_type_id; 2782 2783 /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */ 2784 if (map_is_ringbuf(map)) 2785 map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries); 2786 2787 if (def->parts & MAP_DEF_MAP_TYPE) 2788 pr_debug("map '%s': found type = %u.\n", map->name, def->map_type); 2789 2790 if (def->parts & MAP_DEF_KEY_TYPE) 2791 pr_debug("map '%s': found key [%u], sz = %u.\n", 2792 map->name, def->key_type_id, def->key_size); 2793 else if (def->parts & MAP_DEF_KEY_SIZE) 2794 pr_debug("map '%s': found key_size = %u.\n", map->name, def->key_size); 2795 2796 if (def->parts & MAP_DEF_VALUE_TYPE) 2797 pr_debug("map '%s': found value [%u], sz = %u.\n", 2798 map->name, def->value_type_id, def->value_size); 2799 else if (def->parts & MAP_DEF_VALUE_SIZE) 2800 pr_debug("map '%s': found value_size = %u.\n", map->name, def->value_size); 2801 2802 if (def->parts & MAP_DEF_MAX_ENTRIES) 2803 pr_debug("map '%s': found max_entries = %u.\n", map->name, def->max_entries); 2804 if (def->parts & MAP_DEF_MAP_FLAGS) 2805 pr_debug("map '%s': found map_flags = 0x%x.\n", map->name, def->map_flags); 2806 if (def->parts & MAP_DEF_MAP_EXTRA) 2807 pr_debug("map '%s': found map_extra = 0x%llx.\n", map->name, 2808 (unsigned long long)def->map_extra); 2809 if (def->parts & MAP_DEF_PINNING) 2810 pr_debug("map '%s': found pinning = %u.\n", map->name, def->pinning); 2811 if (def->parts & MAP_DEF_NUMA_NODE) 2812 pr_debug("map '%s': found numa_node = %u.\n", map->name, def->numa_node); 2813 2814 if (def->parts & MAP_DEF_INNER_MAP) 2815 pr_debug("map '%s': found inner map definition.\n", map->name); 2816 } 2817 2818 static const char *btf_var_linkage_str(__u32 linkage) 2819 { 2820 switch (linkage) { 2821 case BTF_VAR_STATIC: return "static"; 2822 case BTF_VAR_GLOBAL_ALLOCATED: return "global"; 2823 case BTF_VAR_GLOBAL_EXTERN: return "extern"; 2824 default: return "unknown"; 2825 } 2826 } 2827 2828 static int bpf_object__init_user_btf_map(struct bpf_object *obj, 2829 const struct btf_type *sec, 2830 int var_idx, int sec_idx, 2831 const Elf_Data *data, bool strict, 2832 const char *pin_root_path) 2833 { 2834 struct btf_map_def map_def = {}, inner_def = {}; 2835 const struct btf_type *var, *def; 2836 const struct btf_var_secinfo *vi; 2837 const struct btf_var *var_extra; 2838 const char *map_name; 2839 struct bpf_map *map; 2840 int err; 2841 2842 vi = btf_var_secinfos(sec) + var_idx; 2843 var = btf__type_by_id(obj->btf, vi->type); 2844 var_extra = btf_var(var); 2845 map_name = btf__name_by_offset(obj->btf, var->name_off); 2846 2847 if (map_name == NULL || map_name[0] == '\0') { 2848 pr_warn("map #%d: empty name.\n", var_idx); 2849 return -EINVAL; 2850 } 2851 if ((__u64)vi->offset + vi->size > data->d_size) { 2852 pr_warn("map '%s' BTF data is corrupted.\n", map_name); 2853 return -EINVAL; 2854 } 2855 if (!btf_is_var(var)) { 2856 pr_warn("map '%s': unexpected var kind %s.\n", 2857 map_name, btf_kind_str(var)); 2858 return -EINVAL; 2859 } 2860 if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED) { 2861 pr_warn("map '%s': unsupported map linkage %s.\n", 2862 map_name, btf_var_linkage_str(var_extra->linkage)); 2863 return -EOPNOTSUPP; 2864 } 2865 2866 def = skip_mods_and_typedefs(obj->btf, var->type, NULL); 2867 if (!btf_is_struct(def)) { 2868 pr_warn("map '%s': unexpected def kind %s.\n", 2869 map_name, btf_kind_str(var)); 2870 return -EINVAL; 2871 } 2872 if (def->size > vi->size) { 2873 pr_warn("map '%s': invalid def size.\n", map_name); 2874 return -EINVAL; 2875 } 2876 2877 map = bpf_object__add_map(obj); 2878 if (IS_ERR(map)) 2879 return PTR_ERR(map); 2880 map->name = strdup(map_name); 2881 if (!map->name) { 2882 pr_warn("map '%s': failed to alloc map name.\n", map_name); 2883 return -ENOMEM; 2884 } 2885 map->libbpf_type = LIBBPF_MAP_UNSPEC; 2886 map->def.type = BPF_MAP_TYPE_UNSPEC; 2887 map->sec_idx = sec_idx; 2888 map->sec_offset = vi->offset; 2889 map->btf_var_idx = var_idx; 2890 pr_debug("map '%s': at sec_idx %d, offset %zu.\n", 2891 map_name, map->sec_idx, map->sec_offset); 2892 2893 err = parse_btf_map_def(map->name, obj->btf, def, strict, &map_def, &inner_def); 2894 if (err) 2895 return err; 2896 2897 fill_map_from_def(map, &map_def); 2898 2899 if (map_def.pinning == LIBBPF_PIN_BY_NAME) { 2900 err = build_map_pin_path(map, pin_root_path); 2901 if (err) { 2902 pr_warn("map '%s': couldn't build pin path.\n", map->name); 2903 return err; 2904 } 2905 } 2906 2907 if (map_def.parts & MAP_DEF_INNER_MAP) { 2908 map->inner_map = calloc(1, sizeof(*map->inner_map)); 2909 if (!map->inner_map) 2910 return -ENOMEM; 2911 map->inner_map->fd = create_placeholder_fd(); 2912 if (map->inner_map->fd < 0) 2913 return map->inner_map->fd; 2914 map->inner_map->sec_idx = sec_idx; 2915 map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1); 2916 if (!map->inner_map->name) 2917 return -ENOMEM; 2918 sprintf(map->inner_map->name, "%s.inner", map_name); 2919 2920 fill_map_from_def(map->inner_map, &inner_def); 2921 } 2922 2923 err = map_fill_btf_type_info(obj, map); 2924 if (err) 2925 return err; 2926 2927 return 0; 2928 } 2929 2930 static int init_arena_map_data(struct bpf_object *obj, struct bpf_map *map, 2931 const char *sec_name, int sec_idx, 2932 void *data, size_t data_sz) 2933 { 2934 const long page_sz = sysconf(_SC_PAGE_SIZE); 2935 size_t mmap_sz; 2936 2937 mmap_sz = bpf_map_mmap_sz(obj->arena_map); 2938 if (roundup(data_sz, page_sz) > mmap_sz) { 2939 pr_warn("elf: sec '%s': declared ARENA map size (%zu) is too small to hold global __arena variables of size %zu\n", 2940 sec_name, mmap_sz, data_sz); 2941 return -E2BIG; 2942 } 2943 2944 obj->arena_data = malloc(data_sz); 2945 if (!obj->arena_data) 2946 return -ENOMEM; 2947 memcpy(obj->arena_data, data, data_sz); 2948 obj->arena_data_sz = data_sz; 2949 2950 /* make bpf_map__init_value() work for ARENA maps */ 2951 map->mmaped = obj->arena_data; 2952 2953 return 0; 2954 } 2955 2956 static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, 2957 const char *pin_root_path) 2958 { 2959 const struct btf_type *sec = NULL; 2960 int nr_types, i, vlen, err; 2961 const struct btf_type *t; 2962 const char *name; 2963 Elf_Data *data; 2964 Elf_Scn *scn; 2965 2966 if (obj->efile.btf_maps_shndx < 0) 2967 return 0; 2968 2969 scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx); 2970 data = elf_sec_data(obj, scn); 2971 if (!scn || !data) { 2972 pr_warn("elf: failed to get %s map definitions for %s\n", 2973 MAPS_ELF_SEC, obj->path); 2974 return -EINVAL; 2975 } 2976 2977 nr_types = btf__type_cnt(obj->btf); 2978 for (i = 1; i < nr_types; i++) { 2979 t = btf__type_by_id(obj->btf, i); 2980 if (!btf_is_datasec(t)) 2981 continue; 2982 name = btf__name_by_offset(obj->btf, t->name_off); 2983 if (strcmp(name, MAPS_ELF_SEC) == 0) { 2984 sec = t; 2985 obj->efile.btf_maps_sec_btf_id = i; 2986 break; 2987 } 2988 } 2989 2990 if (!sec) { 2991 pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC); 2992 return -ENOENT; 2993 } 2994 2995 vlen = btf_vlen(sec); 2996 for (i = 0; i < vlen; i++) { 2997 err = bpf_object__init_user_btf_map(obj, sec, i, 2998 obj->efile.btf_maps_shndx, 2999 data, strict, 3000 pin_root_path); 3001 if (err) 3002 return err; 3003 } 3004 3005 for (i = 0; i < obj->nr_maps; i++) { 3006 struct bpf_map *map = &obj->maps[i]; 3007 3008 if (map->def.type != BPF_MAP_TYPE_ARENA) 3009 continue; 3010 3011 if (obj->arena_map) { 3012 pr_warn("map '%s': only single ARENA map is supported (map '%s' is also ARENA)\n", 3013 map->name, obj->arena_map->name); 3014 return -EINVAL; 3015 } 3016 obj->arena_map = map; 3017 3018 if (obj->efile.arena_data) { 3019 err = init_arena_map_data(obj, map, ARENA_SEC, obj->efile.arena_data_shndx, 3020 obj->efile.arena_data->d_buf, 3021 obj->efile.arena_data->d_size); 3022 if (err) 3023 return err; 3024 } 3025 } 3026 if (obj->efile.arena_data && !obj->arena_map) { 3027 pr_warn("elf: sec '%s': to use global __arena variables the ARENA map should be explicitly declared in SEC(\".maps\")\n", 3028 ARENA_SEC); 3029 return -ENOENT; 3030 } 3031 3032 return 0; 3033 } 3034 3035 static int bpf_object__init_maps(struct bpf_object *obj, 3036 const struct bpf_object_open_opts *opts) 3037 { 3038 const char *pin_root_path; 3039 bool strict; 3040 int err = 0; 3041 3042 strict = !OPTS_GET(opts, relaxed_maps, false); 3043 pin_root_path = OPTS_GET(opts, pin_root_path, NULL); 3044 3045 err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path); 3046 err = err ?: bpf_object__init_global_data_maps(obj); 3047 err = err ?: bpf_object__init_kconfig_map(obj); 3048 err = err ?: bpf_object_init_struct_ops(obj); 3049 3050 return err; 3051 } 3052 3053 static bool section_have_execinstr(struct bpf_object *obj, int idx) 3054 { 3055 Elf64_Shdr *sh; 3056 3057 sh = elf_sec_hdr(obj, elf_sec_by_idx(obj, idx)); 3058 if (!sh) 3059 return false; 3060 3061 return sh->sh_flags & SHF_EXECINSTR; 3062 } 3063 3064 static bool starts_with_qmark(const char *s) 3065 { 3066 return s && s[0] == '?'; 3067 } 3068 3069 static bool btf_needs_sanitization(struct bpf_object *obj) 3070 { 3071 bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC); 3072 bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC); 3073 bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT); 3074 bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); 3075 bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); 3076 bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); 3077 bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64); 3078 bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC); 3079 3080 return !has_func || !has_datasec || !has_func_global || !has_float || 3081 !has_decl_tag || !has_type_tag || !has_enum64 || !has_qmark_datasec; 3082 } 3083 3084 static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) 3085 { 3086 bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC); 3087 bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC); 3088 bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT); 3089 bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); 3090 bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); 3091 bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); 3092 bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64); 3093 bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC); 3094 int enum64_placeholder_id = 0; 3095 struct btf_type *t; 3096 int i, j, vlen; 3097 3098 for (i = 1; i < btf__type_cnt(btf); i++) { 3099 t = (struct btf_type *)btf__type_by_id(btf, i); 3100 3101 if ((!has_datasec && btf_is_var(t)) || (!has_decl_tag && btf_is_decl_tag(t))) { 3102 /* replace VAR/DECL_TAG with INT */ 3103 t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0); 3104 /* 3105 * using size = 1 is the safest choice, 4 will be too 3106 * big and cause kernel BTF validation failure if 3107 * original variable took less than 4 bytes 3108 */ 3109 t->size = 1; 3110 *(int *)(t + 1) = BTF_INT_ENC(0, 0, 8); 3111 } else if (!has_datasec && btf_is_datasec(t)) { 3112 /* replace DATASEC with STRUCT */ 3113 const struct btf_var_secinfo *v = btf_var_secinfos(t); 3114 struct btf_member *m = btf_members(t); 3115 struct btf_type *vt; 3116 char *name; 3117 3118 name = (char *)btf__name_by_offset(btf, t->name_off); 3119 while (*name) { 3120 if (*name == '.' || *name == '?') 3121 *name = '_'; 3122 name++; 3123 } 3124 3125 vlen = btf_vlen(t); 3126 t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen); 3127 for (j = 0; j < vlen; j++, v++, m++) { 3128 /* order of field assignments is important */ 3129 m->offset = v->offset * 8; 3130 m->type = v->type; 3131 /* preserve variable name as member name */ 3132 vt = (void *)btf__type_by_id(btf, v->type); 3133 m->name_off = vt->name_off; 3134 } 3135 } else if (!has_qmark_datasec && btf_is_datasec(t) && 3136 starts_with_qmark(btf__name_by_offset(btf, t->name_off))) { 3137 /* replace '?' prefix with '_' for DATASEC names */ 3138 char *name; 3139 3140 name = (char *)btf__name_by_offset(btf, t->name_off); 3141 if (name[0] == '?') 3142 name[0] = '_'; 3143 } else if (!has_func && btf_is_func_proto(t)) { 3144 /* replace FUNC_PROTO with ENUM */ 3145 vlen = btf_vlen(t); 3146 t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen); 3147 t->size = sizeof(__u32); /* kernel enforced */ 3148 } else if (!has_func && btf_is_func(t)) { 3149 /* replace FUNC with TYPEDEF */ 3150 t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0); 3151 } else if (!has_func_global && btf_is_func(t)) { 3152 /* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */ 3153 t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0); 3154 } else if (!has_float && btf_is_float(t)) { 3155 /* replace FLOAT with an equally-sized empty STRUCT; 3156 * since C compilers do not accept e.g. "float" as a 3157 * valid struct name, make it anonymous 3158 */ 3159 t->name_off = 0; 3160 t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0); 3161 } else if (!has_type_tag && btf_is_type_tag(t)) { 3162 /* replace TYPE_TAG with a CONST */ 3163 t->name_off = 0; 3164 t->info = BTF_INFO_ENC(BTF_KIND_CONST, 0, 0); 3165 } else if (!has_enum64 && btf_is_enum(t)) { 3166 /* clear the kflag */ 3167 t->info = btf_type_info(btf_kind(t), btf_vlen(t), false); 3168 } else if (!has_enum64 && btf_is_enum64(t)) { 3169 /* replace ENUM64 with a union */ 3170 struct btf_member *m; 3171 3172 if (enum64_placeholder_id == 0) { 3173 enum64_placeholder_id = btf__add_int(btf, "enum64_placeholder", 1, 0); 3174 if (enum64_placeholder_id < 0) 3175 return enum64_placeholder_id; 3176 3177 t = (struct btf_type *)btf__type_by_id(btf, i); 3178 } 3179 3180 m = btf_members(t); 3181 vlen = btf_vlen(t); 3182 t->info = BTF_INFO_ENC(BTF_KIND_UNION, 0, vlen); 3183 for (j = 0; j < vlen; j++, m++) { 3184 m->type = enum64_placeholder_id; 3185 m->offset = 0; 3186 } 3187 } 3188 } 3189 3190 return 0; 3191 } 3192 3193 static bool libbpf_needs_btf(const struct bpf_object *obj) 3194 { 3195 return obj->efile.btf_maps_shndx >= 0 || 3196 obj->efile.has_st_ops || 3197 obj->nr_extern > 0; 3198 } 3199 3200 static bool kernel_needs_btf(const struct bpf_object *obj) 3201 { 3202 return obj->efile.has_st_ops; 3203 } 3204 3205 static int bpf_object__init_btf(struct bpf_object *obj, 3206 Elf_Data *btf_data, 3207 Elf_Data *btf_ext_data) 3208 { 3209 int err = -ENOENT; 3210 3211 if (btf_data) { 3212 obj->btf = btf__new(btf_data->d_buf, btf_data->d_size); 3213 err = libbpf_get_error(obj->btf); 3214 if (err) { 3215 obj->btf = NULL; 3216 pr_warn("Error loading ELF section %s: %d.\n", BTF_ELF_SEC, err); 3217 goto out; 3218 } 3219 /* enforce 8-byte pointers for BPF-targeted BTFs */ 3220 btf__set_pointer_size(obj->btf, 8); 3221 } 3222 if (btf_ext_data) { 3223 struct btf_ext_info *ext_segs[3]; 3224 int seg_num, sec_num; 3225 3226 if (!obj->btf) { 3227 pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n", 3228 BTF_EXT_ELF_SEC, BTF_ELF_SEC); 3229 goto out; 3230 } 3231 obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size); 3232 err = libbpf_get_error(obj->btf_ext); 3233 if (err) { 3234 pr_warn("Error loading ELF section %s: %d. Ignored and continue.\n", 3235 BTF_EXT_ELF_SEC, err); 3236 obj->btf_ext = NULL; 3237 goto out; 3238 } 3239 3240 /* setup .BTF.ext to ELF section mapping */ 3241 ext_segs[0] = &obj->btf_ext->func_info; 3242 ext_segs[1] = &obj->btf_ext->line_info; 3243 ext_segs[2] = &obj->btf_ext->core_relo_info; 3244 for (seg_num = 0; seg_num < ARRAY_SIZE(ext_segs); seg_num++) { 3245 struct btf_ext_info *seg = ext_segs[seg_num]; 3246 const struct btf_ext_info_sec *sec; 3247 const char *sec_name; 3248 Elf_Scn *scn; 3249 3250 if (seg->sec_cnt == 0) 3251 continue; 3252 3253 seg->sec_idxs = calloc(seg->sec_cnt, sizeof(*seg->sec_idxs)); 3254 if (!seg->sec_idxs) { 3255 err = -ENOMEM; 3256 goto out; 3257 } 3258 3259 sec_num = 0; 3260 for_each_btf_ext_sec(seg, sec) { 3261 /* preventively increment index to avoid doing 3262 * this before every continue below 3263 */ 3264 sec_num++; 3265 3266 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); 3267 if (str_is_empty(sec_name)) 3268 continue; 3269 scn = elf_sec_by_name(obj, sec_name); 3270 if (!scn) 3271 continue; 3272 3273 seg->sec_idxs[sec_num - 1] = elf_ndxscn(scn); 3274 } 3275 } 3276 } 3277 out: 3278 if (err && libbpf_needs_btf(obj)) { 3279 pr_warn("BTF is required, but is missing or corrupted.\n"); 3280 return err; 3281 } 3282 return 0; 3283 } 3284 3285 static int compare_vsi_off(const void *_a, const void *_b) 3286 { 3287 const struct btf_var_secinfo *a = _a; 3288 const struct btf_var_secinfo *b = _b; 3289 3290 return a->offset - b->offset; 3291 } 3292 3293 static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, 3294 struct btf_type *t) 3295 { 3296 __u32 size = 0, i, vars = btf_vlen(t); 3297 const char *sec_name = btf__name_by_offset(btf, t->name_off); 3298 struct btf_var_secinfo *vsi; 3299 bool fixup_offsets = false; 3300 int err; 3301 3302 if (!sec_name) { 3303 pr_debug("No name found in string section for DATASEC kind.\n"); 3304 return -ENOENT; 3305 } 3306 3307 /* Extern-backing datasecs (.ksyms, .kconfig) have their size and 3308 * variable offsets set at the previous step. Further, not every 3309 * extern BTF VAR has corresponding ELF symbol preserved, so we skip 3310 * all fixups altogether for such sections and go straight to sorting 3311 * VARs within their DATASEC. 3312 */ 3313 if (strcmp(sec_name, KCONFIG_SEC) == 0 || strcmp(sec_name, KSYMS_SEC) == 0) 3314 goto sort_vars; 3315 3316 /* Clang leaves DATASEC size and VAR offsets as zeroes, so we need to 3317 * fix this up. But BPF static linker already fixes this up and fills 3318 * all the sizes and offsets during static linking. So this step has 3319 * to be optional. But the STV_HIDDEN handling is non-optional for any 3320 * non-extern DATASEC, so the variable fixup loop below handles both 3321 * functions at the same time, paying the cost of BTF VAR <-> ELF 3322 * symbol matching just once. 3323 */ 3324 if (t->size == 0) { 3325 err = find_elf_sec_sz(obj, sec_name, &size); 3326 if (err || !size) { 3327 pr_debug("sec '%s': failed to determine size from ELF: size %u, err %d\n", 3328 sec_name, size, err); 3329 return -ENOENT; 3330 } 3331 3332 t->size = size; 3333 fixup_offsets = true; 3334 } 3335 3336 for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) { 3337 const struct btf_type *t_var; 3338 struct btf_var *var; 3339 const char *var_name; 3340 Elf64_Sym *sym; 3341 3342 t_var = btf__type_by_id(btf, vsi->type); 3343 if (!t_var || !btf_is_var(t_var)) { 3344 pr_debug("sec '%s': unexpected non-VAR type found\n", sec_name); 3345 return -EINVAL; 3346 } 3347 3348 var = btf_var(t_var); 3349 if (var->linkage == BTF_VAR_STATIC || var->linkage == BTF_VAR_GLOBAL_EXTERN) 3350 continue; 3351 3352 var_name = btf__name_by_offset(btf, t_var->name_off); 3353 if (!var_name) { 3354 pr_debug("sec '%s': failed to find name of DATASEC's member #%d\n", 3355 sec_name, i); 3356 return -ENOENT; 3357 } 3358 3359 sym = find_elf_var_sym(obj, var_name); 3360 if (IS_ERR(sym)) { 3361 pr_debug("sec '%s': failed to find ELF symbol for VAR '%s'\n", 3362 sec_name, var_name); 3363 return -ENOENT; 3364 } 3365 3366 if (fixup_offsets) 3367 vsi->offset = sym->st_value; 3368 3369 /* if variable is a global/weak symbol, but has restricted 3370 * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF VAR 3371 * as static. This follows similar logic for functions (BPF 3372 * subprogs) and influences libbpf's further decisions about 3373 * whether to make global data BPF array maps as 3374 * BPF_F_MMAPABLE. 3375 */ 3376 if (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN 3377 || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL) 3378 var->linkage = BTF_VAR_STATIC; 3379 } 3380 3381 sort_vars: 3382 qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off); 3383 return 0; 3384 } 3385 3386 static int bpf_object_fixup_btf(struct bpf_object *obj) 3387 { 3388 int i, n, err = 0; 3389 3390 if (!obj->btf) 3391 return 0; 3392 3393 n = btf__type_cnt(obj->btf); 3394 for (i = 1; i < n; i++) { 3395 struct btf_type *t = btf_type_by_id(obj->btf, i); 3396 3397 /* Loader needs to fix up some of the things compiler 3398 * couldn't get its hands on while emitting BTF. This 3399 * is section size and global variable offset. We use 3400 * the info from the ELF itself for this purpose. 3401 */ 3402 if (btf_is_datasec(t)) { 3403 err = btf_fixup_datasec(obj, obj->btf, t); 3404 if (err) 3405 return err; 3406 } 3407 } 3408 3409 return 0; 3410 } 3411 3412 static bool prog_needs_vmlinux_btf(struct bpf_program *prog) 3413 { 3414 if (prog->type == BPF_PROG_TYPE_STRUCT_OPS || 3415 prog->type == BPF_PROG_TYPE_LSM) 3416 return true; 3417 3418 /* BPF_PROG_TYPE_TRACING programs which do not attach to other programs 3419 * also need vmlinux BTF 3420 */ 3421 if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd) 3422 return true; 3423 3424 return false; 3425 } 3426 3427 static bool map_needs_vmlinux_btf(struct bpf_map *map) 3428 { 3429 return bpf_map__is_struct_ops(map); 3430 } 3431 3432 static bool obj_needs_vmlinux_btf(const struct bpf_object *obj) 3433 { 3434 struct bpf_program *prog; 3435 struct bpf_map *map; 3436 int i; 3437 3438 /* CO-RE relocations need kernel BTF, only when btf_custom_path 3439 * is not specified 3440 */ 3441 if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path) 3442 return true; 3443 3444 /* Support for typed ksyms needs kernel BTF */ 3445 for (i = 0; i < obj->nr_extern; i++) { 3446 const struct extern_desc *ext; 3447 3448 ext = &obj->externs[i]; 3449 if (ext->type == EXT_KSYM && ext->ksym.type_id) 3450 return true; 3451 } 3452 3453 bpf_object__for_each_program(prog, obj) { 3454 if (!prog->autoload) 3455 continue; 3456 if (prog_needs_vmlinux_btf(prog)) 3457 return true; 3458 } 3459 3460 bpf_object__for_each_map(map, obj) { 3461 if (map_needs_vmlinux_btf(map)) 3462 return true; 3463 } 3464 3465 return false; 3466 } 3467 3468 static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force) 3469 { 3470 int err; 3471 3472 /* btf_vmlinux could be loaded earlier */ 3473 if (obj->btf_vmlinux || obj->gen_loader) 3474 return 0; 3475 3476 if (!force && !obj_needs_vmlinux_btf(obj)) 3477 return 0; 3478 3479 obj->btf_vmlinux = btf__load_vmlinux_btf(); 3480 err = libbpf_get_error(obj->btf_vmlinux); 3481 if (err) { 3482 pr_warn("Error loading vmlinux BTF: %d\n", err); 3483 obj->btf_vmlinux = NULL; 3484 return err; 3485 } 3486 return 0; 3487 } 3488 3489 static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) 3490 { 3491 struct btf *kern_btf = obj->btf; 3492 bool btf_mandatory, sanitize; 3493 int i, err = 0; 3494 3495 if (!obj->btf) 3496 return 0; 3497 3498 if (!kernel_supports(obj, FEAT_BTF)) { 3499 if (kernel_needs_btf(obj)) { 3500 err = -EOPNOTSUPP; 3501 goto report; 3502 } 3503 pr_debug("Kernel doesn't support BTF, skipping uploading it.\n"); 3504 return 0; 3505 } 3506 3507 /* Even though some subprogs are global/weak, user might prefer more 3508 * permissive BPF verification process that BPF verifier performs for 3509 * static functions, taking into account more context from the caller 3510 * functions. In such case, they need to mark such subprogs with 3511 * __attribute__((visibility("hidden"))) and libbpf will adjust 3512 * corresponding FUNC BTF type to be marked as static and trigger more 3513 * involved BPF verification process. 3514 */ 3515 for (i = 0; i < obj->nr_programs; i++) { 3516 struct bpf_program *prog = &obj->programs[i]; 3517 struct btf_type *t; 3518 const char *name; 3519 int j, n; 3520 3521 if (!prog->mark_btf_static || !prog_is_subprog(obj, prog)) 3522 continue; 3523 3524 n = btf__type_cnt(obj->btf); 3525 for (j = 1; j < n; j++) { 3526 t = btf_type_by_id(obj->btf, j); 3527 if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) 3528 continue; 3529 3530 name = btf__str_by_offset(obj->btf, t->name_off); 3531 if (strcmp(name, prog->name) != 0) 3532 continue; 3533 3534 t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_STATIC, 0); 3535 break; 3536 } 3537 } 3538 3539 sanitize = btf_needs_sanitization(obj); 3540 if (sanitize) { 3541 const void *raw_data; 3542 __u32 sz; 3543 3544 /* clone BTF to sanitize a copy and leave the original intact */ 3545 raw_data = btf__raw_data(obj->btf, &sz); 3546 kern_btf = btf__new(raw_data, sz); 3547 err = libbpf_get_error(kern_btf); 3548 if (err) 3549 return err; 3550 3551 /* enforce 8-byte pointers for BPF-targeted BTFs */ 3552 btf__set_pointer_size(obj->btf, 8); 3553 err = bpf_object__sanitize_btf(obj, kern_btf); 3554 if (err) 3555 return err; 3556 } 3557 3558 if (obj->gen_loader) { 3559 __u32 raw_size = 0; 3560 const void *raw_data = btf__raw_data(kern_btf, &raw_size); 3561 3562 if (!raw_data) 3563 return -ENOMEM; 3564 bpf_gen__load_btf(obj->gen_loader, raw_data, raw_size); 3565 /* Pretend to have valid FD to pass various fd >= 0 checks. 3566 * This fd == 0 will not be used with any syscall and will be reset to -1 eventually. 3567 */ 3568 btf__set_fd(kern_btf, 0); 3569 } else { 3570 /* currently BPF_BTF_LOAD only supports log_level 1 */ 3571 err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size, 3572 obj->log_level ? 1 : 0, obj->token_fd); 3573 } 3574 if (sanitize) { 3575 if (!err) { 3576 /* move fd to libbpf's BTF */ 3577 btf__set_fd(obj->btf, btf__fd(kern_btf)); 3578 btf__set_fd(kern_btf, -1); 3579 } 3580 btf__free(kern_btf); 3581 } 3582 report: 3583 if (err) { 3584 btf_mandatory = kernel_needs_btf(obj); 3585 pr_warn("Error loading .BTF into kernel: %d. %s\n", err, 3586 btf_mandatory ? "BTF is mandatory, can't proceed." 3587 : "BTF is optional, ignoring."); 3588 if (!btf_mandatory) 3589 err = 0; 3590 } 3591 return err; 3592 } 3593 3594 static const char *elf_sym_str(const struct bpf_object *obj, size_t off) 3595 { 3596 const char *name; 3597 3598 name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off); 3599 if (!name) { 3600 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n", 3601 off, obj->path, elf_errmsg(-1)); 3602 return NULL; 3603 } 3604 3605 return name; 3606 } 3607 3608 static const char *elf_sec_str(const struct bpf_object *obj, size_t off) 3609 { 3610 const char *name; 3611 3612 name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off); 3613 if (!name) { 3614 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n", 3615 off, obj->path, elf_errmsg(-1)); 3616 return NULL; 3617 } 3618 3619 return name; 3620 } 3621 3622 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx) 3623 { 3624 Elf_Scn *scn; 3625 3626 scn = elf_getscn(obj->efile.elf, idx); 3627 if (!scn) { 3628 pr_warn("elf: failed to get section(%zu) from %s: %s\n", 3629 idx, obj->path, elf_errmsg(-1)); 3630 return NULL; 3631 } 3632 return scn; 3633 } 3634 3635 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name) 3636 { 3637 Elf_Scn *scn = NULL; 3638 Elf *elf = obj->efile.elf; 3639 const char *sec_name; 3640 3641 while ((scn = elf_nextscn(elf, scn)) != NULL) { 3642 sec_name = elf_sec_name(obj, scn); 3643 if (!sec_name) 3644 return NULL; 3645 3646 if (strcmp(sec_name, name) != 0) 3647 continue; 3648 3649 return scn; 3650 } 3651 return NULL; 3652 } 3653 3654 static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn) 3655 { 3656 Elf64_Shdr *shdr; 3657 3658 if (!scn) 3659 return NULL; 3660 3661 shdr = elf64_getshdr(scn); 3662 if (!shdr) { 3663 pr_warn("elf: failed to get section(%zu) header from %s: %s\n", 3664 elf_ndxscn(scn), obj->path, elf_errmsg(-1)); 3665 return NULL; 3666 } 3667 3668 return shdr; 3669 } 3670 3671 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn) 3672 { 3673 const char *name; 3674 Elf64_Shdr *sh; 3675 3676 if (!scn) 3677 return NULL; 3678 3679 sh = elf_sec_hdr(obj, scn); 3680 if (!sh) 3681 return NULL; 3682 3683 name = elf_sec_str(obj, sh->sh_name); 3684 if (!name) { 3685 pr_warn("elf: failed to get section(%zu) name from %s: %s\n", 3686 elf_ndxscn(scn), obj->path, elf_errmsg(-1)); 3687 return NULL; 3688 } 3689 3690 return name; 3691 } 3692 3693 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn) 3694 { 3695 Elf_Data *data; 3696 3697 if (!scn) 3698 return NULL; 3699 3700 data = elf_getdata(scn, 0); 3701 if (!data) { 3702 pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n", 3703 elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "<?>", 3704 obj->path, elf_errmsg(-1)); 3705 return NULL; 3706 } 3707 3708 return data; 3709 } 3710 3711 static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx) 3712 { 3713 if (idx >= obj->efile.symbols->d_size / sizeof(Elf64_Sym)) 3714 return NULL; 3715 3716 return (Elf64_Sym *)obj->efile.symbols->d_buf + idx; 3717 } 3718 3719 static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx) 3720 { 3721 if (idx >= data->d_size / sizeof(Elf64_Rel)) 3722 return NULL; 3723 3724 return (Elf64_Rel *)data->d_buf + idx; 3725 } 3726 3727 static bool is_sec_name_dwarf(const char *name) 3728 { 3729 /* approximation, but the actual list is too long */ 3730 return str_has_pfx(name, ".debug_"); 3731 } 3732 3733 static bool ignore_elf_section(Elf64_Shdr *hdr, const char *name) 3734 { 3735 /* no special handling of .strtab */ 3736 if (hdr->sh_type == SHT_STRTAB) 3737 return true; 3738 3739 /* ignore .llvm_addrsig section as well */ 3740 if (hdr->sh_type == SHT_LLVM_ADDRSIG) 3741 return true; 3742 3743 /* no subprograms will lead to an empty .text section, ignore it */ 3744 if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 && 3745 strcmp(name, ".text") == 0) 3746 return true; 3747 3748 /* DWARF sections */ 3749 if (is_sec_name_dwarf(name)) 3750 return true; 3751 3752 if (str_has_pfx(name, ".rel")) { 3753 name += sizeof(".rel") - 1; 3754 /* DWARF section relocations */ 3755 if (is_sec_name_dwarf(name)) 3756 return true; 3757 3758 /* .BTF and .BTF.ext don't need relocations */ 3759 if (strcmp(name, BTF_ELF_SEC) == 0 || 3760 strcmp(name, BTF_EXT_ELF_SEC) == 0) 3761 return true; 3762 } 3763 3764 return false; 3765 } 3766 3767 static int cmp_progs(const void *_a, const void *_b) 3768 { 3769 const struct bpf_program *a = _a; 3770 const struct bpf_program *b = _b; 3771 3772 if (a->sec_idx != b->sec_idx) 3773 return a->sec_idx < b->sec_idx ? -1 : 1; 3774 3775 /* sec_insn_off can't be the same within the section */ 3776 return a->sec_insn_off < b->sec_insn_off ? -1 : 1; 3777 } 3778 3779 static int bpf_object__elf_collect(struct bpf_object *obj) 3780 { 3781 struct elf_sec_desc *sec_desc; 3782 Elf *elf = obj->efile.elf; 3783 Elf_Data *btf_ext_data = NULL; 3784 Elf_Data *btf_data = NULL; 3785 int idx = 0, err = 0; 3786 const char *name; 3787 Elf_Data *data; 3788 Elf_Scn *scn; 3789 Elf64_Shdr *sh; 3790 3791 /* ELF section indices are 0-based, but sec #0 is special "invalid" 3792 * section. Since section count retrieved by elf_getshdrnum() does 3793 * include sec #0, it is already the necessary size of an array to keep 3794 * all the sections. 3795 */ 3796 if (elf_getshdrnum(obj->efile.elf, &obj->efile.sec_cnt)) { 3797 pr_warn("elf: failed to get the number of sections for %s: %s\n", 3798 obj->path, elf_errmsg(-1)); 3799 return -LIBBPF_ERRNO__FORMAT; 3800 } 3801 obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs)); 3802 if (!obj->efile.secs) 3803 return -ENOMEM; 3804 3805 /* a bunch of ELF parsing functionality depends on processing symbols, 3806 * so do the first pass and find the symbol table 3807 */ 3808 scn = NULL; 3809 while ((scn = elf_nextscn(elf, scn)) != NULL) { 3810 sh = elf_sec_hdr(obj, scn); 3811 if (!sh) 3812 return -LIBBPF_ERRNO__FORMAT; 3813 3814 if (sh->sh_type == SHT_SYMTAB) { 3815 if (obj->efile.symbols) { 3816 pr_warn("elf: multiple symbol tables in %s\n", obj->path); 3817 return -LIBBPF_ERRNO__FORMAT; 3818 } 3819 3820 data = elf_sec_data(obj, scn); 3821 if (!data) 3822 return -LIBBPF_ERRNO__FORMAT; 3823 3824 idx = elf_ndxscn(scn); 3825 3826 obj->efile.symbols = data; 3827 obj->efile.symbols_shndx = idx; 3828 obj->efile.strtabidx = sh->sh_link; 3829 } 3830 } 3831 3832 if (!obj->efile.symbols) { 3833 pr_warn("elf: couldn't find symbol table in %s, stripped object file?\n", 3834 obj->path); 3835 return -ENOENT; 3836 } 3837 3838 scn = NULL; 3839 while ((scn = elf_nextscn(elf, scn)) != NULL) { 3840 idx = elf_ndxscn(scn); 3841 sec_desc = &obj->efile.secs[idx]; 3842 3843 sh = elf_sec_hdr(obj, scn); 3844 if (!sh) 3845 return -LIBBPF_ERRNO__FORMAT; 3846 3847 name = elf_sec_str(obj, sh->sh_name); 3848 if (!name) 3849 return -LIBBPF_ERRNO__FORMAT; 3850 3851 if (ignore_elf_section(sh, name)) 3852 continue; 3853 3854 data = elf_sec_data(obj, scn); 3855 if (!data) 3856 return -LIBBPF_ERRNO__FORMAT; 3857 3858 pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n", 3859 idx, name, (unsigned long)data->d_size, 3860 (int)sh->sh_link, (unsigned long)sh->sh_flags, 3861 (int)sh->sh_type); 3862 3863 if (strcmp(name, "license") == 0) { 3864 err = bpf_object__init_license(obj, data->d_buf, data->d_size); 3865 if (err) 3866 return err; 3867 } else if (strcmp(name, "version") == 0) { 3868 err = bpf_object__init_kversion(obj, data->d_buf, data->d_size); 3869 if (err) 3870 return err; 3871 } else if (strcmp(name, "maps") == 0) { 3872 pr_warn("elf: legacy map definitions in 'maps' section are not supported by libbpf v1.0+\n"); 3873 return -ENOTSUP; 3874 } else if (strcmp(name, MAPS_ELF_SEC) == 0) { 3875 obj->efile.btf_maps_shndx = idx; 3876 } else if (strcmp(name, BTF_ELF_SEC) == 0) { 3877 if (sh->sh_type != SHT_PROGBITS) 3878 return -LIBBPF_ERRNO__FORMAT; 3879 btf_data = data; 3880 } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) { 3881 if (sh->sh_type != SHT_PROGBITS) 3882 return -LIBBPF_ERRNO__FORMAT; 3883 btf_ext_data = data; 3884 } else if (sh->sh_type == SHT_SYMTAB) { 3885 /* already processed during the first pass above */ 3886 } else if (sh->sh_type == SHT_PROGBITS && data->d_size > 0) { 3887 if (sh->sh_flags & SHF_EXECINSTR) { 3888 if (strcmp(name, ".text") == 0) 3889 obj->efile.text_shndx = idx; 3890 err = bpf_object__add_programs(obj, data, name, idx); 3891 if (err) 3892 return err; 3893 } else if (strcmp(name, DATA_SEC) == 0 || 3894 str_has_pfx(name, DATA_SEC ".")) { 3895 sec_desc->sec_type = SEC_DATA; 3896 sec_desc->shdr = sh; 3897 sec_desc->data = data; 3898 } else if (strcmp(name, RODATA_SEC) == 0 || 3899 str_has_pfx(name, RODATA_SEC ".")) { 3900 sec_desc->sec_type = SEC_RODATA; 3901 sec_desc->shdr = sh; 3902 sec_desc->data = data; 3903 } else if (strcmp(name, STRUCT_OPS_SEC) == 0 || 3904 strcmp(name, STRUCT_OPS_LINK_SEC) == 0 || 3905 strcmp(name, "?" STRUCT_OPS_SEC) == 0 || 3906 strcmp(name, "?" STRUCT_OPS_LINK_SEC) == 0) { 3907 sec_desc->sec_type = SEC_ST_OPS; 3908 sec_desc->shdr = sh; 3909 sec_desc->data = data; 3910 obj->efile.has_st_ops = true; 3911 } else if (strcmp(name, ARENA_SEC) == 0) { 3912 obj->efile.arena_data = data; 3913 obj->efile.arena_data_shndx = idx; 3914 } else { 3915 pr_info("elf: skipping unrecognized data section(%d) %s\n", 3916 idx, name); 3917 } 3918 } else if (sh->sh_type == SHT_REL) { 3919 int targ_sec_idx = sh->sh_info; /* points to other section */ 3920 3921 if (sh->sh_entsize != sizeof(Elf64_Rel) || 3922 targ_sec_idx >= obj->efile.sec_cnt) 3923 return -LIBBPF_ERRNO__FORMAT; 3924 3925 /* Only do relo for section with exec instructions */ 3926 if (!section_have_execinstr(obj, targ_sec_idx) && 3927 strcmp(name, ".rel" STRUCT_OPS_SEC) && 3928 strcmp(name, ".rel" STRUCT_OPS_LINK_SEC) && 3929 strcmp(name, ".rel?" STRUCT_OPS_SEC) && 3930 strcmp(name, ".rel?" STRUCT_OPS_LINK_SEC) && 3931 strcmp(name, ".rel" MAPS_ELF_SEC)) { 3932 pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n", 3933 idx, name, targ_sec_idx, 3934 elf_sec_name(obj, elf_sec_by_idx(obj, targ_sec_idx)) ?: "<?>"); 3935 continue; 3936 } 3937 3938 sec_desc->sec_type = SEC_RELO; 3939 sec_desc->shdr = sh; 3940 sec_desc->data = data; 3941 } else if (sh->sh_type == SHT_NOBITS && (strcmp(name, BSS_SEC) == 0 || 3942 str_has_pfx(name, BSS_SEC "."))) { 3943 sec_desc->sec_type = SEC_BSS; 3944 sec_desc->shdr = sh; 3945 sec_desc->data = data; 3946 } else { 3947 pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name, 3948 (size_t)sh->sh_size); 3949 } 3950 } 3951 3952 if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) { 3953 pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path); 3954 return -LIBBPF_ERRNO__FORMAT; 3955 } 3956 3957 /* sort BPF programs by section name and in-section instruction offset 3958 * for faster search 3959 */ 3960 if (obj->nr_programs) 3961 qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs); 3962 3963 return bpf_object__init_btf(obj, btf_data, btf_ext_data); 3964 } 3965 3966 static bool sym_is_extern(const Elf64_Sym *sym) 3967 { 3968 int bind = ELF64_ST_BIND(sym->st_info); 3969 /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */ 3970 return sym->st_shndx == SHN_UNDEF && 3971 (bind == STB_GLOBAL || bind == STB_WEAK) && 3972 ELF64_ST_TYPE(sym->st_info) == STT_NOTYPE; 3973 } 3974 3975 static bool sym_is_subprog(const Elf64_Sym *sym, int text_shndx) 3976 { 3977 int bind = ELF64_ST_BIND(sym->st_info); 3978 int type = ELF64_ST_TYPE(sym->st_info); 3979 3980 /* in .text section */ 3981 if (sym->st_shndx != text_shndx) 3982 return false; 3983 3984 /* local function */ 3985 if (bind == STB_LOCAL && type == STT_SECTION) 3986 return true; 3987 3988 /* global function */ 3989 return bind == STB_GLOBAL && type == STT_FUNC; 3990 } 3991 3992 static int find_extern_btf_id(const struct btf *btf, const char *ext_name) 3993 { 3994 const struct btf_type *t; 3995 const char *tname; 3996 int i, n; 3997 3998 if (!btf) 3999 return -ESRCH; 4000 4001 n = btf__type_cnt(btf); 4002 for (i = 1; i < n; i++) { 4003 t = btf__type_by_id(btf, i); 4004 4005 if (!btf_is_var(t) && !btf_is_func(t)) 4006 continue; 4007 4008 tname = btf__name_by_offset(btf, t->name_off); 4009 if (strcmp(tname, ext_name)) 4010 continue; 4011 4012 if (btf_is_var(t) && 4013 btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN) 4014 return -EINVAL; 4015 4016 if (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_EXTERN) 4017 return -EINVAL; 4018 4019 return i; 4020 } 4021 4022 return -ENOENT; 4023 } 4024 4025 static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) { 4026 const struct btf_var_secinfo *vs; 4027 const struct btf_type *t; 4028 int i, j, n; 4029 4030 if (!btf) 4031 return -ESRCH; 4032 4033 n = btf__type_cnt(btf); 4034 for (i = 1; i < n; i++) { 4035 t = btf__type_by_id(btf, i); 4036 4037 if (!btf_is_datasec(t)) 4038 continue; 4039 4040 vs = btf_var_secinfos(t); 4041 for (j = 0; j < btf_vlen(t); j++, vs++) { 4042 if (vs->type == ext_btf_id) 4043 return i; 4044 } 4045 } 4046 4047 return -ENOENT; 4048 } 4049 4050 static enum kcfg_type find_kcfg_type(const struct btf *btf, int id, 4051 bool *is_signed) 4052 { 4053 const struct btf_type *t; 4054 const char *name; 4055 4056 t = skip_mods_and_typedefs(btf, id, NULL); 4057 name = btf__name_by_offset(btf, t->name_off); 4058 4059 if (is_signed) 4060 *is_signed = false; 4061 switch (btf_kind(t)) { 4062 case BTF_KIND_INT: { 4063 int enc = btf_int_encoding(t); 4064 4065 if (enc & BTF_INT_BOOL) 4066 return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN; 4067 if (is_signed) 4068 *is_signed = enc & BTF_INT_SIGNED; 4069 if (t->size == 1) 4070 return KCFG_CHAR; 4071 if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1))) 4072 return KCFG_UNKNOWN; 4073 return KCFG_INT; 4074 } 4075 case BTF_KIND_ENUM: 4076 if (t->size != 4) 4077 return KCFG_UNKNOWN; 4078 if (strcmp(name, "libbpf_tristate")) 4079 return KCFG_UNKNOWN; 4080 return KCFG_TRISTATE; 4081 case BTF_KIND_ENUM64: 4082 if (strcmp(name, "libbpf_tristate")) 4083 return KCFG_UNKNOWN; 4084 return KCFG_TRISTATE; 4085 case BTF_KIND_ARRAY: 4086 if (btf_array(t)->nelems == 0) 4087 return KCFG_UNKNOWN; 4088 if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR) 4089 return KCFG_UNKNOWN; 4090 return KCFG_CHAR_ARR; 4091 default: 4092 return KCFG_UNKNOWN; 4093 } 4094 } 4095 4096 static int cmp_externs(const void *_a, const void *_b) 4097 { 4098 const struct extern_desc *a = _a; 4099 const struct extern_desc *b = _b; 4100 4101 if (a->type != b->type) 4102 return a->type < b->type ? -1 : 1; 4103 4104 if (a->type == EXT_KCFG) { 4105 /* descending order by alignment requirements */ 4106 if (a->kcfg.align != b->kcfg.align) 4107 return a->kcfg.align > b->kcfg.align ? -1 : 1; 4108 /* ascending order by size, within same alignment class */ 4109 if (a->kcfg.sz != b->kcfg.sz) 4110 return a->kcfg.sz < b->kcfg.sz ? -1 : 1; 4111 } 4112 4113 /* resolve ties by name */ 4114 return strcmp(a->name, b->name); 4115 } 4116 4117 static int find_int_btf_id(const struct btf *btf) 4118 { 4119 const struct btf_type *t; 4120 int i, n; 4121 4122 n = btf__type_cnt(btf); 4123 for (i = 1; i < n; i++) { 4124 t = btf__type_by_id(btf, i); 4125 4126 if (btf_is_int(t) && btf_int_bits(t) == 32) 4127 return i; 4128 } 4129 4130 return 0; 4131 } 4132 4133 static int add_dummy_ksym_var(struct btf *btf) 4134 { 4135 int i, int_btf_id, sec_btf_id, dummy_var_btf_id; 4136 const struct btf_var_secinfo *vs; 4137 const struct btf_type *sec; 4138 4139 if (!btf) 4140 return 0; 4141 4142 sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC, 4143 BTF_KIND_DATASEC); 4144 if (sec_btf_id < 0) 4145 return 0; 4146 4147 sec = btf__type_by_id(btf, sec_btf_id); 4148 vs = btf_var_secinfos(sec); 4149 for (i = 0; i < btf_vlen(sec); i++, vs++) { 4150 const struct btf_type *vt; 4151 4152 vt = btf__type_by_id(btf, vs->type); 4153 if (btf_is_func(vt)) 4154 break; 4155 } 4156 4157 /* No func in ksyms sec. No need to add dummy var. */ 4158 if (i == btf_vlen(sec)) 4159 return 0; 4160 4161 int_btf_id = find_int_btf_id(btf); 4162 dummy_var_btf_id = btf__add_var(btf, 4163 "dummy_ksym", 4164 BTF_VAR_GLOBAL_ALLOCATED, 4165 int_btf_id); 4166 if (dummy_var_btf_id < 0) 4167 pr_warn("cannot create a dummy_ksym var\n"); 4168 4169 return dummy_var_btf_id; 4170 } 4171 4172 static int bpf_object__collect_externs(struct bpf_object *obj) 4173 { 4174 struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL; 4175 const struct btf_type *t; 4176 struct extern_desc *ext; 4177 int i, n, off, dummy_var_btf_id; 4178 const char *ext_name, *sec_name; 4179 size_t ext_essent_len; 4180 Elf_Scn *scn; 4181 Elf64_Shdr *sh; 4182 4183 if (!obj->efile.symbols) 4184 return 0; 4185 4186 scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx); 4187 sh = elf_sec_hdr(obj, scn); 4188 if (!sh || sh->sh_entsize != sizeof(Elf64_Sym)) 4189 return -LIBBPF_ERRNO__FORMAT; 4190 4191 dummy_var_btf_id = add_dummy_ksym_var(obj->btf); 4192 if (dummy_var_btf_id < 0) 4193 return dummy_var_btf_id; 4194 4195 n = sh->sh_size / sh->sh_entsize; 4196 pr_debug("looking for externs among %d symbols...\n", n); 4197 4198 for (i = 0; i < n; i++) { 4199 Elf64_Sym *sym = elf_sym_by_idx(obj, i); 4200 4201 if (!sym) 4202 return -LIBBPF_ERRNO__FORMAT; 4203 if (!sym_is_extern(sym)) 4204 continue; 4205 ext_name = elf_sym_str(obj, sym->st_name); 4206 if (!ext_name || !ext_name[0]) 4207 continue; 4208 4209 ext = obj->externs; 4210 ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext)); 4211 if (!ext) 4212 return -ENOMEM; 4213 obj->externs = ext; 4214 ext = &ext[obj->nr_extern]; 4215 memset(ext, 0, sizeof(*ext)); 4216 obj->nr_extern++; 4217 4218 ext->btf_id = find_extern_btf_id(obj->btf, ext_name); 4219 if (ext->btf_id <= 0) { 4220 pr_warn("failed to find BTF for extern '%s': %d\n", 4221 ext_name, ext->btf_id); 4222 return ext->btf_id; 4223 } 4224 t = btf__type_by_id(obj->btf, ext->btf_id); 4225 ext->name = btf__name_by_offset(obj->btf, t->name_off); 4226 ext->sym_idx = i; 4227 ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK; 4228 4229 ext_essent_len = bpf_core_essential_name_len(ext->name); 4230 ext->essent_name = NULL; 4231 if (ext_essent_len != strlen(ext->name)) { 4232 ext->essent_name = strndup(ext->name, ext_essent_len); 4233 if (!ext->essent_name) 4234 return -ENOMEM; 4235 } 4236 4237 ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id); 4238 if (ext->sec_btf_id <= 0) { 4239 pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n", 4240 ext_name, ext->btf_id, ext->sec_btf_id); 4241 return ext->sec_btf_id; 4242 } 4243 sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id); 4244 sec_name = btf__name_by_offset(obj->btf, sec->name_off); 4245 4246 if (strcmp(sec_name, KCONFIG_SEC) == 0) { 4247 if (btf_is_func(t)) { 4248 pr_warn("extern function %s is unsupported under %s section\n", 4249 ext->name, KCONFIG_SEC); 4250 return -ENOTSUP; 4251 } 4252 kcfg_sec = sec; 4253 ext->type = EXT_KCFG; 4254 ext->kcfg.sz = btf__resolve_size(obj->btf, t->type); 4255 if (ext->kcfg.sz <= 0) { 4256 pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n", 4257 ext_name, ext->kcfg.sz); 4258 return ext->kcfg.sz; 4259 } 4260 ext->kcfg.align = btf__align_of(obj->btf, t->type); 4261 if (ext->kcfg.align <= 0) { 4262 pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n", 4263 ext_name, ext->kcfg.align); 4264 return -EINVAL; 4265 } 4266 ext->kcfg.type = find_kcfg_type(obj->btf, t->type, 4267 &ext->kcfg.is_signed); 4268 if (ext->kcfg.type == KCFG_UNKNOWN) { 4269 pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name); 4270 return -ENOTSUP; 4271 } 4272 } else if (strcmp(sec_name, KSYMS_SEC) == 0) { 4273 ksym_sec = sec; 4274 ext->type = EXT_KSYM; 4275 skip_mods_and_typedefs(obj->btf, t->type, 4276 &ext->ksym.type_id); 4277 } else { 4278 pr_warn("unrecognized extern section '%s'\n", sec_name); 4279 return -ENOTSUP; 4280 } 4281 } 4282 pr_debug("collected %d externs total\n", obj->nr_extern); 4283 4284 if (!obj->nr_extern) 4285 return 0; 4286 4287 /* sort externs by type, for kcfg ones also by (align, size, name) */ 4288 qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs); 4289 4290 /* for .ksyms section, we need to turn all externs into allocated 4291 * variables in BTF to pass kernel verification; we do this by 4292 * pretending that each extern is a 8-byte variable 4293 */ 4294 if (ksym_sec) { 4295 /* find existing 4-byte integer type in BTF to use for fake 4296 * extern variables in DATASEC 4297 */ 4298 int int_btf_id = find_int_btf_id(obj->btf); 4299 /* For extern function, a dummy_var added earlier 4300 * will be used to replace the vs->type and 4301 * its name string will be used to refill 4302 * the missing param's name. 4303 */ 4304 const struct btf_type *dummy_var; 4305 4306 dummy_var = btf__type_by_id(obj->btf, dummy_var_btf_id); 4307 for (i = 0; i < obj->nr_extern; i++) { 4308 ext = &obj->externs[i]; 4309 if (ext->type != EXT_KSYM) 4310 continue; 4311 pr_debug("extern (ksym) #%d: symbol %d, name %s\n", 4312 i, ext->sym_idx, ext->name); 4313 } 4314 4315 sec = ksym_sec; 4316 n = btf_vlen(sec); 4317 for (i = 0, off = 0; i < n; i++, off += sizeof(int)) { 4318 struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i; 4319 struct btf_type *vt; 4320 4321 vt = (void *)btf__type_by_id(obj->btf, vs->type); 4322 ext_name = btf__name_by_offset(obj->btf, vt->name_off); 4323 ext = find_extern_by_name(obj, ext_name); 4324 if (!ext) { 4325 pr_warn("failed to find extern definition for BTF %s '%s'\n", 4326 btf_kind_str(vt), ext_name); 4327 return -ESRCH; 4328 } 4329 if (btf_is_func(vt)) { 4330 const struct btf_type *func_proto; 4331 struct btf_param *param; 4332 int j; 4333 4334 func_proto = btf__type_by_id(obj->btf, 4335 vt->type); 4336 param = btf_params(func_proto); 4337 /* Reuse the dummy_var string if the 4338 * func proto does not have param name. 4339 */ 4340 for (j = 0; j < btf_vlen(func_proto); j++) 4341 if (param[j].type && !param[j].name_off) 4342 param[j].name_off = 4343 dummy_var->name_off; 4344 vs->type = dummy_var_btf_id; 4345 vt->info &= ~0xffff; 4346 vt->info |= BTF_FUNC_GLOBAL; 4347 } else { 4348 btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED; 4349 vt->type = int_btf_id; 4350 } 4351 vs->offset = off; 4352 vs->size = sizeof(int); 4353 } 4354 sec->size = off; 4355 } 4356 4357 if (kcfg_sec) { 4358 sec = kcfg_sec; 4359 /* for kcfg externs calculate their offsets within a .kconfig map */ 4360 off = 0; 4361 for (i = 0; i < obj->nr_extern; i++) { 4362 ext = &obj->externs[i]; 4363 if (ext->type != EXT_KCFG) 4364 continue; 4365 4366 ext->kcfg.data_off = roundup(off, ext->kcfg.align); 4367 off = ext->kcfg.data_off + ext->kcfg.sz; 4368 pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n", 4369 i, ext->sym_idx, ext->kcfg.data_off, ext->name); 4370 } 4371 sec->size = off; 4372 n = btf_vlen(sec); 4373 for (i = 0; i < n; i++) { 4374 struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i; 4375 4376 t = btf__type_by_id(obj->btf, vs->type); 4377 ext_name = btf__name_by_offset(obj->btf, t->name_off); 4378 ext = find_extern_by_name(obj, ext_name); 4379 if (!ext) { 4380 pr_warn("failed to find extern definition for BTF var '%s'\n", 4381 ext_name); 4382 return -ESRCH; 4383 } 4384 btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED; 4385 vs->offset = ext->kcfg.data_off; 4386 } 4387 } 4388 return 0; 4389 } 4390 4391 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog) 4392 { 4393 return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1; 4394 } 4395 4396 struct bpf_program * 4397 bpf_object__find_program_by_name(const struct bpf_object *obj, 4398 const char *name) 4399 { 4400 struct bpf_program *prog; 4401 4402 bpf_object__for_each_program(prog, obj) { 4403 if (prog_is_subprog(obj, prog)) 4404 continue; 4405 if (!strcmp(prog->name, name)) 4406 return prog; 4407 } 4408 return errno = ENOENT, NULL; 4409 } 4410 4411 static bool bpf_object__shndx_is_data(const struct bpf_object *obj, 4412 int shndx) 4413 { 4414 switch (obj->efile.secs[shndx].sec_type) { 4415 case SEC_BSS: 4416 case SEC_DATA: 4417 case SEC_RODATA: 4418 return true; 4419 default: 4420 return false; 4421 } 4422 } 4423 4424 static bool bpf_object__shndx_is_maps(const struct bpf_object *obj, 4425 int shndx) 4426 { 4427 return shndx == obj->efile.btf_maps_shndx; 4428 } 4429 4430 static enum libbpf_map_type 4431 bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx) 4432 { 4433 if (shndx == obj->efile.symbols_shndx) 4434 return LIBBPF_MAP_KCONFIG; 4435 4436 switch (obj->efile.secs[shndx].sec_type) { 4437 case SEC_BSS: 4438 return LIBBPF_MAP_BSS; 4439 case SEC_DATA: 4440 return LIBBPF_MAP_DATA; 4441 case SEC_RODATA: 4442 return LIBBPF_MAP_RODATA; 4443 default: 4444 return LIBBPF_MAP_UNSPEC; 4445 } 4446 } 4447 4448 static int bpf_program__record_reloc(struct bpf_program *prog, 4449 struct reloc_desc *reloc_desc, 4450 __u32 insn_idx, const char *sym_name, 4451 const Elf64_Sym *sym, const Elf64_Rel *rel) 4452 { 4453 struct bpf_insn *insn = &prog->insns[insn_idx]; 4454 size_t map_idx, nr_maps = prog->obj->nr_maps; 4455 struct bpf_object *obj = prog->obj; 4456 __u32 shdr_idx = sym->st_shndx; 4457 enum libbpf_map_type type; 4458 const char *sym_sec_name; 4459 struct bpf_map *map; 4460 4461 if (!is_call_insn(insn) && !is_ldimm64_insn(insn)) { 4462 pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n", 4463 prog->name, sym_name, insn_idx, insn->code); 4464 return -LIBBPF_ERRNO__RELOC; 4465 } 4466 4467 if (sym_is_extern(sym)) { 4468 int sym_idx = ELF64_R_SYM(rel->r_info); 4469 int i, n = obj->nr_extern; 4470 struct extern_desc *ext; 4471 4472 for (i = 0; i < n; i++) { 4473 ext = &obj->externs[i]; 4474 if (ext->sym_idx == sym_idx) 4475 break; 4476 } 4477 if (i >= n) { 4478 pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n", 4479 prog->name, sym_name, sym_idx); 4480 return -LIBBPF_ERRNO__RELOC; 4481 } 4482 pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n", 4483 prog->name, i, ext->name, ext->sym_idx, insn_idx); 4484 if (insn->code == (BPF_JMP | BPF_CALL)) 4485 reloc_desc->type = RELO_EXTERN_CALL; 4486 else 4487 reloc_desc->type = RELO_EXTERN_LD64; 4488 reloc_desc->insn_idx = insn_idx; 4489 reloc_desc->ext_idx = i; 4490 return 0; 4491 } 4492 4493 /* sub-program call relocation */ 4494 if (is_call_insn(insn)) { 4495 if (insn->src_reg != BPF_PSEUDO_CALL) { 4496 pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name); 4497 return -LIBBPF_ERRNO__RELOC; 4498 } 4499 /* text_shndx can be 0, if no default "main" program exists */ 4500 if (!shdr_idx || shdr_idx != obj->efile.text_shndx) { 4501 sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx)); 4502 pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n", 4503 prog->name, sym_name, sym_sec_name); 4504 return -LIBBPF_ERRNO__RELOC; 4505 } 4506 if (sym->st_value % BPF_INSN_SZ) { 4507 pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n", 4508 prog->name, sym_name, (size_t)sym->st_value); 4509 return -LIBBPF_ERRNO__RELOC; 4510 } 4511 reloc_desc->type = RELO_CALL; 4512 reloc_desc->insn_idx = insn_idx; 4513 reloc_desc->sym_off = sym->st_value; 4514 return 0; 4515 } 4516 4517 if (!shdr_idx || shdr_idx >= SHN_LORESERVE) { 4518 pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n", 4519 prog->name, sym_name, shdr_idx); 4520 return -LIBBPF_ERRNO__RELOC; 4521 } 4522 4523 /* loading subprog addresses */ 4524 if (sym_is_subprog(sym, obj->efile.text_shndx)) { 4525 /* global_func: sym->st_value = offset in the section, insn->imm = 0. 4526 * local_func: sym->st_value = 0, insn->imm = offset in the section. 4527 */ 4528 if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) { 4529 pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n", 4530 prog->name, sym_name, (size_t)sym->st_value, insn->imm); 4531 return -LIBBPF_ERRNO__RELOC; 4532 } 4533 4534 reloc_desc->type = RELO_SUBPROG_ADDR; 4535 reloc_desc->insn_idx = insn_idx; 4536 reloc_desc->sym_off = sym->st_value; 4537 return 0; 4538 } 4539 4540 type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx); 4541 sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx)); 4542 4543 /* arena data relocation */ 4544 if (shdr_idx == obj->efile.arena_data_shndx) { 4545 reloc_desc->type = RELO_DATA; 4546 reloc_desc->insn_idx = insn_idx; 4547 reloc_desc->map_idx = obj->arena_map - obj->maps; 4548 reloc_desc->sym_off = sym->st_value; 4549 return 0; 4550 } 4551 4552 /* generic map reference relocation */ 4553 if (type == LIBBPF_MAP_UNSPEC) { 4554 if (!bpf_object__shndx_is_maps(obj, shdr_idx)) { 4555 pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n", 4556 prog->name, sym_name, sym_sec_name); 4557 return -LIBBPF_ERRNO__RELOC; 4558 } 4559 for (map_idx = 0; map_idx < nr_maps; map_idx++) { 4560 map = &obj->maps[map_idx]; 4561 if (map->libbpf_type != type || 4562 map->sec_idx != sym->st_shndx || 4563 map->sec_offset != sym->st_value) 4564 continue; 4565 pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n", 4566 prog->name, map_idx, map->name, map->sec_idx, 4567 map->sec_offset, insn_idx); 4568 break; 4569 } 4570 if (map_idx >= nr_maps) { 4571 pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n", 4572 prog->name, sym_sec_name, (size_t)sym->st_value); 4573 return -LIBBPF_ERRNO__RELOC; 4574 } 4575 reloc_desc->type = RELO_LD64; 4576 reloc_desc->insn_idx = insn_idx; 4577 reloc_desc->map_idx = map_idx; 4578 reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */ 4579 return 0; 4580 } 4581 4582 /* global data map relocation */ 4583 if (!bpf_object__shndx_is_data(obj, shdr_idx)) { 4584 pr_warn("prog '%s': bad data relo against section '%s'\n", 4585 prog->name, sym_sec_name); 4586 return -LIBBPF_ERRNO__RELOC; 4587 } 4588 for (map_idx = 0; map_idx < nr_maps; map_idx++) { 4589 map = &obj->maps[map_idx]; 4590 if (map->libbpf_type != type || map->sec_idx != sym->st_shndx) 4591 continue; 4592 pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n", 4593 prog->name, map_idx, map->name, map->sec_idx, 4594 map->sec_offset, insn_idx); 4595 break; 4596 } 4597 if (map_idx >= nr_maps) { 4598 pr_warn("prog '%s': data relo failed to find map for section '%s'\n", 4599 prog->name, sym_sec_name); 4600 return -LIBBPF_ERRNO__RELOC; 4601 } 4602 4603 reloc_desc->type = RELO_DATA; 4604 reloc_desc->insn_idx = insn_idx; 4605 reloc_desc->map_idx = map_idx; 4606 reloc_desc->sym_off = sym->st_value; 4607 return 0; 4608 } 4609 4610 static bool prog_contains_insn(const struct bpf_program *prog, size_t insn_idx) 4611 { 4612 return insn_idx >= prog->sec_insn_off && 4613 insn_idx < prog->sec_insn_off + prog->sec_insn_cnt; 4614 } 4615 4616 static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj, 4617 size_t sec_idx, size_t insn_idx) 4618 { 4619 int l = 0, r = obj->nr_programs - 1, m; 4620 struct bpf_program *prog; 4621 4622 if (!obj->nr_programs) 4623 return NULL; 4624 4625 while (l < r) { 4626 m = l + (r - l + 1) / 2; 4627 prog = &obj->programs[m]; 4628 4629 if (prog->sec_idx < sec_idx || 4630 (prog->sec_idx == sec_idx && prog->sec_insn_off <= insn_idx)) 4631 l = m; 4632 else 4633 r = m - 1; 4634 } 4635 /* matching program could be at index l, but it still might be the 4636 * wrong one, so we need to double check conditions for the last time 4637 */ 4638 prog = &obj->programs[l]; 4639 if (prog->sec_idx == sec_idx && prog_contains_insn(prog, insn_idx)) 4640 return prog; 4641 return NULL; 4642 } 4643 4644 static int 4645 bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data) 4646 { 4647 const char *relo_sec_name, *sec_name; 4648 size_t sec_idx = shdr->sh_info, sym_idx; 4649 struct bpf_program *prog; 4650 struct reloc_desc *relos; 4651 int err, i, nrels; 4652 const char *sym_name; 4653 __u32 insn_idx; 4654 Elf_Scn *scn; 4655 Elf_Data *scn_data; 4656 Elf64_Sym *sym; 4657 Elf64_Rel *rel; 4658 4659 if (sec_idx >= obj->efile.sec_cnt) 4660 return -EINVAL; 4661 4662 scn = elf_sec_by_idx(obj, sec_idx); 4663 scn_data = elf_sec_data(obj, scn); 4664 if (!scn_data) 4665 return -LIBBPF_ERRNO__FORMAT; 4666 4667 relo_sec_name = elf_sec_str(obj, shdr->sh_name); 4668 sec_name = elf_sec_name(obj, scn); 4669 if (!relo_sec_name || !sec_name) 4670 return -EINVAL; 4671 4672 pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n", 4673 relo_sec_name, sec_idx, sec_name); 4674 nrels = shdr->sh_size / shdr->sh_entsize; 4675 4676 for (i = 0; i < nrels; i++) { 4677 rel = elf_rel_by_idx(data, i); 4678 if (!rel) { 4679 pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i); 4680 return -LIBBPF_ERRNO__FORMAT; 4681 } 4682 4683 sym_idx = ELF64_R_SYM(rel->r_info); 4684 sym = elf_sym_by_idx(obj, sym_idx); 4685 if (!sym) { 4686 pr_warn("sec '%s': symbol #%zu not found for relo #%d\n", 4687 relo_sec_name, sym_idx, i); 4688 return -LIBBPF_ERRNO__FORMAT; 4689 } 4690 4691 if (sym->st_shndx >= obj->efile.sec_cnt) { 4692 pr_warn("sec '%s': corrupted symbol #%zu pointing to invalid section #%zu for relo #%d\n", 4693 relo_sec_name, sym_idx, (size_t)sym->st_shndx, i); 4694 return -LIBBPF_ERRNO__FORMAT; 4695 } 4696 4697 if (rel->r_offset % BPF_INSN_SZ || rel->r_offset >= scn_data->d_size) { 4698 pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n", 4699 relo_sec_name, (size_t)rel->r_offset, i); 4700 return -LIBBPF_ERRNO__FORMAT; 4701 } 4702 4703 insn_idx = rel->r_offset / BPF_INSN_SZ; 4704 /* relocations against static functions are recorded as 4705 * relocations against the section that contains a function; 4706 * in such case, symbol will be STT_SECTION and sym.st_name 4707 * will point to empty string (0), so fetch section name 4708 * instead 4709 */ 4710 if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && sym->st_name == 0) 4711 sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym->st_shndx)); 4712 else 4713 sym_name = elf_sym_str(obj, sym->st_name); 4714 sym_name = sym_name ?: "<?"; 4715 4716 pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n", 4717 relo_sec_name, i, insn_idx, sym_name); 4718 4719 prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx); 4720 if (!prog) { 4721 pr_debug("sec '%s': relo #%d: couldn't find program in section '%s' for insn #%u, probably overridden weak function, skipping...\n", 4722 relo_sec_name, i, sec_name, insn_idx); 4723 continue; 4724 } 4725 4726 relos = libbpf_reallocarray(prog->reloc_desc, 4727 prog->nr_reloc + 1, sizeof(*relos)); 4728 if (!relos) 4729 return -ENOMEM; 4730 prog->reloc_desc = relos; 4731 4732 /* adjust insn_idx to local BPF program frame of reference */ 4733 insn_idx -= prog->sec_insn_off; 4734 err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc], 4735 insn_idx, sym_name, sym, rel); 4736 if (err) 4737 return err; 4738 4739 prog->nr_reloc++; 4740 } 4741 return 0; 4742 } 4743 4744 static int map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map) 4745 { 4746 int id; 4747 4748 if (!obj->btf) 4749 return -ENOENT; 4750 4751 /* if it's BTF-defined map, we don't need to search for type IDs. 4752 * For struct_ops map, it does not need btf_key_type_id and 4753 * btf_value_type_id. 4754 */ 4755 if (map->sec_idx == obj->efile.btf_maps_shndx || bpf_map__is_struct_ops(map)) 4756 return 0; 4757 4758 /* 4759 * LLVM annotates global data differently in BTF, that is, 4760 * only as '.data', '.bss' or '.rodata'. 4761 */ 4762 if (!bpf_map__is_internal(map)) 4763 return -ENOENT; 4764 4765 id = btf__find_by_name(obj->btf, map->real_name); 4766 if (id < 0) 4767 return id; 4768 4769 map->btf_key_type_id = 0; 4770 map->btf_value_type_id = id; 4771 return 0; 4772 } 4773 4774 static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info) 4775 { 4776 char file[PATH_MAX], buff[4096]; 4777 FILE *fp; 4778 __u32 val; 4779 int err; 4780 4781 snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd); 4782 memset(info, 0, sizeof(*info)); 4783 4784 fp = fopen(file, "re"); 4785 if (!fp) { 4786 err = -errno; 4787 pr_warn("failed to open %s: %d. No procfs support?\n", file, 4788 err); 4789 return err; 4790 } 4791 4792 while (fgets(buff, sizeof(buff), fp)) { 4793 if (sscanf(buff, "map_type:\t%u", &val) == 1) 4794 info->type = val; 4795 else if (sscanf(buff, "key_size:\t%u", &val) == 1) 4796 info->key_size = val; 4797 else if (sscanf(buff, "value_size:\t%u", &val) == 1) 4798 info->value_size = val; 4799 else if (sscanf(buff, "max_entries:\t%u", &val) == 1) 4800 info->max_entries = val; 4801 else if (sscanf(buff, "map_flags:\t%i", &val) == 1) 4802 info->map_flags = val; 4803 } 4804 4805 fclose(fp); 4806 4807 return 0; 4808 } 4809 4810 bool bpf_map__autocreate(const struct bpf_map *map) 4811 { 4812 return map->autocreate; 4813 } 4814 4815 int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate) 4816 { 4817 if (map->obj->loaded) 4818 return libbpf_err(-EBUSY); 4819 4820 map->autocreate = autocreate; 4821 return 0; 4822 } 4823 4824 int bpf_map__set_autoattach(struct bpf_map *map, bool autoattach) 4825 { 4826 if (!bpf_map__is_struct_ops(map)) 4827 return libbpf_err(-EINVAL); 4828 4829 map->autoattach = autoattach; 4830 return 0; 4831 } 4832 4833 bool bpf_map__autoattach(const struct bpf_map *map) 4834 { 4835 return map->autoattach; 4836 } 4837 4838 int bpf_map__reuse_fd(struct bpf_map *map, int fd) 4839 { 4840 struct bpf_map_info info; 4841 __u32 len = sizeof(info), name_len; 4842 int new_fd, err; 4843 char *new_name; 4844 4845 memset(&info, 0, len); 4846 err = bpf_map_get_info_by_fd(fd, &info, &len); 4847 if (err && errno == EINVAL) 4848 err = bpf_get_map_info_from_fdinfo(fd, &info); 4849 if (err) 4850 return libbpf_err(err); 4851 4852 name_len = strlen(info.name); 4853 if (name_len == BPF_OBJ_NAME_LEN - 1 && strncmp(map->name, info.name, name_len) == 0) 4854 new_name = strdup(map->name); 4855 else 4856 new_name = strdup(info.name); 4857 4858 if (!new_name) 4859 return libbpf_err(-errno); 4860 4861 /* 4862 * Like dup(), but make sure new FD is >= 3 and has O_CLOEXEC set. 4863 * This is similar to what we do in ensure_good_fd(), but without 4864 * closing original FD. 4865 */ 4866 new_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); 4867 if (new_fd < 0) { 4868 err = -errno; 4869 goto err_free_new_name; 4870 } 4871 4872 err = reuse_fd(map->fd, new_fd); 4873 if (err) 4874 goto err_free_new_name; 4875 4876 free(map->name); 4877 4878 map->name = new_name; 4879 map->def.type = info.type; 4880 map->def.key_size = info.key_size; 4881 map->def.value_size = info.value_size; 4882 map->def.max_entries = info.max_entries; 4883 map->def.map_flags = info.map_flags; 4884 map->btf_key_type_id = info.btf_key_type_id; 4885 map->btf_value_type_id = info.btf_value_type_id; 4886 map->reused = true; 4887 map->map_extra = info.map_extra; 4888 4889 return 0; 4890 4891 err_free_new_name: 4892 free(new_name); 4893 return libbpf_err(err); 4894 } 4895 4896 __u32 bpf_map__max_entries(const struct bpf_map *map) 4897 { 4898 return map->def.max_entries; 4899 } 4900 4901 struct bpf_map *bpf_map__inner_map(struct bpf_map *map) 4902 { 4903 if (!bpf_map_type__is_map_in_map(map->def.type)) 4904 return errno = EINVAL, NULL; 4905 4906 return map->inner_map; 4907 } 4908 4909 int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries) 4910 { 4911 if (map->obj->loaded) 4912 return libbpf_err(-EBUSY); 4913 4914 map->def.max_entries = max_entries; 4915 4916 /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */ 4917 if (map_is_ringbuf(map)) 4918 map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries); 4919 4920 return 0; 4921 } 4922 4923 static int bpf_object_prepare_token(struct bpf_object *obj) 4924 { 4925 const char *bpffs_path; 4926 int bpffs_fd = -1, token_fd, err; 4927 bool mandatory; 4928 enum libbpf_print_level level; 4929 4930 /* token is explicitly prevented */ 4931 if (obj->token_path && obj->token_path[0] == '\0') { 4932 pr_debug("object '%s': token is prevented, skipping...\n", obj->name); 4933 return 0; 4934 } 4935 4936 mandatory = obj->token_path != NULL; 4937 level = mandatory ? LIBBPF_WARN : LIBBPF_DEBUG; 4938 4939 bpffs_path = obj->token_path ?: BPF_FS_DEFAULT_PATH; 4940 bpffs_fd = open(bpffs_path, O_DIRECTORY, O_RDWR); 4941 if (bpffs_fd < 0) { 4942 err = -errno; 4943 __pr(level, "object '%s': failed (%d) to open BPF FS mount at '%s'%s\n", 4944 obj->name, err, bpffs_path, 4945 mandatory ? "" : ", skipping optional step..."); 4946 return mandatory ? err : 0; 4947 } 4948 4949 token_fd = bpf_token_create(bpffs_fd, 0); 4950 close(bpffs_fd); 4951 if (token_fd < 0) { 4952 if (!mandatory && token_fd == -ENOENT) { 4953 pr_debug("object '%s': BPF FS at '%s' doesn't have BPF token delegation set up, skipping...\n", 4954 obj->name, bpffs_path); 4955 return 0; 4956 } 4957 __pr(level, "object '%s': failed (%d) to create BPF token from '%s'%s\n", 4958 obj->name, token_fd, bpffs_path, 4959 mandatory ? "" : ", skipping optional step..."); 4960 return mandatory ? token_fd : 0; 4961 } 4962 4963 obj->feat_cache = calloc(1, sizeof(*obj->feat_cache)); 4964 if (!obj->feat_cache) { 4965 close(token_fd); 4966 return -ENOMEM; 4967 } 4968 4969 obj->token_fd = token_fd; 4970 obj->feat_cache->token_fd = token_fd; 4971 4972 return 0; 4973 } 4974 4975 static int 4976 bpf_object__probe_loading(struct bpf_object *obj) 4977 { 4978 char *cp, errmsg[STRERR_BUFSIZE]; 4979 struct bpf_insn insns[] = { 4980 BPF_MOV64_IMM(BPF_REG_0, 0), 4981 BPF_EXIT_INSN(), 4982 }; 4983 int ret, insn_cnt = ARRAY_SIZE(insns); 4984 LIBBPF_OPTS(bpf_prog_load_opts, opts, 4985 .token_fd = obj->token_fd, 4986 .prog_flags = obj->token_fd ? BPF_F_TOKEN_FD : 0, 4987 ); 4988 4989 if (obj->gen_loader) 4990 return 0; 4991 4992 ret = bump_rlimit_memlock(); 4993 if (ret) 4994 pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret); 4995 4996 /* make sure basic loading works */ 4997 ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &opts); 4998 if (ret < 0) 4999 ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); 5000 if (ret < 0) { 5001 ret = errno; 5002 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); 5003 pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF " 5004 "program. Make sure your kernel supports BPF " 5005 "(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is " 5006 "set to big enough value.\n", __func__, cp, ret); 5007 return -ret; 5008 } 5009 close(ret); 5010 5011 return 0; 5012 } 5013 5014 bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) 5015 { 5016 if (obj->gen_loader) 5017 /* To generate loader program assume the latest kernel 5018 * to avoid doing extra prog_load, map_create syscalls. 5019 */ 5020 return true; 5021 5022 if (obj->token_fd) 5023 return feat_supported(obj->feat_cache, feat_id); 5024 5025 return feat_supported(NULL, feat_id); 5026 } 5027 5028 static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) 5029 { 5030 struct bpf_map_info map_info; 5031 char msg[STRERR_BUFSIZE]; 5032 __u32 map_info_len = sizeof(map_info); 5033 int err; 5034 5035 memset(&map_info, 0, map_info_len); 5036 err = bpf_map_get_info_by_fd(map_fd, &map_info, &map_info_len); 5037 if (err && errno == EINVAL) 5038 err = bpf_get_map_info_from_fdinfo(map_fd, &map_info); 5039 if (err) { 5040 pr_warn("failed to get map info for map FD %d: %s\n", map_fd, 5041 libbpf_strerror_r(errno, msg, sizeof(msg))); 5042 return false; 5043 } 5044 5045 return (map_info.type == map->def.type && 5046 map_info.key_size == map->def.key_size && 5047 map_info.value_size == map->def.value_size && 5048 map_info.max_entries == map->def.max_entries && 5049 map_info.map_flags == map->def.map_flags && 5050 map_info.map_extra == map->map_extra); 5051 } 5052 5053 static int 5054 bpf_object__reuse_map(struct bpf_map *map) 5055 { 5056 char *cp, errmsg[STRERR_BUFSIZE]; 5057 int err, pin_fd; 5058 5059 pin_fd = bpf_obj_get(map->pin_path); 5060 if (pin_fd < 0) { 5061 err = -errno; 5062 if (err == -ENOENT) { 5063 pr_debug("found no pinned map to reuse at '%s'\n", 5064 map->pin_path); 5065 return 0; 5066 } 5067 5068 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); 5069 pr_warn("couldn't retrieve pinned map '%s': %s\n", 5070 map->pin_path, cp); 5071 return err; 5072 } 5073 5074 if (!map_is_reuse_compat(map, pin_fd)) { 5075 pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n", 5076 map->pin_path); 5077 close(pin_fd); 5078 return -EINVAL; 5079 } 5080 5081 err = bpf_map__reuse_fd(map, pin_fd); 5082 close(pin_fd); 5083 if (err) 5084 return err; 5085 5086 map->pinned = true; 5087 pr_debug("reused pinned map at '%s'\n", map->pin_path); 5088 5089 return 0; 5090 } 5091 5092 static int 5093 bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) 5094 { 5095 enum libbpf_map_type map_type = map->libbpf_type; 5096 char *cp, errmsg[STRERR_BUFSIZE]; 5097 int err, zero = 0; 5098 5099 if (obj->gen_loader) { 5100 bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps, 5101 map->mmaped, map->def.value_size); 5102 if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) 5103 bpf_gen__map_freeze(obj->gen_loader, map - obj->maps); 5104 return 0; 5105 } 5106 5107 err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0); 5108 if (err) { 5109 err = -errno; 5110 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); 5111 pr_warn("Error setting initial map(%s) contents: %s\n", 5112 map->name, cp); 5113 return err; 5114 } 5115 5116 /* Freeze .rodata and .kconfig map as read-only from syscall side. */ 5117 if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) { 5118 err = bpf_map_freeze(map->fd); 5119 if (err) { 5120 err = -errno; 5121 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); 5122 pr_warn("Error freezing map(%s) as read-only: %s\n", 5123 map->name, cp); 5124 return err; 5125 } 5126 } 5127 return 0; 5128 } 5129 5130 static void bpf_map__destroy(struct bpf_map *map); 5131 5132 static bool map_is_created(const struct bpf_map *map) 5133 { 5134 return map->obj->loaded || map->reused; 5135 } 5136 5137 static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner) 5138 { 5139 LIBBPF_OPTS(bpf_map_create_opts, create_attr); 5140 struct bpf_map_def *def = &map->def; 5141 const char *map_name = NULL; 5142 int err = 0, map_fd; 5143 5144 if (kernel_supports(obj, FEAT_PROG_NAME)) 5145 map_name = map->name; 5146 create_attr.map_ifindex = map->map_ifindex; 5147 create_attr.map_flags = def->map_flags; 5148 create_attr.numa_node = map->numa_node; 5149 create_attr.map_extra = map->map_extra; 5150 create_attr.token_fd = obj->token_fd; 5151 if (obj->token_fd) 5152 create_attr.map_flags |= BPF_F_TOKEN_FD; 5153 5154 if (bpf_map__is_struct_ops(map)) { 5155 create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; 5156 if (map->mod_btf_fd >= 0) { 5157 create_attr.value_type_btf_obj_fd = map->mod_btf_fd; 5158 create_attr.map_flags |= BPF_F_VTYPE_BTF_OBJ_FD; 5159 } 5160 } 5161 5162 if (obj->btf && btf__fd(obj->btf) >= 0) { 5163 create_attr.btf_fd = btf__fd(obj->btf); 5164 create_attr.btf_key_type_id = map->btf_key_type_id; 5165 create_attr.btf_value_type_id = map->btf_value_type_id; 5166 } 5167 5168 if (bpf_map_type__is_map_in_map(def->type)) { 5169 if (map->inner_map) { 5170 err = map_set_def_max_entries(map->inner_map); 5171 if (err) 5172 return err; 5173 err = bpf_object__create_map(obj, map->inner_map, true); 5174 if (err) { 5175 pr_warn("map '%s': failed to create inner map: %d\n", 5176 map->name, err); 5177 return err; 5178 } 5179 map->inner_map_fd = map->inner_map->fd; 5180 } 5181 if (map->inner_map_fd >= 0) 5182 create_attr.inner_map_fd = map->inner_map_fd; 5183 } 5184 5185 switch (def->type) { 5186 case BPF_MAP_TYPE_PERF_EVENT_ARRAY: 5187 case BPF_MAP_TYPE_CGROUP_ARRAY: 5188 case BPF_MAP_TYPE_STACK_TRACE: 5189 case BPF_MAP_TYPE_ARRAY_OF_MAPS: 5190 case BPF_MAP_TYPE_HASH_OF_MAPS: 5191 case BPF_MAP_TYPE_DEVMAP: 5192 case BPF_MAP_TYPE_DEVMAP_HASH: 5193 case BPF_MAP_TYPE_CPUMAP: 5194 case BPF_MAP_TYPE_XSKMAP: 5195 case BPF_MAP_TYPE_SOCKMAP: 5196 case BPF_MAP_TYPE_SOCKHASH: 5197 case BPF_MAP_TYPE_QUEUE: 5198 case BPF_MAP_TYPE_STACK: 5199 case BPF_MAP_TYPE_ARENA: 5200 create_attr.btf_fd = 0; 5201 create_attr.btf_key_type_id = 0; 5202 create_attr.btf_value_type_id = 0; 5203 map->btf_key_type_id = 0; 5204 map->btf_value_type_id = 0; 5205 break; 5206 case BPF_MAP_TYPE_STRUCT_OPS: 5207 create_attr.btf_value_type_id = 0; 5208 break; 5209 default: 5210 break; 5211 } 5212 5213 if (obj->gen_loader) { 5214 bpf_gen__map_create(obj->gen_loader, def->type, map_name, 5215 def->key_size, def->value_size, def->max_entries, 5216 &create_attr, is_inner ? -1 : map - obj->maps); 5217 /* We keep pretenting we have valid FD to pass various fd >= 0 5218 * checks by just keeping original placeholder FDs in place. 5219 * See bpf_object__add_map() comment. 5220 * This placeholder fd will not be used with any syscall and 5221 * will be reset to -1 eventually. 5222 */ 5223 map_fd = map->fd; 5224 } else { 5225 map_fd = bpf_map_create(def->type, map_name, 5226 def->key_size, def->value_size, 5227 def->max_entries, &create_attr); 5228 } 5229 if (map_fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) { 5230 char *cp, errmsg[STRERR_BUFSIZE]; 5231 5232 err = -errno; 5233 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); 5234 pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", 5235 map->name, cp, err); 5236 create_attr.btf_fd = 0; 5237 create_attr.btf_key_type_id = 0; 5238 create_attr.btf_value_type_id = 0; 5239 map->btf_key_type_id = 0; 5240 map->btf_value_type_id = 0; 5241 map_fd = bpf_map_create(def->type, map_name, 5242 def->key_size, def->value_size, 5243 def->max_entries, &create_attr); 5244 } 5245 5246 if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) { 5247 if (obj->gen_loader) 5248 map->inner_map->fd = -1; 5249 bpf_map__destroy(map->inner_map); 5250 zfree(&map->inner_map); 5251 } 5252 5253 if (map_fd < 0) 5254 return map_fd; 5255 5256 /* obj->gen_loader case, prevent reuse_fd() from closing map_fd */ 5257 if (map->fd == map_fd) 5258 return 0; 5259 5260 /* Keep placeholder FD value but now point it to the BPF map object. 5261 * This way everything that relied on this map's FD (e.g., relocated 5262 * ldimm64 instructions) will stay valid and won't need adjustments. 5263 * map->fd stays valid but now point to what map_fd points to. 5264 */ 5265 return reuse_fd(map->fd, map_fd); 5266 } 5267 5268 static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map) 5269 { 5270 const struct bpf_map *targ_map; 5271 unsigned int i; 5272 int fd, err = 0; 5273 5274 for (i = 0; i < map->init_slots_sz; i++) { 5275 if (!map->init_slots[i]) 5276 continue; 5277 5278 targ_map = map->init_slots[i]; 5279 fd = targ_map->fd; 5280 5281 if (obj->gen_loader) { 5282 bpf_gen__populate_outer_map(obj->gen_loader, 5283 map - obj->maps, i, 5284 targ_map - obj->maps); 5285 } else { 5286 err = bpf_map_update_elem(map->fd, &i, &fd, 0); 5287 } 5288 if (err) { 5289 err = -errno; 5290 pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n", 5291 map->name, i, targ_map->name, fd, err); 5292 return err; 5293 } 5294 pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n", 5295 map->name, i, targ_map->name, fd); 5296 } 5297 5298 zfree(&map->init_slots); 5299 map->init_slots_sz = 0; 5300 5301 return 0; 5302 } 5303 5304 static int init_prog_array_slots(struct bpf_object *obj, struct bpf_map *map) 5305 { 5306 const struct bpf_program *targ_prog; 5307 unsigned int i; 5308 int fd, err; 5309 5310 if (obj->gen_loader) 5311 return -ENOTSUP; 5312 5313 for (i = 0; i < map->init_slots_sz; i++) { 5314 if (!map->init_slots[i]) 5315 continue; 5316 5317 targ_prog = map->init_slots[i]; 5318 fd = bpf_program__fd(targ_prog); 5319 5320 err = bpf_map_update_elem(map->fd, &i, &fd, 0); 5321 if (err) { 5322 err = -errno; 5323 pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %d\n", 5324 map->name, i, targ_prog->name, fd, err); 5325 return err; 5326 } 5327 pr_debug("map '%s': slot [%d] set to prog '%s' fd=%d\n", 5328 map->name, i, targ_prog->name, fd); 5329 } 5330 5331 zfree(&map->init_slots); 5332 map->init_slots_sz = 0; 5333 5334 return 0; 5335 } 5336 5337 static int bpf_object_init_prog_arrays(struct bpf_object *obj) 5338 { 5339 struct bpf_map *map; 5340 int i, err; 5341 5342 for (i = 0; i < obj->nr_maps; i++) { 5343 map = &obj->maps[i]; 5344 5345 if (!map->init_slots_sz || map->def.type != BPF_MAP_TYPE_PROG_ARRAY) 5346 continue; 5347 5348 err = init_prog_array_slots(obj, map); 5349 if (err < 0) 5350 return err; 5351 } 5352 return 0; 5353 } 5354 5355 static int map_set_def_max_entries(struct bpf_map *map) 5356 { 5357 if (map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !map->def.max_entries) { 5358 int nr_cpus; 5359 5360 nr_cpus = libbpf_num_possible_cpus(); 5361 if (nr_cpus < 0) { 5362 pr_warn("map '%s': failed to determine number of system CPUs: %d\n", 5363 map->name, nr_cpus); 5364 return nr_cpus; 5365 } 5366 pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus); 5367 map->def.max_entries = nr_cpus; 5368 } 5369 5370 return 0; 5371 } 5372 5373 static int 5374 bpf_object__create_maps(struct bpf_object *obj) 5375 { 5376 struct bpf_map *map; 5377 char *cp, errmsg[STRERR_BUFSIZE]; 5378 unsigned int i, j; 5379 int err; 5380 bool retried; 5381 5382 for (i = 0; i < obj->nr_maps; i++) { 5383 map = &obj->maps[i]; 5384 5385 /* To support old kernels, we skip creating global data maps 5386 * (.rodata, .data, .kconfig, etc); later on, during program 5387 * loading, if we detect that at least one of the to-be-loaded 5388 * programs is referencing any global data map, we'll error 5389 * out with program name and relocation index logged. 5390 * This approach allows to accommodate Clang emitting 5391 * unnecessary .rodata.str1.1 sections for string literals, 5392 * but also it allows to have CO-RE applications that use 5393 * global variables in some of BPF programs, but not others. 5394 * If those global variable-using programs are not loaded at 5395 * runtime due to bpf_program__set_autoload(prog, false), 5396 * bpf_object loading will succeed just fine even on old 5397 * kernels. 5398 */ 5399 if (bpf_map__is_internal(map) && !kernel_supports(obj, FEAT_GLOBAL_DATA)) 5400 map->autocreate = false; 5401 5402 if (!map->autocreate) { 5403 pr_debug("map '%s': skipped auto-creating...\n", map->name); 5404 continue; 5405 } 5406 5407 err = map_set_def_max_entries(map); 5408 if (err) 5409 goto err_out; 5410 5411 retried = false; 5412 retry: 5413 if (map->pin_path) { 5414 err = bpf_object__reuse_map(map); 5415 if (err) { 5416 pr_warn("map '%s': error reusing pinned map\n", 5417 map->name); 5418 goto err_out; 5419 } 5420 if (retried && map->fd < 0) { 5421 pr_warn("map '%s': cannot find pinned map\n", 5422 map->name); 5423 err = -ENOENT; 5424 goto err_out; 5425 } 5426 } 5427 5428 if (map->reused) { 5429 pr_debug("map '%s': skipping creation (preset fd=%d)\n", 5430 map->name, map->fd); 5431 } else { 5432 err = bpf_object__create_map(obj, map, false); 5433 if (err) 5434 goto err_out; 5435 5436 pr_debug("map '%s': created successfully, fd=%d\n", 5437 map->name, map->fd); 5438 5439 if (bpf_map__is_internal(map)) { 5440 err = bpf_object__populate_internal_map(obj, map); 5441 if (err < 0) 5442 goto err_out; 5443 } 5444 if (map->def.type == BPF_MAP_TYPE_ARENA) { 5445 map->mmaped = mmap((void *)(long)map->map_extra, 5446 bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE, 5447 map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED, 5448 map->fd, 0); 5449 if (map->mmaped == MAP_FAILED) { 5450 err = -errno; 5451 map->mmaped = NULL; 5452 pr_warn("map '%s': failed to mmap arena: %d\n", 5453 map->name, err); 5454 return err; 5455 } 5456 if (obj->arena_data) { 5457 memcpy(map->mmaped, obj->arena_data, obj->arena_data_sz); 5458 zfree(&obj->arena_data); 5459 } 5460 } 5461 if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) { 5462 err = init_map_in_map_slots(obj, map); 5463 if (err < 0) 5464 goto err_out; 5465 } 5466 } 5467 5468 if (map->pin_path && !map->pinned) { 5469 err = bpf_map__pin(map, NULL); 5470 if (err) { 5471 if (!retried && err == -EEXIST) { 5472 retried = true; 5473 goto retry; 5474 } 5475 pr_warn("map '%s': failed to auto-pin at '%s': %d\n", 5476 map->name, map->pin_path, err); 5477 goto err_out; 5478 } 5479 } 5480 } 5481 5482 return 0; 5483 5484 err_out: 5485 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); 5486 pr_warn("map '%s': failed to create: %s(%d)\n", map->name, cp, err); 5487 pr_perm_msg(err); 5488 for (j = 0; j < i; j++) 5489 zclose(obj->maps[j].fd); 5490 return err; 5491 } 5492 5493 static bool bpf_core_is_flavor_sep(const char *s) 5494 { 5495 /* check X___Y name pattern, where X and Y are not underscores */ 5496 return s[0] != '_' && /* X */ 5497 s[1] == '_' && s[2] == '_' && s[3] == '_' && /* ___ */ 5498 s[4] != '_'; /* Y */ 5499 } 5500 5501 /* Given 'some_struct_name___with_flavor' return the length of a name prefix 5502 * before last triple underscore. Struct name part after last triple 5503 * underscore is ignored by BPF CO-RE relocation during relocation matching. 5504 */ 5505 size_t bpf_core_essential_name_len(const char *name) 5506 { 5507 size_t n = strlen(name); 5508 int i; 5509 5510 for (i = n - 5; i >= 0; i--) { 5511 if (bpf_core_is_flavor_sep(name + i)) 5512 return i + 1; 5513 } 5514 return n; 5515 } 5516 5517 void bpf_core_free_cands(struct bpf_core_cand_list *cands) 5518 { 5519 if (!cands) 5520 return; 5521 5522 free(cands->cands); 5523 free(cands); 5524 } 5525 5526 int bpf_core_add_cands(struct bpf_core_cand *local_cand, 5527 size_t local_essent_len, 5528 const struct btf *targ_btf, 5529 const char *targ_btf_name, 5530 int targ_start_id, 5531 struct bpf_core_cand_list *cands) 5532 { 5533 struct bpf_core_cand *new_cands, *cand; 5534 const struct btf_type *t, *local_t; 5535 const char *targ_name, *local_name; 5536 size_t targ_essent_len; 5537 int n, i; 5538 5539 local_t = btf__type_by_id(local_cand->btf, local_cand->id); 5540 local_name = btf__str_by_offset(local_cand->btf, local_t->name_off); 5541 5542 n = btf__type_cnt(targ_btf); 5543 for (i = targ_start_id; i < n; i++) { 5544 t = btf__type_by_id(targ_btf, i); 5545 if (!btf_kind_core_compat(t, local_t)) 5546 continue; 5547 5548 targ_name = btf__name_by_offset(targ_btf, t->name_off); 5549 if (str_is_empty(targ_name)) 5550 continue; 5551 5552 targ_essent_len = bpf_core_essential_name_len(targ_name); 5553 if (targ_essent_len != local_essent_len) 5554 continue; 5555 5556 if (strncmp(local_name, targ_name, local_essent_len) != 0) 5557 continue; 5558 5559 pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n", 5560 local_cand->id, btf_kind_str(local_t), 5561 local_name, i, btf_kind_str(t), targ_name, 5562 targ_btf_name); 5563 new_cands = libbpf_reallocarray(cands->cands, cands->len + 1, 5564 sizeof(*cands->cands)); 5565 if (!new_cands) 5566 return -ENOMEM; 5567 5568 cand = &new_cands[cands->len]; 5569 cand->btf = targ_btf; 5570 cand->id = i; 5571 5572 cands->cands = new_cands; 5573 cands->len++; 5574 } 5575 return 0; 5576 } 5577 5578 static int load_module_btfs(struct bpf_object *obj) 5579 { 5580 struct bpf_btf_info info; 5581 struct module_btf *mod_btf; 5582 struct btf *btf; 5583 char name[64]; 5584 __u32 id = 0, len; 5585 int err, fd; 5586 5587 if (obj->btf_modules_loaded) 5588 return 0; 5589 5590 if (obj->gen_loader) 5591 return 0; 5592 5593 /* don't do this again, even if we find no module BTFs */ 5594 obj->btf_modules_loaded = true; 5595 5596 /* kernel too old to support module BTFs */ 5597 if (!kernel_supports(obj, FEAT_MODULE_BTF)) 5598 return 0; 5599 5600 while (true) { 5601 err = bpf_btf_get_next_id(id, &id); 5602 if (err && errno == ENOENT) 5603 return 0; 5604 if (err && errno == EPERM) { 5605 pr_debug("skipping module BTFs loading, missing privileges\n"); 5606 return 0; 5607 } 5608 if (err) { 5609 err = -errno; 5610 pr_warn("failed to iterate BTF objects: %d\n", err); 5611 return err; 5612 } 5613 5614 fd = bpf_btf_get_fd_by_id(id); 5615 if (fd < 0) { 5616 if (errno == ENOENT) 5617 continue; /* expected race: BTF was unloaded */ 5618 err = -errno; 5619 pr_warn("failed to get BTF object #%d FD: %d\n", id, err); 5620 return err; 5621 } 5622 5623 len = sizeof(info); 5624 memset(&info, 0, sizeof(info)); 5625 info.name = ptr_to_u64(name); 5626 info.name_len = sizeof(name); 5627 5628 err = bpf_btf_get_info_by_fd(fd, &info, &len); 5629 if (err) { 5630 err = -errno; 5631 pr_warn("failed to get BTF object #%d info: %d\n", id, err); 5632 goto err_out; 5633 } 5634 5635 /* ignore non-module BTFs */ 5636 if (!info.kernel_btf || strcmp(name, "vmlinux") == 0) { 5637 close(fd); 5638 continue; 5639 } 5640 5641 btf = btf_get_from_fd(fd, obj->btf_vmlinux); 5642 err = libbpf_get_error(btf); 5643 if (err) { 5644 pr_warn("failed to load module [%s]'s BTF object #%d: %d\n", 5645 name, id, err); 5646 goto err_out; 5647 } 5648 5649 err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap, 5650 sizeof(*obj->btf_modules), obj->btf_module_cnt + 1); 5651 if (err) 5652 goto err_out; 5653 5654 mod_btf = &obj->btf_modules[obj->btf_module_cnt++]; 5655 5656 mod_btf->btf = btf; 5657 mod_btf->id = id; 5658 mod_btf->fd = fd; 5659 mod_btf->name = strdup(name); 5660 if (!mod_btf->name) { 5661 err = -ENOMEM; 5662 goto err_out; 5663 } 5664 continue; 5665 5666 err_out: 5667 close(fd); 5668 return err; 5669 } 5670 5671 return 0; 5672 } 5673 5674 static struct bpf_core_cand_list * 5675 bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id) 5676 { 5677 struct bpf_core_cand local_cand = {}; 5678 struct bpf_core_cand_list *cands; 5679 const struct btf *main_btf; 5680 const struct btf_type *local_t; 5681 const char *local_name; 5682 size_t local_essent_len; 5683 int err, i; 5684 5685 local_cand.btf = local_btf; 5686 local_cand.id = local_type_id; 5687 local_t = btf__type_by_id(local_btf, local_type_id); 5688 if (!local_t) 5689 return ERR_PTR(-EINVAL); 5690 5691 local_name = btf__name_by_offset(local_btf, local_t->name_off); 5692 if (str_is_empty(local_name)) 5693 return ERR_PTR(-EINVAL); 5694 local_essent_len = bpf_core_essential_name_len(local_name); 5695 5696 cands = calloc(1, sizeof(*cands)); 5697 if (!cands) 5698 return ERR_PTR(-ENOMEM); 5699 5700 /* Attempt to find target candidates in vmlinux BTF first */ 5701 main_btf = obj->btf_vmlinux_override ?: obj->btf_vmlinux; 5702 err = bpf_core_add_cands(&local_cand, local_essent_len, main_btf, "vmlinux", 1, cands); 5703 if (err) 5704 goto err_out; 5705 5706 /* if vmlinux BTF has any candidate, don't got for module BTFs */ 5707 if (cands->len) 5708 return cands; 5709 5710 /* if vmlinux BTF was overridden, don't attempt to load module BTFs */ 5711 if (obj->btf_vmlinux_override) 5712 return cands; 5713 5714 /* now look through module BTFs, trying to still find candidates */ 5715 err = load_module_btfs(obj); 5716 if (err) 5717 goto err_out; 5718 5719 for (i = 0; i < obj->btf_module_cnt; i++) { 5720 err = bpf_core_add_cands(&local_cand, local_essent_len, 5721 obj->btf_modules[i].btf, 5722 obj->btf_modules[i].name, 5723 btf__type_cnt(obj->btf_vmlinux), 5724 cands); 5725 if (err) 5726 goto err_out; 5727 } 5728 5729 return cands; 5730 err_out: 5731 bpf_core_free_cands(cands); 5732 return ERR_PTR(err); 5733 } 5734 5735 /* Check local and target types for compatibility. This check is used for 5736 * type-based CO-RE relocations and follow slightly different rules than 5737 * field-based relocations. This function assumes that root types were already 5738 * checked for name match. Beyond that initial root-level name check, names 5739 * are completely ignored. Compatibility rules are as follows: 5740 * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but 5741 * kind should match for local and target types (i.e., STRUCT is not 5742 * compatible with UNION); 5743 * - for ENUMs, the size is ignored; 5744 * - for INT, size and signedness are ignored; 5745 * - for ARRAY, dimensionality is ignored, element types are checked for 5746 * compatibility recursively; 5747 * - CONST/VOLATILE/RESTRICT modifiers are ignored; 5748 * - TYPEDEFs/PTRs are compatible if types they pointing to are compatible; 5749 * - FUNC_PROTOs are compatible if they have compatible signature: same 5750 * number of input args and compatible return and argument types. 5751 * These rules are not set in stone and probably will be adjusted as we get 5752 * more experience with using BPF CO-RE relocations. 5753 */ 5754 int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, 5755 const struct btf *targ_btf, __u32 targ_id) 5756 { 5757 return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, 32); 5758 } 5759 5760 int bpf_core_types_match(const struct btf *local_btf, __u32 local_id, 5761 const struct btf *targ_btf, __u32 targ_id) 5762 { 5763 return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false, 32); 5764 } 5765 5766 static size_t bpf_core_hash_fn(const long key, void *ctx) 5767 { 5768 return key; 5769 } 5770 5771 static bool bpf_core_equal_fn(const long k1, const long k2, void *ctx) 5772 { 5773 return k1 == k2; 5774 } 5775 5776 static int record_relo_core(struct bpf_program *prog, 5777 const struct bpf_core_relo *core_relo, int insn_idx) 5778 { 5779 struct reloc_desc *relos, *relo; 5780 5781 relos = libbpf_reallocarray(prog->reloc_desc, 5782 prog->nr_reloc + 1, sizeof(*relos)); 5783 if (!relos) 5784 return -ENOMEM; 5785 relo = &relos[prog->nr_reloc]; 5786 relo->type = RELO_CORE; 5787 relo->insn_idx = insn_idx; 5788 relo->core_relo = core_relo; 5789 prog->reloc_desc = relos; 5790 prog->nr_reloc++; 5791 return 0; 5792 } 5793 5794 static const struct bpf_core_relo *find_relo_core(struct bpf_program *prog, int insn_idx) 5795 { 5796 struct reloc_desc *relo; 5797 int i; 5798 5799 for (i = 0; i < prog->nr_reloc; i++) { 5800 relo = &prog->reloc_desc[i]; 5801 if (relo->type != RELO_CORE || relo->insn_idx != insn_idx) 5802 continue; 5803 5804 return relo->core_relo; 5805 } 5806 5807 return NULL; 5808 } 5809 5810 static int bpf_core_resolve_relo(struct bpf_program *prog, 5811 const struct bpf_core_relo *relo, 5812 int relo_idx, 5813 const struct btf *local_btf, 5814 struct hashmap *cand_cache, 5815 struct bpf_core_relo_res *targ_res) 5816 { 5817 struct bpf_core_spec specs_scratch[3] = {}; 5818 struct bpf_core_cand_list *cands = NULL; 5819 const char *prog_name = prog->name; 5820 const struct btf_type *local_type; 5821 const char *local_name; 5822 __u32 local_id = relo->type_id; 5823 int err; 5824 5825 local_type = btf__type_by_id(local_btf, local_id); 5826 if (!local_type) 5827 return -EINVAL; 5828 5829 local_name = btf__name_by_offset(local_btf, local_type->name_off); 5830 if (!local_name) 5831 return -EINVAL; 5832 5833 if (relo->kind != BPF_CORE_TYPE_ID_LOCAL && 5834 !hashmap__find(cand_cache, local_id, &cands)) { 5835 cands = bpf_core_find_cands(prog->obj, local_btf, local_id); 5836 if (IS_ERR(cands)) { 5837 pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n", 5838 prog_name, relo_idx, local_id, btf_kind_str(local_type), 5839 local_name, PTR_ERR(cands)); 5840 return PTR_ERR(cands); 5841 } 5842 err = hashmap__set(cand_cache, local_id, cands, NULL, NULL); 5843 if (err) { 5844 bpf_core_free_cands(cands); 5845 return err; 5846 } 5847 } 5848 5849 return bpf_core_calc_relo_insn(prog_name, relo, relo_idx, local_btf, cands, specs_scratch, 5850 targ_res); 5851 } 5852 5853 static int 5854 bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) 5855 { 5856 const struct btf_ext_info_sec *sec; 5857 struct bpf_core_relo_res targ_res; 5858 const struct bpf_core_relo *rec; 5859 const struct btf_ext_info *seg; 5860 struct hashmap_entry *entry; 5861 struct hashmap *cand_cache = NULL; 5862 struct bpf_program *prog; 5863 struct bpf_insn *insn; 5864 const char *sec_name; 5865 int i, err = 0, insn_idx, sec_idx, sec_num; 5866 5867 if (obj->btf_ext->core_relo_info.len == 0) 5868 return 0; 5869 5870 if (targ_btf_path) { 5871 obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL); 5872 err = libbpf_get_error(obj->btf_vmlinux_override); 5873 if (err) { 5874 pr_warn("failed to parse target BTF: %d\n", err); 5875 return err; 5876 } 5877 } 5878 5879 cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL); 5880 if (IS_ERR(cand_cache)) { 5881 err = PTR_ERR(cand_cache); 5882 goto out; 5883 } 5884 5885 seg = &obj->btf_ext->core_relo_info; 5886 sec_num = 0; 5887 for_each_btf_ext_sec(seg, sec) { 5888 sec_idx = seg->sec_idxs[sec_num]; 5889 sec_num++; 5890 5891 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); 5892 if (str_is_empty(sec_name)) { 5893 err = -EINVAL; 5894 goto out; 5895 } 5896 5897 pr_debug("sec '%s': found %d CO-RE relocations\n", sec_name, sec->num_info); 5898 5899 for_each_btf_ext_rec(seg, sec, i, rec) { 5900 if (rec->insn_off % BPF_INSN_SZ) 5901 return -EINVAL; 5902 insn_idx = rec->insn_off / BPF_INSN_SZ; 5903 prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx); 5904 if (!prog) { 5905 /* When __weak subprog is "overridden" by another instance 5906 * of the subprog from a different object file, linker still 5907 * appends all the .BTF.ext info that used to belong to that 5908 * eliminated subprogram. 5909 * This is similar to what x86-64 linker does for relocations. 5910 * So just ignore such relocations just like we ignore 5911 * subprog instructions when discovering subprograms. 5912 */ 5913 pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n", 5914 sec_name, i, insn_idx); 5915 continue; 5916 } 5917 /* no need to apply CO-RE relocation if the program is 5918 * not going to be loaded 5919 */ 5920 if (!prog->autoload) 5921 continue; 5922 5923 /* adjust insn_idx from section frame of reference to the local 5924 * program's frame of reference; (sub-)program code is not yet 5925 * relocated, so it's enough to just subtract in-section offset 5926 */ 5927 insn_idx = insn_idx - prog->sec_insn_off; 5928 if (insn_idx >= prog->insns_cnt) 5929 return -EINVAL; 5930 insn = &prog->insns[insn_idx]; 5931 5932 err = record_relo_core(prog, rec, insn_idx); 5933 if (err) { 5934 pr_warn("prog '%s': relo #%d: failed to record relocation: %d\n", 5935 prog->name, i, err); 5936 goto out; 5937 } 5938 5939 if (prog->obj->gen_loader) 5940 continue; 5941 5942 err = bpf_core_resolve_relo(prog, rec, i, obj->btf, cand_cache, &targ_res); 5943 if (err) { 5944 pr_warn("prog '%s': relo #%d: failed to relocate: %d\n", 5945 prog->name, i, err); 5946 goto out; 5947 } 5948 5949 err = bpf_core_patch_insn(prog->name, insn, insn_idx, rec, i, &targ_res); 5950 if (err) { 5951 pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n", 5952 prog->name, i, insn_idx, err); 5953 goto out; 5954 } 5955 } 5956 } 5957 5958 out: 5959 /* obj->btf_vmlinux and module BTFs are freed after object load */ 5960 btf__free(obj->btf_vmlinux_override); 5961 obj->btf_vmlinux_override = NULL; 5962 5963 if (!IS_ERR_OR_NULL(cand_cache)) { 5964 hashmap__for_each_entry(cand_cache, entry, i) { 5965 bpf_core_free_cands(entry->pvalue); 5966 } 5967 hashmap__free(cand_cache); 5968 } 5969 return err; 5970 } 5971 5972 /* base map load ldimm64 special constant, used also for log fixup logic */ 5973 #define POISON_LDIMM64_MAP_BASE 2001000000 5974 #define POISON_LDIMM64_MAP_PFX "200100" 5975 5976 static void poison_map_ldimm64(struct bpf_program *prog, int relo_idx, 5977 int insn_idx, struct bpf_insn *insn, 5978 int map_idx, const struct bpf_map *map) 5979 { 5980 int i; 5981 5982 pr_debug("prog '%s': relo #%d: poisoning insn #%d that loads map #%d '%s'\n", 5983 prog->name, relo_idx, insn_idx, map_idx, map->name); 5984 5985 /* we turn single ldimm64 into two identical invalid calls */ 5986 for (i = 0; i < 2; i++) { 5987 insn->code = BPF_JMP | BPF_CALL; 5988 insn->dst_reg = 0; 5989 insn->src_reg = 0; 5990 insn->off = 0; 5991 /* if this instruction is reachable (not a dead code), 5992 * verifier will complain with something like: 5993 * invalid func unknown#2001000123 5994 * where lower 123 is map index into obj->maps[] array 5995 */ 5996 insn->imm = POISON_LDIMM64_MAP_BASE + map_idx; 5997 5998 insn++; 5999 } 6000 } 6001 6002 /* unresolved kfunc call special constant, used also for log fixup logic */ 6003 #define POISON_CALL_KFUNC_BASE 2002000000 6004 #define POISON_CALL_KFUNC_PFX "2002" 6005 6006 static void poison_kfunc_call(struct bpf_program *prog, int relo_idx, 6007 int insn_idx, struct bpf_insn *insn, 6008 int ext_idx, const struct extern_desc *ext) 6009 { 6010 pr_debug("prog '%s': relo #%d: poisoning insn #%d that calls kfunc '%s'\n", 6011 prog->name, relo_idx, insn_idx, ext->name); 6012 6013 /* we turn kfunc call into invalid helper call with identifiable constant */ 6014 insn->code = BPF_JMP | BPF_CALL; 6015 insn->dst_reg = 0; 6016 insn->src_reg = 0; 6017 insn->off = 0; 6018 /* if this instruction is reachable (not a dead code), 6019 * verifier will complain with something like: 6020 * invalid func unknown#2001000123 6021 * where lower 123 is extern index into obj->externs[] array 6022 */ 6023 insn->imm = POISON_CALL_KFUNC_BASE + ext_idx; 6024 } 6025 6026 /* Relocate data references within program code: 6027 * - map references; 6028 * - global variable references; 6029 * - extern references. 6030 */ 6031 static int 6032 bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) 6033 { 6034 int i; 6035 6036 for (i = 0; i < prog->nr_reloc; i++) { 6037 struct reloc_desc *relo = &prog->reloc_desc[i]; 6038 struct bpf_insn *insn = &prog->insns[relo->insn_idx]; 6039 const struct bpf_map *map; 6040 struct extern_desc *ext; 6041 6042 switch (relo->type) { 6043 case RELO_LD64: 6044 map = &obj->maps[relo->map_idx]; 6045 if (obj->gen_loader) { 6046 insn[0].src_reg = BPF_PSEUDO_MAP_IDX; 6047 insn[0].imm = relo->map_idx; 6048 } else if (map->autocreate) { 6049 insn[0].src_reg = BPF_PSEUDO_MAP_FD; 6050 insn[0].imm = map->fd; 6051 } else { 6052 poison_map_ldimm64(prog, i, relo->insn_idx, insn, 6053 relo->map_idx, map); 6054 } 6055 break; 6056 case RELO_DATA: 6057 map = &obj->maps[relo->map_idx]; 6058 insn[1].imm = insn[0].imm + relo->sym_off; 6059 if (obj->gen_loader) { 6060 insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE; 6061 insn[0].imm = relo->map_idx; 6062 } else if (map->autocreate) { 6063 insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; 6064 insn[0].imm = map->fd; 6065 } else { 6066 poison_map_ldimm64(prog, i, relo->insn_idx, insn, 6067 relo->map_idx, map); 6068 } 6069 break; 6070 case RELO_EXTERN_LD64: 6071 ext = &obj->externs[relo->ext_idx]; 6072 if (ext->type == EXT_KCFG) { 6073 if (obj->gen_loader) { 6074 insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE; 6075 insn[0].imm = obj->kconfig_map_idx; 6076 } else { 6077 insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; 6078 insn[0].imm = obj->maps[obj->kconfig_map_idx].fd; 6079 } 6080 insn[1].imm = ext->kcfg.data_off; 6081 } else /* EXT_KSYM */ { 6082 if (ext->ksym.type_id && ext->is_set) { /* typed ksyms */ 6083 insn[0].src_reg = BPF_PSEUDO_BTF_ID; 6084 insn[0].imm = ext->ksym.kernel_btf_id; 6085 insn[1].imm = ext->ksym.kernel_btf_obj_fd; 6086 } else { /* typeless ksyms or unresolved typed ksyms */ 6087 insn[0].imm = (__u32)ext->ksym.addr; 6088 insn[1].imm = ext->ksym.addr >> 32; 6089 } 6090 } 6091 break; 6092 case RELO_EXTERN_CALL: 6093 ext = &obj->externs[relo->ext_idx]; 6094 insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL; 6095 if (ext->is_set) { 6096 insn[0].imm = ext->ksym.kernel_btf_id; 6097 insn[0].off = ext->ksym.btf_fd_idx; 6098 } else { /* unresolved weak kfunc call */ 6099 poison_kfunc_call(prog, i, relo->insn_idx, insn, 6100 relo->ext_idx, ext); 6101 } 6102 break; 6103 case RELO_SUBPROG_ADDR: 6104 if (insn[0].src_reg != BPF_PSEUDO_FUNC) { 6105 pr_warn("prog '%s': relo #%d: bad insn\n", 6106 prog->name, i); 6107 return -EINVAL; 6108 } 6109 /* handled already */ 6110 break; 6111 case RELO_CALL: 6112 /* handled already */ 6113 break; 6114 case RELO_CORE: 6115 /* will be handled by bpf_program_record_relos() */ 6116 break; 6117 default: 6118 pr_warn("prog '%s': relo #%d: bad relo type %d\n", 6119 prog->name, i, relo->type); 6120 return -EINVAL; 6121 } 6122 } 6123 6124 return 0; 6125 } 6126 6127 static int adjust_prog_btf_ext_info(const struct bpf_object *obj, 6128 const struct bpf_program *prog, 6129 const struct btf_ext_info *ext_info, 6130 void **prog_info, __u32 *prog_rec_cnt, 6131 __u32 *prog_rec_sz) 6132 { 6133 void *copy_start = NULL, *copy_end = NULL; 6134 void *rec, *rec_end, *new_prog_info; 6135 const struct btf_ext_info_sec *sec; 6136 size_t old_sz, new_sz; 6137 int i, sec_num, sec_idx, off_adj; 6138 6139 sec_num = 0; 6140 for_each_btf_ext_sec(ext_info, sec) { 6141 sec_idx = ext_info->sec_idxs[sec_num]; 6142 sec_num++; 6143 if (prog->sec_idx != sec_idx) 6144 continue; 6145 6146 for_each_btf_ext_rec(ext_info, sec, i, rec) { 6147 __u32 insn_off = *(__u32 *)rec / BPF_INSN_SZ; 6148 6149 if (insn_off < prog->sec_insn_off) 6150 continue; 6151 if (insn_off >= prog->sec_insn_off + prog->sec_insn_cnt) 6152 break; 6153 6154 if (!copy_start) 6155 copy_start = rec; 6156 copy_end = rec + ext_info->rec_size; 6157 } 6158 6159 if (!copy_start) 6160 return -ENOENT; 6161 6162 /* append func/line info of a given (sub-)program to the main 6163 * program func/line info 6164 */ 6165 old_sz = (size_t)(*prog_rec_cnt) * ext_info->rec_size; 6166 new_sz = old_sz + (copy_end - copy_start); 6167 new_prog_info = realloc(*prog_info, new_sz); 6168 if (!new_prog_info) 6169 return -ENOMEM; 6170 *prog_info = new_prog_info; 6171 *prog_rec_cnt = new_sz / ext_info->rec_size; 6172 memcpy(new_prog_info + old_sz, copy_start, copy_end - copy_start); 6173 6174 /* Kernel instruction offsets are in units of 8-byte 6175 * instructions, while .BTF.ext instruction offsets generated 6176 * by Clang are in units of bytes. So convert Clang offsets 6177 * into kernel offsets and adjust offset according to program 6178 * relocated position. 6179 */ 6180 off_adj = prog->sub_insn_off - prog->sec_insn_off; 6181 rec = new_prog_info + old_sz; 6182 rec_end = new_prog_info + new_sz; 6183 for (; rec < rec_end; rec += ext_info->rec_size) { 6184 __u32 *insn_off = rec; 6185 6186 *insn_off = *insn_off / BPF_INSN_SZ + off_adj; 6187 } 6188 *prog_rec_sz = ext_info->rec_size; 6189 return 0; 6190 } 6191 6192 return -ENOENT; 6193 } 6194 6195 static int 6196 reloc_prog_func_and_line_info(const struct bpf_object *obj, 6197 struct bpf_program *main_prog, 6198 const struct bpf_program *prog) 6199 { 6200 int err; 6201 6202 /* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't 6203 * support func/line info 6204 */ 6205 if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC)) 6206 return 0; 6207 6208 /* only attempt func info relocation if main program's func_info 6209 * relocation was successful 6210 */ 6211 if (main_prog != prog && !main_prog->func_info) 6212 goto line_info; 6213 6214 err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->func_info, 6215 &main_prog->func_info, 6216 &main_prog->func_info_cnt, 6217 &main_prog->func_info_rec_size); 6218 if (err) { 6219 if (err != -ENOENT) { 6220 pr_warn("prog '%s': error relocating .BTF.ext function info: %d\n", 6221 prog->name, err); 6222 return err; 6223 } 6224 if (main_prog->func_info) { 6225 /* 6226 * Some info has already been found but has problem 6227 * in the last btf_ext reloc. Must have to error out. 6228 */ 6229 pr_warn("prog '%s': missing .BTF.ext function info.\n", prog->name); 6230 return err; 6231 } 6232 /* Have problem loading the very first info. Ignore the rest. */ 6233 pr_warn("prog '%s': missing .BTF.ext function info for the main program, skipping all of .BTF.ext func info.\n", 6234 prog->name); 6235 } 6236 6237 line_info: 6238 /* don't relocate line info if main program's relocation failed */ 6239 if (main_prog != prog && !main_prog->line_info) 6240 return 0; 6241 6242 err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->line_info, 6243 &main_prog->line_info, 6244 &main_prog->line_info_cnt, 6245 &main_prog->line_info_rec_size); 6246 if (err) { 6247 if (err != -ENOENT) { 6248 pr_warn("prog '%s': error relocating .BTF.ext line info: %d\n", 6249 prog->name, err); 6250 return err; 6251 } 6252 if (main_prog->line_info) { 6253 /* 6254 * Some info has already been found but has problem 6255 * in the last btf_ext reloc. Must have to error out. 6256 */ 6257 pr_warn("prog '%s': missing .BTF.ext line info.\n", prog->name); 6258 return err; 6259 } 6260 /* Have problem loading the very first info. Ignore the rest. */ 6261 pr_warn("prog '%s': missing .BTF.ext line info for the main program, skipping all of .BTF.ext line info.\n", 6262 prog->name); 6263 } 6264 return 0; 6265 } 6266 6267 static int cmp_relo_by_insn_idx(const void *key, const void *elem) 6268 { 6269 size_t insn_idx = *(const size_t *)key; 6270 const struct reloc_desc *relo = elem; 6271 6272 if (insn_idx == relo->insn_idx) 6273 return 0; 6274 return insn_idx < relo->insn_idx ? -1 : 1; 6275 } 6276 6277 static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx) 6278 { 6279 if (!prog->nr_reloc) 6280 return NULL; 6281 return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc, 6282 sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx); 6283 } 6284 6285 static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_program *subprog) 6286 { 6287 int new_cnt = main_prog->nr_reloc + subprog->nr_reloc; 6288 struct reloc_desc *relos; 6289 int i; 6290 6291 if (main_prog == subprog) 6292 return 0; 6293 relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos)); 6294 /* if new count is zero, reallocarray can return a valid NULL result; 6295 * in this case the previous pointer will be freed, so we *have to* 6296 * reassign old pointer to the new value (even if it's NULL) 6297 */ 6298 if (!relos && new_cnt) 6299 return -ENOMEM; 6300 if (subprog->nr_reloc) 6301 memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc, 6302 sizeof(*relos) * subprog->nr_reloc); 6303 6304 for (i = main_prog->nr_reloc; i < new_cnt; i++) 6305 relos[i].insn_idx += subprog->sub_insn_off; 6306 /* After insn_idx adjustment the 'relos' array is still sorted 6307 * by insn_idx and doesn't break bsearch. 6308 */ 6309 main_prog->reloc_desc = relos; 6310 main_prog->nr_reloc = new_cnt; 6311 return 0; 6312 } 6313 6314 static int 6315 bpf_object__append_subprog_code(struct bpf_object *obj, struct bpf_program *main_prog, 6316 struct bpf_program *subprog) 6317 { 6318 struct bpf_insn *insns; 6319 size_t new_cnt; 6320 int err; 6321 6322 subprog->sub_insn_off = main_prog->insns_cnt; 6323 6324 new_cnt = main_prog->insns_cnt + subprog->insns_cnt; 6325 insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns)); 6326 if (!insns) { 6327 pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name); 6328 return -ENOMEM; 6329 } 6330 main_prog->insns = insns; 6331 main_prog->insns_cnt = new_cnt; 6332 6333 memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns, 6334 subprog->insns_cnt * sizeof(*insns)); 6335 6336 pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n", 6337 main_prog->name, subprog->insns_cnt, subprog->name); 6338 6339 /* The subprog insns are now appended. Append its relos too. */ 6340 err = append_subprog_relos(main_prog, subprog); 6341 if (err) 6342 return err; 6343 return 0; 6344 } 6345 6346 static int 6347 bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog, 6348 struct bpf_program *prog) 6349 { 6350 size_t sub_insn_idx, insn_idx; 6351 struct bpf_program *subprog; 6352 struct reloc_desc *relo; 6353 struct bpf_insn *insn; 6354 int err; 6355 6356 err = reloc_prog_func_and_line_info(obj, main_prog, prog); 6357 if (err) 6358 return err; 6359 6360 for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) { 6361 insn = &main_prog->insns[prog->sub_insn_off + insn_idx]; 6362 if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn)) 6363 continue; 6364 6365 relo = find_prog_insn_relo(prog, insn_idx); 6366 if (relo && relo->type == RELO_EXTERN_CALL) 6367 /* kfunc relocations will be handled later 6368 * in bpf_object__relocate_data() 6369 */ 6370 continue; 6371 if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) { 6372 pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n", 6373 prog->name, insn_idx, relo->type); 6374 return -LIBBPF_ERRNO__RELOC; 6375 } 6376 if (relo) { 6377 /* sub-program instruction index is a combination of 6378 * an offset of a symbol pointed to by relocation and 6379 * call instruction's imm field; for global functions, 6380 * call always has imm = -1, but for static functions 6381 * relocation is against STT_SECTION and insn->imm 6382 * points to a start of a static function 6383 * 6384 * for subprog addr relocation, the relo->sym_off + insn->imm is 6385 * the byte offset in the corresponding section. 6386 */ 6387 if (relo->type == RELO_CALL) 6388 sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1; 6389 else 6390 sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ; 6391 } else if (insn_is_pseudo_func(insn)) { 6392 /* 6393 * RELO_SUBPROG_ADDR relo is always emitted even if both 6394 * functions are in the same section, so it shouldn't reach here. 6395 */ 6396 pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n", 6397 prog->name, insn_idx); 6398 return -LIBBPF_ERRNO__RELOC; 6399 } else { 6400 /* if subprogram call is to a static function within 6401 * the same ELF section, there won't be any relocation 6402 * emitted, but it also means there is no additional 6403 * offset necessary, insns->imm is relative to 6404 * instruction's original position within the section 6405 */ 6406 sub_insn_idx = prog->sec_insn_off + insn_idx + insn->imm + 1; 6407 } 6408 6409 /* we enforce that sub-programs should be in .text section */ 6410 subprog = find_prog_by_sec_insn(obj, obj->efile.text_shndx, sub_insn_idx); 6411 if (!subprog) { 6412 pr_warn("prog '%s': no .text section found yet sub-program call exists\n", 6413 prog->name); 6414 return -LIBBPF_ERRNO__RELOC; 6415 } 6416 6417 /* if it's the first call instruction calling into this 6418 * subprogram (meaning this subprog hasn't been processed 6419 * yet) within the context of current main program: 6420 * - append it at the end of main program's instructions blog; 6421 * - process is recursively, while current program is put on hold; 6422 * - if that subprogram calls some other not yet processes 6423 * subprogram, same thing will happen recursively until 6424 * there are no more unprocesses subprograms left to append 6425 * and relocate. 6426 */ 6427 if (subprog->sub_insn_off == 0) { 6428 err = bpf_object__append_subprog_code(obj, main_prog, subprog); 6429 if (err) 6430 return err; 6431 err = bpf_object__reloc_code(obj, main_prog, subprog); 6432 if (err) 6433 return err; 6434 } 6435 6436 /* main_prog->insns memory could have been re-allocated, so 6437 * calculate pointer again 6438 */ 6439 insn = &main_prog->insns[prog->sub_insn_off + insn_idx]; 6440 /* calculate correct instruction position within current main 6441 * prog; each main prog can have a different set of 6442 * subprograms appended (potentially in different order as 6443 * well), so position of any subprog can be different for 6444 * different main programs 6445 */ 6446 insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1; 6447 6448 pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n", 6449 prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off); 6450 } 6451 6452 return 0; 6453 } 6454 6455 /* 6456 * Relocate sub-program calls. 6457 * 6458 * Algorithm operates as follows. Each entry-point BPF program (referred to as 6459 * main prog) is processed separately. For each subprog (non-entry functions, 6460 * that can be called from either entry progs or other subprogs) gets their 6461 * sub_insn_off reset to zero. This serves as indicator that this subprogram 6462 * hasn't been yet appended and relocated within current main prog. Once its 6463 * relocated, sub_insn_off will point at the position within current main prog 6464 * where given subprog was appended. This will further be used to relocate all 6465 * the call instructions jumping into this subprog. 6466 * 6467 * We start with main program and process all call instructions. If the call 6468 * is into a subprog that hasn't been processed (i.e., subprog->sub_insn_off 6469 * is zero), subprog instructions are appended at the end of main program's 6470 * instruction array. Then main program is "put on hold" while we recursively 6471 * process newly appended subprogram. If that subprogram calls into another 6472 * subprogram that hasn't been appended, new subprogram is appended again to 6473 * the *main* prog's instructions (subprog's instructions are always left 6474 * untouched, as they need to be in unmodified state for subsequent main progs 6475 * and subprog instructions are always sent only as part of a main prog) and 6476 * the process continues recursively. Once all the subprogs called from a main 6477 * prog or any of its subprogs are appended (and relocated), all their 6478 * positions within finalized instructions array are known, so it's easy to 6479 * rewrite call instructions with correct relative offsets, corresponding to 6480 * desired target subprog. 6481 * 6482 * Its important to realize that some subprogs might not be called from some 6483 * main prog and any of its called/used subprogs. Those will keep their 6484 * subprog->sub_insn_off as zero at all times and won't be appended to current 6485 * main prog and won't be relocated within the context of current main prog. 6486 * They might still be used from other main progs later. 6487 * 6488 * Visually this process can be shown as below. Suppose we have two main 6489 * programs mainA and mainB and BPF object contains three subprogs: subA, 6490 * subB, and subC. mainA calls only subA, mainB calls only subC, but subA and 6491 * subC both call subB: 6492 * 6493 * +--------+ +-------+ 6494 * | v v | 6495 * +--+---+ +--+-+-+ +---+--+ 6496 * | subA | | subB | | subC | 6497 * +--+---+ +------+ +---+--+ 6498 * ^ ^ 6499 * | | 6500 * +---+-------+ +------+----+ 6501 * | mainA | | mainB | 6502 * +-----------+ +-----------+ 6503 * 6504 * We'll start relocating mainA, will find subA, append it and start 6505 * processing sub A recursively: 6506 * 6507 * +-----------+------+ 6508 * | mainA | subA | 6509 * +-----------+------+ 6510 * 6511 * At this point we notice that subB is used from subA, so we append it and 6512 * relocate (there are no further subcalls from subB): 6513 * 6514 * +-----------+------+------+ 6515 * | mainA | subA | subB | 6516 * +-----------+------+------+ 6517 * 6518 * At this point, we relocate subA calls, then go one level up and finish with 6519 * relocatin mainA calls. mainA is done. 6520 * 6521 * For mainB process is similar but results in different order. We start with 6522 * mainB and skip subA and subB, as mainB never calls them (at least 6523 * directly), but we see subC is needed, so we append and start processing it: 6524 * 6525 * +-----------+------+ 6526 * | mainB | subC | 6527 * +-----------+------+ 6528 * Now we see subC needs subB, so we go back to it, append and relocate it: 6529 * 6530 * +-----------+------+------+ 6531 * | mainB | subC | subB | 6532 * +-----------+------+------+ 6533 * 6534 * At this point we unwind recursion, relocate calls in subC, then in mainB. 6535 */ 6536 static int 6537 bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog) 6538 { 6539 struct bpf_program *subprog; 6540 int i, err; 6541 6542 /* mark all subprogs as not relocated (yet) within the context of 6543 * current main program 6544 */ 6545 for (i = 0; i < obj->nr_programs; i++) { 6546 subprog = &obj->programs[i]; 6547 if (!prog_is_subprog(obj, subprog)) 6548 continue; 6549 6550 subprog->sub_insn_off = 0; 6551 } 6552 6553 err = bpf_object__reloc_code(obj, prog, prog); 6554 if (err) 6555 return err; 6556 6557 return 0; 6558 } 6559 6560 static void 6561 bpf_object__free_relocs(struct bpf_object *obj) 6562 { 6563 struct bpf_program *prog; 6564 int i; 6565 6566 /* free up relocation descriptors */ 6567 for (i = 0; i < obj->nr_programs; i++) { 6568 prog = &obj->programs[i]; 6569 zfree(&prog->reloc_desc); 6570 prog->nr_reloc = 0; 6571 } 6572 } 6573 6574 static int cmp_relocs(const void *_a, const void *_b) 6575 { 6576 const struct reloc_desc *a = _a; 6577 const struct reloc_desc *b = _b; 6578 6579 if (a->insn_idx != b->insn_idx) 6580 return a->insn_idx < b->insn_idx ? -1 : 1; 6581 6582 /* no two relocations should have the same insn_idx, but ... */ 6583 if (a->type != b->type) 6584 return a->type < b->type ? -1 : 1; 6585 6586 return 0; 6587 } 6588 6589 static void bpf_object__sort_relos(struct bpf_object *obj) 6590 { 6591 int i; 6592 6593 for (i = 0; i < obj->nr_programs; i++) { 6594 struct bpf_program *p = &obj->programs[i]; 6595 6596 if (!p->nr_reloc) 6597 continue; 6598 6599 qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs); 6600 } 6601 } 6602 6603 static int bpf_prog_assign_exc_cb(struct bpf_object *obj, struct bpf_program *prog) 6604 { 6605 const char *str = "exception_callback:"; 6606 size_t pfx_len = strlen(str); 6607 int i, j, n; 6608 6609 if (!obj->btf || !kernel_supports(obj, FEAT_BTF_DECL_TAG)) 6610 return 0; 6611 6612 n = btf__type_cnt(obj->btf); 6613 for (i = 1; i < n; i++) { 6614 const char *name; 6615 struct btf_type *t; 6616 6617 t = btf_type_by_id(obj->btf, i); 6618 if (!btf_is_decl_tag(t) || btf_decl_tag(t)->component_idx != -1) 6619 continue; 6620 6621 name = btf__str_by_offset(obj->btf, t->name_off); 6622 if (strncmp(name, str, pfx_len) != 0) 6623 continue; 6624 6625 t = btf_type_by_id(obj->btf, t->type); 6626 if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) { 6627 pr_warn("prog '%s': exception_callback:<value> decl tag not applied to the main program\n", 6628 prog->name); 6629 return -EINVAL; 6630 } 6631 if (strcmp(prog->name, btf__str_by_offset(obj->btf, t->name_off)) != 0) 6632 continue; 6633 /* Multiple callbacks are specified for the same prog, 6634 * the verifier will eventually return an error for this 6635 * case, hence simply skip appending a subprog. 6636 */ 6637 if (prog->exception_cb_idx >= 0) { 6638 prog->exception_cb_idx = -1; 6639 break; 6640 } 6641 6642 name += pfx_len; 6643 if (str_is_empty(name)) { 6644 pr_warn("prog '%s': exception_callback:<value> decl tag contains empty value\n", 6645 prog->name); 6646 return -EINVAL; 6647 } 6648 6649 for (j = 0; j < obj->nr_programs; j++) { 6650 struct bpf_program *subprog = &obj->programs[j]; 6651 6652 if (!prog_is_subprog(obj, subprog)) 6653 continue; 6654 if (strcmp(name, subprog->name) != 0) 6655 continue; 6656 /* Enforce non-hidden, as from verifier point of 6657 * view it expects global functions, whereas the 6658 * mark_btf_static fixes up linkage as static. 6659 */ 6660 if (!subprog->sym_global || subprog->mark_btf_static) { 6661 pr_warn("prog '%s': exception callback %s must be a global non-hidden function\n", 6662 prog->name, subprog->name); 6663 return -EINVAL; 6664 } 6665 /* Let's see if we already saw a static exception callback with the same name */ 6666 if (prog->exception_cb_idx >= 0) { 6667 pr_warn("prog '%s': multiple subprogs with same name as exception callback '%s'\n", 6668 prog->name, subprog->name); 6669 return -EINVAL; 6670 } 6671 prog->exception_cb_idx = j; 6672 break; 6673 } 6674 6675 if (prog->exception_cb_idx >= 0) 6676 continue; 6677 6678 pr_warn("prog '%s': cannot find exception callback '%s'\n", prog->name, name); 6679 return -ENOENT; 6680 } 6681 6682 return 0; 6683 } 6684 6685 static struct { 6686 enum bpf_prog_type prog_type; 6687 const char *ctx_name; 6688 } global_ctx_map[] = { 6689 { BPF_PROG_TYPE_CGROUP_DEVICE, "bpf_cgroup_dev_ctx" }, 6690 { BPF_PROG_TYPE_CGROUP_SKB, "__sk_buff" }, 6691 { BPF_PROG_TYPE_CGROUP_SOCK, "bpf_sock" }, 6692 { BPF_PROG_TYPE_CGROUP_SOCK_ADDR, "bpf_sock_addr" }, 6693 { BPF_PROG_TYPE_CGROUP_SOCKOPT, "bpf_sockopt" }, 6694 { BPF_PROG_TYPE_CGROUP_SYSCTL, "bpf_sysctl" }, 6695 { BPF_PROG_TYPE_FLOW_DISSECTOR, "__sk_buff" }, 6696 { BPF_PROG_TYPE_KPROBE, "bpf_user_pt_regs_t" }, 6697 { BPF_PROG_TYPE_LWT_IN, "__sk_buff" }, 6698 { BPF_PROG_TYPE_LWT_OUT, "__sk_buff" }, 6699 { BPF_PROG_TYPE_LWT_SEG6LOCAL, "__sk_buff" }, 6700 { BPF_PROG_TYPE_LWT_XMIT, "__sk_buff" }, 6701 { BPF_PROG_TYPE_NETFILTER, "bpf_nf_ctx" }, 6702 { BPF_PROG_TYPE_PERF_EVENT, "bpf_perf_event_data" }, 6703 { BPF_PROG_TYPE_RAW_TRACEPOINT, "bpf_raw_tracepoint_args" }, 6704 { BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, "bpf_raw_tracepoint_args" }, 6705 { BPF_PROG_TYPE_SCHED_ACT, "__sk_buff" }, 6706 { BPF_PROG_TYPE_SCHED_CLS, "__sk_buff" }, 6707 { BPF_PROG_TYPE_SK_LOOKUP, "bpf_sk_lookup" }, 6708 { BPF_PROG_TYPE_SK_MSG, "sk_msg_md" }, 6709 { BPF_PROG_TYPE_SK_REUSEPORT, "sk_reuseport_md" }, 6710 { BPF_PROG_TYPE_SK_SKB, "__sk_buff" }, 6711 { BPF_PROG_TYPE_SOCK_OPS, "bpf_sock_ops" }, 6712 { BPF_PROG_TYPE_SOCKET_FILTER, "__sk_buff" }, 6713 { BPF_PROG_TYPE_XDP, "xdp_md" }, 6714 /* all other program types don't have "named" context structs */ 6715 }; 6716 6717 /* forward declarations for arch-specific underlying types of bpf_user_pt_regs_t typedef, 6718 * for below __builtin_types_compatible_p() checks; 6719 * with this approach we don't need any extra arch-specific #ifdef guards 6720 */ 6721 struct pt_regs; 6722 struct user_pt_regs; 6723 struct user_regs_struct; 6724 6725 static bool need_func_arg_type_fixup(const struct btf *btf, const struct bpf_program *prog, 6726 const char *subprog_name, int arg_idx, 6727 int arg_type_id, const char *ctx_name) 6728 { 6729 const struct btf_type *t; 6730 const char *tname; 6731 6732 /* check if existing parameter already matches verifier expectations */ 6733 t = skip_mods_and_typedefs(btf, arg_type_id, NULL); 6734 if (!btf_is_ptr(t)) 6735 goto out_warn; 6736 6737 /* typedef bpf_user_pt_regs_t is a special PITA case, valid for kprobe 6738 * and perf_event programs, so check this case early on and forget 6739 * about it for subsequent checks 6740 */ 6741 while (btf_is_mod(t)) 6742 t = btf__type_by_id(btf, t->type); 6743 if (btf_is_typedef(t) && 6744 (prog->type == BPF_PROG_TYPE_KPROBE || prog->type == BPF_PROG_TYPE_PERF_EVENT)) { 6745 tname = btf__str_by_offset(btf, t->name_off) ?: "<anon>"; 6746 if (strcmp(tname, "bpf_user_pt_regs_t") == 0) 6747 return false; /* canonical type for kprobe/perf_event */ 6748 } 6749 6750 /* now we can ignore typedefs moving forward */ 6751 t = skip_mods_and_typedefs(btf, t->type, NULL); 6752 6753 /* if it's `void *`, definitely fix up BTF info */ 6754 if (btf_is_void(t)) 6755 return true; 6756 6757 /* if it's already proper canonical type, no need to fix up */ 6758 tname = btf__str_by_offset(btf, t->name_off) ?: "<anon>"; 6759 if (btf_is_struct(t) && strcmp(tname, ctx_name) == 0) 6760 return false; 6761 6762 /* special cases */ 6763 switch (prog->type) { 6764 case BPF_PROG_TYPE_KPROBE: 6765 /* `struct pt_regs *` is expected, but we need to fix up */ 6766 if (btf_is_struct(t) && strcmp(tname, "pt_regs") == 0) 6767 return true; 6768 break; 6769 case BPF_PROG_TYPE_PERF_EVENT: 6770 if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct pt_regs) && 6771 btf_is_struct(t) && strcmp(tname, "pt_regs") == 0) 6772 return true; 6773 if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_pt_regs) && 6774 btf_is_struct(t) && strcmp(tname, "user_pt_regs") == 0) 6775 return true; 6776 if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_regs_struct) && 6777 btf_is_struct(t) && strcmp(tname, "user_regs_struct") == 0) 6778 return true; 6779 break; 6780 case BPF_PROG_TYPE_RAW_TRACEPOINT: 6781 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: 6782 /* allow u64* as ctx */ 6783 if (btf_is_int(t) && t->size == 8) 6784 return true; 6785 break; 6786 default: 6787 break; 6788 } 6789 6790 out_warn: 6791 pr_warn("prog '%s': subprog '%s' arg#%d is expected to be of `struct %s *` type\n", 6792 prog->name, subprog_name, arg_idx, ctx_name); 6793 return false; 6794 } 6795 6796 static int clone_func_btf_info(struct btf *btf, int orig_fn_id, struct bpf_program *prog) 6797 { 6798 int fn_id, fn_proto_id, ret_type_id, orig_proto_id; 6799 int i, err, arg_cnt, fn_name_off, linkage; 6800 struct btf_type *fn_t, *fn_proto_t, *t; 6801 struct btf_param *p; 6802 6803 /* caller already validated FUNC -> FUNC_PROTO validity */ 6804 fn_t = btf_type_by_id(btf, orig_fn_id); 6805 fn_proto_t = btf_type_by_id(btf, fn_t->type); 6806 6807 /* Note that each btf__add_xxx() operation invalidates 6808 * all btf_type and string pointers, so we need to be 6809 * very careful when cloning BTF types. BTF type 6810 * pointers have to be always refetched. And to avoid 6811 * problems with invalidated string pointers, we 6812 * add empty strings initially, then just fix up 6813 * name_off offsets in place. Offsets are stable for 6814 * existing strings, so that works out. 6815 */ 6816 fn_name_off = fn_t->name_off; /* we are about to invalidate fn_t */ 6817 linkage = btf_func_linkage(fn_t); 6818 orig_proto_id = fn_t->type; /* original FUNC_PROTO ID */ 6819 ret_type_id = fn_proto_t->type; /* fn_proto_t will be invalidated */ 6820 arg_cnt = btf_vlen(fn_proto_t); 6821 6822 /* clone FUNC_PROTO and its params */ 6823 fn_proto_id = btf__add_func_proto(btf, ret_type_id); 6824 if (fn_proto_id < 0) 6825 return -EINVAL; 6826 6827 for (i = 0; i < arg_cnt; i++) { 6828 int name_off; 6829 6830 /* copy original parameter data */ 6831 t = btf_type_by_id(btf, orig_proto_id); 6832 p = &btf_params(t)[i]; 6833 name_off = p->name_off; 6834 6835 err = btf__add_func_param(btf, "", p->type); 6836 if (err) 6837 return err; 6838 6839 fn_proto_t = btf_type_by_id(btf, fn_proto_id); 6840 p = &btf_params(fn_proto_t)[i]; 6841 p->name_off = name_off; /* use remembered str offset */ 6842 } 6843 6844 /* clone FUNC now, btf__add_func() enforces non-empty name, so use 6845 * entry program's name as a placeholder, which we replace immediately 6846 * with original name_off 6847 */ 6848 fn_id = btf__add_func(btf, prog->name, linkage, fn_proto_id); 6849 if (fn_id < 0) 6850 return -EINVAL; 6851 6852 fn_t = btf_type_by_id(btf, fn_id); 6853 fn_t->name_off = fn_name_off; /* reuse original string */ 6854 6855 return fn_id; 6856 } 6857 6858 /* Check if main program or global subprog's function prototype has `arg:ctx` 6859 * argument tags, and, if necessary, substitute correct type to match what BPF 6860 * verifier would expect, taking into account specific program type. This 6861 * allows to support __arg_ctx tag transparently on old kernels that don't yet 6862 * have a native support for it in the verifier, making user's life much 6863 * easier. 6864 */ 6865 static int bpf_program_fixup_func_info(struct bpf_object *obj, struct bpf_program *prog) 6866 { 6867 const char *ctx_name = NULL, *ctx_tag = "arg:ctx", *fn_name; 6868 struct bpf_func_info_min *func_rec; 6869 struct btf_type *fn_t, *fn_proto_t; 6870 struct btf *btf = obj->btf; 6871 const struct btf_type *t; 6872 struct btf_param *p; 6873 int ptr_id = 0, struct_id, tag_id, orig_fn_id; 6874 int i, n, arg_idx, arg_cnt, err, rec_idx; 6875 int *orig_ids; 6876 6877 /* no .BTF.ext, no problem */ 6878 if (!obj->btf_ext || !prog->func_info) 6879 return 0; 6880 6881 /* don't do any fix ups if kernel natively supports __arg_ctx */ 6882 if (kernel_supports(obj, FEAT_ARG_CTX_TAG)) 6883 return 0; 6884 6885 /* some BPF program types just don't have named context structs, so 6886 * this fallback mechanism doesn't work for them 6887 */ 6888 for (i = 0; i < ARRAY_SIZE(global_ctx_map); i++) { 6889 if (global_ctx_map[i].prog_type != prog->type) 6890 continue; 6891 ctx_name = global_ctx_map[i].ctx_name; 6892 break; 6893 } 6894 if (!ctx_name) 6895 return 0; 6896 6897 /* remember original func BTF IDs to detect if we already cloned them */ 6898 orig_ids = calloc(prog->func_info_cnt, sizeof(*orig_ids)); 6899 if (!orig_ids) 6900 return -ENOMEM; 6901 for (i = 0; i < prog->func_info_cnt; i++) { 6902 func_rec = prog->func_info + prog->func_info_rec_size * i; 6903 orig_ids[i] = func_rec->type_id; 6904 } 6905 6906 /* go through each DECL_TAG with "arg:ctx" and see if it points to one 6907 * of our subprogs; if yes and subprog is global and needs adjustment, 6908 * clone and adjust FUNC -> FUNC_PROTO combo 6909 */ 6910 for (i = 1, n = btf__type_cnt(btf); i < n; i++) { 6911 /* only DECL_TAG with "arg:ctx" value are interesting */ 6912 t = btf__type_by_id(btf, i); 6913 if (!btf_is_decl_tag(t)) 6914 continue; 6915 if (strcmp(btf__str_by_offset(btf, t->name_off), ctx_tag) != 0) 6916 continue; 6917 6918 /* only global funcs need adjustment, if at all */ 6919 orig_fn_id = t->type; 6920 fn_t = btf_type_by_id(btf, orig_fn_id); 6921 if (!btf_is_func(fn_t) || btf_func_linkage(fn_t) != BTF_FUNC_GLOBAL) 6922 continue; 6923 6924 /* sanity check FUNC -> FUNC_PROTO chain, just in case */ 6925 fn_proto_t = btf_type_by_id(btf, fn_t->type); 6926 if (!fn_proto_t || !btf_is_func_proto(fn_proto_t)) 6927 continue; 6928 6929 /* find corresponding func_info record */ 6930 func_rec = NULL; 6931 for (rec_idx = 0; rec_idx < prog->func_info_cnt; rec_idx++) { 6932 if (orig_ids[rec_idx] == t->type) { 6933 func_rec = prog->func_info + prog->func_info_rec_size * rec_idx; 6934 break; 6935 } 6936 } 6937 /* current main program doesn't call into this subprog */ 6938 if (!func_rec) 6939 continue; 6940 6941 /* some more sanity checking of DECL_TAG */ 6942 arg_cnt = btf_vlen(fn_proto_t); 6943 arg_idx = btf_decl_tag(t)->component_idx; 6944 if (arg_idx < 0 || arg_idx >= arg_cnt) 6945 continue; 6946 6947 /* check if we should fix up argument type */ 6948 p = &btf_params(fn_proto_t)[arg_idx]; 6949 fn_name = btf__str_by_offset(btf, fn_t->name_off) ?: "<anon>"; 6950 if (!need_func_arg_type_fixup(btf, prog, fn_name, arg_idx, p->type, ctx_name)) 6951 continue; 6952 6953 /* clone fn/fn_proto, unless we already did it for another arg */ 6954 if (func_rec->type_id == orig_fn_id) { 6955 int fn_id; 6956 6957 fn_id = clone_func_btf_info(btf, orig_fn_id, prog); 6958 if (fn_id < 0) { 6959 err = fn_id; 6960 goto err_out; 6961 } 6962 6963 /* point func_info record to a cloned FUNC type */ 6964 func_rec->type_id = fn_id; 6965 } 6966 6967 /* create PTR -> STRUCT type chain to mark PTR_TO_CTX argument; 6968 * we do it just once per main BPF program, as all global 6969 * funcs share the same program type, so need only PTR -> 6970 * STRUCT type chain 6971 */ 6972 if (ptr_id == 0) { 6973 struct_id = btf__add_struct(btf, ctx_name, 0); 6974 ptr_id = btf__add_ptr(btf, struct_id); 6975 if (ptr_id < 0 || struct_id < 0) { 6976 err = -EINVAL; 6977 goto err_out; 6978 } 6979 } 6980 6981 /* for completeness, clone DECL_TAG and point it to cloned param */ 6982 tag_id = btf__add_decl_tag(btf, ctx_tag, func_rec->type_id, arg_idx); 6983 if (tag_id < 0) { 6984 err = -EINVAL; 6985 goto err_out; 6986 } 6987 6988 /* all the BTF manipulations invalidated pointers, refetch them */ 6989 fn_t = btf_type_by_id(btf, func_rec->type_id); 6990 fn_proto_t = btf_type_by_id(btf, fn_t->type); 6991 6992 /* fix up type ID pointed to by param */ 6993 p = &btf_params(fn_proto_t)[arg_idx]; 6994 p->type = ptr_id; 6995 } 6996 6997 free(orig_ids); 6998 return 0; 6999 err_out: 7000 free(orig_ids); 7001 return err; 7002 } 7003 7004 static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) 7005 { 7006 struct bpf_program *prog; 7007 size_t i, j; 7008 int err; 7009 7010 if (obj->btf_ext) { 7011 err = bpf_object__relocate_core(obj, targ_btf_path); 7012 if (err) { 7013 pr_warn("failed to perform CO-RE relocations: %d\n", 7014 err); 7015 return err; 7016 } 7017 bpf_object__sort_relos(obj); 7018 } 7019 7020 /* Before relocating calls pre-process relocations and mark 7021 * few ld_imm64 instructions that points to subprogs. 7022 * Otherwise bpf_object__reloc_code() later would have to consider 7023 * all ld_imm64 insns as relocation candidates. That would 7024 * reduce relocation speed, since amount of find_prog_insn_relo() 7025 * would increase and most of them will fail to find a relo. 7026 */ 7027 for (i = 0; i < obj->nr_programs; i++) { 7028 prog = &obj->programs[i]; 7029 for (j = 0; j < prog->nr_reloc; j++) { 7030 struct reloc_desc *relo = &prog->reloc_desc[j]; 7031 struct bpf_insn *insn = &prog->insns[relo->insn_idx]; 7032 7033 /* mark the insn, so it's recognized by insn_is_pseudo_func() */ 7034 if (relo->type == RELO_SUBPROG_ADDR) 7035 insn[0].src_reg = BPF_PSEUDO_FUNC; 7036 } 7037 } 7038 7039 /* relocate subprogram calls and append used subprograms to main 7040 * programs; each copy of subprogram code needs to be relocated 7041 * differently for each main program, because its code location might 7042 * have changed. 7043 * Append subprog relos to main programs to allow data relos to be 7044 * processed after text is completely relocated. 7045 */ 7046 for (i = 0; i < obj->nr_programs; i++) { 7047 prog = &obj->programs[i]; 7048 /* sub-program's sub-calls are relocated within the context of 7049 * its main program only 7050 */ 7051 if (prog_is_subprog(obj, prog)) 7052 continue; 7053 if (!prog->autoload) 7054 continue; 7055 7056 err = bpf_object__relocate_calls(obj, prog); 7057 if (err) { 7058 pr_warn("prog '%s': failed to relocate calls: %d\n", 7059 prog->name, err); 7060 return err; 7061 } 7062 7063 err = bpf_prog_assign_exc_cb(obj, prog); 7064 if (err) 7065 return err; 7066 /* Now, also append exception callback if it has not been done already. */ 7067 if (prog->exception_cb_idx >= 0) { 7068 struct bpf_program *subprog = &obj->programs[prog->exception_cb_idx]; 7069 7070 /* Calling exception callback directly is disallowed, which the 7071 * verifier will reject later. In case it was processed already, 7072 * we can skip this step, otherwise for all other valid cases we 7073 * have to append exception callback now. 7074 */ 7075 if (subprog->sub_insn_off == 0) { 7076 err = bpf_object__append_subprog_code(obj, prog, subprog); 7077 if (err) 7078 return err; 7079 err = bpf_object__reloc_code(obj, prog, subprog); 7080 if (err) 7081 return err; 7082 } 7083 } 7084 } 7085 for (i = 0; i < obj->nr_programs; i++) { 7086 prog = &obj->programs[i]; 7087 if (prog_is_subprog(obj, prog)) 7088 continue; 7089 if (!prog->autoload) 7090 continue; 7091 7092 /* Process data relos for main programs */ 7093 err = bpf_object__relocate_data(obj, prog); 7094 if (err) { 7095 pr_warn("prog '%s': failed to relocate data references: %d\n", 7096 prog->name, err); 7097 return err; 7098 } 7099 7100 /* Fix up .BTF.ext information, if necessary */ 7101 err = bpf_program_fixup_func_info(obj, prog); 7102 if (err) { 7103 pr_warn("prog '%s': failed to perform .BTF.ext fix ups: %d\n", 7104 prog->name, err); 7105 return err; 7106 } 7107 } 7108 7109 return 0; 7110 } 7111 7112 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, 7113 Elf64_Shdr *shdr, Elf_Data *data); 7114 7115 static int bpf_object__collect_map_relos(struct bpf_object *obj, 7116 Elf64_Shdr *shdr, Elf_Data *data) 7117 { 7118 const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *); 7119 int i, j, nrels, new_sz; 7120 const struct btf_var_secinfo *vi = NULL; 7121 const struct btf_type *sec, *var, *def; 7122 struct bpf_map *map = NULL, *targ_map = NULL; 7123 struct bpf_program *targ_prog = NULL; 7124 bool is_prog_array, is_map_in_map; 7125 const struct btf_member *member; 7126 const char *name, *mname, *type; 7127 unsigned int moff; 7128 Elf64_Sym *sym; 7129 Elf64_Rel *rel; 7130 void *tmp; 7131 7132 if (!obj->efile.btf_maps_sec_btf_id || !obj->btf) 7133 return -EINVAL; 7134 sec = btf__type_by_id(obj->btf, obj->efile.btf_maps_sec_btf_id); 7135 if (!sec) 7136 return -EINVAL; 7137 7138 nrels = shdr->sh_size / shdr->sh_entsize; 7139 for (i = 0; i < nrels; i++) { 7140 rel = elf_rel_by_idx(data, i); 7141 if (!rel) { 7142 pr_warn(".maps relo #%d: failed to get ELF relo\n", i); 7143 return -LIBBPF_ERRNO__FORMAT; 7144 } 7145 7146 sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info)); 7147 if (!sym) { 7148 pr_warn(".maps relo #%d: symbol %zx not found\n", 7149 i, (size_t)ELF64_R_SYM(rel->r_info)); 7150 return -LIBBPF_ERRNO__FORMAT; 7151 } 7152 name = elf_sym_str(obj, sym->st_name) ?: "<?>"; 7153 7154 pr_debug(".maps relo #%d: for %zd value %zd rel->r_offset %zu name %d ('%s')\n", 7155 i, (ssize_t)(rel->r_info >> 32), (size_t)sym->st_value, 7156 (size_t)rel->r_offset, sym->st_name, name); 7157 7158 for (j = 0; j < obj->nr_maps; j++) { 7159 map = &obj->maps[j]; 7160 if (map->sec_idx != obj->efile.btf_maps_shndx) 7161 continue; 7162 7163 vi = btf_var_secinfos(sec) + map->btf_var_idx; 7164 if (vi->offset <= rel->r_offset && 7165 rel->r_offset + bpf_ptr_sz <= vi->offset + vi->size) 7166 break; 7167 } 7168 if (j == obj->nr_maps) { 7169 pr_warn(".maps relo #%d: cannot find map '%s' at rel->r_offset %zu\n", 7170 i, name, (size_t)rel->r_offset); 7171 return -EINVAL; 7172 } 7173 7174 is_map_in_map = bpf_map_type__is_map_in_map(map->def.type); 7175 is_prog_array = map->def.type == BPF_MAP_TYPE_PROG_ARRAY; 7176 type = is_map_in_map ? "map" : "prog"; 7177 if (is_map_in_map) { 7178 if (sym->st_shndx != obj->efile.btf_maps_shndx) { 7179 pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n", 7180 i, name); 7181 return -LIBBPF_ERRNO__RELOC; 7182 } 7183 if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS && 7184 map->def.key_size != sizeof(int)) { 7185 pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n", 7186 i, map->name, sizeof(int)); 7187 return -EINVAL; 7188 } 7189 targ_map = bpf_object__find_map_by_name(obj, name); 7190 if (!targ_map) { 7191 pr_warn(".maps relo #%d: '%s' isn't a valid map reference\n", 7192 i, name); 7193 return -ESRCH; 7194 } 7195 } else if (is_prog_array) { 7196 targ_prog = bpf_object__find_program_by_name(obj, name); 7197 if (!targ_prog) { 7198 pr_warn(".maps relo #%d: '%s' isn't a valid program reference\n", 7199 i, name); 7200 return -ESRCH; 7201 } 7202 if (targ_prog->sec_idx != sym->st_shndx || 7203 targ_prog->sec_insn_off * 8 != sym->st_value || 7204 prog_is_subprog(obj, targ_prog)) { 7205 pr_warn(".maps relo #%d: '%s' isn't an entry-point program\n", 7206 i, name); 7207 return -LIBBPF_ERRNO__RELOC; 7208 } 7209 } else { 7210 return -EINVAL; 7211 } 7212 7213 var = btf__type_by_id(obj->btf, vi->type); 7214 def = skip_mods_and_typedefs(obj->btf, var->type, NULL); 7215 if (btf_vlen(def) == 0) 7216 return -EINVAL; 7217 member = btf_members(def) + btf_vlen(def) - 1; 7218 mname = btf__name_by_offset(obj->btf, member->name_off); 7219 if (strcmp(mname, "values")) 7220 return -EINVAL; 7221 7222 moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8; 7223 if (rel->r_offset - vi->offset < moff) 7224 return -EINVAL; 7225 7226 moff = rel->r_offset - vi->offset - moff; 7227 /* here we use BPF pointer size, which is always 64 bit, as we 7228 * are parsing ELF that was built for BPF target 7229 */ 7230 if (moff % bpf_ptr_sz) 7231 return -EINVAL; 7232 moff /= bpf_ptr_sz; 7233 if (moff >= map->init_slots_sz) { 7234 new_sz = moff + 1; 7235 tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz); 7236 if (!tmp) 7237 return -ENOMEM; 7238 map->init_slots = tmp; 7239 memset(map->init_slots + map->init_slots_sz, 0, 7240 (new_sz - map->init_slots_sz) * host_ptr_sz); 7241 map->init_slots_sz = new_sz; 7242 } 7243 map->init_slots[moff] = is_map_in_map ? (void *)targ_map : (void *)targ_prog; 7244 7245 pr_debug(".maps relo #%d: map '%s' slot [%d] points to %s '%s'\n", 7246 i, map->name, moff, type, name); 7247 } 7248 7249 return 0; 7250 } 7251 7252 static int bpf_object__collect_relos(struct bpf_object *obj) 7253 { 7254 int i, err; 7255 7256 for (i = 0; i < obj->efile.sec_cnt; i++) { 7257 struct elf_sec_desc *sec_desc = &obj->efile.secs[i]; 7258 Elf64_Shdr *shdr; 7259 Elf_Data *data; 7260 int idx; 7261 7262 if (sec_desc->sec_type != SEC_RELO) 7263 continue; 7264 7265 shdr = sec_desc->shdr; 7266 data = sec_desc->data; 7267 idx = shdr->sh_info; 7268 7269 if (shdr->sh_type != SHT_REL || idx < 0 || idx >= obj->efile.sec_cnt) { 7270 pr_warn("internal error at %d\n", __LINE__); 7271 return -LIBBPF_ERRNO__INTERNAL; 7272 } 7273 7274 if (obj->efile.secs[idx].sec_type == SEC_ST_OPS) 7275 err = bpf_object__collect_st_ops_relos(obj, shdr, data); 7276 else if (idx == obj->efile.btf_maps_shndx) 7277 err = bpf_object__collect_map_relos(obj, shdr, data); 7278 else 7279 err = bpf_object__collect_prog_relos(obj, shdr, data); 7280 if (err) 7281 return err; 7282 } 7283 7284 bpf_object__sort_relos(obj); 7285 return 0; 7286 } 7287 7288 static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id) 7289 { 7290 if (BPF_CLASS(insn->code) == BPF_JMP && 7291 BPF_OP(insn->code) == BPF_CALL && 7292 BPF_SRC(insn->code) == BPF_K && 7293 insn->src_reg == 0 && 7294 insn->dst_reg == 0) { 7295 *func_id = insn->imm; 7296 return true; 7297 } 7298 return false; 7299 } 7300 7301 static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program *prog) 7302 { 7303 struct bpf_insn *insn = prog->insns; 7304 enum bpf_func_id func_id; 7305 int i; 7306 7307 if (obj->gen_loader) 7308 return 0; 7309 7310 for (i = 0; i < prog->insns_cnt; i++, insn++) { 7311 if (!insn_is_helper_call(insn, &func_id)) 7312 continue; 7313 7314 /* on kernels that don't yet support 7315 * bpf_probe_read_{kernel,user}[_str] helpers, fall back 7316 * to bpf_probe_read() which works well for old kernels 7317 */ 7318 switch (func_id) { 7319 case BPF_FUNC_probe_read_kernel: 7320 case BPF_FUNC_probe_read_user: 7321 if (!kernel_supports(obj, FEAT_PROBE_READ_KERN)) 7322 insn->imm = BPF_FUNC_probe_read; 7323 break; 7324 case BPF_FUNC_probe_read_kernel_str: 7325 case BPF_FUNC_probe_read_user_str: 7326 if (!kernel_supports(obj, FEAT_PROBE_READ_KERN)) 7327 insn->imm = BPF_FUNC_probe_read_str; 7328 break; 7329 default: 7330 break; 7331 } 7332 } 7333 return 0; 7334 } 7335 7336 static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name, 7337 int *btf_obj_fd, int *btf_type_id); 7338 7339 /* this is called as prog->sec_def->prog_prepare_load_fn for libbpf-supported sec_defs */ 7340 static int libbpf_prepare_prog_load(struct bpf_program *prog, 7341 struct bpf_prog_load_opts *opts, long cookie) 7342 { 7343 enum sec_def_flags def = cookie; 7344 7345 /* old kernels might not support specifying expected_attach_type */ 7346 if ((def & SEC_EXP_ATTACH_OPT) && !kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE)) 7347 opts->expected_attach_type = 0; 7348 7349 if (def & SEC_SLEEPABLE) 7350 opts->prog_flags |= BPF_F_SLEEPABLE; 7351 7352 if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS)) 7353 opts->prog_flags |= BPF_F_XDP_HAS_FRAGS; 7354 7355 /* special check for usdt to use uprobe_multi link */ 7356 if ((def & SEC_USDT) && kernel_supports(prog->obj, FEAT_UPROBE_MULTI_LINK)) 7357 prog->expected_attach_type = BPF_TRACE_UPROBE_MULTI; 7358 7359 if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) { 7360 int btf_obj_fd = 0, btf_type_id = 0, err; 7361 const char *attach_name; 7362 7363 attach_name = strchr(prog->sec_name, '/'); 7364 if (!attach_name) { 7365 /* if BPF program is annotated with just SEC("fentry") 7366 * (or similar) without declaratively specifying 7367 * target, then it is expected that target will be 7368 * specified with bpf_program__set_attach_target() at 7369 * runtime before BPF object load step. If not, then 7370 * there is nothing to load into the kernel as BPF 7371 * verifier won't be able to validate BPF program 7372 * correctness anyways. 7373 */ 7374 pr_warn("prog '%s': no BTF-based attach target is specified, use bpf_program__set_attach_target()\n", 7375 prog->name); 7376 return -EINVAL; 7377 } 7378 attach_name++; /* skip over / */ 7379 7380 err = libbpf_find_attach_btf_id(prog, attach_name, &btf_obj_fd, &btf_type_id); 7381 if (err) 7382 return err; 7383 7384 /* cache resolved BTF FD and BTF type ID in the prog */ 7385 prog->attach_btf_obj_fd = btf_obj_fd; 7386 prog->attach_btf_id = btf_type_id; 7387 7388 /* but by now libbpf common logic is not utilizing 7389 * prog->atach_btf_obj_fd/prog->attach_btf_id anymore because 7390 * this callback is called after opts were populated by 7391 * libbpf, so this callback has to update opts explicitly here 7392 */ 7393 opts->attach_btf_obj_fd = btf_obj_fd; 7394 opts->attach_btf_id = btf_type_id; 7395 } 7396 return 0; 7397 } 7398 7399 static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz); 7400 7401 static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog, 7402 struct bpf_insn *insns, int insns_cnt, 7403 const char *license, __u32 kern_version, int *prog_fd) 7404 { 7405 LIBBPF_OPTS(bpf_prog_load_opts, load_attr); 7406 const char *prog_name = NULL; 7407 char *cp, errmsg[STRERR_BUFSIZE]; 7408 size_t log_buf_size = 0; 7409 char *log_buf = NULL, *tmp; 7410 bool own_log_buf = true; 7411 __u32 log_level = prog->log_level; 7412 int ret, err; 7413 7414 /* Be more helpful by rejecting programs that can't be validated early 7415 * with more meaningful and actionable error message. 7416 */ 7417 switch (prog->type) { 7418 case BPF_PROG_TYPE_UNSPEC: 7419 /* 7420 * The program type must be set. Most likely we couldn't find a proper 7421 * section definition at load time, and thus we didn't infer the type. 7422 */ 7423 pr_warn("prog '%s': missing BPF prog type, check ELF section name '%s'\n", 7424 prog->name, prog->sec_name); 7425 return -EINVAL; 7426 case BPF_PROG_TYPE_STRUCT_OPS: 7427 if (prog->attach_btf_id == 0) { 7428 pr_warn("prog '%s': SEC(\"struct_ops\") program isn't referenced anywhere, did you forget to use it?\n", 7429 prog->name); 7430 return -EINVAL; 7431 } 7432 break; 7433 default: 7434 break; 7435 } 7436 7437 if (!insns || !insns_cnt) 7438 return -EINVAL; 7439 7440 if (kernel_supports(obj, FEAT_PROG_NAME)) 7441 prog_name = prog->name; 7442 load_attr.attach_prog_fd = prog->attach_prog_fd; 7443 load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd; 7444 load_attr.attach_btf_id = prog->attach_btf_id; 7445 load_attr.kern_version = kern_version; 7446 load_attr.prog_ifindex = prog->prog_ifindex; 7447 7448 /* specify func_info/line_info only if kernel supports them */ 7449 if (obj->btf && btf__fd(obj->btf) >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) { 7450 load_attr.prog_btf_fd = btf__fd(obj->btf); 7451 load_attr.func_info = prog->func_info; 7452 load_attr.func_info_rec_size = prog->func_info_rec_size; 7453 load_attr.func_info_cnt = prog->func_info_cnt; 7454 load_attr.line_info = prog->line_info; 7455 load_attr.line_info_rec_size = prog->line_info_rec_size; 7456 load_attr.line_info_cnt = prog->line_info_cnt; 7457 } 7458 load_attr.log_level = log_level; 7459 load_attr.prog_flags = prog->prog_flags; 7460 load_attr.fd_array = obj->fd_array; 7461 7462 load_attr.token_fd = obj->token_fd; 7463 if (obj->token_fd) 7464 load_attr.prog_flags |= BPF_F_TOKEN_FD; 7465 7466 /* adjust load_attr if sec_def provides custom preload callback */ 7467 if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) { 7468 err = prog->sec_def->prog_prepare_load_fn(prog, &load_attr, prog->sec_def->cookie); 7469 if (err < 0) { 7470 pr_warn("prog '%s': failed to prepare load attributes: %d\n", 7471 prog->name, err); 7472 return err; 7473 } 7474 insns = prog->insns; 7475 insns_cnt = prog->insns_cnt; 7476 } 7477 7478 /* allow prog_prepare_load_fn to change expected_attach_type */ 7479 load_attr.expected_attach_type = prog->expected_attach_type; 7480 7481 if (obj->gen_loader) { 7482 bpf_gen__prog_load(obj->gen_loader, prog->type, prog->name, 7483 license, insns, insns_cnt, &load_attr, 7484 prog - obj->programs); 7485 *prog_fd = -1; 7486 return 0; 7487 } 7488 7489 retry_load: 7490 /* if log_level is zero, we don't request logs initially even if 7491 * custom log_buf is specified; if the program load fails, then we'll 7492 * bump log_level to 1 and use either custom log_buf or we'll allocate 7493 * our own and retry the load to get details on what failed 7494 */ 7495 if (log_level) { 7496 if (prog->log_buf) { 7497 log_buf = prog->log_buf; 7498 log_buf_size = prog->log_size; 7499 own_log_buf = false; 7500 } else if (obj->log_buf) { 7501 log_buf = obj->log_buf; 7502 log_buf_size = obj->log_size; 7503 own_log_buf = false; 7504 } else { 7505 log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, log_buf_size * 2); 7506 tmp = realloc(log_buf, log_buf_size); 7507 if (!tmp) { 7508 ret = -ENOMEM; 7509 goto out; 7510 } 7511 log_buf = tmp; 7512 log_buf[0] = '\0'; 7513 own_log_buf = true; 7514 } 7515 } 7516 7517 load_attr.log_buf = log_buf; 7518 load_attr.log_size = log_buf_size; 7519 load_attr.log_level = log_level; 7520 7521 ret = bpf_prog_load(prog->type, prog_name, license, insns, insns_cnt, &load_attr); 7522 if (ret >= 0) { 7523 if (log_level && own_log_buf) { 7524 pr_debug("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n", 7525 prog->name, log_buf); 7526 } 7527 7528 if (obj->has_rodata && kernel_supports(obj, FEAT_PROG_BIND_MAP)) { 7529 struct bpf_map *map; 7530 int i; 7531 7532 for (i = 0; i < obj->nr_maps; i++) { 7533 map = &prog->obj->maps[i]; 7534 if (map->libbpf_type != LIBBPF_MAP_RODATA) 7535 continue; 7536 7537 if (bpf_prog_bind_map(ret, map->fd, NULL)) { 7538 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); 7539 pr_warn("prog '%s': failed to bind map '%s': %s\n", 7540 prog->name, map->real_name, cp); 7541 /* Don't fail hard if can't bind rodata. */ 7542 } 7543 } 7544 } 7545 7546 *prog_fd = ret; 7547 ret = 0; 7548 goto out; 7549 } 7550 7551 if (log_level == 0) { 7552 log_level = 1; 7553 goto retry_load; 7554 } 7555 /* On ENOSPC, increase log buffer size and retry, unless custom 7556 * log_buf is specified. 7557 * Be careful to not overflow u32, though. Kernel's log buf size limit 7558 * isn't part of UAPI so it can always be bumped to full 4GB. So don't 7559 * multiply by 2 unless we are sure we'll fit within 32 bits. 7560 * Currently, we'll get -EINVAL when we reach (UINT_MAX >> 2). 7561 */ 7562 if (own_log_buf && errno == ENOSPC && log_buf_size <= UINT_MAX / 2) 7563 goto retry_load; 7564 7565 ret = -errno; 7566 7567 /* post-process verifier log to improve error descriptions */ 7568 fixup_verifier_log(prog, log_buf, log_buf_size); 7569 7570 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); 7571 pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp); 7572 pr_perm_msg(ret); 7573 7574 if (own_log_buf && log_buf && log_buf[0] != '\0') { 7575 pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n", 7576 prog->name, log_buf); 7577 } 7578 7579 out: 7580 if (own_log_buf) 7581 free(log_buf); 7582 return ret; 7583 } 7584 7585 static char *find_prev_line(char *buf, char *cur) 7586 { 7587 char *p; 7588 7589 if (cur == buf) /* end of a log buf */ 7590 return NULL; 7591 7592 p = cur - 1; 7593 while (p - 1 >= buf && *(p - 1) != '\n') 7594 p--; 7595 7596 return p; 7597 } 7598 7599 static void patch_log(char *buf, size_t buf_sz, size_t log_sz, 7600 char *orig, size_t orig_sz, const char *patch) 7601 { 7602 /* size of the remaining log content to the right from the to-be-replaced part */ 7603 size_t rem_sz = (buf + log_sz) - (orig + orig_sz); 7604 size_t patch_sz = strlen(patch); 7605 7606 if (patch_sz != orig_sz) { 7607 /* If patch line(s) are longer than original piece of verifier log, 7608 * shift log contents by (patch_sz - orig_sz) bytes to the right 7609 * starting from after to-be-replaced part of the log. 7610 * 7611 * If patch line(s) are shorter than original piece of verifier log, 7612 * shift log contents by (orig_sz - patch_sz) bytes to the left 7613 * starting from after to-be-replaced part of the log 7614 * 7615 * We need to be careful about not overflowing available 7616 * buf_sz capacity. If that's the case, we'll truncate the end 7617 * of the original log, as necessary. 7618 */ 7619 if (patch_sz > orig_sz) { 7620 if (orig + patch_sz >= buf + buf_sz) { 7621 /* patch is big enough to cover remaining space completely */ 7622 patch_sz -= (orig + patch_sz) - (buf + buf_sz) + 1; 7623 rem_sz = 0; 7624 } else if (patch_sz - orig_sz > buf_sz - log_sz) { 7625 /* patch causes part of remaining log to be truncated */ 7626 rem_sz -= (patch_sz - orig_sz) - (buf_sz - log_sz); 7627 } 7628 } 7629 /* shift remaining log to the right by calculated amount */ 7630 memmove(orig + patch_sz, orig + orig_sz, rem_sz); 7631 } 7632 7633 memcpy(orig, patch, patch_sz); 7634 } 7635 7636 static void fixup_log_failed_core_relo(struct bpf_program *prog, 7637 char *buf, size_t buf_sz, size_t log_sz, 7638 char *line1, char *line2, char *line3) 7639 { 7640 /* Expected log for failed and not properly guarded CO-RE relocation: 7641 * line1 -> 123: (85) call unknown#195896080 7642 * line2 -> invalid func unknown#195896080 7643 * line3 -> <anything else or end of buffer> 7644 * 7645 * "123" is the index of the instruction that was poisoned. We extract 7646 * instruction index to find corresponding CO-RE relocation and 7647 * replace this part of the log with more relevant information about 7648 * failed CO-RE relocation. 7649 */ 7650 const struct bpf_core_relo *relo; 7651 struct bpf_core_spec spec; 7652 char patch[512], spec_buf[256]; 7653 int insn_idx, err, spec_len; 7654 7655 if (sscanf(line1, "%d: (%*d) call unknown#195896080\n", &insn_idx) != 1) 7656 return; 7657 7658 relo = find_relo_core(prog, insn_idx); 7659 if (!relo) 7660 return; 7661 7662 err = bpf_core_parse_spec(prog->name, prog->obj->btf, relo, &spec); 7663 if (err) 7664 return; 7665 7666 spec_len = bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec); 7667 snprintf(patch, sizeof(patch), 7668 "%d: <invalid CO-RE relocation>\n" 7669 "failed to resolve CO-RE relocation %s%s\n", 7670 insn_idx, spec_buf, spec_len >= sizeof(spec_buf) ? "..." : ""); 7671 7672 patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); 7673 } 7674 7675 static void fixup_log_missing_map_load(struct bpf_program *prog, 7676 char *buf, size_t buf_sz, size_t log_sz, 7677 char *line1, char *line2, char *line3) 7678 { 7679 /* Expected log for failed and not properly guarded map reference: 7680 * line1 -> 123: (85) call unknown#2001000345 7681 * line2 -> invalid func unknown#2001000345 7682 * line3 -> <anything else or end of buffer> 7683 * 7684 * "123" is the index of the instruction that was poisoned. 7685 * "345" in "2001000345" is a map index in obj->maps to fetch map name. 7686 */ 7687 struct bpf_object *obj = prog->obj; 7688 const struct bpf_map *map; 7689 int insn_idx, map_idx; 7690 char patch[128]; 7691 7692 if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &map_idx) != 2) 7693 return; 7694 7695 map_idx -= POISON_LDIMM64_MAP_BASE; 7696 if (map_idx < 0 || map_idx >= obj->nr_maps) 7697 return; 7698 map = &obj->maps[map_idx]; 7699 7700 snprintf(patch, sizeof(patch), 7701 "%d: <invalid BPF map reference>\n" 7702 "BPF map '%s' is referenced but wasn't created\n", 7703 insn_idx, map->name); 7704 7705 patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); 7706 } 7707 7708 static void fixup_log_missing_kfunc_call(struct bpf_program *prog, 7709 char *buf, size_t buf_sz, size_t log_sz, 7710 char *line1, char *line2, char *line3) 7711 { 7712 /* Expected log for failed and not properly guarded kfunc call: 7713 * line1 -> 123: (85) call unknown#2002000345 7714 * line2 -> invalid func unknown#2002000345 7715 * line3 -> <anything else or end of buffer> 7716 * 7717 * "123" is the index of the instruction that was poisoned. 7718 * "345" in "2002000345" is an extern index in obj->externs to fetch kfunc name. 7719 */ 7720 struct bpf_object *obj = prog->obj; 7721 const struct extern_desc *ext; 7722 int insn_idx, ext_idx; 7723 char patch[128]; 7724 7725 if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &ext_idx) != 2) 7726 return; 7727 7728 ext_idx -= POISON_CALL_KFUNC_BASE; 7729 if (ext_idx < 0 || ext_idx >= obj->nr_extern) 7730 return; 7731 ext = &obj->externs[ext_idx]; 7732 7733 snprintf(patch, sizeof(patch), 7734 "%d: <invalid kfunc call>\n" 7735 "kfunc '%s' is referenced but wasn't resolved\n", 7736 insn_idx, ext->name); 7737 7738 patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); 7739 } 7740 7741 static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz) 7742 { 7743 /* look for familiar error patterns in last N lines of the log */ 7744 const size_t max_last_line_cnt = 10; 7745 char *prev_line, *cur_line, *next_line; 7746 size_t log_sz; 7747 int i; 7748 7749 if (!buf) 7750 return; 7751 7752 log_sz = strlen(buf) + 1; 7753 next_line = buf + log_sz - 1; 7754 7755 for (i = 0; i < max_last_line_cnt; i++, next_line = cur_line) { 7756 cur_line = find_prev_line(buf, next_line); 7757 if (!cur_line) 7758 return; 7759 7760 if (str_has_pfx(cur_line, "invalid func unknown#195896080\n")) { 7761 prev_line = find_prev_line(buf, cur_line); 7762 if (!prev_line) 7763 continue; 7764 7765 /* failed CO-RE relocation case */ 7766 fixup_log_failed_core_relo(prog, buf, buf_sz, log_sz, 7767 prev_line, cur_line, next_line); 7768 return; 7769 } else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_LDIMM64_MAP_PFX)) { 7770 prev_line = find_prev_line(buf, cur_line); 7771 if (!prev_line) 7772 continue; 7773 7774 /* reference to uncreated BPF map */ 7775 fixup_log_missing_map_load(prog, buf, buf_sz, log_sz, 7776 prev_line, cur_line, next_line); 7777 return; 7778 } else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_CALL_KFUNC_PFX)) { 7779 prev_line = find_prev_line(buf, cur_line); 7780 if (!prev_line) 7781 continue; 7782 7783 /* reference to unresolved kfunc */ 7784 fixup_log_missing_kfunc_call(prog, buf, buf_sz, log_sz, 7785 prev_line, cur_line, next_line); 7786 return; 7787 } 7788 } 7789 } 7790 7791 static int bpf_program_record_relos(struct bpf_program *prog) 7792 { 7793 struct bpf_object *obj = prog->obj; 7794 int i; 7795 7796 for (i = 0; i < prog->nr_reloc; i++) { 7797 struct reloc_desc *relo = &prog->reloc_desc[i]; 7798 struct extern_desc *ext = &obj->externs[relo->ext_idx]; 7799 int kind; 7800 7801 switch (relo->type) { 7802 case RELO_EXTERN_LD64: 7803 if (ext->type != EXT_KSYM) 7804 continue; 7805 kind = btf_is_var(btf__type_by_id(obj->btf, ext->btf_id)) ? 7806 BTF_KIND_VAR : BTF_KIND_FUNC; 7807 bpf_gen__record_extern(obj->gen_loader, ext->name, 7808 ext->is_weak, !ext->ksym.type_id, 7809 true, kind, relo->insn_idx); 7810 break; 7811 case RELO_EXTERN_CALL: 7812 bpf_gen__record_extern(obj->gen_loader, ext->name, 7813 ext->is_weak, false, false, BTF_KIND_FUNC, 7814 relo->insn_idx); 7815 break; 7816 case RELO_CORE: { 7817 struct bpf_core_relo cr = { 7818 .insn_off = relo->insn_idx * 8, 7819 .type_id = relo->core_relo->type_id, 7820 .access_str_off = relo->core_relo->access_str_off, 7821 .kind = relo->core_relo->kind, 7822 }; 7823 7824 bpf_gen__record_relo_core(obj->gen_loader, &cr); 7825 break; 7826 } 7827 default: 7828 continue; 7829 } 7830 } 7831 return 0; 7832 } 7833 7834 static int 7835 bpf_object__load_progs(struct bpf_object *obj, int log_level) 7836 { 7837 struct bpf_program *prog; 7838 size_t i; 7839 int err; 7840 7841 for (i = 0; i < obj->nr_programs; i++) { 7842 prog = &obj->programs[i]; 7843 err = bpf_object__sanitize_prog(obj, prog); 7844 if (err) 7845 return err; 7846 } 7847 7848 for (i = 0; i < obj->nr_programs; i++) { 7849 prog = &obj->programs[i]; 7850 if (prog_is_subprog(obj, prog)) 7851 continue; 7852 if (!prog->autoload) { 7853 pr_debug("prog '%s': skipped loading\n", prog->name); 7854 continue; 7855 } 7856 prog->log_level |= log_level; 7857 7858 if (obj->gen_loader) 7859 bpf_program_record_relos(prog); 7860 7861 err = bpf_object_load_prog(obj, prog, prog->insns, prog->insns_cnt, 7862 obj->license, obj->kern_version, &prog->fd); 7863 if (err) { 7864 pr_warn("prog '%s': failed to load: %d\n", prog->name, err); 7865 return err; 7866 } 7867 } 7868 7869 bpf_object__free_relocs(obj); 7870 return 0; 7871 } 7872 7873 static const struct bpf_sec_def *find_sec_def(const char *sec_name); 7874 7875 static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object_open_opts *opts) 7876 { 7877 struct bpf_program *prog; 7878 int err; 7879 7880 bpf_object__for_each_program(prog, obj) { 7881 prog->sec_def = find_sec_def(prog->sec_name); 7882 if (!prog->sec_def) { 7883 /* couldn't guess, but user might manually specify */ 7884 pr_debug("prog '%s': unrecognized ELF section name '%s'\n", 7885 prog->name, prog->sec_name); 7886 continue; 7887 } 7888 7889 prog->type = prog->sec_def->prog_type; 7890 prog->expected_attach_type = prog->sec_def->expected_attach_type; 7891 7892 /* sec_def can have custom callback which should be called 7893 * after bpf_program is initialized to adjust its properties 7894 */ 7895 if (prog->sec_def->prog_setup_fn) { 7896 err = prog->sec_def->prog_setup_fn(prog, prog->sec_def->cookie); 7897 if (err < 0) { 7898 pr_warn("prog '%s': failed to initialize: %d\n", 7899 prog->name, err); 7900 return err; 7901 } 7902 } 7903 } 7904 7905 return 0; 7906 } 7907 7908 static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz, 7909 const struct bpf_object_open_opts *opts) 7910 { 7911 const char *obj_name, *kconfig, *btf_tmp_path, *token_path; 7912 struct bpf_object *obj; 7913 char tmp_name[64]; 7914 int err; 7915 char *log_buf; 7916 size_t log_size; 7917 __u32 log_level; 7918 7919 if (elf_version(EV_CURRENT) == EV_NONE) { 7920 pr_warn("failed to init libelf for %s\n", 7921 path ? : "(mem buf)"); 7922 return ERR_PTR(-LIBBPF_ERRNO__LIBELF); 7923 } 7924 7925 if (!OPTS_VALID(opts, bpf_object_open_opts)) 7926 return ERR_PTR(-EINVAL); 7927 7928 obj_name = OPTS_GET(opts, object_name, NULL); 7929 if (obj_buf) { 7930 if (!obj_name) { 7931 snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx", 7932 (unsigned long)obj_buf, 7933 (unsigned long)obj_buf_sz); 7934 obj_name = tmp_name; 7935 } 7936 path = obj_name; 7937 pr_debug("loading object '%s' from buffer\n", obj_name); 7938 } 7939 7940 log_buf = OPTS_GET(opts, kernel_log_buf, NULL); 7941 log_size = OPTS_GET(opts, kernel_log_size, 0); 7942 log_level = OPTS_GET(opts, kernel_log_level, 0); 7943 if (log_size > UINT_MAX) 7944 return ERR_PTR(-EINVAL); 7945 if (log_size && !log_buf) 7946 return ERR_PTR(-EINVAL); 7947 7948 token_path = OPTS_GET(opts, bpf_token_path, NULL); 7949 /* if user didn't specify bpf_token_path explicitly, check if 7950 * LIBBPF_BPF_TOKEN_PATH envvar was set and treat it as bpf_token_path 7951 * option 7952 */ 7953 if (!token_path) 7954 token_path = getenv("LIBBPF_BPF_TOKEN_PATH"); 7955 if (token_path && strlen(token_path) >= PATH_MAX) 7956 return ERR_PTR(-ENAMETOOLONG); 7957 7958 obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name); 7959 if (IS_ERR(obj)) 7960 return obj; 7961 7962 obj->log_buf = log_buf; 7963 obj->log_size = log_size; 7964 obj->log_level = log_level; 7965 7966 if (token_path) { 7967 obj->token_path = strdup(token_path); 7968 if (!obj->token_path) { 7969 err = -ENOMEM; 7970 goto out; 7971 } 7972 } 7973 7974 btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL); 7975 if (btf_tmp_path) { 7976 if (strlen(btf_tmp_path) >= PATH_MAX) { 7977 err = -ENAMETOOLONG; 7978 goto out; 7979 } 7980 obj->btf_custom_path = strdup(btf_tmp_path); 7981 if (!obj->btf_custom_path) { 7982 err = -ENOMEM; 7983 goto out; 7984 } 7985 } 7986 7987 kconfig = OPTS_GET(opts, kconfig, NULL); 7988 if (kconfig) { 7989 obj->kconfig = strdup(kconfig); 7990 if (!obj->kconfig) { 7991 err = -ENOMEM; 7992 goto out; 7993 } 7994 } 7995 7996 err = bpf_object__elf_init(obj); 7997 err = err ? : bpf_object__check_endianness(obj); 7998 err = err ? : bpf_object__elf_collect(obj); 7999 err = err ? : bpf_object__collect_externs(obj); 8000 err = err ? : bpf_object_fixup_btf(obj); 8001 err = err ? : bpf_object__init_maps(obj, opts); 8002 err = err ? : bpf_object_init_progs(obj, opts); 8003 err = err ? : bpf_object__collect_relos(obj); 8004 if (err) 8005 goto out; 8006 8007 bpf_object__elf_finish(obj); 8008 8009 return obj; 8010 out: 8011 bpf_object__close(obj); 8012 return ERR_PTR(err); 8013 } 8014 8015 struct bpf_object * 8016 bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts) 8017 { 8018 if (!path) 8019 return libbpf_err_ptr(-EINVAL); 8020 8021 pr_debug("loading %s\n", path); 8022 8023 return libbpf_ptr(bpf_object_open(path, NULL, 0, opts)); 8024 } 8025 8026 struct bpf_object *bpf_object__open(const char *path) 8027 { 8028 return bpf_object__open_file(path, NULL); 8029 } 8030 8031 struct bpf_object * 8032 bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, 8033 const struct bpf_object_open_opts *opts) 8034 { 8035 if (!obj_buf || obj_buf_sz == 0) 8036 return libbpf_err_ptr(-EINVAL); 8037 8038 return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, opts)); 8039 } 8040 8041 static int bpf_object_unload(struct bpf_object *obj) 8042 { 8043 size_t i; 8044 8045 if (!obj) 8046 return libbpf_err(-EINVAL); 8047 8048 for (i = 0; i < obj->nr_maps; i++) { 8049 zclose(obj->maps[i].fd); 8050 if (obj->maps[i].st_ops) 8051 zfree(&obj->maps[i].st_ops->kern_vdata); 8052 } 8053 8054 for (i = 0; i < obj->nr_programs; i++) 8055 bpf_program__unload(&obj->programs[i]); 8056 8057 return 0; 8058 } 8059 8060 static int bpf_object__sanitize_maps(struct bpf_object *obj) 8061 { 8062 struct bpf_map *m; 8063 8064 bpf_object__for_each_map(m, obj) { 8065 if (!bpf_map__is_internal(m)) 8066 continue; 8067 if (!kernel_supports(obj, FEAT_ARRAY_MMAP)) 8068 m->def.map_flags &= ~BPF_F_MMAPABLE; 8069 } 8070 8071 return 0; 8072 } 8073 8074 typedef int (*kallsyms_cb_t)(unsigned long long sym_addr, char sym_type, 8075 const char *sym_name, void *ctx); 8076 8077 static int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *ctx) 8078 { 8079 char sym_type, sym_name[500]; 8080 unsigned long long sym_addr; 8081 int ret, err = 0; 8082 FILE *f; 8083 8084 f = fopen("/proc/kallsyms", "re"); 8085 if (!f) { 8086 err = -errno; 8087 pr_warn("failed to open /proc/kallsyms: %d\n", err); 8088 return err; 8089 } 8090 8091 while (true) { 8092 ret = fscanf(f, "%llx %c %499s%*[^\n]\n", 8093 &sym_addr, &sym_type, sym_name); 8094 if (ret == EOF && feof(f)) 8095 break; 8096 if (ret != 3) { 8097 pr_warn("failed to read kallsyms entry: %d\n", ret); 8098 err = -EINVAL; 8099 break; 8100 } 8101 8102 err = cb(sym_addr, sym_type, sym_name, ctx); 8103 if (err) 8104 break; 8105 } 8106 8107 fclose(f); 8108 return err; 8109 } 8110 8111 static int kallsyms_cb(unsigned long long sym_addr, char sym_type, 8112 const char *sym_name, void *ctx) 8113 { 8114 struct bpf_object *obj = ctx; 8115 const struct btf_type *t; 8116 struct extern_desc *ext; 8117 char *res; 8118 8119 res = strstr(sym_name, ".llvm."); 8120 if (sym_type == 'd' && res) 8121 ext = find_extern_by_name_with_len(obj, sym_name, res - sym_name); 8122 else 8123 ext = find_extern_by_name(obj, sym_name); 8124 if (!ext || ext->type != EXT_KSYM) 8125 return 0; 8126 8127 t = btf__type_by_id(obj->btf, ext->btf_id); 8128 if (!btf_is_var(t)) 8129 return 0; 8130 8131 if (ext->is_set && ext->ksym.addr != sym_addr) { 8132 pr_warn("extern (ksym) '%s': resolution is ambiguous: 0x%llx or 0x%llx\n", 8133 sym_name, ext->ksym.addr, sym_addr); 8134 return -EINVAL; 8135 } 8136 if (!ext->is_set) { 8137 ext->is_set = true; 8138 ext->ksym.addr = sym_addr; 8139 pr_debug("extern (ksym) '%s': set to 0x%llx\n", sym_name, sym_addr); 8140 } 8141 return 0; 8142 } 8143 8144 static int bpf_object__read_kallsyms_file(struct bpf_object *obj) 8145 { 8146 return libbpf_kallsyms_parse(kallsyms_cb, obj); 8147 } 8148 8149 static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name, 8150 __u16 kind, struct btf **res_btf, 8151 struct module_btf **res_mod_btf) 8152 { 8153 struct module_btf *mod_btf; 8154 struct btf *btf; 8155 int i, id, err; 8156 8157 btf = obj->btf_vmlinux; 8158 mod_btf = NULL; 8159 id = btf__find_by_name_kind(btf, ksym_name, kind); 8160 8161 if (id == -ENOENT) { 8162 err = load_module_btfs(obj); 8163 if (err) 8164 return err; 8165 8166 for (i = 0; i < obj->btf_module_cnt; i++) { 8167 /* we assume module_btf's BTF FD is always >0 */ 8168 mod_btf = &obj->btf_modules[i]; 8169 btf = mod_btf->btf; 8170 id = btf__find_by_name_kind_own(btf, ksym_name, kind); 8171 if (id != -ENOENT) 8172 break; 8173 } 8174 } 8175 if (id <= 0) 8176 return -ESRCH; 8177 8178 *res_btf = btf; 8179 *res_mod_btf = mod_btf; 8180 return id; 8181 } 8182 8183 static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj, 8184 struct extern_desc *ext) 8185 { 8186 const struct btf_type *targ_var, *targ_type; 8187 __u32 targ_type_id, local_type_id; 8188 struct module_btf *mod_btf = NULL; 8189 const char *targ_var_name; 8190 struct btf *btf = NULL; 8191 int id, err; 8192 8193 id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &mod_btf); 8194 if (id < 0) { 8195 if (id == -ESRCH && ext->is_weak) 8196 return 0; 8197 pr_warn("extern (var ksym) '%s': not found in kernel BTF\n", 8198 ext->name); 8199 return id; 8200 } 8201 8202 /* find local type_id */ 8203 local_type_id = ext->ksym.type_id; 8204 8205 /* find target type_id */ 8206 targ_var = btf__type_by_id(btf, id); 8207 targ_var_name = btf__name_by_offset(btf, targ_var->name_off); 8208 targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id); 8209 8210 err = bpf_core_types_are_compat(obj->btf, local_type_id, 8211 btf, targ_type_id); 8212 if (err <= 0) { 8213 const struct btf_type *local_type; 8214 const char *targ_name, *local_name; 8215 8216 local_type = btf__type_by_id(obj->btf, local_type_id); 8217 local_name = btf__name_by_offset(obj->btf, local_type->name_off); 8218 targ_name = btf__name_by_offset(btf, targ_type->name_off); 8219 8220 pr_warn("extern (var ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n", 8221 ext->name, local_type_id, 8222 btf_kind_str(local_type), local_name, targ_type_id, 8223 btf_kind_str(targ_type), targ_name); 8224 return -EINVAL; 8225 } 8226 8227 ext->is_set = true; 8228 ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0; 8229 ext->ksym.kernel_btf_id = id; 8230 pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n", 8231 ext->name, id, btf_kind_str(targ_var), targ_var_name); 8232 8233 return 0; 8234 } 8235 8236 static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj, 8237 struct extern_desc *ext) 8238 { 8239 int local_func_proto_id, kfunc_proto_id, kfunc_id; 8240 struct module_btf *mod_btf = NULL; 8241 const struct btf_type *kern_func; 8242 struct btf *kern_btf = NULL; 8243 int ret; 8244 8245 local_func_proto_id = ext->ksym.type_id; 8246 8247 kfunc_id = find_ksym_btf_id(obj, ext->essent_name ?: ext->name, BTF_KIND_FUNC, &kern_btf, 8248 &mod_btf); 8249 if (kfunc_id < 0) { 8250 if (kfunc_id == -ESRCH && ext->is_weak) 8251 return 0; 8252 pr_warn("extern (func ksym) '%s': not found in kernel or module BTFs\n", 8253 ext->name); 8254 return kfunc_id; 8255 } 8256 8257 kern_func = btf__type_by_id(kern_btf, kfunc_id); 8258 kfunc_proto_id = kern_func->type; 8259 8260 ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id, 8261 kern_btf, kfunc_proto_id); 8262 if (ret <= 0) { 8263 if (ext->is_weak) 8264 return 0; 8265 8266 pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with %s [%d]\n", 8267 ext->name, local_func_proto_id, 8268 mod_btf ? mod_btf->name : "vmlinux", kfunc_proto_id); 8269 return -EINVAL; 8270 } 8271 8272 /* set index for module BTF fd in fd_array, if unset */ 8273 if (mod_btf && !mod_btf->fd_array_idx) { 8274 /* insn->off is s16 */ 8275 if (obj->fd_array_cnt == INT16_MAX) { 8276 pr_warn("extern (func ksym) '%s': module BTF fd index %d too big to fit in bpf_insn offset\n", 8277 ext->name, mod_btf->fd_array_idx); 8278 return -E2BIG; 8279 } 8280 /* Cannot use index 0 for module BTF fd */ 8281 if (!obj->fd_array_cnt) 8282 obj->fd_array_cnt = 1; 8283 8284 ret = libbpf_ensure_mem((void **)&obj->fd_array, &obj->fd_array_cap, sizeof(int), 8285 obj->fd_array_cnt + 1); 8286 if (ret) 8287 return ret; 8288 mod_btf->fd_array_idx = obj->fd_array_cnt; 8289 /* we assume module BTF FD is always >0 */ 8290 obj->fd_array[obj->fd_array_cnt++] = mod_btf->fd; 8291 } 8292 8293 ext->is_set = true; 8294 ext->ksym.kernel_btf_id = kfunc_id; 8295 ext->ksym.btf_fd_idx = mod_btf ? mod_btf->fd_array_idx : 0; 8296 /* Also set kernel_btf_obj_fd to make sure that bpf_object__relocate_data() 8297 * populates FD into ld_imm64 insn when it's used to point to kfunc. 8298 * {kernel_btf_id, btf_fd_idx} -> fixup bpf_call. 8299 * {kernel_btf_id, kernel_btf_obj_fd} -> fixup ld_imm64. 8300 */ 8301 ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0; 8302 pr_debug("extern (func ksym) '%s': resolved to %s [%d]\n", 8303 ext->name, mod_btf ? mod_btf->name : "vmlinux", kfunc_id); 8304 8305 return 0; 8306 } 8307 8308 static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj) 8309 { 8310 const struct btf_type *t; 8311 struct extern_desc *ext; 8312 int i, err; 8313 8314 for (i = 0; i < obj->nr_extern; i++) { 8315 ext = &obj->externs[i]; 8316 if (ext->type != EXT_KSYM || !ext->ksym.type_id) 8317 continue; 8318 8319 if (obj->gen_loader) { 8320 ext->is_set = true; 8321 ext->ksym.kernel_btf_obj_fd = 0; 8322 ext->ksym.kernel_btf_id = 0; 8323 continue; 8324 } 8325 t = btf__type_by_id(obj->btf, ext->btf_id); 8326 if (btf_is_var(t)) 8327 err = bpf_object__resolve_ksym_var_btf_id(obj, ext); 8328 else 8329 err = bpf_object__resolve_ksym_func_btf_id(obj, ext); 8330 if (err) 8331 return err; 8332 } 8333 return 0; 8334 } 8335 8336 static int bpf_object__resolve_externs(struct bpf_object *obj, 8337 const char *extra_kconfig) 8338 { 8339 bool need_config = false, need_kallsyms = false; 8340 bool need_vmlinux_btf = false; 8341 struct extern_desc *ext; 8342 void *kcfg_data = NULL; 8343 int err, i; 8344 8345 if (obj->nr_extern == 0) 8346 return 0; 8347 8348 if (obj->kconfig_map_idx >= 0) 8349 kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped; 8350 8351 for (i = 0; i < obj->nr_extern; i++) { 8352 ext = &obj->externs[i]; 8353 8354 if (ext->type == EXT_KSYM) { 8355 if (ext->ksym.type_id) 8356 need_vmlinux_btf = true; 8357 else 8358 need_kallsyms = true; 8359 continue; 8360 } else if (ext->type == EXT_KCFG) { 8361 void *ext_ptr = kcfg_data + ext->kcfg.data_off; 8362 __u64 value = 0; 8363 8364 /* Kconfig externs need actual /proc/config.gz */ 8365 if (str_has_pfx(ext->name, "CONFIG_")) { 8366 need_config = true; 8367 continue; 8368 } 8369 8370 /* Virtual kcfg externs are customly handled by libbpf */ 8371 if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) { 8372 value = get_kernel_version(); 8373 if (!value) { 8374 pr_warn("extern (kcfg) '%s': failed to get kernel version\n", ext->name); 8375 return -EINVAL; 8376 } 8377 } else if (strcmp(ext->name, "LINUX_HAS_BPF_COOKIE") == 0) { 8378 value = kernel_supports(obj, FEAT_BPF_COOKIE); 8379 } else if (strcmp(ext->name, "LINUX_HAS_SYSCALL_WRAPPER") == 0) { 8380 value = kernel_supports(obj, FEAT_SYSCALL_WRAPPER); 8381 } else if (!str_has_pfx(ext->name, "LINUX_") || !ext->is_weak) { 8382 /* Currently libbpf supports only CONFIG_ and LINUX_ prefixed 8383 * __kconfig externs, where LINUX_ ones are virtual and filled out 8384 * customly by libbpf (their values don't come from Kconfig). 8385 * If LINUX_xxx variable is not recognized by libbpf, but is marked 8386 * __weak, it defaults to zero value, just like for CONFIG_xxx 8387 * externs. 8388 */ 8389 pr_warn("extern (kcfg) '%s': unrecognized virtual extern\n", ext->name); 8390 return -EINVAL; 8391 } 8392 8393 err = set_kcfg_value_num(ext, ext_ptr, value); 8394 if (err) 8395 return err; 8396 pr_debug("extern (kcfg) '%s': set to 0x%llx\n", 8397 ext->name, (long long)value); 8398 } else { 8399 pr_warn("extern '%s': unrecognized extern kind\n", ext->name); 8400 return -EINVAL; 8401 } 8402 } 8403 if (need_config && extra_kconfig) { 8404 err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data); 8405 if (err) 8406 return -EINVAL; 8407 need_config = false; 8408 for (i = 0; i < obj->nr_extern; i++) { 8409 ext = &obj->externs[i]; 8410 if (ext->type == EXT_KCFG && !ext->is_set) { 8411 need_config = true; 8412 break; 8413 } 8414 } 8415 } 8416 if (need_config) { 8417 err = bpf_object__read_kconfig_file(obj, kcfg_data); 8418 if (err) 8419 return -EINVAL; 8420 } 8421 if (need_kallsyms) { 8422 err = bpf_object__read_kallsyms_file(obj); 8423 if (err) 8424 return -EINVAL; 8425 } 8426 if (need_vmlinux_btf) { 8427 err = bpf_object__resolve_ksyms_btf_id(obj); 8428 if (err) 8429 return -EINVAL; 8430 } 8431 for (i = 0; i < obj->nr_extern; i++) { 8432 ext = &obj->externs[i]; 8433 8434 if (!ext->is_set && !ext->is_weak) { 8435 pr_warn("extern '%s' (strong): not resolved\n", ext->name); 8436 return -ESRCH; 8437 } else if (!ext->is_set) { 8438 pr_debug("extern '%s' (weak): not resolved, defaulting to zero\n", 8439 ext->name); 8440 } 8441 } 8442 8443 return 0; 8444 } 8445 8446 static void bpf_map_prepare_vdata(const struct bpf_map *map) 8447 { 8448 struct bpf_struct_ops *st_ops; 8449 __u32 i; 8450 8451 st_ops = map->st_ops; 8452 for (i = 0; i < btf_vlen(st_ops->type); i++) { 8453 struct bpf_program *prog = st_ops->progs[i]; 8454 void *kern_data; 8455 int prog_fd; 8456 8457 if (!prog) 8458 continue; 8459 8460 prog_fd = bpf_program__fd(prog); 8461 kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i]; 8462 *(unsigned long *)kern_data = prog_fd; 8463 } 8464 } 8465 8466 static int bpf_object_prepare_struct_ops(struct bpf_object *obj) 8467 { 8468 struct bpf_map *map; 8469 int i; 8470 8471 for (i = 0; i < obj->nr_maps; i++) { 8472 map = &obj->maps[i]; 8473 8474 if (!bpf_map__is_struct_ops(map)) 8475 continue; 8476 8477 if (!map->autocreate) 8478 continue; 8479 8480 bpf_map_prepare_vdata(map); 8481 } 8482 8483 return 0; 8484 } 8485 8486 static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path) 8487 { 8488 int err, i; 8489 8490 if (!obj) 8491 return libbpf_err(-EINVAL); 8492 8493 if (obj->loaded) { 8494 pr_warn("object '%s': load can't be attempted twice\n", obj->name); 8495 return libbpf_err(-EINVAL); 8496 } 8497 8498 if (obj->gen_loader) 8499 bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps); 8500 8501 err = bpf_object_prepare_token(obj); 8502 err = err ? : bpf_object__probe_loading(obj); 8503 err = err ? : bpf_object__load_vmlinux_btf(obj, false); 8504 err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); 8505 err = err ? : bpf_object__sanitize_maps(obj); 8506 err = err ? : bpf_object__init_kern_struct_ops_maps(obj); 8507 err = err ? : bpf_object_adjust_struct_ops_autoload(obj); 8508 err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path); 8509 err = err ? : bpf_object__sanitize_and_load_btf(obj); 8510 err = err ? : bpf_object__create_maps(obj); 8511 err = err ? : bpf_object__load_progs(obj, extra_log_level); 8512 err = err ? : bpf_object_init_prog_arrays(obj); 8513 err = err ? : bpf_object_prepare_struct_ops(obj); 8514 8515 if (obj->gen_loader) { 8516 /* reset FDs */ 8517 if (obj->btf) 8518 btf__set_fd(obj->btf, -1); 8519 if (!err) 8520 err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps); 8521 } 8522 8523 /* clean up fd_array */ 8524 zfree(&obj->fd_array); 8525 8526 /* clean up module BTFs */ 8527 for (i = 0; i < obj->btf_module_cnt; i++) { 8528 close(obj->btf_modules[i].fd); 8529 btf__free(obj->btf_modules[i].btf); 8530 free(obj->btf_modules[i].name); 8531 } 8532 free(obj->btf_modules); 8533 8534 /* clean up vmlinux BTF */ 8535 btf__free(obj->btf_vmlinux); 8536 obj->btf_vmlinux = NULL; 8537 8538 obj->loaded = true; /* doesn't matter if successfully or not */ 8539 8540 if (err) 8541 goto out; 8542 8543 return 0; 8544 out: 8545 /* unpin any maps that were auto-pinned during load */ 8546 for (i = 0; i < obj->nr_maps; i++) 8547 if (obj->maps[i].pinned && !obj->maps[i].reused) 8548 bpf_map__unpin(&obj->maps[i], NULL); 8549 8550 bpf_object_unload(obj); 8551 pr_warn("failed to load object '%s'\n", obj->path); 8552 return libbpf_err(err); 8553 } 8554 8555 int bpf_object__load(struct bpf_object *obj) 8556 { 8557 return bpf_object_load(obj, 0, NULL); 8558 } 8559 8560 static int make_parent_dir(const char *path) 8561 { 8562 char *cp, errmsg[STRERR_BUFSIZE]; 8563 char *dname, *dir; 8564 int err = 0; 8565 8566 dname = strdup(path); 8567 if (dname == NULL) 8568 return -ENOMEM; 8569 8570 dir = dirname(dname); 8571 if (mkdir(dir, 0700) && errno != EEXIST) 8572 err = -errno; 8573 8574 free(dname); 8575 if (err) { 8576 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); 8577 pr_warn("failed to mkdir %s: %s\n", path, cp); 8578 } 8579 return err; 8580 } 8581 8582 static int check_path(const char *path) 8583 { 8584 char *cp, errmsg[STRERR_BUFSIZE]; 8585 struct statfs st_fs; 8586 char *dname, *dir; 8587 int err = 0; 8588 8589 if (path == NULL) 8590 return -EINVAL; 8591 8592 dname = strdup(path); 8593 if (dname == NULL) 8594 return -ENOMEM; 8595 8596 dir = dirname(dname); 8597 if (statfs(dir, &st_fs)) { 8598 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); 8599 pr_warn("failed to statfs %s: %s\n", dir, cp); 8600 err = -errno; 8601 } 8602 free(dname); 8603 8604 if (!err && st_fs.f_type != BPF_FS_MAGIC) { 8605 pr_warn("specified path %s is not on BPF FS\n", path); 8606 err = -EINVAL; 8607 } 8608 8609 return err; 8610 } 8611 8612 int bpf_program__pin(struct bpf_program *prog, const char *path) 8613 { 8614 char *cp, errmsg[STRERR_BUFSIZE]; 8615 int err; 8616 8617 if (prog->fd < 0) { 8618 pr_warn("prog '%s': can't pin program that wasn't loaded\n", prog->name); 8619 return libbpf_err(-EINVAL); 8620 } 8621 8622 err = make_parent_dir(path); 8623 if (err) 8624 return libbpf_err(err); 8625 8626 err = check_path(path); 8627 if (err) 8628 return libbpf_err(err); 8629 8630 if (bpf_obj_pin(prog->fd, path)) { 8631 err = -errno; 8632 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); 8633 pr_warn("prog '%s': failed to pin at '%s': %s\n", prog->name, path, cp); 8634 return libbpf_err(err); 8635 } 8636 8637 pr_debug("prog '%s': pinned at '%s'\n", prog->name, path); 8638 return 0; 8639 } 8640 8641 int bpf_program__unpin(struct bpf_program *prog, const char *path) 8642 { 8643 int err; 8644 8645 if (prog->fd < 0) { 8646 pr_warn("prog '%s': can't unpin program that wasn't loaded\n", prog->name); 8647 return libbpf_err(-EINVAL); 8648 } 8649 8650 err = check_path(path); 8651 if (err) 8652 return libbpf_err(err); 8653 8654 err = unlink(path); 8655 if (err) 8656 return libbpf_err(-errno); 8657 8658 pr_debug("prog '%s': unpinned from '%s'\n", prog->name, path); 8659 return 0; 8660 } 8661 8662 int bpf_map__pin(struct bpf_map *map, const char *path) 8663 { 8664 char *cp, errmsg[STRERR_BUFSIZE]; 8665 int err; 8666 8667 if (map == NULL) { 8668 pr_warn("invalid map pointer\n"); 8669 return libbpf_err(-EINVAL); 8670 } 8671 8672 if (map->fd < 0) { 8673 pr_warn("map '%s': can't pin BPF map without FD (was it created?)\n", map->name); 8674 return libbpf_err(-EINVAL); 8675 } 8676 8677 if (map->pin_path) { 8678 if (path && strcmp(path, map->pin_path)) { 8679 pr_warn("map '%s' already has pin path '%s' different from '%s'\n", 8680 bpf_map__name(map), map->pin_path, path); 8681 return libbpf_err(-EINVAL); 8682 } else if (map->pinned) { 8683 pr_debug("map '%s' already pinned at '%s'; not re-pinning\n", 8684 bpf_map__name(map), map->pin_path); 8685 return 0; 8686 } 8687 } else { 8688 if (!path) { 8689 pr_warn("missing a path to pin map '%s' at\n", 8690 bpf_map__name(map)); 8691 return libbpf_err(-EINVAL); 8692 } else if (map->pinned) { 8693 pr_warn("map '%s' already pinned\n", bpf_map__name(map)); 8694 return libbpf_err(-EEXIST); 8695 } 8696 8697 map->pin_path = strdup(path); 8698 if (!map->pin_path) { 8699 err = -errno; 8700 goto out_err; 8701 } 8702 } 8703 8704 err = make_parent_dir(map->pin_path); 8705 if (err) 8706 return libbpf_err(err); 8707 8708 err = check_path(map->pin_path); 8709 if (err) 8710 return libbpf_err(err); 8711 8712 if (bpf_obj_pin(map->fd, map->pin_path)) { 8713 err = -errno; 8714 goto out_err; 8715 } 8716 8717 map->pinned = true; 8718 pr_debug("pinned map '%s'\n", map->pin_path); 8719 8720 return 0; 8721 8722 out_err: 8723 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); 8724 pr_warn("failed to pin map: %s\n", cp); 8725 return libbpf_err(err); 8726 } 8727 8728 int bpf_map__unpin(struct bpf_map *map, const char *path) 8729 { 8730 int err; 8731 8732 if (map == NULL) { 8733 pr_warn("invalid map pointer\n"); 8734 return libbpf_err(-EINVAL); 8735 } 8736 8737 if (map->pin_path) { 8738 if (path && strcmp(path, map->pin_path)) { 8739 pr_warn("map '%s' already has pin path '%s' different from '%s'\n", 8740 bpf_map__name(map), map->pin_path, path); 8741 return libbpf_err(-EINVAL); 8742 } 8743 path = map->pin_path; 8744 } else if (!path) { 8745 pr_warn("no path to unpin map '%s' from\n", 8746 bpf_map__name(map)); 8747 return libbpf_err(-EINVAL); 8748 } 8749 8750 err = check_path(path); 8751 if (err) 8752 return libbpf_err(err); 8753 8754 err = unlink(path); 8755 if (err != 0) 8756 return libbpf_err(-errno); 8757 8758 map->pinned = false; 8759 pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path); 8760 8761 return 0; 8762 } 8763 8764 int bpf_map__set_pin_path(struct bpf_map *map, const char *path) 8765 { 8766 char *new = NULL; 8767 8768 if (path) { 8769 new = strdup(path); 8770 if (!new) 8771 return libbpf_err(-errno); 8772 } 8773 8774 free(map->pin_path); 8775 map->pin_path = new; 8776 return 0; 8777 } 8778 8779 __alias(bpf_map__pin_path) 8780 const char *bpf_map__get_pin_path(const struct bpf_map *map); 8781 8782 const char *bpf_map__pin_path(const struct bpf_map *map) 8783 { 8784 return map->pin_path; 8785 } 8786 8787 bool bpf_map__is_pinned(const struct bpf_map *map) 8788 { 8789 return map->pinned; 8790 } 8791 8792 static void sanitize_pin_path(char *s) 8793 { 8794 /* bpffs disallows periods in path names */ 8795 while (*s) { 8796 if (*s == '.') 8797 *s = '_'; 8798 s++; 8799 } 8800 } 8801 8802 int bpf_object__pin_maps(struct bpf_object *obj, const char *path) 8803 { 8804 struct bpf_map *map; 8805 int err; 8806 8807 if (!obj) 8808 return libbpf_err(-ENOENT); 8809 8810 if (!obj->loaded) { 8811 pr_warn("object not yet loaded; load it first\n"); 8812 return libbpf_err(-ENOENT); 8813 } 8814 8815 bpf_object__for_each_map(map, obj) { 8816 char *pin_path = NULL; 8817 char buf[PATH_MAX]; 8818 8819 if (!map->autocreate) 8820 continue; 8821 8822 if (path) { 8823 err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map)); 8824 if (err) 8825 goto err_unpin_maps; 8826 sanitize_pin_path(buf); 8827 pin_path = buf; 8828 } else if (!map->pin_path) { 8829 continue; 8830 } 8831 8832 err = bpf_map__pin(map, pin_path); 8833 if (err) 8834 goto err_unpin_maps; 8835 } 8836 8837 return 0; 8838 8839 err_unpin_maps: 8840 while ((map = bpf_object__prev_map(obj, map))) { 8841 if (!map->pin_path) 8842 continue; 8843 8844 bpf_map__unpin(map, NULL); 8845 } 8846 8847 return libbpf_err(err); 8848 } 8849 8850 int bpf_object__unpin_maps(struct bpf_object *obj, const char *path) 8851 { 8852 struct bpf_map *map; 8853 int err; 8854 8855 if (!obj) 8856 return libbpf_err(-ENOENT); 8857 8858 bpf_object__for_each_map(map, obj) { 8859 char *pin_path = NULL; 8860 char buf[PATH_MAX]; 8861 8862 if (path) { 8863 err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map)); 8864 if (err) 8865 return libbpf_err(err); 8866 sanitize_pin_path(buf); 8867 pin_path = buf; 8868 } else if (!map->pin_path) { 8869 continue; 8870 } 8871 8872 err = bpf_map__unpin(map, pin_path); 8873 if (err) 8874 return libbpf_err(err); 8875 } 8876 8877 return 0; 8878 } 8879 8880 int bpf_object__pin_programs(struct bpf_object *obj, const char *path) 8881 { 8882 struct bpf_program *prog; 8883 char buf[PATH_MAX]; 8884 int err; 8885 8886 if (!obj) 8887 return libbpf_err(-ENOENT); 8888 8889 if (!obj->loaded) { 8890 pr_warn("object not yet loaded; load it first\n"); 8891 return libbpf_err(-ENOENT); 8892 } 8893 8894 bpf_object__for_each_program(prog, obj) { 8895 err = pathname_concat(buf, sizeof(buf), path, prog->name); 8896 if (err) 8897 goto err_unpin_programs; 8898 8899 err = bpf_program__pin(prog, buf); 8900 if (err) 8901 goto err_unpin_programs; 8902 } 8903 8904 return 0; 8905 8906 err_unpin_programs: 8907 while ((prog = bpf_object__prev_program(obj, prog))) { 8908 if (pathname_concat(buf, sizeof(buf), path, prog->name)) 8909 continue; 8910 8911 bpf_program__unpin(prog, buf); 8912 } 8913 8914 return libbpf_err(err); 8915 } 8916 8917 int bpf_object__unpin_programs(struct bpf_object *obj, const char *path) 8918 { 8919 struct bpf_program *prog; 8920 int err; 8921 8922 if (!obj) 8923 return libbpf_err(-ENOENT); 8924 8925 bpf_object__for_each_program(prog, obj) { 8926 char buf[PATH_MAX]; 8927 8928 err = pathname_concat(buf, sizeof(buf), path, prog->name); 8929 if (err) 8930 return libbpf_err(err); 8931 8932 err = bpf_program__unpin(prog, buf); 8933 if (err) 8934 return libbpf_err(err); 8935 } 8936 8937 return 0; 8938 } 8939 8940 int bpf_object__pin(struct bpf_object *obj, const char *path) 8941 { 8942 int err; 8943 8944 err = bpf_object__pin_maps(obj, path); 8945 if (err) 8946 return libbpf_err(err); 8947 8948 err = bpf_object__pin_programs(obj, path); 8949 if (err) { 8950 bpf_object__unpin_maps(obj, path); 8951 return libbpf_err(err); 8952 } 8953 8954 return 0; 8955 } 8956 8957 int bpf_object__unpin(struct bpf_object *obj, const char *path) 8958 { 8959 int err; 8960 8961 err = bpf_object__unpin_programs(obj, path); 8962 if (err) 8963 return libbpf_err(err); 8964 8965 err = bpf_object__unpin_maps(obj, path); 8966 if (err) 8967 return libbpf_err(err); 8968 8969 return 0; 8970 } 8971 8972 static void bpf_map__destroy(struct bpf_map *map) 8973 { 8974 if (map->inner_map) { 8975 bpf_map__destroy(map->inner_map); 8976 zfree(&map->inner_map); 8977 } 8978 8979 zfree(&map->init_slots); 8980 map->init_slots_sz = 0; 8981 8982 if (map->mmaped && map->mmaped != map->obj->arena_data) 8983 munmap(map->mmaped, bpf_map_mmap_sz(map)); 8984 map->mmaped = NULL; 8985 8986 if (map->st_ops) { 8987 zfree(&map->st_ops->data); 8988 zfree(&map->st_ops->progs); 8989 zfree(&map->st_ops->kern_func_off); 8990 zfree(&map->st_ops); 8991 } 8992 8993 zfree(&map->name); 8994 zfree(&map->real_name); 8995 zfree(&map->pin_path); 8996 8997 if (map->fd >= 0) 8998 zclose(map->fd); 8999 } 9000 9001 void bpf_object__close(struct bpf_object *obj) 9002 { 9003 size_t i; 9004 9005 if (IS_ERR_OR_NULL(obj)) 9006 return; 9007 9008 usdt_manager_free(obj->usdt_man); 9009 obj->usdt_man = NULL; 9010 9011 bpf_gen__free(obj->gen_loader); 9012 bpf_object__elf_finish(obj); 9013 bpf_object_unload(obj); 9014 btf__free(obj->btf); 9015 btf__free(obj->btf_vmlinux); 9016 btf_ext__free(obj->btf_ext); 9017 9018 for (i = 0; i < obj->nr_maps; i++) 9019 bpf_map__destroy(&obj->maps[i]); 9020 9021 zfree(&obj->btf_custom_path); 9022 zfree(&obj->kconfig); 9023 9024 for (i = 0; i < obj->nr_extern; i++) 9025 zfree(&obj->externs[i].essent_name); 9026 9027 zfree(&obj->externs); 9028 obj->nr_extern = 0; 9029 9030 zfree(&obj->maps); 9031 obj->nr_maps = 0; 9032 9033 if (obj->programs && obj->nr_programs) { 9034 for (i = 0; i < obj->nr_programs; i++) 9035 bpf_program__exit(&obj->programs[i]); 9036 } 9037 zfree(&obj->programs); 9038 9039 zfree(&obj->feat_cache); 9040 zfree(&obj->token_path); 9041 if (obj->token_fd > 0) 9042 close(obj->token_fd); 9043 9044 zfree(&obj->arena_data); 9045 9046 free(obj); 9047 } 9048 9049 const char *bpf_object__name(const struct bpf_object *obj) 9050 { 9051 return obj ? obj->name : libbpf_err_ptr(-EINVAL); 9052 } 9053 9054 unsigned int bpf_object__kversion(const struct bpf_object *obj) 9055 { 9056 return obj ? obj->kern_version : 0; 9057 } 9058 9059 struct btf *bpf_object__btf(const struct bpf_object *obj) 9060 { 9061 return obj ? obj->btf : NULL; 9062 } 9063 9064 int bpf_object__btf_fd(const struct bpf_object *obj) 9065 { 9066 return obj->btf ? btf__fd(obj->btf) : -1; 9067 } 9068 9069 int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version) 9070 { 9071 if (obj->loaded) 9072 return libbpf_err(-EINVAL); 9073 9074 obj->kern_version = kern_version; 9075 9076 return 0; 9077 } 9078 9079 int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts) 9080 { 9081 struct bpf_gen *gen; 9082 9083 if (!opts) 9084 return -EFAULT; 9085 if (!OPTS_VALID(opts, gen_loader_opts)) 9086 return -EINVAL; 9087 gen = calloc(sizeof(*gen), 1); 9088 if (!gen) 9089 return -ENOMEM; 9090 gen->opts = opts; 9091 obj->gen_loader = gen; 9092 return 0; 9093 } 9094 9095 static struct bpf_program * 9096 __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj, 9097 bool forward) 9098 { 9099 size_t nr_programs = obj->nr_programs; 9100 ssize_t idx; 9101 9102 if (!nr_programs) 9103 return NULL; 9104 9105 if (!p) 9106 /* Iter from the beginning */ 9107 return forward ? &obj->programs[0] : 9108 &obj->programs[nr_programs - 1]; 9109 9110 if (p->obj != obj) { 9111 pr_warn("error: program handler doesn't match object\n"); 9112 return errno = EINVAL, NULL; 9113 } 9114 9115 idx = (p - obj->programs) + (forward ? 1 : -1); 9116 if (idx >= obj->nr_programs || idx < 0) 9117 return NULL; 9118 return &obj->programs[idx]; 9119 } 9120 9121 struct bpf_program * 9122 bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev) 9123 { 9124 struct bpf_program *prog = prev; 9125 9126 do { 9127 prog = __bpf_program__iter(prog, obj, true); 9128 } while (prog && prog_is_subprog(obj, prog)); 9129 9130 return prog; 9131 } 9132 9133 struct bpf_program * 9134 bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next) 9135 { 9136 struct bpf_program *prog = next; 9137 9138 do { 9139 prog = __bpf_program__iter(prog, obj, false); 9140 } while (prog && prog_is_subprog(obj, prog)); 9141 9142 return prog; 9143 } 9144 9145 void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex) 9146 { 9147 prog->prog_ifindex = ifindex; 9148 } 9149 9150 const char *bpf_program__name(const struct bpf_program *prog) 9151 { 9152 return prog->name; 9153 } 9154 9155 const char *bpf_program__section_name(const struct bpf_program *prog) 9156 { 9157 return prog->sec_name; 9158 } 9159 9160 bool bpf_program__autoload(const struct bpf_program *prog) 9161 { 9162 return prog->autoload; 9163 } 9164 9165 int bpf_program__set_autoload(struct bpf_program *prog, bool autoload) 9166 { 9167 if (prog->obj->loaded) 9168 return libbpf_err(-EINVAL); 9169 9170 prog->autoload = autoload; 9171 return 0; 9172 } 9173 9174 bool bpf_program__autoattach(const struct bpf_program *prog) 9175 { 9176 return prog->autoattach; 9177 } 9178 9179 void bpf_program__set_autoattach(struct bpf_program *prog, bool autoattach) 9180 { 9181 prog->autoattach = autoattach; 9182 } 9183 9184 const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog) 9185 { 9186 return prog->insns; 9187 } 9188 9189 size_t bpf_program__insn_cnt(const struct bpf_program *prog) 9190 { 9191 return prog->insns_cnt; 9192 } 9193 9194 int bpf_program__set_insns(struct bpf_program *prog, 9195 struct bpf_insn *new_insns, size_t new_insn_cnt) 9196 { 9197 struct bpf_insn *insns; 9198 9199 if (prog->obj->loaded) 9200 return -EBUSY; 9201 9202 insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns)); 9203 /* NULL is a valid return from reallocarray if the new count is zero */ 9204 if (!insns && new_insn_cnt) { 9205 pr_warn("prog '%s': failed to realloc prog code\n", prog->name); 9206 return -ENOMEM; 9207 } 9208 memcpy(insns, new_insns, new_insn_cnt * sizeof(*insns)); 9209 9210 prog->insns = insns; 9211 prog->insns_cnt = new_insn_cnt; 9212 return 0; 9213 } 9214 9215 int bpf_program__fd(const struct bpf_program *prog) 9216 { 9217 if (!prog) 9218 return libbpf_err(-EINVAL); 9219 9220 if (prog->fd < 0) 9221 return libbpf_err(-ENOENT); 9222 9223 return prog->fd; 9224 } 9225 9226 __alias(bpf_program__type) 9227 enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog); 9228 9229 enum bpf_prog_type bpf_program__type(const struct bpf_program *prog) 9230 { 9231 return prog->type; 9232 } 9233 9234 static size_t custom_sec_def_cnt; 9235 static struct bpf_sec_def *custom_sec_defs; 9236 static struct bpf_sec_def custom_fallback_def; 9237 static bool has_custom_fallback_def; 9238 static int last_custom_sec_def_handler_id; 9239 9240 int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type) 9241 { 9242 if (prog->obj->loaded) 9243 return libbpf_err(-EBUSY); 9244 9245 /* if type is not changed, do nothing */ 9246 if (prog->type == type) 9247 return 0; 9248 9249 prog->type = type; 9250 9251 /* If a program type was changed, we need to reset associated SEC() 9252 * handler, as it will be invalid now. The only exception is a generic 9253 * fallback handler, which by definition is program type-agnostic and 9254 * is a catch-all custom handler, optionally set by the application, 9255 * so should be able to handle any type of BPF program. 9256 */ 9257 if (prog->sec_def != &custom_fallback_def) 9258 prog->sec_def = NULL; 9259 return 0; 9260 } 9261 9262 __alias(bpf_program__expected_attach_type) 9263 enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog); 9264 9265 enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program *prog) 9266 { 9267 return prog->expected_attach_type; 9268 } 9269 9270 int bpf_program__set_expected_attach_type(struct bpf_program *prog, 9271 enum bpf_attach_type type) 9272 { 9273 if (prog->obj->loaded) 9274 return libbpf_err(-EBUSY); 9275 9276 prog->expected_attach_type = type; 9277 return 0; 9278 } 9279 9280 __u32 bpf_program__flags(const struct bpf_program *prog) 9281 { 9282 return prog->prog_flags; 9283 } 9284 9285 int bpf_program__set_flags(struct bpf_program *prog, __u32 flags) 9286 { 9287 if (prog->obj->loaded) 9288 return libbpf_err(-EBUSY); 9289 9290 prog->prog_flags = flags; 9291 return 0; 9292 } 9293 9294 __u32 bpf_program__log_level(const struct bpf_program *prog) 9295 { 9296 return prog->log_level; 9297 } 9298 9299 int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level) 9300 { 9301 if (prog->obj->loaded) 9302 return libbpf_err(-EBUSY); 9303 9304 prog->log_level = log_level; 9305 return 0; 9306 } 9307 9308 const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size) 9309 { 9310 *log_size = prog->log_size; 9311 return prog->log_buf; 9312 } 9313 9314 int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size) 9315 { 9316 if (log_size && !log_buf) 9317 return -EINVAL; 9318 if (prog->log_size > UINT_MAX) 9319 return -EINVAL; 9320 if (prog->obj->loaded) 9321 return -EBUSY; 9322 9323 prog->log_buf = log_buf; 9324 prog->log_size = log_size; 9325 return 0; 9326 } 9327 9328 #define SEC_DEF(sec_pfx, ptype, atype, flags, ...) { \ 9329 .sec = (char *)sec_pfx, \ 9330 .prog_type = BPF_PROG_TYPE_##ptype, \ 9331 .expected_attach_type = atype, \ 9332 .cookie = (long)(flags), \ 9333 .prog_prepare_load_fn = libbpf_prepare_prog_load, \ 9334 __VA_ARGS__ \ 9335 } 9336 9337 static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9338 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9339 static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9340 static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9341 static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9342 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9343 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9344 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9345 static int attach_kprobe_session(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9346 static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9347 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9348 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9349 9350 static const struct bpf_sec_def section_defs[] = { 9351 SEC_DEF("socket", SOCKET_FILTER, 0, SEC_NONE), 9352 SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE), 9353 SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE), 9354 SEC_DEF("kprobe+", KPROBE, 0, SEC_NONE, attach_kprobe), 9355 SEC_DEF("uprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), 9356 SEC_DEF("uprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe), 9357 SEC_DEF("kretprobe+", KPROBE, 0, SEC_NONE, attach_kprobe), 9358 SEC_DEF("uretprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), 9359 SEC_DEF("uretprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe), 9360 SEC_DEF("kprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), 9361 SEC_DEF("kretprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), 9362 SEC_DEF("kprobe.session+", KPROBE, BPF_TRACE_KPROBE_SESSION, SEC_NONE, attach_kprobe_session), 9363 SEC_DEF("uprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi), 9364 SEC_DEF("uretprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi), 9365 SEC_DEF("uprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi), 9366 SEC_DEF("uretprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi), 9367 SEC_DEF("ksyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall), 9368 SEC_DEF("kretsyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall), 9369 SEC_DEF("usdt+", KPROBE, 0, SEC_USDT, attach_usdt), 9370 SEC_DEF("usdt.s+", KPROBE, 0, SEC_USDT | SEC_SLEEPABLE, attach_usdt), 9371 SEC_DEF("tc/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE), /* alias for tcx */ 9372 SEC_DEF("tc/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE), /* alias for tcx */ 9373 SEC_DEF("tcx/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE), 9374 SEC_DEF("tcx/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE), 9375 SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */ 9376 SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */ 9377 SEC_DEF("action", SCHED_ACT, 0, SEC_NONE), /* deprecated / legacy, use tcx */ 9378 SEC_DEF("netkit/primary", SCHED_CLS, BPF_NETKIT_PRIMARY, SEC_NONE), 9379 SEC_DEF("netkit/peer", SCHED_CLS, BPF_NETKIT_PEER, SEC_NONE), 9380 SEC_DEF("tracepoint+", TRACEPOINT, 0, SEC_NONE, attach_tp), 9381 SEC_DEF("tp+", TRACEPOINT, 0, SEC_NONE, attach_tp), 9382 SEC_DEF("raw_tracepoint+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), 9383 SEC_DEF("raw_tp+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), 9384 SEC_DEF("raw_tracepoint.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), 9385 SEC_DEF("raw_tp.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), 9386 SEC_DEF("tp_btf+", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace), 9387 SEC_DEF("fentry+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace), 9388 SEC_DEF("fmod_ret+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace), 9389 SEC_DEF("fexit+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace), 9390 SEC_DEF("fentry.s+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), 9391 SEC_DEF("fmod_ret.s+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), 9392 SEC_DEF("fexit.s+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), 9393 SEC_DEF("freplace+", EXT, 0, SEC_ATTACH_BTF, attach_trace), 9394 SEC_DEF("lsm+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm), 9395 SEC_DEF("lsm.s+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm), 9396 SEC_DEF("lsm_cgroup+", LSM, BPF_LSM_CGROUP, SEC_ATTACH_BTF), 9397 SEC_DEF("iter+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter), 9398 SEC_DEF("iter.s+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter), 9399 SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE), 9400 SEC_DEF("xdp.frags/devmap", XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS), 9401 SEC_DEF("xdp/devmap", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE), 9402 SEC_DEF("xdp.frags/cpumap", XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS), 9403 SEC_DEF("xdp/cpumap", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE), 9404 SEC_DEF("xdp.frags", XDP, BPF_XDP, SEC_XDP_FRAGS), 9405 SEC_DEF("xdp", XDP, BPF_XDP, SEC_ATTACHABLE_OPT), 9406 SEC_DEF("perf_event", PERF_EVENT, 0, SEC_NONE), 9407 SEC_DEF("lwt_in", LWT_IN, 0, SEC_NONE), 9408 SEC_DEF("lwt_out", LWT_OUT, 0, SEC_NONE), 9409 SEC_DEF("lwt_xmit", LWT_XMIT, 0, SEC_NONE), 9410 SEC_DEF("lwt_seg6local", LWT_SEG6LOCAL, 0, SEC_NONE), 9411 SEC_DEF("sockops", SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT), 9412 SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT), 9413 SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT), 9414 SEC_DEF("sk_skb/verdict", SK_SKB, BPF_SK_SKB_VERDICT, SEC_ATTACHABLE_OPT), 9415 SEC_DEF("sk_skb", SK_SKB, 0, SEC_NONE), 9416 SEC_DEF("sk_msg", SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT), 9417 SEC_DEF("lirc_mode2", LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT), 9418 SEC_DEF("flow_dissector", FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT), 9419 SEC_DEF("cgroup_skb/ingress", CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT), 9420 SEC_DEF("cgroup_skb/egress", CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT), 9421 SEC_DEF("cgroup/skb", CGROUP_SKB, 0, SEC_NONE), 9422 SEC_DEF("cgroup/sock_create", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE), 9423 SEC_DEF("cgroup/sock_release", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE), 9424 SEC_DEF("cgroup/sock", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT), 9425 SEC_DEF("cgroup/post_bind4", CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE), 9426 SEC_DEF("cgroup/post_bind6", CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE), 9427 SEC_DEF("cgroup/bind4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE), 9428 SEC_DEF("cgroup/bind6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE), 9429 SEC_DEF("cgroup/connect4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE), 9430 SEC_DEF("cgroup/connect6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE), 9431 SEC_DEF("cgroup/connect_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_CONNECT, SEC_ATTACHABLE), 9432 SEC_DEF("cgroup/sendmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE), 9433 SEC_DEF("cgroup/sendmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE), 9434 SEC_DEF("cgroup/sendmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_SENDMSG, SEC_ATTACHABLE), 9435 SEC_DEF("cgroup/recvmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE), 9436 SEC_DEF("cgroup/recvmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE), 9437 SEC_DEF("cgroup/recvmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_RECVMSG, SEC_ATTACHABLE), 9438 SEC_DEF("cgroup/getpeername4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE), 9439 SEC_DEF("cgroup/getpeername6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE), 9440 SEC_DEF("cgroup/getpeername_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETPEERNAME, SEC_ATTACHABLE), 9441 SEC_DEF("cgroup/getsockname4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE), 9442 SEC_DEF("cgroup/getsockname6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE), 9443 SEC_DEF("cgroup/getsockname_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETSOCKNAME, SEC_ATTACHABLE), 9444 SEC_DEF("cgroup/sysctl", CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE), 9445 SEC_DEF("cgroup/getsockopt", CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE), 9446 SEC_DEF("cgroup/setsockopt", CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE), 9447 SEC_DEF("cgroup/dev", CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT), 9448 SEC_DEF("struct_ops+", STRUCT_OPS, 0, SEC_NONE), 9449 SEC_DEF("struct_ops.s+", STRUCT_OPS, 0, SEC_SLEEPABLE), 9450 SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE), 9451 SEC_DEF("netfilter", NETFILTER, BPF_NETFILTER, SEC_NONE), 9452 }; 9453 9454 int libbpf_register_prog_handler(const char *sec, 9455 enum bpf_prog_type prog_type, 9456 enum bpf_attach_type exp_attach_type, 9457 const struct libbpf_prog_handler_opts *opts) 9458 { 9459 struct bpf_sec_def *sec_def; 9460 9461 if (!OPTS_VALID(opts, libbpf_prog_handler_opts)) 9462 return libbpf_err(-EINVAL); 9463 9464 if (last_custom_sec_def_handler_id == INT_MAX) /* prevent overflow */ 9465 return libbpf_err(-E2BIG); 9466 9467 if (sec) { 9468 sec_def = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt + 1, 9469 sizeof(*sec_def)); 9470 if (!sec_def) 9471 return libbpf_err(-ENOMEM); 9472 9473 custom_sec_defs = sec_def; 9474 sec_def = &custom_sec_defs[custom_sec_def_cnt]; 9475 } else { 9476 if (has_custom_fallback_def) 9477 return libbpf_err(-EBUSY); 9478 9479 sec_def = &custom_fallback_def; 9480 } 9481 9482 sec_def->sec = sec ? strdup(sec) : NULL; 9483 if (sec && !sec_def->sec) 9484 return libbpf_err(-ENOMEM); 9485 9486 sec_def->prog_type = prog_type; 9487 sec_def->expected_attach_type = exp_attach_type; 9488 sec_def->cookie = OPTS_GET(opts, cookie, 0); 9489 9490 sec_def->prog_setup_fn = OPTS_GET(opts, prog_setup_fn, NULL); 9491 sec_def->prog_prepare_load_fn = OPTS_GET(opts, prog_prepare_load_fn, NULL); 9492 sec_def->prog_attach_fn = OPTS_GET(opts, prog_attach_fn, NULL); 9493 9494 sec_def->handler_id = ++last_custom_sec_def_handler_id; 9495 9496 if (sec) 9497 custom_sec_def_cnt++; 9498 else 9499 has_custom_fallback_def = true; 9500 9501 return sec_def->handler_id; 9502 } 9503 9504 int libbpf_unregister_prog_handler(int handler_id) 9505 { 9506 struct bpf_sec_def *sec_defs; 9507 int i; 9508 9509 if (handler_id <= 0) 9510 return libbpf_err(-EINVAL); 9511 9512 if (has_custom_fallback_def && custom_fallback_def.handler_id == handler_id) { 9513 memset(&custom_fallback_def, 0, sizeof(custom_fallback_def)); 9514 has_custom_fallback_def = false; 9515 return 0; 9516 } 9517 9518 for (i = 0; i < custom_sec_def_cnt; i++) { 9519 if (custom_sec_defs[i].handler_id == handler_id) 9520 break; 9521 } 9522 9523 if (i == custom_sec_def_cnt) 9524 return libbpf_err(-ENOENT); 9525 9526 free(custom_sec_defs[i].sec); 9527 for (i = i + 1; i < custom_sec_def_cnt; i++) 9528 custom_sec_defs[i - 1] = custom_sec_defs[i]; 9529 custom_sec_def_cnt--; 9530 9531 /* try to shrink the array, but it's ok if we couldn't */ 9532 sec_defs = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt, sizeof(*sec_defs)); 9533 /* if new count is zero, reallocarray can return a valid NULL result; 9534 * in this case the previous pointer will be freed, so we *have to* 9535 * reassign old pointer to the new value (even if it's NULL) 9536 */ 9537 if (sec_defs || custom_sec_def_cnt == 0) 9538 custom_sec_defs = sec_defs; 9539 9540 return 0; 9541 } 9542 9543 static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_name) 9544 { 9545 size_t len = strlen(sec_def->sec); 9546 9547 /* "type/" always has to have proper SEC("type/extras") form */ 9548 if (sec_def->sec[len - 1] == '/') { 9549 if (str_has_pfx(sec_name, sec_def->sec)) 9550 return true; 9551 return false; 9552 } 9553 9554 /* "type+" means it can be either exact SEC("type") or 9555 * well-formed SEC("type/extras") with proper '/' separator 9556 */ 9557 if (sec_def->sec[len - 1] == '+') { 9558 len--; 9559 /* not even a prefix */ 9560 if (strncmp(sec_name, sec_def->sec, len) != 0) 9561 return false; 9562 /* exact match or has '/' separator */ 9563 if (sec_name[len] == '\0' || sec_name[len] == '/') 9564 return true; 9565 return false; 9566 } 9567 9568 return strcmp(sec_name, sec_def->sec) == 0; 9569 } 9570 9571 static const struct bpf_sec_def *find_sec_def(const char *sec_name) 9572 { 9573 const struct bpf_sec_def *sec_def; 9574 int i, n; 9575 9576 n = custom_sec_def_cnt; 9577 for (i = 0; i < n; i++) { 9578 sec_def = &custom_sec_defs[i]; 9579 if (sec_def_matches(sec_def, sec_name)) 9580 return sec_def; 9581 } 9582 9583 n = ARRAY_SIZE(section_defs); 9584 for (i = 0; i < n; i++) { 9585 sec_def = §ion_defs[i]; 9586 if (sec_def_matches(sec_def, sec_name)) 9587 return sec_def; 9588 } 9589 9590 if (has_custom_fallback_def) 9591 return &custom_fallback_def; 9592 9593 return NULL; 9594 } 9595 9596 #define MAX_TYPE_NAME_SIZE 32 9597 9598 static char *libbpf_get_type_names(bool attach_type) 9599 { 9600 int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE; 9601 char *buf; 9602 9603 buf = malloc(len); 9604 if (!buf) 9605 return NULL; 9606 9607 buf[0] = '\0'; 9608 /* Forge string buf with all available names */ 9609 for (i = 0; i < ARRAY_SIZE(section_defs); i++) { 9610 const struct bpf_sec_def *sec_def = §ion_defs[i]; 9611 9612 if (attach_type) { 9613 if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load) 9614 continue; 9615 9616 if (!(sec_def->cookie & SEC_ATTACHABLE)) 9617 continue; 9618 } 9619 9620 if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) { 9621 free(buf); 9622 return NULL; 9623 } 9624 strcat(buf, " "); 9625 strcat(buf, section_defs[i].sec); 9626 } 9627 9628 return buf; 9629 } 9630 9631 int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, 9632 enum bpf_attach_type *expected_attach_type) 9633 { 9634 const struct bpf_sec_def *sec_def; 9635 char *type_names; 9636 9637 if (!name) 9638 return libbpf_err(-EINVAL); 9639 9640 sec_def = find_sec_def(name); 9641 if (sec_def) { 9642 *prog_type = sec_def->prog_type; 9643 *expected_attach_type = sec_def->expected_attach_type; 9644 return 0; 9645 } 9646 9647 pr_debug("failed to guess program type from ELF section '%s'\n", name); 9648 type_names = libbpf_get_type_names(false); 9649 if (type_names != NULL) { 9650 pr_debug("supported section(type) names are:%s\n", type_names); 9651 free(type_names); 9652 } 9653 9654 return libbpf_err(-ESRCH); 9655 } 9656 9657 const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t) 9658 { 9659 if (t < 0 || t >= ARRAY_SIZE(attach_type_name)) 9660 return NULL; 9661 9662 return attach_type_name[t]; 9663 } 9664 9665 const char *libbpf_bpf_link_type_str(enum bpf_link_type t) 9666 { 9667 if (t < 0 || t >= ARRAY_SIZE(link_type_name)) 9668 return NULL; 9669 9670 return link_type_name[t]; 9671 } 9672 9673 const char *libbpf_bpf_map_type_str(enum bpf_map_type t) 9674 { 9675 if (t < 0 || t >= ARRAY_SIZE(map_type_name)) 9676 return NULL; 9677 9678 return map_type_name[t]; 9679 } 9680 9681 const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t) 9682 { 9683 if (t < 0 || t >= ARRAY_SIZE(prog_type_name)) 9684 return NULL; 9685 9686 return prog_type_name[t]; 9687 } 9688 9689 static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj, 9690 int sec_idx, 9691 size_t offset) 9692 { 9693 struct bpf_map *map; 9694 size_t i; 9695 9696 for (i = 0; i < obj->nr_maps; i++) { 9697 map = &obj->maps[i]; 9698 if (!bpf_map__is_struct_ops(map)) 9699 continue; 9700 if (map->sec_idx == sec_idx && 9701 map->sec_offset <= offset && 9702 offset - map->sec_offset < map->def.value_size) 9703 return map; 9704 } 9705 9706 return NULL; 9707 } 9708 9709 /* Collect the reloc from ELF, populate the st_ops->progs[], and update 9710 * st_ops->data for shadow type. 9711 */ 9712 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, 9713 Elf64_Shdr *shdr, Elf_Data *data) 9714 { 9715 const struct btf_member *member; 9716 struct bpf_struct_ops *st_ops; 9717 struct bpf_program *prog; 9718 unsigned int shdr_idx; 9719 const struct btf *btf; 9720 struct bpf_map *map; 9721 unsigned int moff, insn_idx; 9722 const char *name; 9723 __u32 member_idx; 9724 Elf64_Sym *sym; 9725 Elf64_Rel *rel; 9726 int i, nrels; 9727 9728 btf = obj->btf; 9729 nrels = shdr->sh_size / shdr->sh_entsize; 9730 for (i = 0; i < nrels; i++) { 9731 rel = elf_rel_by_idx(data, i); 9732 if (!rel) { 9733 pr_warn("struct_ops reloc: failed to get %d reloc\n", i); 9734 return -LIBBPF_ERRNO__FORMAT; 9735 } 9736 9737 sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info)); 9738 if (!sym) { 9739 pr_warn("struct_ops reloc: symbol %zx not found\n", 9740 (size_t)ELF64_R_SYM(rel->r_info)); 9741 return -LIBBPF_ERRNO__FORMAT; 9742 } 9743 9744 name = elf_sym_str(obj, sym->st_name) ?: "<?>"; 9745 map = find_struct_ops_map_by_offset(obj, shdr->sh_info, rel->r_offset); 9746 if (!map) { 9747 pr_warn("struct_ops reloc: cannot find map at rel->r_offset %zu\n", 9748 (size_t)rel->r_offset); 9749 return -EINVAL; 9750 } 9751 9752 moff = rel->r_offset - map->sec_offset; 9753 shdr_idx = sym->st_shndx; 9754 st_ops = map->st_ops; 9755 pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel->r_offset %zu map->sec_offset %zu name %d (\'%s\')\n", 9756 map->name, 9757 (long long)(rel->r_info >> 32), 9758 (long long)sym->st_value, 9759 shdr_idx, (size_t)rel->r_offset, 9760 map->sec_offset, sym->st_name, name); 9761 9762 if (shdr_idx >= SHN_LORESERVE) { 9763 pr_warn("struct_ops reloc %s: rel->r_offset %zu shdr_idx %u unsupported non-static function\n", 9764 map->name, (size_t)rel->r_offset, shdr_idx); 9765 return -LIBBPF_ERRNO__RELOC; 9766 } 9767 if (sym->st_value % BPF_INSN_SZ) { 9768 pr_warn("struct_ops reloc %s: invalid target program offset %llu\n", 9769 map->name, (unsigned long long)sym->st_value); 9770 return -LIBBPF_ERRNO__FORMAT; 9771 } 9772 insn_idx = sym->st_value / BPF_INSN_SZ; 9773 9774 member = find_member_by_offset(st_ops->type, moff * 8); 9775 if (!member) { 9776 pr_warn("struct_ops reloc %s: cannot find member at moff %u\n", 9777 map->name, moff); 9778 return -EINVAL; 9779 } 9780 member_idx = member - btf_members(st_ops->type); 9781 name = btf__name_by_offset(btf, member->name_off); 9782 9783 if (!resolve_func_ptr(btf, member->type, NULL)) { 9784 pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n", 9785 map->name, name); 9786 return -EINVAL; 9787 } 9788 9789 prog = find_prog_by_sec_insn(obj, shdr_idx, insn_idx); 9790 if (!prog) { 9791 pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n", 9792 map->name, shdr_idx, name); 9793 return -EINVAL; 9794 } 9795 9796 /* prevent the use of BPF prog with invalid type */ 9797 if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) { 9798 pr_warn("struct_ops reloc %s: prog %s is not struct_ops BPF program\n", 9799 map->name, prog->name); 9800 return -EINVAL; 9801 } 9802 9803 st_ops->progs[member_idx] = prog; 9804 9805 /* st_ops->data will be exposed to users, being returned by 9806 * bpf_map__initial_value() as a pointer to the shadow 9807 * type. All function pointers in the original struct type 9808 * should be converted to a pointer to struct bpf_program 9809 * in the shadow type. 9810 */ 9811 *((struct bpf_program **)(st_ops->data + moff)) = prog; 9812 } 9813 9814 return 0; 9815 } 9816 9817 #define BTF_TRACE_PREFIX "btf_trace_" 9818 #define BTF_LSM_PREFIX "bpf_lsm_" 9819 #define BTF_ITER_PREFIX "bpf_iter_" 9820 #define BTF_MAX_NAME_SIZE 128 9821 9822 void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, 9823 const char **prefix, int *kind) 9824 { 9825 switch (attach_type) { 9826 case BPF_TRACE_RAW_TP: 9827 *prefix = BTF_TRACE_PREFIX; 9828 *kind = BTF_KIND_TYPEDEF; 9829 break; 9830 case BPF_LSM_MAC: 9831 case BPF_LSM_CGROUP: 9832 *prefix = BTF_LSM_PREFIX; 9833 *kind = BTF_KIND_FUNC; 9834 break; 9835 case BPF_TRACE_ITER: 9836 *prefix = BTF_ITER_PREFIX; 9837 *kind = BTF_KIND_FUNC; 9838 break; 9839 default: 9840 *prefix = ""; 9841 *kind = BTF_KIND_FUNC; 9842 } 9843 } 9844 9845 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, 9846 const char *name, __u32 kind) 9847 { 9848 char btf_type_name[BTF_MAX_NAME_SIZE]; 9849 int ret; 9850 9851 ret = snprintf(btf_type_name, sizeof(btf_type_name), 9852 "%s%s", prefix, name); 9853 /* snprintf returns the number of characters written excluding the 9854 * terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it 9855 * indicates truncation. 9856 */ 9857 if (ret < 0 || ret >= sizeof(btf_type_name)) 9858 return -ENAMETOOLONG; 9859 return btf__find_by_name_kind(btf, btf_type_name, kind); 9860 } 9861 9862 static inline int find_attach_btf_id(struct btf *btf, const char *name, 9863 enum bpf_attach_type attach_type) 9864 { 9865 const char *prefix; 9866 int kind; 9867 9868 btf_get_kernel_prefix_kind(attach_type, &prefix, &kind); 9869 return find_btf_by_prefix_kind(btf, prefix, name, kind); 9870 } 9871 9872 int libbpf_find_vmlinux_btf_id(const char *name, 9873 enum bpf_attach_type attach_type) 9874 { 9875 struct btf *btf; 9876 int err; 9877 9878 btf = btf__load_vmlinux_btf(); 9879 err = libbpf_get_error(btf); 9880 if (err) { 9881 pr_warn("vmlinux BTF is not found\n"); 9882 return libbpf_err(err); 9883 } 9884 9885 err = find_attach_btf_id(btf, name, attach_type); 9886 if (err <= 0) 9887 pr_warn("%s is not found in vmlinux BTF\n", name); 9888 9889 btf__free(btf); 9890 return libbpf_err(err); 9891 } 9892 9893 static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) 9894 { 9895 struct bpf_prog_info info; 9896 __u32 info_len = sizeof(info); 9897 struct btf *btf; 9898 int err; 9899 9900 memset(&info, 0, info_len); 9901 err = bpf_prog_get_info_by_fd(attach_prog_fd, &info, &info_len); 9902 if (err) { 9903 pr_warn("failed bpf_prog_get_info_by_fd for FD %d: %d\n", 9904 attach_prog_fd, err); 9905 return err; 9906 } 9907 9908 err = -EINVAL; 9909 if (!info.btf_id) { 9910 pr_warn("The target program doesn't have BTF\n"); 9911 goto out; 9912 } 9913 btf = btf__load_from_kernel_by_id(info.btf_id); 9914 err = libbpf_get_error(btf); 9915 if (err) { 9916 pr_warn("Failed to get BTF %d of the program: %d\n", info.btf_id, err); 9917 goto out; 9918 } 9919 err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC); 9920 btf__free(btf); 9921 if (err <= 0) { 9922 pr_warn("%s is not found in prog's BTF\n", name); 9923 goto out; 9924 } 9925 out: 9926 return err; 9927 } 9928 9929 static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name, 9930 enum bpf_attach_type attach_type, 9931 int *btf_obj_fd, int *btf_type_id) 9932 { 9933 int ret, i, mod_len; 9934 const char *fn_name, *mod_name = NULL; 9935 9936 fn_name = strchr(attach_name, ':'); 9937 if (fn_name) { 9938 mod_name = attach_name; 9939 mod_len = fn_name - mod_name; 9940 fn_name++; 9941 } 9942 9943 if (!mod_name || strncmp(mod_name, "vmlinux", mod_len) == 0) { 9944 ret = find_attach_btf_id(obj->btf_vmlinux, 9945 mod_name ? fn_name : attach_name, 9946 attach_type); 9947 if (ret > 0) { 9948 *btf_obj_fd = 0; /* vmlinux BTF */ 9949 *btf_type_id = ret; 9950 return 0; 9951 } 9952 if (ret != -ENOENT) 9953 return ret; 9954 } 9955 9956 ret = load_module_btfs(obj); 9957 if (ret) 9958 return ret; 9959 9960 for (i = 0; i < obj->btf_module_cnt; i++) { 9961 const struct module_btf *mod = &obj->btf_modules[i]; 9962 9963 if (mod_name && strncmp(mod->name, mod_name, mod_len) != 0) 9964 continue; 9965 9966 ret = find_attach_btf_id(mod->btf, 9967 mod_name ? fn_name : attach_name, 9968 attach_type); 9969 if (ret > 0) { 9970 *btf_obj_fd = mod->fd; 9971 *btf_type_id = ret; 9972 return 0; 9973 } 9974 if (ret == -ENOENT) 9975 continue; 9976 9977 return ret; 9978 } 9979 9980 return -ESRCH; 9981 } 9982 9983 static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name, 9984 int *btf_obj_fd, int *btf_type_id) 9985 { 9986 enum bpf_attach_type attach_type = prog->expected_attach_type; 9987 __u32 attach_prog_fd = prog->attach_prog_fd; 9988 int err = 0; 9989 9990 /* BPF program's BTF ID */ 9991 if (prog->type == BPF_PROG_TYPE_EXT || attach_prog_fd) { 9992 if (!attach_prog_fd) { 9993 pr_warn("prog '%s': attach program FD is not set\n", prog->name); 9994 return -EINVAL; 9995 } 9996 err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd); 9997 if (err < 0) { 9998 pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %d\n", 9999 prog->name, attach_prog_fd, attach_name, err); 10000 return err; 10001 } 10002 *btf_obj_fd = 0; 10003 *btf_type_id = err; 10004 return 0; 10005 } 10006 10007 /* kernel/module BTF ID */ 10008 if (prog->obj->gen_loader) { 10009 bpf_gen__record_attach_target(prog->obj->gen_loader, attach_name, attach_type); 10010 *btf_obj_fd = 0; 10011 *btf_type_id = 1; 10012 } else { 10013 err = find_kernel_btf_id(prog->obj, attach_name, 10014 attach_type, btf_obj_fd, 10015 btf_type_id); 10016 } 10017 if (err) { 10018 pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %d\n", 10019 prog->name, attach_name, err); 10020 return err; 10021 } 10022 return 0; 10023 } 10024 10025 int libbpf_attach_type_by_name(const char *name, 10026 enum bpf_attach_type *attach_type) 10027 { 10028 char *type_names; 10029 const struct bpf_sec_def *sec_def; 10030 10031 if (!name) 10032 return libbpf_err(-EINVAL); 10033 10034 sec_def = find_sec_def(name); 10035 if (!sec_def) { 10036 pr_debug("failed to guess attach type based on ELF section name '%s'\n", name); 10037 type_names = libbpf_get_type_names(true); 10038 if (type_names != NULL) { 10039 pr_debug("attachable section(type) names are:%s\n", type_names); 10040 free(type_names); 10041 } 10042 10043 return libbpf_err(-EINVAL); 10044 } 10045 10046 if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load) 10047 return libbpf_err(-EINVAL); 10048 if (!(sec_def->cookie & SEC_ATTACHABLE)) 10049 return libbpf_err(-EINVAL); 10050 10051 *attach_type = sec_def->expected_attach_type; 10052 return 0; 10053 } 10054 10055 int bpf_map__fd(const struct bpf_map *map) 10056 { 10057 if (!map) 10058 return libbpf_err(-EINVAL); 10059 if (!map_is_created(map)) 10060 return -1; 10061 return map->fd; 10062 } 10063 10064 static bool map_uses_real_name(const struct bpf_map *map) 10065 { 10066 /* Since libbpf started to support custom .data.* and .rodata.* maps, 10067 * their user-visible name differs from kernel-visible name. Users see 10068 * such map's corresponding ELF section name as a map name. 10069 * This check distinguishes .data/.rodata from .data.* and .rodata.* 10070 * maps to know which name has to be returned to the user. 10071 */ 10072 if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0) 10073 return true; 10074 if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0) 10075 return true; 10076 return false; 10077 } 10078 10079 const char *bpf_map__name(const struct bpf_map *map) 10080 { 10081 if (!map) 10082 return NULL; 10083 10084 if (map_uses_real_name(map)) 10085 return map->real_name; 10086 10087 return map->name; 10088 } 10089 10090 enum bpf_map_type bpf_map__type(const struct bpf_map *map) 10091 { 10092 return map->def.type; 10093 } 10094 10095 int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type) 10096 { 10097 if (map_is_created(map)) 10098 return libbpf_err(-EBUSY); 10099 map->def.type = type; 10100 return 0; 10101 } 10102 10103 __u32 bpf_map__map_flags(const struct bpf_map *map) 10104 { 10105 return map->def.map_flags; 10106 } 10107 10108 int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags) 10109 { 10110 if (map_is_created(map)) 10111 return libbpf_err(-EBUSY); 10112 map->def.map_flags = flags; 10113 return 0; 10114 } 10115 10116 __u64 bpf_map__map_extra(const struct bpf_map *map) 10117 { 10118 return map->map_extra; 10119 } 10120 10121 int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra) 10122 { 10123 if (map_is_created(map)) 10124 return libbpf_err(-EBUSY); 10125 map->map_extra = map_extra; 10126 return 0; 10127 } 10128 10129 __u32 bpf_map__numa_node(const struct bpf_map *map) 10130 { 10131 return map->numa_node; 10132 } 10133 10134 int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node) 10135 { 10136 if (map_is_created(map)) 10137 return libbpf_err(-EBUSY); 10138 map->numa_node = numa_node; 10139 return 0; 10140 } 10141 10142 __u32 bpf_map__key_size(const struct bpf_map *map) 10143 { 10144 return map->def.key_size; 10145 } 10146 10147 int bpf_map__set_key_size(struct bpf_map *map, __u32 size) 10148 { 10149 if (map_is_created(map)) 10150 return libbpf_err(-EBUSY); 10151 map->def.key_size = size; 10152 return 0; 10153 } 10154 10155 __u32 bpf_map__value_size(const struct bpf_map *map) 10156 { 10157 return map->def.value_size; 10158 } 10159 10160 static int map_btf_datasec_resize(struct bpf_map *map, __u32 size) 10161 { 10162 struct btf *btf; 10163 struct btf_type *datasec_type, *var_type; 10164 struct btf_var_secinfo *var; 10165 const struct btf_type *array_type; 10166 const struct btf_array *array; 10167 int vlen, element_sz, new_array_id; 10168 __u32 nr_elements; 10169 10170 /* check btf existence */ 10171 btf = bpf_object__btf(map->obj); 10172 if (!btf) 10173 return -ENOENT; 10174 10175 /* verify map is datasec */ 10176 datasec_type = btf_type_by_id(btf, bpf_map__btf_value_type_id(map)); 10177 if (!btf_is_datasec(datasec_type)) { 10178 pr_warn("map '%s': cannot be resized, map value type is not a datasec\n", 10179 bpf_map__name(map)); 10180 return -EINVAL; 10181 } 10182 10183 /* verify datasec has at least one var */ 10184 vlen = btf_vlen(datasec_type); 10185 if (vlen == 0) { 10186 pr_warn("map '%s': cannot be resized, map value datasec is empty\n", 10187 bpf_map__name(map)); 10188 return -EINVAL; 10189 } 10190 10191 /* verify last var in the datasec is an array */ 10192 var = &btf_var_secinfos(datasec_type)[vlen - 1]; 10193 var_type = btf_type_by_id(btf, var->type); 10194 array_type = skip_mods_and_typedefs(btf, var_type->type, NULL); 10195 if (!btf_is_array(array_type)) { 10196 pr_warn("map '%s': cannot be resized, last var must be an array\n", 10197 bpf_map__name(map)); 10198 return -EINVAL; 10199 } 10200 10201 /* verify request size aligns with array */ 10202 array = btf_array(array_type); 10203 element_sz = btf__resolve_size(btf, array->type); 10204 if (element_sz <= 0 || (size - var->offset) % element_sz != 0) { 10205 pr_warn("map '%s': cannot be resized, element size (%d) doesn't align with new total size (%u)\n", 10206 bpf_map__name(map), element_sz, size); 10207 return -EINVAL; 10208 } 10209 10210 /* create a new array based on the existing array, but with new length */ 10211 nr_elements = (size - var->offset) / element_sz; 10212 new_array_id = btf__add_array(btf, array->index_type, array->type, nr_elements); 10213 if (new_array_id < 0) 10214 return new_array_id; 10215 10216 /* adding a new btf type invalidates existing pointers to btf objects, 10217 * so refresh pointers before proceeding 10218 */ 10219 datasec_type = btf_type_by_id(btf, map->btf_value_type_id); 10220 var = &btf_var_secinfos(datasec_type)[vlen - 1]; 10221 var_type = btf_type_by_id(btf, var->type); 10222 10223 /* finally update btf info */ 10224 datasec_type->size = size; 10225 var->size = size - var->offset; 10226 var_type->type = new_array_id; 10227 10228 return 0; 10229 } 10230 10231 int bpf_map__set_value_size(struct bpf_map *map, __u32 size) 10232 { 10233 if (map->obj->loaded || map->reused) 10234 return libbpf_err(-EBUSY); 10235 10236 if (map->mmaped) { 10237 size_t mmap_old_sz, mmap_new_sz; 10238 int err; 10239 10240 if (map->def.type != BPF_MAP_TYPE_ARRAY) 10241 return -EOPNOTSUPP; 10242 10243 mmap_old_sz = bpf_map_mmap_sz(map); 10244 mmap_new_sz = array_map_mmap_sz(size, map->def.max_entries); 10245 err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz); 10246 if (err) { 10247 pr_warn("map '%s': failed to resize memory-mapped region: %d\n", 10248 bpf_map__name(map), err); 10249 return err; 10250 } 10251 err = map_btf_datasec_resize(map, size); 10252 if (err && err != -ENOENT) { 10253 pr_warn("map '%s': failed to adjust resized BTF, clearing BTF key/value info: %d\n", 10254 bpf_map__name(map), err); 10255 map->btf_value_type_id = 0; 10256 map->btf_key_type_id = 0; 10257 } 10258 } 10259 10260 map->def.value_size = size; 10261 return 0; 10262 } 10263 10264 __u32 bpf_map__btf_key_type_id(const struct bpf_map *map) 10265 { 10266 return map ? map->btf_key_type_id : 0; 10267 } 10268 10269 __u32 bpf_map__btf_value_type_id(const struct bpf_map *map) 10270 { 10271 return map ? map->btf_value_type_id : 0; 10272 } 10273 10274 int bpf_map__set_initial_value(struct bpf_map *map, 10275 const void *data, size_t size) 10276 { 10277 size_t actual_sz; 10278 10279 if (map->obj->loaded || map->reused) 10280 return libbpf_err(-EBUSY); 10281 10282 if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG) 10283 return libbpf_err(-EINVAL); 10284 10285 if (map->def.type == BPF_MAP_TYPE_ARENA) 10286 actual_sz = map->obj->arena_data_sz; 10287 else 10288 actual_sz = map->def.value_size; 10289 if (size != actual_sz) 10290 return libbpf_err(-EINVAL); 10291 10292 memcpy(map->mmaped, data, size); 10293 return 0; 10294 } 10295 10296 void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize) 10297 { 10298 if (bpf_map__is_struct_ops(map)) { 10299 if (psize) 10300 *psize = map->def.value_size; 10301 return map->st_ops->data; 10302 } 10303 10304 if (!map->mmaped) 10305 return NULL; 10306 10307 if (map->def.type == BPF_MAP_TYPE_ARENA) 10308 *psize = map->obj->arena_data_sz; 10309 else 10310 *psize = map->def.value_size; 10311 10312 return map->mmaped; 10313 } 10314 10315 bool bpf_map__is_internal(const struct bpf_map *map) 10316 { 10317 return map->libbpf_type != LIBBPF_MAP_UNSPEC; 10318 } 10319 10320 __u32 bpf_map__ifindex(const struct bpf_map *map) 10321 { 10322 return map->map_ifindex; 10323 } 10324 10325 int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex) 10326 { 10327 if (map_is_created(map)) 10328 return libbpf_err(-EBUSY); 10329 map->map_ifindex = ifindex; 10330 return 0; 10331 } 10332 10333 int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd) 10334 { 10335 if (!bpf_map_type__is_map_in_map(map->def.type)) { 10336 pr_warn("error: unsupported map type\n"); 10337 return libbpf_err(-EINVAL); 10338 } 10339 if (map->inner_map_fd != -1) { 10340 pr_warn("error: inner_map_fd already specified\n"); 10341 return libbpf_err(-EINVAL); 10342 } 10343 if (map->inner_map) { 10344 bpf_map__destroy(map->inner_map); 10345 zfree(&map->inner_map); 10346 } 10347 map->inner_map_fd = fd; 10348 return 0; 10349 } 10350 10351 static struct bpf_map * 10352 __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i) 10353 { 10354 ssize_t idx; 10355 struct bpf_map *s, *e; 10356 10357 if (!obj || !obj->maps) 10358 return errno = EINVAL, NULL; 10359 10360 s = obj->maps; 10361 e = obj->maps + obj->nr_maps; 10362 10363 if ((m < s) || (m >= e)) { 10364 pr_warn("error in %s: map handler doesn't belong to object\n", 10365 __func__); 10366 return errno = EINVAL, NULL; 10367 } 10368 10369 idx = (m - obj->maps) + i; 10370 if (idx >= obj->nr_maps || idx < 0) 10371 return NULL; 10372 return &obj->maps[idx]; 10373 } 10374 10375 struct bpf_map * 10376 bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) 10377 { 10378 if (prev == NULL && obj != NULL) 10379 return obj->maps; 10380 10381 return __bpf_map__iter(prev, obj, 1); 10382 } 10383 10384 struct bpf_map * 10385 bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next) 10386 { 10387 if (next == NULL && obj != NULL) { 10388 if (!obj->nr_maps) 10389 return NULL; 10390 return obj->maps + obj->nr_maps - 1; 10391 } 10392 10393 return __bpf_map__iter(next, obj, -1); 10394 } 10395 10396 struct bpf_map * 10397 bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name) 10398 { 10399 struct bpf_map *pos; 10400 10401 bpf_object__for_each_map(pos, obj) { 10402 /* if it's a special internal map name (which always starts 10403 * with dot) then check if that special name matches the 10404 * real map name (ELF section name) 10405 */ 10406 if (name[0] == '.') { 10407 if (pos->real_name && strcmp(pos->real_name, name) == 0) 10408 return pos; 10409 continue; 10410 } 10411 /* otherwise map name has to be an exact match */ 10412 if (map_uses_real_name(pos)) { 10413 if (strcmp(pos->real_name, name) == 0) 10414 return pos; 10415 continue; 10416 } 10417 if (strcmp(pos->name, name) == 0) 10418 return pos; 10419 } 10420 return errno = ENOENT, NULL; 10421 } 10422 10423 int 10424 bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name) 10425 { 10426 return bpf_map__fd(bpf_object__find_map_by_name(obj, name)); 10427 } 10428 10429 static int validate_map_op(const struct bpf_map *map, size_t key_sz, 10430 size_t value_sz, bool check_value_sz) 10431 { 10432 if (!map_is_created(map)) /* map is not yet created */ 10433 return -ENOENT; 10434 10435 if (map->def.key_size != key_sz) { 10436 pr_warn("map '%s': unexpected key size %zu provided, expected %u\n", 10437 map->name, key_sz, map->def.key_size); 10438 return -EINVAL; 10439 } 10440 10441 if (map->fd < 0) { 10442 pr_warn("map '%s': can't use BPF map without FD (was it created?)\n", map->name); 10443 return -EINVAL; 10444 } 10445 10446 if (!check_value_sz) 10447 return 0; 10448 10449 switch (map->def.type) { 10450 case BPF_MAP_TYPE_PERCPU_ARRAY: 10451 case BPF_MAP_TYPE_PERCPU_HASH: 10452 case BPF_MAP_TYPE_LRU_PERCPU_HASH: 10453 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: { 10454 int num_cpu = libbpf_num_possible_cpus(); 10455 size_t elem_sz = roundup(map->def.value_size, 8); 10456 10457 if (value_sz != num_cpu * elem_sz) { 10458 pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n", 10459 map->name, value_sz, num_cpu, elem_sz, num_cpu * elem_sz); 10460 return -EINVAL; 10461 } 10462 break; 10463 } 10464 default: 10465 if (map->def.value_size != value_sz) { 10466 pr_warn("map '%s': unexpected value size %zu provided, expected %u\n", 10467 map->name, value_sz, map->def.value_size); 10468 return -EINVAL; 10469 } 10470 break; 10471 } 10472 return 0; 10473 } 10474 10475 int bpf_map__lookup_elem(const struct bpf_map *map, 10476 const void *key, size_t key_sz, 10477 void *value, size_t value_sz, __u64 flags) 10478 { 10479 int err; 10480 10481 err = validate_map_op(map, key_sz, value_sz, true); 10482 if (err) 10483 return libbpf_err(err); 10484 10485 return bpf_map_lookup_elem_flags(map->fd, key, value, flags); 10486 } 10487 10488 int bpf_map__update_elem(const struct bpf_map *map, 10489 const void *key, size_t key_sz, 10490 const void *value, size_t value_sz, __u64 flags) 10491 { 10492 int err; 10493 10494 err = validate_map_op(map, key_sz, value_sz, true); 10495 if (err) 10496 return libbpf_err(err); 10497 10498 return bpf_map_update_elem(map->fd, key, value, flags); 10499 } 10500 10501 int bpf_map__delete_elem(const struct bpf_map *map, 10502 const void *key, size_t key_sz, __u64 flags) 10503 { 10504 int err; 10505 10506 err = validate_map_op(map, key_sz, 0, false /* check_value_sz */); 10507 if (err) 10508 return libbpf_err(err); 10509 10510 return bpf_map_delete_elem_flags(map->fd, key, flags); 10511 } 10512 10513 int bpf_map__lookup_and_delete_elem(const struct bpf_map *map, 10514 const void *key, size_t key_sz, 10515 void *value, size_t value_sz, __u64 flags) 10516 { 10517 int err; 10518 10519 err = validate_map_op(map, key_sz, value_sz, true); 10520 if (err) 10521 return libbpf_err(err); 10522 10523 return bpf_map_lookup_and_delete_elem_flags(map->fd, key, value, flags); 10524 } 10525 10526 int bpf_map__get_next_key(const struct bpf_map *map, 10527 const void *cur_key, void *next_key, size_t key_sz) 10528 { 10529 int err; 10530 10531 err = validate_map_op(map, key_sz, 0, false /* check_value_sz */); 10532 if (err) 10533 return libbpf_err(err); 10534 10535 return bpf_map_get_next_key(map->fd, cur_key, next_key); 10536 } 10537 10538 long libbpf_get_error(const void *ptr) 10539 { 10540 if (!IS_ERR_OR_NULL(ptr)) 10541 return 0; 10542 10543 if (IS_ERR(ptr)) 10544 errno = -PTR_ERR(ptr); 10545 10546 /* If ptr == NULL, then errno should be already set by the failing 10547 * API, because libbpf never returns NULL on success and it now always 10548 * sets errno on error. So no extra errno handling for ptr == NULL 10549 * case. 10550 */ 10551 return -errno; 10552 } 10553 10554 /* Replace link's underlying BPF program with the new one */ 10555 int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog) 10556 { 10557 int ret; 10558 int prog_fd = bpf_program__fd(prog); 10559 10560 if (prog_fd < 0) { 10561 pr_warn("prog '%s': can't use BPF program without FD (was it loaded?)\n", 10562 prog->name); 10563 return libbpf_err(-EINVAL); 10564 } 10565 10566 ret = bpf_link_update(bpf_link__fd(link), prog_fd, NULL); 10567 return libbpf_err_errno(ret); 10568 } 10569 10570 /* Release "ownership" of underlying BPF resource (typically, BPF program 10571 * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected 10572 * link, when destructed through bpf_link__destroy() call won't attempt to 10573 * detach/unregisted that BPF resource. This is useful in situations where, 10574 * say, attached BPF program has to outlive userspace program that attached it 10575 * in the system. Depending on type of BPF program, though, there might be 10576 * additional steps (like pinning BPF program in BPF FS) necessary to ensure 10577 * exit of userspace program doesn't trigger automatic detachment and clean up 10578 * inside the kernel. 10579 */ 10580 void bpf_link__disconnect(struct bpf_link *link) 10581 { 10582 link->disconnected = true; 10583 } 10584 10585 int bpf_link__destroy(struct bpf_link *link) 10586 { 10587 int err = 0; 10588 10589 if (IS_ERR_OR_NULL(link)) 10590 return 0; 10591 10592 if (!link->disconnected && link->detach) 10593 err = link->detach(link); 10594 if (link->pin_path) 10595 free(link->pin_path); 10596 if (link->dealloc) 10597 link->dealloc(link); 10598 else 10599 free(link); 10600 10601 return libbpf_err(err); 10602 } 10603 10604 int bpf_link__fd(const struct bpf_link *link) 10605 { 10606 return link->fd; 10607 } 10608 10609 const char *bpf_link__pin_path(const struct bpf_link *link) 10610 { 10611 return link->pin_path; 10612 } 10613 10614 static int bpf_link__detach_fd(struct bpf_link *link) 10615 { 10616 return libbpf_err_errno(close(link->fd)); 10617 } 10618 10619 struct bpf_link *bpf_link__open(const char *path) 10620 { 10621 struct bpf_link *link; 10622 int fd; 10623 10624 fd = bpf_obj_get(path); 10625 if (fd < 0) { 10626 fd = -errno; 10627 pr_warn("failed to open link at %s: %d\n", path, fd); 10628 return libbpf_err_ptr(fd); 10629 } 10630 10631 link = calloc(1, sizeof(*link)); 10632 if (!link) { 10633 close(fd); 10634 return libbpf_err_ptr(-ENOMEM); 10635 } 10636 link->detach = &bpf_link__detach_fd; 10637 link->fd = fd; 10638 10639 link->pin_path = strdup(path); 10640 if (!link->pin_path) { 10641 bpf_link__destroy(link); 10642 return libbpf_err_ptr(-ENOMEM); 10643 } 10644 10645 return link; 10646 } 10647 10648 int bpf_link__detach(struct bpf_link *link) 10649 { 10650 return bpf_link_detach(link->fd) ? -errno : 0; 10651 } 10652 10653 int bpf_link__pin(struct bpf_link *link, const char *path) 10654 { 10655 int err; 10656 10657 if (link->pin_path) 10658 return libbpf_err(-EBUSY); 10659 err = make_parent_dir(path); 10660 if (err) 10661 return libbpf_err(err); 10662 err = check_path(path); 10663 if (err) 10664 return libbpf_err(err); 10665 10666 link->pin_path = strdup(path); 10667 if (!link->pin_path) 10668 return libbpf_err(-ENOMEM); 10669 10670 if (bpf_obj_pin(link->fd, link->pin_path)) { 10671 err = -errno; 10672 zfree(&link->pin_path); 10673 return libbpf_err(err); 10674 } 10675 10676 pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path); 10677 return 0; 10678 } 10679 10680 int bpf_link__unpin(struct bpf_link *link) 10681 { 10682 int err; 10683 10684 if (!link->pin_path) 10685 return libbpf_err(-EINVAL); 10686 10687 err = unlink(link->pin_path); 10688 if (err != 0) 10689 return -errno; 10690 10691 pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path); 10692 zfree(&link->pin_path); 10693 return 0; 10694 } 10695 10696 struct bpf_link_perf { 10697 struct bpf_link link; 10698 int perf_event_fd; 10699 /* legacy kprobe support: keep track of probe identifier and type */ 10700 char *legacy_probe_name; 10701 bool legacy_is_kprobe; 10702 bool legacy_is_retprobe; 10703 }; 10704 10705 static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe); 10706 static int remove_uprobe_event_legacy(const char *probe_name, bool retprobe); 10707 10708 static int bpf_link_perf_detach(struct bpf_link *link) 10709 { 10710 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); 10711 int err = 0; 10712 10713 if (ioctl(perf_link->perf_event_fd, PERF_EVENT_IOC_DISABLE, 0) < 0) 10714 err = -errno; 10715 10716 if (perf_link->perf_event_fd != link->fd) 10717 close(perf_link->perf_event_fd); 10718 close(link->fd); 10719 10720 /* legacy uprobe/kprobe needs to be removed after perf event fd closure */ 10721 if (perf_link->legacy_probe_name) { 10722 if (perf_link->legacy_is_kprobe) { 10723 err = remove_kprobe_event_legacy(perf_link->legacy_probe_name, 10724 perf_link->legacy_is_retprobe); 10725 } else { 10726 err = remove_uprobe_event_legacy(perf_link->legacy_probe_name, 10727 perf_link->legacy_is_retprobe); 10728 } 10729 } 10730 10731 return err; 10732 } 10733 10734 static void bpf_link_perf_dealloc(struct bpf_link *link) 10735 { 10736 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); 10737 10738 free(perf_link->legacy_probe_name); 10739 free(perf_link); 10740 } 10741 10742 struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd, 10743 const struct bpf_perf_event_opts *opts) 10744 { 10745 char errmsg[STRERR_BUFSIZE]; 10746 struct bpf_link_perf *link; 10747 int prog_fd, link_fd = -1, err; 10748 bool force_ioctl_attach; 10749 10750 if (!OPTS_VALID(opts, bpf_perf_event_opts)) 10751 return libbpf_err_ptr(-EINVAL); 10752 10753 if (pfd < 0) { 10754 pr_warn("prog '%s': invalid perf event FD %d\n", 10755 prog->name, pfd); 10756 return libbpf_err_ptr(-EINVAL); 10757 } 10758 prog_fd = bpf_program__fd(prog); 10759 if (prog_fd < 0) { 10760 pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n", 10761 prog->name); 10762 return libbpf_err_ptr(-EINVAL); 10763 } 10764 10765 link = calloc(1, sizeof(*link)); 10766 if (!link) 10767 return libbpf_err_ptr(-ENOMEM); 10768 link->link.detach = &bpf_link_perf_detach; 10769 link->link.dealloc = &bpf_link_perf_dealloc; 10770 link->perf_event_fd = pfd; 10771 10772 force_ioctl_attach = OPTS_GET(opts, force_ioctl_attach, false); 10773 if (kernel_supports(prog->obj, FEAT_PERF_LINK) && !force_ioctl_attach) { 10774 DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_opts, 10775 .perf_event.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0)); 10776 10777 link_fd = bpf_link_create(prog_fd, pfd, BPF_PERF_EVENT, &link_opts); 10778 if (link_fd < 0) { 10779 err = -errno; 10780 pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %d (%s)\n", 10781 prog->name, pfd, 10782 err, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 10783 goto err_out; 10784 } 10785 link->link.fd = link_fd; 10786 } else { 10787 if (OPTS_GET(opts, bpf_cookie, 0)) { 10788 pr_warn("prog '%s': user context value is not supported\n", prog->name); 10789 err = -EOPNOTSUPP; 10790 goto err_out; 10791 } 10792 10793 if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) { 10794 err = -errno; 10795 pr_warn("prog '%s': failed to attach to perf_event FD %d: %s\n", 10796 prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 10797 if (err == -EPROTO) 10798 pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n", 10799 prog->name, pfd); 10800 goto err_out; 10801 } 10802 link->link.fd = pfd; 10803 } 10804 if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) { 10805 err = -errno; 10806 pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n", 10807 prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 10808 goto err_out; 10809 } 10810 10811 return &link->link; 10812 err_out: 10813 if (link_fd >= 0) 10814 close(link_fd); 10815 free(link); 10816 return libbpf_err_ptr(err); 10817 } 10818 10819 struct bpf_link *bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd) 10820 { 10821 return bpf_program__attach_perf_event_opts(prog, pfd, NULL); 10822 } 10823 10824 /* 10825 * this function is expected to parse integer in the range of [0, 2^31-1] from 10826 * given file using scanf format string fmt. If actual parsed value is 10827 * negative, the result might be indistinguishable from error 10828 */ 10829 static int parse_uint_from_file(const char *file, const char *fmt) 10830 { 10831 char buf[STRERR_BUFSIZE]; 10832 int err, ret; 10833 FILE *f; 10834 10835 f = fopen(file, "re"); 10836 if (!f) { 10837 err = -errno; 10838 pr_debug("failed to open '%s': %s\n", file, 10839 libbpf_strerror_r(err, buf, sizeof(buf))); 10840 return err; 10841 } 10842 err = fscanf(f, fmt, &ret); 10843 if (err != 1) { 10844 err = err == EOF ? -EIO : -errno; 10845 pr_debug("failed to parse '%s': %s\n", file, 10846 libbpf_strerror_r(err, buf, sizeof(buf))); 10847 fclose(f); 10848 return err; 10849 } 10850 fclose(f); 10851 return ret; 10852 } 10853 10854 static int determine_kprobe_perf_type(void) 10855 { 10856 const char *file = "/sys/bus/event_source/devices/kprobe/type"; 10857 10858 return parse_uint_from_file(file, "%d\n"); 10859 } 10860 10861 static int determine_uprobe_perf_type(void) 10862 { 10863 const char *file = "/sys/bus/event_source/devices/uprobe/type"; 10864 10865 return parse_uint_from_file(file, "%d\n"); 10866 } 10867 10868 static int determine_kprobe_retprobe_bit(void) 10869 { 10870 const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe"; 10871 10872 return parse_uint_from_file(file, "config:%d\n"); 10873 } 10874 10875 static int determine_uprobe_retprobe_bit(void) 10876 { 10877 const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe"; 10878 10879 return parse_uint_from_file(file, "config:%d\n"); 10880 } 10881 10882 #define PERF_UPROBE_REF_CTR_OFFSET_BITS 32 10883 #define PERF_UPROBE_REF_CTR_OFFSET_SHIFT 32 10884 10885 static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, 10886 uint64_t offset, int pid, size_t ref_ctr_off) 10887 { 10888 const size_t attr_sz = sizeof(struct perf_event_attr); 10889 struct perf_event_attr attr; 10890 char errmsg[STRERR_BUFSIZE]; 10891 int type, pfd; 10892 10893 if ((__u64)ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS)) 10894 return -EINVAL; 10895 10896 memset(&attr, 0, attr_sz); 10897 10898 type = uprobe ? determine_uprobe_perf_type() 10899 : determine_kprobe_perf_type(); 10900 if (type < 0) { 10901 pr_warn("failed to determine %s perf type: %s\n", 10902 uprobe ? "uprobe" : "kprobe", 10903 libbpf_strerror_r(type, errmsg, sizeof(errmsg))); 10904 return type; 10905 } 10906 if (retprobe) { 10907 int bit = uprobe ? determine_uprobe_retprobe_bit() 10908 : determine_kprobe_retprobe_bit(); 10909 10910 if (bit < 0) { 10911 pr_warn("failed to determine %s retprobe bit: %s\n", 10912 uprobe ? "uprobe" : "kprobe", 10913 libbpf_strerror_r(bit, errmsg, sizeof(errmsg))); 10914 return bit; 10915 } 10916 attr.config |= 1 << bit; 10917 } 10918 attr.size = attr_sz; 10919 attr.type = type; 10920 attr.config |= (__u64)ref_ctr_off << PERF_UPROBE_REF_CTR_OFFSET_SHIFT; 10921 attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */ 10922 attr.config2 = offset; /* kprobe_addr or probe_offset */ 10923 10924 /* pid filter is meaningful only for uprobes */ 10925 pfd = syscall(__NR_perf_event_open, &attr, 10926 pid < 0 ? -1 : pid /* pid */, 10927 pid == -1 ? 0 : -1 /* cpu */, 10928 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); 10929 return pfd >= 0 ? pfd : -errno; 10930 } 10931 10932 static int append_to_file(const char *file, const char *fmt, ...) 10933 { 10934 int fd, n, err = 0; 10935 va_list ap; 10936 char buf[1024]; 10937 10938 va_start(ap, fmt); 10939 n = vsnprintf(buf, sizeof(buf), fmt, ap); 10940 va_end(ap); 10941 10942 if (n < 0 || n >= sizeof(buf)) 10943 return -EINVAL; 10944 10945 fd = open(file, O_WRONLY | O_APPEND | O_CLOEXEC, 0); 10946 if (fd < 0) 10947 return -errno; 10948 10949 if (write(fd, buf, n) < 0) 10950 err = -errno; 10951 10952 close(fd); 10953 return err; 10954 } 10955 10956 #define DEBUGFS "/sys/kernel/debug/tracing" 10957 #define TRACEFS "/sys/kernel/tracing" 10958 10959 static bool use_debugfs(void) 10960 { 10961 static int has_debugfs = -1; 10962 10963 if (has_debugfs < 0) 10964 has_debugfs = faccessat(AT_FDCWD, DEBUGFS, F_OK, AT_EACCESS) == 0; 10965 10966 return has_debugfs == 1; 10967 } 10968 10969 static const char *tracefs_path(void) 10970 { 10971 return use_debugfs() ? DEBUGFS : TRACEFS; 10972 } 10973 10974 static const char *tracefs_kprobe_events(void) 10975 { 10976 return use_debugfs() ? DEBUGFS"/kprobe_events" : TRACEFS"/kprobe_events"; 10977 } 10978 10979 static const char *tracefs_uprobe_events(void) 10980 { 10981 return use_debugfs() ? DEBUGFS"/uprobe_events" : TRACEFS"/uprobe_events"; 10982 } 10983 10984 static const char *tracefs_available_filter_functions(void) 10985 { 10986 return use_debugfs() ? DEBUGFS"/available_filter_functions" 10987 : TRACEFS"/available_filter_functions"; 10988 } 10989 10990 static const char *tracefs_available_filter_functions_addrs(void) 10991 { 10992 return use_debugfs() ? DEBUGFS"/available_filter_functions_addrs" 10993 : TRACEFS"/available_filter_functions_addrs"; 10994 } 10995 10996 static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz, 10997 const char *kfunc_name, size_t offset) 10998 { 10999 static int index = 0; 11000 int i; 11001 11002 snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset, 11003 __sync_fetch_and_add(&index, 1)); 11004 11005 /* sanitize binary_path in the probe name */ 11006 for (i = 0; buf[i]; i++) { 11007 if (!isalnum(buf[i])) 11008 buf[i] = '_'; 11009 } 11010 } 11011 11012 static int add_kprobe_event_legacy(const char *probe_name, bool retprobe, 11013 const char *kfunc_name, size_t offset) 11014 { 11015 return append_to_file(tracefs_kprobe_events(), "%c:%s/%s %s+0x%zx", 11016 retprobe ? 'r' : 'p', 11017 retprobe ? "kretprobes" : "kprobes", 11018 probe_name, kfunc_name, offset); 11019 } 11020 11021 static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe) 11022 { 11023 return append_to_file(tracefs_kprobe_events(), "-:%s/%s", 11024 retprobe ? "kretprobes" : "kprobes", probe_name); 11025 } 11026 11027 static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retprobe) 11028 { 11029 char file[256]; 11030 11031 snprintf(file, sizeof(file), "%s/events/%s/%s/id", 11032 tracefs_path(), retprobe ? "kretprobes" : "kprobes", probe_name); 11033 11034 return parse_uint_from_file(file, "%d\n"); 11035 } 11036 11037 static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe, 11038 const char *kfunc_name, size_t offset, int pid) 11039 { 11040 const size_t attr_sz = sizeof(struct perf_event_attr); 11041 struct perf_event_attr attr; 11042 char errmsg[STRERR_BUFSIZE]; 11043 int type, pfd, err; 11044 11045 err = add_kprobe_event_legacy(probe_name, retprobe, kfunc_name, offset); 11046 if (err < 0) { 11047 pr_warn("failed to add legacy kprobe event for '%s+0x%zx': %s\n", 11048 kfunc_name, offset, 11049 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 11050 return err; 11051 } 11052 type = determine_kprobe_perf_type_legacy(probe_name, retprobe); 11053 if (type < 0) { 11054 err = type; 11055 pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n", 11056 kfunc_name, offset, 11057 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 11058 goto err_clean_legacy; 11059 } 11060 11061 memset(&attr, 0, attr_sz); 11062 attr.size = attr_sz; 11063 attr.config = type; 11064 attr.type = PERF_TYPE_TRACEPOINT; 11065 11066 pfd = syscall(__NR_perf_event_open, &attr, 11067 pid < 0 ? -1 : pid, /* pid */ 11068 pid == -1 ? 0 : -1, /* cpu */ 11069 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); 11070 if (pfd < 0) { 11071 err = -errno; 11072 pr_warn("legacy kprobe perf_event_open() failed: %s\n", 11073 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 11074 goto err_clean_legacy; 11075 } 11076 return pfd; 11077 11078 err_clean_legacy: 11079 /* Clear the newly added legacy kprobe_event */ 11080 remove_kprobe_event_legacy(probe_name, retprobe); 11081 return err; 11082 } 11083 11084 static const char *arch_specific_syscall_pfx(void) 11085 { 11086 #if defined(__x86_64__) 11087 return "x64"; 11088 #elif defined(__i386__) 11089 return "ia32"; 11090 #elif defined(__s390x__) 11091 return "s390x"; 11092 #elif defined(__s390__) 11093 return "s390"; 11094 #elif defined(__arm__) 11095 return "arm"; 11096 #elif defined(__aarch64__) 11097 return "arm64"; 11098 #elif defined(__mips__) 11099 return "mips"; 11100 #elif defined(__riscv) 11101 return "riscv"; 11102 #elif defined(__powerpc__) 11103 return "powerpc"; 11104 #elif defined(__powerpc64__) 11105 return "powerpc64"; 11106 #else 11107 return NULL; 11108 #endif 11109 } 11110 11111 int probe_kern_syscall_wrapper(int token_fd) 11112 { 11113 char syscall_name[64]; 11114 const char *ksys_pfx; 11115 11116 ksys_pfx = arch_specific_syscall_pfx(); 11117 if (!ksys_pfx) 11118 return 0; 11119 11120 snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx); 11121 11122 if (determine_kprobe_perf_type() >= 0) { 11123 int pfd; 11124 11125 pfd = perf_event_open_probe(false, false, syscall_name, 0, getpid(), 0); 11126 if (pfd >= 0) 11127 close(pfd); 11128 11129 return pfd >= 0 ? 1 : 0; 11130 } else { /* legacy mode */ 11131 char probe_name[128]; 11132 11133 gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0); 11134 if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0) 11135 return 0; 11136 11137 (void)remove_kprobe_event_legacy(probe_name, false); 11138 return 1; 11139 } 11140 } 11141 11142 struct bpf_link * 11143 bpf_program__attach_kprobe_opts(const struct bpf_program *prog, 11144 const char *func_name, 11145 const struct bpf_kprobe_opts *opts) 11146 { 11147 DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); 11148 enum probe_attach_mode attach_mode; 11149 char errmsg[STRERR_BUFSIZE]; 11150 char *legacy_probe = NULL; 11151 struct bpf_link *link; 11152 size_t offset; 11153 bool retprobe, legacy; 11154 int pfd, err; 11155 11156 if (!OPTS_VALID(opts, bpf_kprobe_opts)) 11157 return libbpf_err_ptr(-EINVAL); 11158 11159 attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT); 11160 retprobe = OPTS_GET(opts, retprobe, false); 11161 offset = OPTS_GET(opts, offset, 0); 11162 pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); 11163 11164 legacy = determine_kprobe_perf_type() < 0; 11165 switch (attach_mode) { 11166 case PROBE_ATTACH_MODE_LEGACY: 11167 legacy = true; 11168 pe_opts.force_ioctl_attach = true; 11169 break; 11170 case PROBE_ATTACH_MODE_PERF: 11171 if (legacy) 11172 return libbpf_err_ptr(-ENOTSUP); 11173 pe_opts.force_ioctl_attach = true; 11174 break; 11175 case PROBE_ATTACH_MODE_LINK: 11176 if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK)) 11177 return libbpf_err_ptr(-ENOTSUP); 11178 break; 11179 case PROBE_ATTACH_MODE_DEFAULT: 11180 break; 11181 default: 11182 return libbpf_err_ptr(-EINVAL); 11183 } 11184 11185 if (!legacy) { 11186 pfd = perf_event_open_probe(false /* uprobe */, retprobe, 11187 func_name, offset, 11188 -1 /* pid */, 0 /* ref_ctr_off */); 11189 } else { 11190 char probe_name[256]; 11191 11192 gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), 11193 func_name, offset); 11194 11195 legacy_probe = strdup(probe_name); 11196 if (!legacy_probe) 11197 return libbpf_err_ptr(-ENOMEM); 11198 11199 pfd = perf_event_kprobe_open_legacy(legacy_probe, retprobe, func_name, 11200 offset, -1 /* pid */); 11201 } 11202 if (pfd < 0) { 11203 err = -errno; 11204 pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n", 11205 prog->name, retprobe ? "kretprobe" : "kprobe", 11206 func_name, offset, 11207 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 11208 goto err_out; 11209 } 11210 link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); 11211 err = libbpf_get_error(link); 11212 if (err) { 11213 close(pfd); 11214 pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n", 11215 prog->name, retprobe ? "kretprobe" : "kprobe", 11216 func_name, offset, 11217 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 11218 goto err_clean_legacy; 11219 } 11220 if (legacy) { 11221 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); 11222 11223 perf_link->legacy_probe_name = legacy_probe; 11224 perf_link->legacy_is_kprobe = true; 11225 perf_link->legacy_is_retprobe = retprobe; 11226 } 11227 11228 return link; 11229 11230 err_clean_legacy: 11231 if (legacy) 11232 remove_kprobe_event_legacy(legacy_probe, retprobe); 11233 err_out: 11234 free(legacy_probe); 11235 return libbpf_err_ptr(err); 11236 } 11237 11238 struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog, 11239 bool retprobe, 11240 const char *func_name) 11241 { 11242 DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts, 11243 .retprobe = retprobe, 11244 ); 11245 11246 return bpf_program__attach_kprobe_opts(prog, func_name, &opts); 11247 } 11248 11249 struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog, 11250 const char *syscall_name, 11251 const struct bpf_ksyscall_opts *opts) 11252 { 11253 LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts); 11254 char func_name[128]; 11255 11256 if (!OPTS_VALID(opts, bpf_ksyscall_opts)) 11257 return libbpf_err_ptr(-EINVAL); 11258 11259 if (kernel_supports(prog->obj, FEAT_SYSCALL_WRAPPER)) { 11260 /* arch_specific_syscall_pfx() should never return NULL here 11261 * because it is guarded by kernel_supports(). However, since 11262 * compiler does not know that we have an explicit conditional 11263 * as well. 11264 */ 11265 snprintf(func_name, sizeof(func_name), "__%s_sys_%s", 11266 arch_specific_syscall_pfx() ? : "", syscall_name); 11267 } else { 11268 snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name); 11269 } 11270 11271 kprobe_opts.retprobe = OPTS_GET(opts, retprobe, false); 11272 kprobe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); 11273 11274 return bpf_program__attach_kprobe_opts(prog, func_name, &kprobe_opts); 11275 } 11276 11277 /* Adapted from perf/util/string.c */ 11278 bool glob_match(const char *str, const char *pat) 11279 { 11280 while (*str && *pat && *pat != '*') { 11281 if (*pat == '?') { /* Matches any single character */ 11282 str++; 11283 pat++; 11284 continue; 11285 } 11286 if (*str != *pat) 11287 return false; 11288 str++; 11289 pat++; 11290 } 11291 /* Check wild card */ 11292 if (*pat == '*') { 11293 while (*pat == '*') 11294 pat++; 11295 if (!*pat) /* Tail wild card matches all */ 11296 return true; 11297 while (*str) 11298 if (glob_match(str++, pat)) 11299 return true; 11300 } 11301 return !*str && !*pat; 11302 } 11303 11304 struct kprobe_multi_resolve { 11305 const char *pattern; 11306 unsigned long *addrs; 11307 size_t cap; 11308 size_t cnt; 11309 }; 11310 11311 struct avail_kallsyms_data { 11312 char **syms; 11313 size_t cnt; 11314 struct kprobe_multi_resolve *res; 11315 }; 11316 11317 static int avail_func_cmp(const void *a, const void *b) 11318 { 11319 return strcmp(*(const char **)a, *(const char **)b); 11320 } 11321 11322 static int avail_kallsyms_cb(unsigned long long sym_addr, char sym_type, 11323 const char *sym_name, void *ctx) 11324 { 11325 struct avail_kallsyms_data *data = ctx; 11326 struct kprobe_multi_resolve *res = data->res; 11327 int err; 11328 11329 if (!bsearch(&sym_name, data->syms, data->cnt, sizeof(*data->syms), avail_func_cmp)) 11330 return 0; 11331 11332 err = libbpf_ensure_mem((void **)&res->addrs, &res->cap, sizeof(*res->addrs), res->cnt + 1); 11333 if (err) 11334 return err; 11335 11336 res->addrs[res->cnt++] = (unsigned long)sym_addr; 11337 return 0; 11338 } 11339 11340 static int libbpf_available_kallsyms_parse(struct kprobe_multi_resolve *res) 11341 { 11342 const char *available_functions_file = tracefs_available_filter_functions(); 11343 struct avail_kallsyms_data data; 11344 char sym_name[500]; 11345 FILE *f; 11346 int err = 0, ret, i; 11347 char **syms = NULL; 11348 size_t cap = 0, cnt = 0; 11349 11350 f = fopen(available_functions_file, "re"); 11351 if (!f) { 11352 err = -errno; 11353 pr_warn("failed to open %s: %d\n", available_functions_file, err); 11354 return err; 11355 } 11356 11357 while (true) { 11358 char *name; 11359 11360 ret = fscanf(f, "%499s%*[^\n]\n", sym_name); 11361 if (ret == EOF && feof(f)) 11362 break; 11363 11364 if (ret != 1) { 11365 pr_warn("failed to parse available_filter_functions entry: %d\n", ret); 11366 err = -EINVAL; 11367 goto cleanup; 11368 } 11369 11370 if (!glob_match(sym_name, res->pattern)) 11371 continue; 11372 11373 err = libbpf_ensure_mem((void **)&syms, &cap, sizeof(*syms), cnt + 1); 11374 if (err) 11375 goto cleanup; 11376 11377 name = strdup(sym_name); 11378 if (!name) { 11379 err = -errno; 11380 goto cleanup; 11381 } 11382 11383 syms[cnt++] = name; 11384 } 11385 11386 /* no entries found, bail out */ 11387 if (cnt == 0) { 11388 err = -ENOENT; 11389 goto cleanup; 11390 } 11391 11392 /* sort available functions */ 11393 qsort(syms, cnt, sizeof(*syms), avail_func_cmp); 11394 11395 data.syms = syms; 11396 data.res = res; 11397 data.cnt = cnt; 11398 libbpf_kallsyms_parse(avail_kallsyms_cb, &data); 11399 11400 if (res->cnt == 0) 11401 err = -ENOENT; 11402 11403 cleanup: 11404 for (i = 0; i < cnt; i++) 11405 free((char *)syms[i]); 11406 free(syms); 11407 11408 fclose(f); 11409 return err; 11410 } 11411 11412 static bool has_available_filter_functions_addrs(void) 11413 { 11414 return access(tracefs_available_filter_functions_addrs(), R_OK) != -1; 11415 } 11416 11417 static int libbpf_available_kprobes_parse(struct kprobe_multi_resolve *res) 11418 { 11419 const char *available_path = tracefs_available_filter_functions_addrs(); 11420 char sym_name[500]; 11421 FILE *f; 11422 int ret, err = 0; 11423 unsigned long long sym_addr; 11424 11425 f = fopen(available_path, "re"); 11426 if (!f) { 11427 err = -errno; 11428 pr_warn("failed to open %s: %d\n", available_path, err); 11429 return err; 11430 } 11431 11432 while (true) { 11433 ret = fscanf(f, "%llx %499s%*[^\n]\n", &sym_addr, sym_name); 11434 if (ret == EOF && feof(f)) 11435 break; 11436 11437 if (ret != 2) { 11438 pr_warn("failed to parse available_filter_functions_addrs entry: %d\n", 11439 ret); 11440 err = -EINVAL; 11441 goto cleanup; 11442 } 11443 11444 if (!glob_match(sym_name, res->pattern)) 11445 continue; 11446 11447 err = libbpf_ensure_mem((void **)&res->addrs, &res->cap, 11448 sizeof(*res->addrs), res->cnt + 1); 11449 if (err) 11450 goto cleanup; 11451 11452 res->addrs[res->cnt++] = (unsigned long)sym_addr; 11453 } 11454 11455 if (res->cnt == 0) 11456 err = -ENOENT; 11457 11458 cleanup: 11459 fclose(f); 11460 return err; 11461 } 11462 11463 struct bpf_link * 11464 bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, 11465 const char *pattern, 11466 const struct bpf_kprobe_multi_opts *opts) 11467 { 11468 LIBBPF_OPTS(bpf_link_create_opts, lopts); 11469 struct kprobe_multi_resolve res = { 11470 .pattern = pattern, 11471 }; 11472 enum bpf_attach_type attach_type; 11473 struct bpf_link *link = NULL; 11474 char errmsg[STRERR_BUFSIZE]; 11475 const unsigned long *addrs; 11476 int err, link_fd, prog_fd; 11477 bool retprobe, session; 11478 const __u64 *cookies; 11479 const char **syms; 11480 size_t cnt; 11481 11482 if (!OPTS_VALID(opts, bpf_kprobe_multi_opts)) 11483 return libbpf_err_ptr(-EINVAL); 11484 11485 prog_fd = bpf_program__fd(prog); 11486 if (prog_fd < 0) { 11487 pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n", 11488 prog->name); 11489 return libbpf_err_ptr(-EINVAL); 11490 } 11491 11492 syms = OPTS_GET(opts, syms, false); 11493 addrs = OPTS_GET(opts, addrs, false); 11494 cnt = OPTS_GET(opts, cnt, false); 11495 cookies = OPTS_GET(opts, cookies, false); 11496 11497 if (!pattern && !addrs && !syms) 11498 return libbpf_err_ptr(-EINVAL); 11499 if (pattern && (addrs || syms || cookies || cnt)) 11500 return libbpf_err_ptr(-EINVAL); 11501 if (!pattern && !cnt) 11502 return libbpf_err_ptr(-EINVAL); 11503 if (addrs && syms) 11504 return libbpf_err_ptr(-EINVAL); 11505 11506 if (pattern) { 11507 if (has_available_filter_functions_addrs()) 11508 err = libbpf_available_kprobes_parse(&res); 11509 else 11510 err = libbpf_available_kallsyms_parse(&res); 11511 if (err) 11512 goto error; 11513 addrs = res.addrs; 11514 cnt = res.cnt; 11515 } 11516 11517 retprobe = OPTS_GET(opts, retprobe, false); 11518 session = OPTS_GET(opts, session, false); 11519 11520 if (retprobe && session) 11521 return libbpf_err_ptr(-EINVAL); 11522 11523 attach_type = session ? BPF_TRACE_KPROBE_SESSION : BPF_TRACE_KPROBE_MULTI; 11524 11525 lopts.kprobe_multi.syms = syms; 11526 lopts.kprobe_multi.addrs = addrs; 11527 lopts.kprobe_multi.cookies = cookies; 11528 lopts.kprobe_multi.cnt = cnt; 11529 lopts.kprobe_multi.flags = retprobe ? BPF_F_KPROBE_MULTI_RETURN : 0; 11530 11531 link = calloc(1, sizeof(*link)); 11532 if (!link) { 11533 err = -ENOMEM; 11534 goto error; 11535 } 11536 link->detach = &bpf_link__detach_fd; 11537 11538 link_fd = bpf_link_create(prog_fd, 0, attach_type, &lopts); 11539 if (link_fd < 0) { 11540 err = -errno; 11541 pr_warn("prog '%s': failed to attach: %s\n", 11542 prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 11543 goto error; 11544 } 11545 link->fd = link_fd; 11546 free(res.addrs); 11547 return link; 11548 11549 error: 11550 free(link); 11551 free(res.addrs); 11552 return libbpf_err_ptr(err); 11553 } 11554 11555 static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link) 11556 { 11557 DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts); 11558 unsigned long offset = 0; 11559 const char *func_name; 11560 char *func; 11561 int n; 11562 11563 *link = NULL; 11564 11565 /* no auto-attach for SEC("kprobe") and SEC("kretprobe") */ 11566 if (strcmp(prog->sec_name, "kprobe") == 0 || strcmp(prog->sec_name, "kretprobe") == 0) 11567 return 0; 11568 11569 opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe/"); 11570 if (opts.retprobe) 11571 func_name = prog->sec_name + sizeof("kretprobe/") - 1; 11572 else 11573 func_name = prog->sec_name + sizeof("kprobe/") - 1; 11574 11575 n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset); 11576 if (n < 1) { 11577 pr_warn("kprobe name is invalid: %s\n", func_name); 11578 return -EINVAL; 11579 } 11580 if (opts.retprobe && offset != 0) { 11581 free(func); 11582 pr_warn("kretprobes do not support offset specification\n"); 11583 return -EINVAL; 11584 } 11585 11586 opts.offset = offset; 11587 *link = bpf_program__attach_kprobe_opts(prog, func, &opts); 11588 free(func); 11589 return libbpf_get_error(*link); 11590 } 11591 11592 static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link) 11593 { 11594 LIBBPF_OPTS(bpf_ksyscall_opts, opts); 11595 const char *syscall_name; 11596 11597 *link = NULL; 11598 11599 /* no auto-attach for SEC("ksyscall") and SEC("kretsyscall") */ 11600 if (strcmp(prog->sec_name, "ksyscall") == 0 || strcmp(prog->sec_name, "kretsyscall") == 0) 11601 return 0; 11602 11603 opts.retprobe = str_has_pfx(prog->sec_name, "kretsyscall/"); 11604 if (opts.retprobe) 11605 syscall_name = prog->sec_name + sizeof("kretsyscall/") - 1; 11606 else 11607 syscall_name = prog->sec_name + sizeof("ksyscall/") - 1; 11608 11609 *link = bpf_program__attach_ksyscall(prog, syscall_name, &opts); 11610 return *link ? 0 : -errno; 11611 } 11612 11613 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link) 11614 { 11615 LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); 11616 const char *spec; 11617 char *pattern; 11618 int n; 11619 11620 *link = NULL; 11621 11622 /* no auto-attach for SEC("kprobe.multi") and SEC("kretprobe.multi") */ 11623 if (strcmp(prog->sec_name, "kprobe.multi") == 0 || 11624 strcmp(prog->sec_name, "kretprobe.multi") == 0) 11625 return 0; 11626 11627 opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe.multi/"); 11628 if (opts.retprobe) 11629 spec = prog->sec_name + sizeof("kretprobe.multi/") - 1; 11630 else 11631 spec = prog->sec_name + sizeof("kprobe.multi/") - 1; 11632 11633 n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern); 11634 if (n < 1) { 11635 pr_warn("kprobe multi pattern is invalid: %s\n", spec); 11636 return -EINVAL; 11637 } 11638 11639 *link = bpf_program__attach_kprobe_multi_opts(prog, pattern, &opts); 11640 free(pattern); 11641 return libbpf_get_error(*link); 11642 } 11643 11644 static int attach_kprobe_session(const struct bpf_program *prog, long cookie, 11645 struct bpf_link **link) 11646 { 11647 LIBBPF_OPTS(bpf_kprobe_multi_opts, opts, .session = true); 11648 const char *spec; 11649 char *pattern; 11650 int n; 11651 11652 *link = NULL; 11653 11654 /* no auto-attach for SEC("kprobe.session") */ 11655 if (strcmp(prog->sec_name, "kprobe.session") == 0) 11656 return 0; 11657 11658 spec = prog->sec_name + sizeof("kprobe.session/") - 1; 11659 n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern); 11660 if (n < 1) { 11661 pr_warn("kprobe session pattern is invalid: %s\n", spec); 11662 return -EINVAL; 11663 } 11664 11665 *link = bpf_program__attach_kprobe_multi_opts(prog, pattern, &opts); 11666 free(pattern); 11667 return *link ? 0 : -errno; 11668 } 11669 11670 static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link) 11671 { 11672 char *probe_type = NULL, *binary_path = NULL, *func_name = NULL; 11673 LIBBPF_OPTS(bpf_uprobe_multi_opts, opts); 11674 int n, ret = -EINVAL; 11675 11676 *link = NULL; 11677 11678 n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]", 11679 &probe_type, &binary_path, &func_name); 11680 switch (n) { 11681 case 1: 11682 /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */ 11683 ret = 0; 11684 break; 11685 case 3: 11686 opts.retprobe = strcmp(probe_type, "uretprobe.multi") == 0; 11687 *link = bpf_program__attach_uprobe_multi(prog, -1, binary_path, func_name, &opts); 11688 ret = libbpf_get_error(*link); 11689 break; 11690 default: 11691 pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name, 11692 prog->sec_name); 11693 break; 11694 } 11695 free(probe_type); 11696 free(binary_path); 11697 free(func_name); 11698 return ret; 11699 } 11700 11701 static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz, 11702 const char *binary_path, uint64_t offset) 11703 { 11704 int i; 11705 11706 snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), binary_path, (size_t)offset); 11707 11708 /* sanitize binary_path in the probe name */ 11709 for (i = 0; buf[i]; i++) { 11710 if (!isalnum(buf[i])) 11711 buf[i] = '_'; 11712 } 11713 } 11714 11715 static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe, 11716 const char *binary_path, size_t offset) 11717 { 11718 return append_to_file(tracefs_uprobe_events(), "%c:%s/%s %s:0x%zx", 11719 retprobe ? 'r' : 'p', 11720 retprobe ? "uretprobes" : "uprobes", 11721 probe_name, binary_path, offset); 11722 } 11723 11724 static inline int remove_uprobe_event_legacy(const char *probe_name, bool retprobe) 11725 { 11726 return append_to_file(tracefs_uprobe_events(), "-:%s/%s", 11727 retprobe ? "uretprobes" : "uprobes", probe_name); 11728 } 11729 11730 static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retprobe) 11731 { 11732 char file[512]; 11733 11734 snprintf(file, sizeof(file), "%s/events/%s/%s/id", 11735 tracefs_path(), retprobe ? "uretprobes" : "uprobes", probe_name); 11736 11737 return parse_uint_from_file(file, "%d\n"); 11738 } 11739 11740 static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe, 11741 const char *binary_path, size_t offset, int pid) 11742 { 11743 const size_t attr_sz = sizeof(struct perf_event_attr); 11744 struct perf_event_attr attr; 11745 int type, pfd, err; 11746 11747 err = add_uprobe_event_legacy(probe_name, retprobe, binary_path, offset); 11748 if (err < 0) { 11749 pr_warn("failed to add legacy uprobe event for %s:0x%zx: %d\n", 11750 binary_path, (size_t)offset, err); 11751 return err; 11752 } 11753 type = determine_uprobe_perf_type_legacy(probe_name, retprobe); 11754 if (type < 0) { 11755 err = type; 11756 pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %d\n", 11757 binary_path, offset, err); 11758 goto err_clean_legacy; 11759 } 11760 11761 memset(&attr, 0, attr_sz); 11762 attr.size = attr_sz; 11763 attr.config = type; 11764 attr.type = PERF_TYPE_TRACEPOINT; 11765 11766 pfd = syscall(__NR_perf_event_open, &attr, 11767 pid < 0 ? -1 : pid, /* pid */ 11768 pid == -1 ? 0 : -1, /* cpu */ 11769 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); 11770 if (pfd < 0) { 11771 err = -errno; 11772 pr_warn("legacy uprobe perf_event_open() failed: %d\n", err); 11773 goto err_clean_legacy; 11774 } 11775 return pfd; 11776 11777 err_clean_legacy: 11778 /* Clear the newly added legacy uprobe_event */ 11779 remove_uprobe_event_legacy(probe_name, retprobe); 11780 return err; 11781 } 11782 11783 /* Find offset of function name in archive specified by path. Currently 11784 * supported are .zip files that do not compress their contents, as used on 11785 * Android in the form of APKs, for example. "file_name" is the name of the ELF 11786 * file inside the archive. "func_name" matches symbol name or name@@LIB for 11787 * library functions. 11788 * 11789 * An overview of the APK format specifically provided here: 11790 * https://en.wikipedia.org/w/index.php?title=Apk_(file_format)&oldid=1139099120#Package_contents 11791 */ 11792 static long elf_find_func_offset_from_archive(const char *archive_path, const char *file_name, 11793 const char *func_name) 11794 { 11795 struct zip_archive *archive; 11796 struct zip_entry entry; 11797 long ret; 11798 Elf *elf; 11799 11800 archive = zip_archive_open(archive_path); 11801 if (IS_ERR(archive)) { 11802 ret = PTR_ERR(archive); 11803 pr_warn("zip: failed to open %s: %ld\n", archive_path, ret); 11804 return ret; 11805 } 11806 11807 ret = zip_archive_find_entry(archive, file_name, &entry); 11808 if (ret) { 11809 pr_warn("zip: could not find archive member %s in %s: %ld\n", file_name, 11810 archive_path, ret); 11811 goto out; 11812 } 11813 pr_debug("zip: found entry for %s in %s at 0x%lx\n", file_name, archive_path, 11814 (unsigned long)entry.data_offset); 11815 11816 if (entry.compression) { 11817 pr_warn("zip: entry %s of %s is compressed and cannot be handled\n", file_name, 11818 archive_path); 11819 ret = -LIBBPF_ERRNO__FORMAT; 11820 goto out; 11821 } 11822 11823 elf = elf_memory((void *)entry.data, entry.data_length); 11824 if (!elf) { 11825 pr_warn("elf: could not read elf file %s from %s: %s\n", file_name, archive_path, 11826 elf_errmsg(-1)); 11827 ret = -LIBBPF_ERRNO__LIBELF; 11828 goto out; 11829 } 11830 11831 ret = elf_find_func_offset(elf, file_name, func_name); 11832 if (ret > 0) { 11833 pr_debug("elf: symbol address match for %s of %s in %s: 0x%x + 0x%lx = 0x%lx\n", 11834 func_name, file_name, archive_path, entry.data_offset, ret, 11835 ret + entry.data_offset); 11836 ret += entry.data_offset; 11837 } 11838 elf_end(elf); 11839 11840 out: 11841 zip_archive_close(archive); 11842 return ret; 11843 } 11844 11845 static const char *arch_specific_lib_paths(void) 11846 { 11847 /* 11848 * Based on https://packages.debian.org/sid/libc6. 11849 * 11850 * Assume that the traced program is built for the same architecture 11851 * as libbpf, which should cover the vast majority of cases. 11852 */ 11853 #if defined(__x86_64__) 11854 return "/lib/x86_64-linux-gnu"; 11855 #elif defined(__i386__) 11856 return "/lib/i386-linux-gnu"; 11857 #elif defined(__s390x__) 11858 return "/lib/s390x-linux-gnu"; 11859 #elif defined(__s390__) 11860 return "/lib/s390-linux-gnu"; 11861 #elif defined(__arm__) && defined(__SOFTFP__) 11862 return "/lib/arm-linux-gnueabi"; 11863 #elif defined(__arm__) && !defined(__SOFTFP__) 11864 return "/lib/arm-linux-gnueabihf"; 11865 #elif defined(__aarch64__) 11866 return "/lib/aarch64-linux-gnu"; 11867 #elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 64 11868 return "/lib/mips64el-linux-gnuabi64"; 11869 #elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 32 11870 return "/lib/mipsel-linux-gnu"; 11871 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 11872 return "/lib/powerpc64le-linux-gnu"; 11873 #elif defined(__sparc__) && defined(__arch64__) 11874 return "/lib/sparc64-linux-gnu"; 11875 #elif defined(__riscv) && __riscv_xlen == 64 11876 return "/lib/riscv64-linux-gnu"; 11877 #else 11878 return NULL; 11879 #endif 11880 } 11881 11882 /* Get full path to program/shared library. */ 11883 static int resolve_full_path(const char *file, char *result, size_t result_sz) 11884 { 11885 const char *search_paths[3] = {}; 11886 int i, perm; 11887 11888 if (str_has_sfx(file, ".so") || strstr(file, ".so.")) { 11889 search_paths[0] = getenv("LD_LIBRARY_PATH"); 11890 search_paths[1] = "/usr/lib64:/usr/lib"; 11891 search_paths[2] = arch_specific_lib_paths(); 11892 perm = R_OK; 11893 } else { 11894 search_paths[0] = getenv("PATH"); 11895 search_paths[1] = "/usr/bin:/usr/sbin"; 11896 perm = R_OK | X_OK; 11897 } 11898 11899 for (i = 0; i < ARRAY_SIZE(search_paths); i++) { 11900 const char *s; 11901 11902 if (!search_paths[i]) 11903 continue; 11904 for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) { 11905 char *next_path; 11906 int seg_len; 11907 11908 if (s[0] == ':') 11909 s++; 11910 next_path = strchr(s, ':'); 11911 seg_len = next_path ? next_path - s : strlen(s); 11912 if (!seg_len) 11913 continue; 11914 snprintf(result, result_sz, "%.*s/%s", seg_len, s, file); 11915 /* ensure it has required permissions */ 11916 if (faccessat(AT_FDCWD, result, perm, AT_EACCESS) < 0) 11917 continue; 11918 pr_debug("resolved '%s' to '%s'\n", file, result); 11919 return 0; 11920 } 11921 } 11922 return -ENOENT; 11923 } 11924 11925 struct bpf_link * 11926 bpf_program__attach_uprobe_multi(const struct bpf_program *prog, 11927 pid_t pid, 11928 const char *path, 11929 const char *func_pattern, 11930 const struct bpf_uprobe_multi_opts *opts) 11931 { 11932 const unsigned long *ref_ctr_offsets = NULL, *offsets = NULL; 11933 LIBBPF_OPTS(bpf_link_create_opts, lopts); 11934 unsigned long *resolved_offsets = NULL; 11935 int err = 0, link_fd, prog_fd; 11936 struct bpf_link *link = NULL; 11937 char errmsg[STRERR_BUFSIZE]; 11938 char full_path[PATH_MAX]; 11939 const __u64 *cookies; 11940 const char **syms; 11941 size_t cnt; 11942 11943 if (!OPTS_VALID(opts, bpf_uprobe_multi_opts)) 11944 return libbpf_err_ptr(-EINVAL); 11945 11946 prog_fd = bpf_program__fd(prog); 11947 if (prog_fd < 0) { 11948 pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n", 11949 prog->name); 11950 return libbpf_err_ptr(-EINVAL); 11951 } 11952 11953 syms = OPTS_GET(opts, syms, NULL); 11954 offsets = OPTS_GET(opts, offsets, NULL); 11955 ref_ctr_offsets = OPTS_GET(opts, ref_ctr_offsets, NULL); 11956 cookies = OPTS_GET(opts, cookies, NULL); 11957 cnt = OPTS_GET(opts, cnt, 0); 11958 11959 /* 11960 * User can specify 2 mutually exclusive set of inputs: 11961 * 11962 * 1) use only path/func_pattern/pid arguments 11963 * 11964 * 2) use path/pid with allowed combinations of: 11965 * syms/offsets/ref_ctr_offsets/cookies/cnt 11966 * 11967 * - syms and offsets are mutually exclusive 11968 * - ref_ctr_offsets and cookies are optional 11969 * 11970 * Any other usage results in error. 11971 */ 11972 11973 if (!path) 11974 return libbpf_err_ptr(-EINVAL); 11975 if (!func_pattern && cnt == 0) 11976 return libbpf_err_ptr(-EINVAL); 11977 11978 if (func_pattern) { 11979 if (syms || offsets || ref_ctr_offsets || cookies || cnt) 11980 return libbpf_err_ptr(-EINVAL); 11981 } else { 11982 if (!!syms == !!offsets) 11983 return libbpf_err_ptr(-EINVAL); 11984 } 11985 11986 if (func_pattern) { 11987 if (!strchr(path, '/')) { 11988 err = resolve_full_path(path, full_path, sizeof(full_path)); 11989 if (err) { 11990 pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", 11991 prog->name, path, err); 11992 return libbpf_err_ptr(err); 11993 } 11994 path = full_path; 11995 } 11996 11997 err = elf_resolve_pattern_offsets(path, func_pattern, 11998 &resolved_offsets, &cnt); 11999 if (err < 0) 12000 return libbpf_err_ptr(err); 12001 offsets = resolved_offsets; 12002 } else if (syms) { 12003 err = elf_resolve_syms_offsets(path, cnt, syms, &resolved_offsets, STT_FUNC); 12004 if (err < 0) 12005 return libbpf_err_ptr(err); 12006 offsets = resolved_offsets; 12007 } 12008 12009 lopts.uprobe_multi.path = path; 12010 lopts.uprobe_multi.offsets = offsets; 12011 lopts.uprobe_multi.ref_ctr_offsets = ref_ctr_offsets; 12012 lopts.uprobe_multi.cookies = cookies; 12013 lopts.uprobe_multi.cnt = cnt; 12014 lopts.uprobe_multi.flags = OPTS_GET(opts, retprobe, false) ? BPF_F_UPROBE_MULTI_RETURN : 0; 12015 12016 if (pid == 0) 12017 pid = getpid(); 12018 if (pid > 0) 12019 lopts.uprobe_multi.pid = pid; 12020 12021 link = calloc(1, sizeof(*link)); 12022 if (!link) { 12023 err = -ENOMEM; 12024 goto error; 12025 } 12026 link->detach = &bpf_link__detach_fd; 12027 12028 link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &lopts); 12029 if (link_fd < 0) { 12030 err = -errno; 12031 pr_warn("prog '%s': failed to attach multi-uprobe: %s\n", 12032 prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 12033 goto error; 12034 } 12035 link->fd = link_fd; 12036 free(resolved_offsets); 12037 return link; 12038 12039 error: 12040 free(resolved_offsets); 12041 free(link); 12042 return libbpf_err_ptr(err); 12043 } 12044 12045 LIBBPF_API struct bpf_link * 12046 bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, 12047 const char *binary_path, size_t func_offset, 12048 const struct bpf_uprobe_opts *opts) 12049 { 12050 const char *archive_path = NULL, *archive_sep = NULL; 12051 char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL; 12052 DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); 12053 enum probe_attach_mode attach_mode; 12054 char full_path[PATH_MAX]; 12055 struct bpf_link *link; 12056 size_t ref_ctr_off; 12057 int pfd, err; 12058 bool retprobe, legacy; 12059 const char *func_name; 12060 12061 if (!OPTS_VALID(opts, bpf_uprobe_opts)) 12062 return libbpf_err_ptr(-EINVAL); 12063 12064 attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT); 12065 retprobe = OPTS_GET(opts, retprobe, false); 12066 ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0); 12067 pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); 12068 12069 if (!binary_path) 12070 return libbpf_err_ptr(-EINVAL); 12071 12072 /* Check if "binary_path" refers to an archive. */ 12073 archive_sep = strstr(binary_path, "!/"); 12074 if (archive_sep) { 12075 full_path[0] = '\0'; 12076 libbpf_strlcpy(full_path, binary_path, 12077 min(sizeof(full_path), (size_t)(archive_sep - binary_path + 1))); 12078 archive_path = full_path; 12079 binary_path = archive_sep + 2; 12080 } else if (!strchr(binary_path, '/')) { 12081 err = resolve_full_path(binary_path, full_path, sizeof(full_path)); 12082 if (err) { 12083 pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", 12084 prog->name, binary_path, err); 12085 return libbpf_err_ptr(err); 12086 } 12087 binary_path = full_path; 12088 } 12089 func_name = OPTS_GET(opts, func_name, NULL); 12090 if (func_name) { 12091 long sym_off; 12092 12093 if (archive_path) { 12094 sym_off = elf_find_func_offset_from_archive(archive_path, binary_path, 12095 func_name); 12096 binary_path = archive_path; 12097 } else { 12098 sym_off = elf_find_func_offset_from_file(binary_path, func_name); 12099 } 12100 if (sym_off < 0) 12101 return libbpf_err_ptr(sym_off); 12102 func_offset += sym_off; 12103 } 12104 12105 legacy = determine_uprobe_perf_type() < 0; 12106 switch (attach_mode) { 12107 case PROBE_ATTACH_MODE_LEGACY: 12108 legacy = true; 12109 pe_opts.force_ioctl_attach = true; 12110 break; 12111 case PROBE_ATTACH_MODE_PERF: 12112 if (legacy) 12113 return libbpf_err_ptr(-ENOTSUP); 12114 pe_opts.force_ioctl_attach = true; 12115 break; 12116 case PROBE_ATTACH_MODE_LINK: 12117 if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK)) 12118 return libbpf_err_ptr(-ENOTSUP); 12119 break; 12120 case PROBE_ATTACH_MODE_DEFAULT: 12121 break; 12122 default: 12123 return libbpf_err_ptr(-EINVAL); 12124 } 12125 12126 if (!legacy) { 12127 pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path, 12128 func_offset, pid, ref_ctr_off); 12129 } else { 12130 char probe_name[PATH_MAX + 64]; 12131 12132 if (ref_ctr_off) 12133 return libbpf_err_ptr(-EINVAL); 12134 12135 gen_uprobe_legacy_event_name(probe_name, sizeof(probe_name), 12136 binary_path, func_offset); 12137 12138 legacy_probe = strdup(probe_name); 12139 if (!legacy_probe) 12140 return libbpf_err_ptr(-ENOMEM); 12141 12142 pfd = perf_event_uprobe_open_legacy(legacy_probe, retprobe, 12143 binary_path, func_offset, pid); 12144 } 12145 if (pfd < 0) { 12146 err = -errno; 12147 pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n", 12148 prog->name, retprobe ? "uretprobe" : "uprobe", 12149 binary_path, func_offset, 12150 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 12151 goto err_out; 12152 } 12153 12154 link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); 12155 err = libbpf_get_error(link); 12156 if (err) { 12157 close(pfd); 12158 pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n", 12159 prog->name, retprobe ? "uretprobe" : "uprobe", 12160 binary_path, func_offset, 12161 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 12162 goto err_clean_legacy; 12163 } 12164 if (legacy) { 12165 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); 12166 12167 perf_link->legacy_probe_name = legacy_probe; 12168 perf_link->legacy_is_kprobe = false; 12169 perf_link->legacy_is_retprobe = retprobe; 12170 } 12171 return link; 12172 12173 err_clean_legacy: 12174 if (legacy) 12175 remove_uprobe_event_legacy(legacy_probe, retprobe); 12176 err_out: 12177 free(legacy_probe); 12178 return libbpf_err_ptr(err); 12179 } 12180 12181 /* Format of u[ret]probe section definition supporting auto-attach: 12182 * u[ret]probe/binary:function[+offset] 12183 * 12184 * binary can be an absolute/relative path or a filename; the latter is resolved to a 12185 * full binary path via bpf_program__attach_uprobe_opts. 12186 * 12187 * Specifying uprobe+ ensures we carry out strict matching; either "uprobe" must be 12188 * specified (and auto-attach is not possible) or the above format is specified for 12189 * auto-attach. 12190 */ 12191 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link) 12192 { 12193 DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts); 12194 char *probe_type = NULL, *binary_path = NULL, *func_name = NULL, *func_off; 12195 int n, c, ret = -EINVAL; 12196 long offset = 0; 12197 12198 *link = NULL; 12199 12200 n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]", 12201 &probe_type, &binary_path, &func_name); 12202 switch (n) { 12203 case 1: 12204 /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */ 12205 ret = 0; 12206 break; 12207 case 2: 12208 pr_warn("prog '%s': section '%s' missing ':function[+offset]' specification\n", 12209 prog->name, prog->sec_name); 12210 break; 12211 case 3: 12212 /* check if user specifies `+offset`, if yes, this should be 12213 * the last part of the string, make sure sscanf read to EOL 12214 */ 12215 func_off = strrchr(func_name, '+'); 12216 if (func_off) { 12217 n = sscanf(func_off, "+%li%n", &offset, &c); 12218 if (n == 1 && *(func_off + c) == '\0') 12219 func_off[0] = '\0'; 12220 else 12221 offset = 0; 12222 } 12223 opts.retprobe = strcmp(probe_type, "uretprobe") == 0 || 12224 strcmp(probe_type, "uretprobe.s") == 0; 12225 if (opts.retprobe && offset != 0) { 12226 pr_warn("prog '%s': uretprobes do not support offset specification\n", 12227 prog->name); 12228 break; 12229 } 12230 opts.func_name = func_name; 12231 *link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts); 12232 ret = libbpf_get_error(*link); 12233 break; 12234 default: 12235 pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name, 12236 prog->sec_name); 12237 break; 12238 } 12239 free(probe_type); 12240 free(binary_path); 12241 free(func_name); 12242 12243 return ret; 12244 } 12245 12246 struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog, 12247 bool retprobe, pid_t pid, 12248 const char *binary_path, 12249 size_t func_offset) 12250 { 12251 DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts, .retprobe = retprobe); 12252 12253 return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts); 12254 } 12255 12256 struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog, 12257 pid_t pid, const char *binary_path, 12258 const char *usdt_provider, const char *usdt_name, 12259 const struct bpf_usdt_opts *opts) 12260 { 12261 char resolved_path[512]; 12262 struct bpf_object *obj = prog->obj; 12263 struct bpf_link *link; 12264 __u64 usdt_cookie; 12265 int err; 12266 12267 if (!OPTS_VALID(opts, bpf_uprobe_opts)) 12268 return libbpf_err_ptr(-EINVAL); 12269 12270 if (bpf_program__fd(prog) < 0) { 12271 pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n", 12272 prog->name); 12273 return libbpf_err_ptr(-EINVAL); 12274 } 12275 12276 if (!binary_path) 12277 return libbpf_err_ptr(-EINVAL); 12278 12279 if (!strchr(binary_path, '/')) { 12280 err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path)); 12281 if (err) { 12282 pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", 12283 prog->name, binary_path, err); 12284 return libbpf_err_ptr(err); 12285 } 12286 binary_path = resolved_path; 12287 } 12288 12289 /* USDT manager is instantiated lazily on first USDT attach. It will 12290 * be destroyed together with BPF object in bpf_object__close(). 12291 */ 12292 if (IS_ERR(obj->usdt_man)) 12293 return libbpf_ptr(obj->usdt_man); 12294 if (!obj->usdt_man) { 12295 obj->usdt_man = usdt_manager_new(obj); 12296 if (IS_ERR(obj->usdt_man)) 12297 return libbpf_ptr(obj->usdt_man); 12298 } 12299 12300 usdt_cookie = OPTS_GET(opts, usdt_cookie, 0); 12301 link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path, 12302 usdt_provider, usdt_name, usdt_cookie); 12303 err = libbpf_get_error(link); 12304 if (err) 12305 return libbpf_err_ptr(err); 12306 return link; 12307 } 12308 12309 static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link) 12310 { 12311 char *path = NULL, *provider = NULL, *name = NULL; 12312 const char *sec_name; 12313 int n, err; 12314 12315 sec_name = bpf_program__section_name(prog); 12316 if (strcmp(sec_name, "usdt") == 0) { 12317 /* no auto-attach for just SEC("usdt") */ 12318 *link = NULL; 12319 return 0; 12320 } 12321 12322 n = sscanf(sec_name, "usdt/%m[^:]:%m[^:]:%m[^:]", &path, &provider, &name); 12323 if (n != 3) { 12324 pr_warn("invalid section '%s', expected SEC(\"usdt/<path>:<provider>:<name>\")\n", 12325 sec_name); 12326 err = -EINVAL; 12327 } else { 12328 *link = bpf_program__attach_usdt(prog, -1 /* any process */, path, 12329 provider, name, NULL); 12330 err = libbpf_get_error(*link); 12331 } 12332 free(path); 12333 free(provider); 12334 free(name); 12335 return err; 12336 } 12337 12338 static int determine_tracepoint_id(const char *tp_category, 12339 const char *tp_name) 12340 { 12341 char file[PATH_MAX]; 12342 int ret; 12343 12344 ret = snprintf(file, sizeof(file), "%s/events/%s/%s/id", 12345 tracefs_path(), tp_category, tp_name); 12346 if (ret < 0) 12347 return -errno; 12348 if (ret >= sizeof(file)) { 12349 pr_debug("tracepoint %s/%s path is too long\n", 12350 tp_category, tp_name); 12351 return -E2BIG; 12352 } 12353 return parse_uint_from_file(file, "%d\n"); 12354 } 12355 12356 static int perf_event_open_tracepoint(const char *tp_category, 12357 const char *tp_name) 12358 { 12359 const size_t attr_sz = sizeof(struct perf_event_attr); 12360 struct perf_event_attr attr; 12361 char errmsg[STRERR_BUFSIZE]; 12362 int tp_id, pfd, err; 12363 12364 tp_id = determine_tracepoint_id(tp_category, tp_name); 12365 if (tp_id < 0) { 12366 pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n", 12367 tp_category, tp_name, 12368 libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg))); 12369 return tp_id; 12370 } 12371 12372 memset(&attr, 0, attr_sz); 12373 attr.type = PERF_TYPE_TRACEPOINT; 12374 attr.size = attr_sz; 12375 attr.config = tp_id; 12376 12377 pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */, 12378 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); 12379 if (pfd < 0) { 12380 err = -errno; 12381 pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n", 12382 tp_category, tp_name, 12383 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 12384 return err; 12385 } 12386 return pfd; 12387 } 12388 12389 struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *prog, 12390 const char *tp_category, 12391 const char *tp_name, 12392 const struct bpf_tracepoint_opts *opts) 12393 { 12394 DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); 12395 char errmsg[STRERR_BUFSIZE]; 12396 struct bpf_link *link; 12397 int pfd, err; 12398 12399 if (!OPTS_VALID(opts, bpf_tracepoint_opts)) 12400 return libbpf_err_ptr(-EINVAL); 12401 12402 pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); 12403 12404 pfd = perf_event_open_tracepoint(tp_category, tp_name); 12405 if (pfd < 0) { 12406 pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n", 12407 prog->name, tp_category, tp_name, 12408 libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); 12409 return libbpf_err_ptr(pfd); 12410 } 12411 link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); 12412 err = libbpf_get_error(link); 12413 if (err) { 12414 close(pfd); 12415 pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n", 12416 prog->name, tp_category, tp_name, 12417 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 12418 return libbpf_err_ptr(err); 12419 } 12420 return link; 12421 } 12422 12423 struct bpf_link *bpf_program__attach_tracepoint(const struct bpf_program *prog, 12424 const char *tp_category, 12425 const char *tp_name) 12426 { 12427 return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL); 12428 } 12429 12430 static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link) 12431 { 12432 char *sec_name, *tp_cat, *tp_name; 12433 12434 *link = NULL; 12435 12436 /* no auto-attach for SEC("tp") or SEC("tracepoint") */ 12437 if (strcmp(prog->sec_name, "tp") == 0 || strcmp(prog->sec_name, "tracepoint") == 0) 12438 return 0; 12439 12440 sec_name = strdup(prog->sec_name); 12441 if (!sec_name) 12442 return -ENOMEM; 12443 12444 /* extract "tp/<category>/<name>" or "tracepoint/<category>/<name>" */ 12445 if (str_has_pfx(prog->sec_name, "tp/")) 12446 tp_cat = sec_name + sizeof("tp/") - 1; 12447 else 12448 tp_cat = sec_name + sizeof("tracepoint/") - 1; 12449 tp_name = strchr(tp_cat, '/'); 12450 if (!tp_name) { 12451 free(sec_name); 12452 return -EINVAL; 12453 } 12454 *tp_name = '\0'; 12455 tp_name++; 12456 12457 *link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name); 12458 free(sec_name); 12459 return libbpf_get_error(*link); 12460 } 12461 12462 struct bpf_link * 12463 bpf_program__attach_raw_tracepoint_opts(const struct bpf_program *prog, 12464 const char *tp_name, 12465 struct bpf_raw_tracepoint_opts *opts) 12466 { 12467 LIBBPF_OPTS(bpf_raw_tp_opts, raw_opts); 12468 char errmsg[STRERR_BUFSIZE]; 12469 struct bpf_link *link; 12470 int prog_fd, pfd; 12471 12472 if (!OPTS_VALID(opts, bpf_raw_tracepoint_opts)) 12473 return libbpf_err_ptr(-EINVAL); 12474 12475 prog_fd = bpf_program__fd(prog); 12476 if (prog_fd < 0) { 12477 pr_warn("prog '%s': can't attach before loaded\n", prog->name); 12478 return libbpf_err_ptr(-EINVAL); 12479 } 12480 12481 link = calloc(1, sizeof(*link)); 12482 if (!link) 12483 return libbpf_err_ptr(-ENOMEM); 12484 link->detach = &bpf_link__detach_fd; 12485 12486 raw_opts.tp_name = tp_name; 12487 raw_opts.cookie = OPTS_GET(opts, cookie, 0); 12488 pfd = bpf_raw_tracepoint_open_opts(prog_fd, &raw_opts); 12489 if (pfd < 0) { 12490 pfd = -errno; 12491 free(link); 12492 pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n", 12493 prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); 12494 return libbpf_err_ptr(pfd); 12495 } 12496 link->fd = pfd; 12497 return link; 12498 } 12499 12500 struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *prog, 12501 const char *tp_name) 12502 { 12503 return bpf_program__attach_raw_tracepoint_opts(prog, tp_name, NULL); 12504 } 12505 12506 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link) 12507 { 12508 static const char *const prefixes[] = { 12509 "raw_tp", 12510 "raw_tracepoint", 12511 "raw_tp.w", 12512 "raw_tracepoint.w", 12513 }; 12514 size_t i; 12515 const char *tp_name = NULL; 12516 12517 *link = NULL; 12518 12519 for (i = 0; i < ARRAY_SIZE(prefixes); i++) { 12520 size_t pfx_len; 12521 12522 if (!str_has_pfx(prog->sec_name, prefixes[i])) 12523 continue; 12524 12525 pfx_len = strlen(prefixes[i]); 12526 /* no auto-attach case of, e.g., SEC("raw_tp") */ 12527 if (prog->sec_name[pfx_len] == '\0') 12528 return 0; 12529 12530 if (prog->sec_name[pfx_len] != '/') 12531 continue; 12532 12533 tp_name = prog->sec_name + pfx_len + 1; 12534 break; 12535 } 12536 12537 if (!tp_name) { 12538 pr_warn("prog '%s': invalid section name '%s'\n", 12539 prog->name, prog->sec_name); 12540 return -EINVAL; 12541 } 12542 12543 *link = bpf_program__attach_raw_tracepoint(prog, tp_name); 12544 return libbpf_get_error(*link); 12545 } 12546 12547 /* Common logic for all BPF program types that attach to a btf_id */ 12548 static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog, 12549 const struct bpf_trace_opts *opts) 12550 { 12551 LIBBPF_OPTS(bpf_link_create_opts, link_opts); 12552 char errmsg[STRERR_BUFSIZE]; 12553 struct bpf_link *link; 12554 int prog_fd, pfd; 12555 12556 if (!OPTS_VALID(opts, bpf_trace_opts)) 12557 return libbpf_err_ptr(-EINVAL); 12558 12559 prog_fd = bpf_program__fd(prog); 12560 if (prog_fd < 0) { 12561 pr_warn("prog '%s': can't attach before loaded\n", prog->name); 12562 return libbpf_err_ptr(-EINVAL); 12563 } 12564 12565 link = calloc(1, sizeof(*link)); 12566 if (!link) 12567 return libbpf_err_ptr(-ENOMEM); 12568 link->detach = &bpf_link__detach_fd; 12569 12570 /* libbpf is smart enough to redirect to BPF_RAW_TRACEPOINT_OPEN on old kernels */ 12571 link_opts.tracing.cookie = OPTS_GET(opts, cookie, 0); 12572 pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), &link_opts); 12573 if (pfd < 0) { 12574 pfd = -errno; 12575 free(link); 12576 pr_warn("prog '%s': failed to attach: %s\n", 12577 prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); 12578 return libbpf_err_ptr(pfd); 12579 } 12580 link->fd = pfd; 12581 return link; 12582 } 12583 12584 struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog) 12585 { 12586 return bpf_program__attach_btf_id(prog, NULL); 12587 } 12588 12589 struct bpf_link *bpf_program__attach_trace_opts(const struct bpf_program *prog, 12590 const struct bpf_trace_opts *opts) 12591 { 12592 return bpf_program__attach_btf_id(prog, opts); 12593 } 12594 12595 struct bpf_link *bpf_program__attach_lsm(const struct bpf_program *prog) 12596 { 12597 return bpf_program__attach_btf_id(prog, NULL); 12598 } 12599 12600 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link) 12601 { 12602 *link = bpf_program__attach_trace(prog); 12603 return libbpf_get_error(*link); 12604 } 12605 12606 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link) 12607 { 12608 *link = bpf_program__attach_lsm(prog); 12609 return libbpf_get_error(*link); 12610 } 12611 12612 static struct bpf_link * 12613 bpf_program_attach_fd(const struct bpf_program *prog, 12614 int target_fd, const char *target_name, 12615 const struct bpf_link_create_opts *opts) 12616 { 12617 enum bpf_attach_type attach_type; 12618 char errmsg[STRERR_BUFSIZE]; 12619 struct bpf_link *link; 12620 int prog_fd, link_fd; 12621 12622 prog_fd = bpf_program__fd(prog); 12623 if (prog_fd < 0) { 12624 pr_warn("prog '%s': can't attach before loaded\n", prog->name); 12625 return libbpf_err_ptr(-EINVAL); 12626 } 12627 12628 link = calloc(1, sizeof(*link)); 12629 if (!link) 12630 return libbpf_err_ptr(-ENOMEM); 12631 link->detach = &bpf_link__detach_fd; 12632 12633 attach_type = bpf_program__expected_attach_type(prog); 12634 link_fd = bpf_link_create(prog_fd, target_fd, attach_type, opts); 12635 if (link_fd < 0) { 12636 link_fd = -errno; 12637 free(link); 12638 pr_warn("prog '%s': failed to attach to %s: %s\n", 12639 prog->name, target_name, 12640 libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg))); 12641 return libbpf_err_ptr(link_fd); 12642 } 12643 link->fd = link_fd; 12644 return link; 12645 } 12646 12647 struct bpf_link * 12648 bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd) 12649 { 12650 return bpf_program_attach_fd(prog, cgroup_fd, "cgroup", NULL); 12651 } 12652 12653 struct bpf_link * 12654 bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd) 12655 { 12656 return bpf_program_attach_fd(prog, netns_fd, "netns", NULL); 12657 } 12658 12659 struct bpf_link * 12660 bpf_program__attach_sockmap(const struct bpf_program *prog, int map_fd) 12661 { 12662 return bpf_program_attach_fd(prog, map_fd, "sockmap", NULL); 12663 } 12664 12665 struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex) 12666 { 12667 /* target_fd/target_ifindex use the same field in LINK_CREATE */ 12668 return bpf_program_attach_fd(prog, ifindex, "xdp", NULL); 12669 } 12670 12671 struct bpf_link * 12672 bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex, 12673 const struct bpf_tcx_opts *opts) 12674 { 12675 LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); 12676 __u32 relative_id; 12677 int relative_fd; 12678 12679 if (!OPTS_VALID(opts, bpf_tcx_opts)) 12680 return libbpf_err_ptr(-EINVAL); 12681 12682 relative_id = OPTS_GET(opts, relative_id, 0); 12683 relative_fd = OPTS_GET(opts, relative_fd, 0); 12684 12685 /* validate we don't have unexpected combinations of non-zero fields */ 12686 if (!ifindex) { 12687 pr_warn("prog '%s': target netdevice ifindex cannot be zero\n", 12688 prog->name); 12689 return libbpf_err_ptr(-EINVAL); 12690 } 12691 if (relative_fd && relative_id) { 12692 pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n", 12693 prog->name); 12694 return libbpf_err_ptr(-EINVAL); 12695 } 12696 12697 link_create_opts.tcx.expected_revision = OPTS_GET(opts, expected_revision, 0); 12698 link_create_opts.tcx.relative_fd = relative_fd; 12699 link_create_opts.tcx.relative_id = relative_id; 12700 link_create_opts.flags = OPTS_GET(opts, flags, 0); 12701 12702 /* target_fd/target_ifindex use the same field in LINK_CREATE */ 12703 return bpf_program_attach_fd(prog, ifindex, "tcx", &link_create_opts); 12704 } 12705 12706 struct bpf_link * 12707 bpf_program__attach_netkit(const struct bpf_program *prog, int ifindex, 12708 const struct bpf_netkit_opts *opts) 12709 { 12710 LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); 12711 __u32 relative_id; 12712 int relative_fd; 12713 12714 if (!OPTS_VALID(opts, bpf_netkit_opts)) 12715 return libbpf_err_ptr(-EINVAL); 12716 12717 relative_id = OPTS_GET(opts, relative_id, 0); 12718 relative_fd = OPTS_GET(opts, relative_fd, 0); 12719 12720 /* validate we don't have unexpected combinations of non-zero fields */ 12721 if (!ifindex) { 12722 pr_warn("prog '%s': target netdevice ifindex cannot be zero\n", 12723 prog->name); 12724 return libbpf_err_ptr(-EINVAL); 12725 } 12726 if (relative_fd && relative_id) { 12727 pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n", 12728 prog->name); 12729 return libbpf_err_ptr(-EINVAL); 12730 } 12731 12732 link_create_opts.netkit.expected_revision = OPTS_GET(opts, expected_revision, 0); 12733 link_create_opts.netkit.relative_fd = relative_fd; 12734 link_create_opts.netkit.relative_id = relative_id; 12735 link_create_opts.flags = OPTS_GET(opts, flags, 0); 12736 12737 return bpf_program_attach_fd(prog, ifindex, "netkit", &link_create_opts); 12738 } 12739 12740 struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog, 12741 int target_fd, 12742 const char *attach_func_name) 12743 { 12744 int btf_id; 12745 12746 if (!!target_fd != !!attach_func_name) { 12747 pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n", 12748 prog->name); 12749 return libbpf_err_ptr(-EINVAL); 12750 } 12751 12752 if (prog->type != BPF_PROG_TYPE_EXT) { 12753 pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace", 12754 prog->name); 12755 return libbpf_err_ptr(-EINVAL); 12756 } 12757 12758 if (target_fd) { 12759 LIBBPF_OPTS(bpf_link_create_opts, target_opts); 12760 12761 btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd); 12762 if (btf_id < 0) 12763 return libbpf_err_ptr(btf_id); 12764 12765 target_opts.target_btf_id = btf_id; 12766 12767 return bpf_program_attach_fd(prog, target_fd, "freplace", 12768 &target_opts); 12769 } else { 12770 /* no target, so use raw_tracepoint_open for compatibility 12771 * with old kernels 12772 */ 12773 return bpf_program__attach_trace(prog); 12774 } 12775 } 12776 12777 struct bpf_link * 12778 bpf_program__attach_iter(const struct bpf_program *prog, 12779 const struct bpf_iter_attach_opts *opts) 12780 { 12781 DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); 12782 char errmsg[STRERR_BUFSIZE]; 12783 struct bpf_link *link; 12784 int prog_fd, link_fd; 12785 __u32 target_fd = 0; 12786 12787 if (!OPTS_VALID(opts, bpf_iter_attach_opts)) 12788 return libbpf_err_ptr(-EINVAL); 12789 12790 link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0); 12791 link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0); 12792 12793 prog_fd = bpf_program__fd(prog); 12794 if (prog_fd < 0) { 12795 pr_warn("prog '%s': can't attach before loaded\n", prog->name); 12796 return libbpf_err_ptr(-EINVAL); 12797 } 12798 12799 link = calloc(1, sizeof(*link)); 12800 if (!link) 12801 return libbpf_err_ptr(-ENOMEM); 12802 link->detach = &bpf_link__detach_fd; 12803 12804 link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER, 12805 &link_create_opts); 12806 if (link_fd < 0) { 12807 link_fd = -errno; 12808 free(link); 12809 pr_warn("prog '%s': failed to attach to iterator: %s\n", 12810 prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg))); 12811 return libbpf_err_ptr(link_fd); 12812 } 12813 link->fd = link_fd; 12814 return link; 12815 } 12816 12817 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link) 12818 { 12819 *link = bpf_program__attach_iter(prog, NULL); 12820 return libbpf_get_error(*link); 12821 } 12822 12823 struct bpf_link *bpf_program__attach_netfilter(const struct bpf_program *prog, 12824 const struct bpf_netfilter_opts *opts) 12825 { 12826 LIBBPF_OPTS(bpf_link_create_opts, lopts); 12827 struct bpf_link *link; 12828 int prog_fd, link_fd; 12829 12830 if (!OPTS_VALID(opts, bpf_netfilter_opts)) 12831 return libbpf_err_ptr(-EINVAL); 12832 12833 prog_fd = bpf_program__fd(prog); 12834 if (prog_fd < 0) { 12835 pr_warn("prog '%s': can't attach before loaded\n", prog->name); 12836 return libbpf_err_ptr(-EINVAL); 12837 } 12838 12839 link = calloc(1, sizeof(*link)); 12840 if (!link) 12841 return libbpf_err_ptr(-ENOMEM); 12842 12843 link->detach = &bpf_link__detach_fd; 12844 12845 lopts.netfilter.pf = OPTS_GET(opts, pf, 0); 12846 lopts.netfilter.hooknum = OPTS_GET(opts, hooknum, 0); 12847 lopts.netfilter.priority = OPTS_GET(opts, priority, 0); 12848 lopts.netfilter.flags = OPTS_GET(opts, flags, 0); 12849 12850 link_fd = bpf_link_create(prog_fd, 0, BPF_NETFILTER, &lopts); 12851 if (link_fd < 0) { 12852 char errmsg[STRERR_BUFSIZE]; 12853 12854 link_fd = -errno; 12855 free(link); 12856 pr_warn("prog '%s': failed to attach to netfilter: %s\n", 12857 prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg))); 12858 return libbpf_err_ptr(link_fd); 12859 } 12860 link->fd = link_fd; 12861 12862 return link; 12863 } 12864 12865 struct bpf_link *bpf_program__attach(const struct bpf_program *prog) 12866 { 12867 struct bpf_link *link = NULL; 12868 int err; 12869 12870 if (!prog->sec_def || !prog->sec_def->prog_attach_fn) 12871 return libbpf_err_ptr(-EOPNOTSUPP); 12872 12873 if (bpf_program__fd(prog) < 0) { 12874 pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n", 12875 prog->name); 12876 return libbpf_err_ptr(-EINVAL); 12877 } 12878 12879 err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, &link); 12880 if (err) 12881 return libbpf_err_ptr(err); 12882 12883 /* When calling bpf_program__attach() explicitly, auto-attach support 12884 * is expected to work, so NULL returned link is considered an error. 12885 * This is different for skeleton's attach, see comment in 12886 * bpf_object__attach_skeleton(). 12887 */ 12888 if (!link) 12889 return libbpf_err_ptr(-EOPNOTSUPP); 12890 12891 return link; 12892 } 12893 12894 struct bpf_link_struct_ops { 12895 struct bpf_link link; 12896 int map_fd; 12897 }; 12898 12899 static int bpf_link__detach_struct_ops(struct bpf_link *link) 12900 { 12901 struct bpf_link_struct_ops *st_link; 12902 __u32 zero = 0; 12903 12904 st_link = container_of(link, struct bpf_link_struct_ops, link); 12905 12906 if (st_link->map_fd < 0) 12907 /* w/o a real link */ 12908 return bpf_map_delete_elem(link->fd, &zero); 12909 12910 return close(link->fd); 12911 } 12912 12913 struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map) 12914 { 12915 struct bpf_link_struct_ops *link; 12916 __u32 zero = 0; 12917 int err, fd; 12918 12919 if (!bpf_map__is_struct_ops(map)) { 12920 pr_warn("map '%s': can't attach non-struct_ops map\n", map->name); 12921 return libbpf_err_ptr(-EINVAL); 12922 } 12923 12924 if (map->fd < 0) { 12925 pr_warn("map '%s': can't attach BPF map without FD (was it created?)\n", map->name); 12926 return libbpf_err_ptr(-EINVAL); 12927 } 12928 12929 link = calloc(1, sizeof(*link)); 12930 if (!link) 12931 return libbpf_err_ptr(-EINVAL); 12932 12933 /* kern_vdata should be prepared during the loading phase. */ 12934 err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0); 12935 /* It can be EBUSY if the map has been used to create or 12936 * update a link before. We don't allow updating the value of 12937 * a struct_ops once it is set. That ensures that the value 12938 * never changed. So, it is safe to skip EBUSY. 12939 */ 12940 if (err && (!(map->def.map_flags & BPF_F_LINK) || err != -EBUSY)) { 12941 free(link); 12942 return libbpf_err_ptr(err); 12943 } 12944 12945 link->link.detach = bpf_link__detach_struct_ops; 12946 12947 if (!(map->def.map_flags & BPF_F_LINK)) { 12948 /* w/o a real link */ 12949 link->link.fd = map->fd; 12950 link->map_fd = -1; 12951 return &link->link; 12952 } 12953 12954 fd = bpf_link_create(map->fd, 0, BPF_STRUCT_OPS, NULL); 12955 if (fd < 0) { 12956 free(link); 12957 return libbpf_err_ptr(fd); 12958 } 12959 12960 link->link.fd = fd; 12961 link->map_fd = map->fd; 12962 12963 return &link->link; 12964 } 12965 12966 /* 12967 * Swap the back struct_ops of a link with a new struct_ops map. 12968 */ 12969 int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map) 12970 { 12971 struct bpf_link_struct_ops *st_ops_link; 12972 __u32 zero = 0; 12973 int err; 12974 12975 if (!bpf_map__is_struct_ops(map)) 12976 return -EINVAL; 12977 12978 if (map->fd < 0) { 12979 pr_warn("map '%s': can't use BPF map without FD (was it created?)\n", map->name); 12980 return -EINVAL; 12981 } 12982 12983 st_ops_link = container_of(link, struct bpf_link_struct_ops, link); 12984 /* Ensure the type of a link is correct */ 12985 if (st_ops_link->map_fd < 0) 12986 return -EINVAL; 12987 12988 err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0); 12989 /* It can be EBUSY if the map has been used to create or 12990 * update a link before. We don't allow updating the value of 12991 * a struct_ops once it is set. That ensures that the value 12992 * never changed. So, it is safe to skip EBUSY. 12993 */ 12994 if (err && err != -EBUSY) 12995 return err; 12996 12997 err = bpf_link_update(link->fd, map->fd, NULL); 12998 if (err < 0) 12999 return err; 13000 13001 st_ops_link->map_fd = map->fd; 13002 13003 return 0; 13004 } 13005 13006 typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr, 13007 void *private_data); 13008 13009 static enum bpf_perf_event_ret 13010 perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, 13011 void **copy_mem, size_t *copy_size, 13012 bpf_perf_event_print_t fn, void *private_data) 13013 { 13014 struct perf_event_mmap_page *header = mmap_mem; 13015 __u64 data_head = ring_buffer_read_head(header); 13016 __u64 data_tail = header->data_tail; 13017 void *base = ((__u8 *)header) + page_size; 13018 int ret = LIBBPF_PERF_EVENT_CONT; 13019 struct perf_event_header *ehdr; 13020 size_t ehdr_size; 13021 13022 while (data_head != data_tail) { 13023 ehdr = base + (data_tail & (mmap_size - 1)); 13024 ehdr_size = ehdr->size; 13025 13026 if (((void *)ehdr) + ehdr_size > base + mmap_size) { 13027 void *copy_start = ehdr; 13028 size_t len_first = base + mmap_size - copy_start; 13029 size_t len_secnd = ehdr_size - len_first; 13030 13031 if (*copy_size < ehdr_size) { 13032 free(*copy_mem); 13033 *copy_mem = malloc(ehdr_size); 13034 if (!*copy_mem) { 13035 *copy_size = 0; 13036 ret = LIBBPF_PERF_EVENT_ERROR; 13037 break; 13038 } 13039 *copy_size = ehdr_size; 13040 } 13041 13042 memcpy(*copy_mem, copy_start, len_first); 13043 memcpy(*copy_mem + len_first, base, len_secnd); 13044 ehdr = *copy_mem; 13045 } 13046 13047 ret = fn(ehdr, private_data); 13048 data_tail += ehdr_size; 13049 if (ret != LIBBPF_PERF_EVENT_CONT) 13050 break; 13051 } 13052 13053 ring_buffer_write_tail(header, data_tail); 13054 return libbpf_err(ret); 13055 } 13056 13057 struct perf_buffer; 13058 13059 struct perf_buffer_params { 13060 struct perf_event_attr *attr; 13061 /* if event_cb is specified, it takes precendence */ 13062 perf_buffer_event_fn event_cb; 13063 /* sample_cb and lost_cb are higher-level common-case callbacks */ 13064 perf_buffer_sample_fn sample_cb; 13065 perf_buffer_lost_fn lost_cb; 13066 void *ctx; 13067 int cpu_cnt; 13068 int *cpus; 13069 int *map_keys; 13070 }; 13071 13072 struct perf_cpu_buf { 13073 struct perf_buffer *pb; 13074 void *base; /* mmap()'ed memory */ 13075 void *buf; /* for reconstructing segmented data */ 13076 size_t buf_size; 13077 int fd; 13078 int cpu; 13079 int map_key; 13080 }; 13081 13082 struct perf_buffer { 13083 perf_buffer_event_fn event_cb; 13084 perf_buffer_sample_fn sample_cb; 13085 perf_buffer_lost_fn lost_cb; 13086 void *ctx; /* passed into callbacks */ 13087 13088 size_t page_size; 13089 size_t mmap_size; 13090 struct perf_cpu_buf **cpu_bufs; 13091 struct epoll_event *events; 13092 int cpu_cnt; /* number of allocated CPU buffers */ 13093 int epoll_fd; /* perf event FD */ 13094 int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */ 13095 }; 13096 13097 static void perf_buffer__free_cpu_buf(struct perf_buffer *pb, 13098 struct perf_cpu_buf *cpu_buf) 13099 { 13100 if (!cpu_buf) 13101 return; 13102 if (cpu_buf->base && 13103 munmap(cpu_buf->base, pb->mmap_size + pb->page_size)) 13104 pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu); 13105 if (cpu_buf->fd >= 0) { 13106 ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0); 13107 close(cpu_buf->fd); 13108 } 13109 free(cpu_buf->buf); 13110 free(cpu_buf); 13111 } 13112 13113 void perf_buffer__free(struct perf_buffer *pb) 13114 { 13115 int i; 13116 13117 if (IS_ERR_OR_NULL(pb)) 13118 return; 13119 if (pb->cpu_bufs) { 13120 for (i = 0; i < pb->cpu_cnt; i++) { 13121 struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i]; 13122 13123 if (!cpu_buf) 13124 continue; 13125 13126 bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key); 13127 perf_buffer__free_cpu_buf(pb, cpu_buf); 13128 } 13129 free(pb->cpu_bufs); 13130 } 13131 if (pb->epoll_fd >= 0) 13132 close(pb->epoll_fd); 13133 free(pb->events); 13134 free(pb); 13135 } 13136 13137 static struct perf_cpu_buf * 13138 perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr, 13139 int cpu, int map_key) 13140 { 13141 struct perf_cpu_buf *cpu_buf; 13142 char msg[STRERR_BUFSIZE]; 13143 int err; 13144 13145 cpu_buf = calloc(1, sizeof(*cpu_buf)); 13146 if (!cpu_buf) 13147 return ERR_PTR(-ENOMEM); 13148 13149 cpu_buf->pb = pb; 13150 cpu_buf->cpu = cpu; 13151 cpu_buf->map_key = map_key; 13152 13153 cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu, 13154 -1, PERF_FLAG_FD_CLOEXEC); 13155 if (cpu_buf->fd < 0) { 13156 err = -errno; 13157 pr_warn("failed to open perf buffer event on cpu #%d: %s\n", 13158 cpu, libbpf_strerror_r(err, msg, sizeof(msg))); 13159 goto error; 13160 } 13161 13162 cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size, 13163 PROT_READ | PROT_WRITE, MAP_SHARED, 13164 cpu_buf->fd, 0); 13165 if (cpu_buf->base == MAP_FAILED) { 13166 cpu_buf->base = NULL; 13167 err = -errno; 13168 pr_warn("failed to mmap perf buffer on cpu #%d: %s\n", 13169 cpu, libbpf_strerror_r(err, msg, sizeof(msg))); 13170 goto error; 13171 } 13172 13173 if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) { 13174 err = -errno; 13175 pr_warn("failed to enable perf buffer event on cpu #%d: %s\n", 13176 cpu, libbpf_strerror_r(err, msg, sizeof(msg))); 13177 goto error; 13178 } 13179 13180 return cpu_buf; 13181 13182 error: 13183 perf_buffer__free_cpu_buf(pb, cpu_buf); 13184 return (struct perf_cpu_buf *)ERR_PTR(err); 13185 } 13186 13187 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, 13188 struct perf_buffer_params *p); 13189 13190 struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt, 13191 perf_buffer_sample_fn sample_cb, 13192 perf_buffer_lost_fn lost_cb, 13193 void *ctx, 13194 const struct perf_buffer_opts *opts) 13195 { 13196 const size_t attr_sz = sizeof(struct perf_event_attr); 13197 struct perf_buffer_params p = {}; 13198 struct perf_event_attr attr; 13199 __u32 sample_period; 13200 13201 if (!OPTS_VALID(opts, perf_buffer_opts)) 13202 return libbpf_err_ptr(-EINVAL); 13203 13204 sample_period = OPTS_GET(opts, sample_period, 1); 13205 if (!sample_period) 13206 sample_period = 1; 13207 13208 memset(&attr, 0, attr_sz); 13209 attr.size = attr_sz; 13210 attr.config = PERF_COUNT_SW_BPF_OUTPUT; 13211 attr.type = PERF_TYPE_SOFTWARE; 13212 attr.sample_type = PERF_SAMPLE_RAW; 13213 attr.sample_period = sample_period; 13214 attr.wakeup_events = sample_period; 13215 13216 p.attr = &attr; 13217 p.sample_cb = sample_cb; 13218 p.lost_cb = lost_cb; 13219 p.ctx = ctx; 13220 13221 return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p)); 13222 } 13223 13224 struct perf_buffer *perf_buffer__new_raw(int map_fd, size_t page_cnt, 13225 struct perf_event_attr *attr, 13226 perf_buffer_event_fn event_cb, void *ctx, 13227 const struct perf_buffer_raw_opts *opts) 13228 { 13229 struct perf_buffer_params p = {}; 13230 13231 if (!attr) 13232 return libbpf_err_ptr(-EINVAL); 13233 13234 if (!OPTS_VALID(opts, perf_buffer_raw_opts)) 13235 return libbpf_err_ptr(-EINVAL); 13236 13237 p.attr = attr; 13238 p.event_cb = event_cb; 13239 p.ctx = ctx; 13240 p.cpu_cnt = OPTS_GET(opts, cpu_cnt, 0); 13241 p.cpus = OPTS_GET(opts, cpus, NULL); 13242 p.map_keys = OPTS_GET(opts, map_keys, NULL); 13243 13244 return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p)); 13245 } 13246 13247 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, 13248 struct perf_buffer_params *p) 13249 { 13250 const char *online_cpus_file = "/sys/devices/system/cpu/online"; 13251 struct bpf_map_info map; 13252 char msg[STRERR_BUFSIZE]; 13253 struct perf_buffer *pb; 13254 bool *online = NULL; 13255 __u32 map_info_len; 13256 int err, i, j, n; 13257 13258 if (page_cnt == 0 || (page_cnt & (page_cnt - 1))) { 13259 pr_warn("page count should be power of two, but is %zu\n", 13260 page_cnt); 13261 return ERR_PTR(-EINVAL); 13262 } 13263 13264 /* best-effort sanity checks */ 13265 memset(&map, 0, sizeof(map)); 13266 map_info_len = sizeof(map); 13267 err = bpf_map_get_info_by_fd(map_fd, &map, &map_info_len); 13268 if (err) { 13269 err = -errno; 13270 /* if BPF_OBJ_GET_INFO_BY_FD is supported, will return 13271 * -EBADFD, -EFAULT, or -E2BIG on real error 13272 */ 13273 if (err != -EINVAL) { 13274 pr_warn("failed to get map info for map FD %d: %s\n", 13275 map_fd, libbpf_strerror_r(err, msg, sizeof(msg))); 13276 return ERR_PTR(err); 13277 } 13278 pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n", 13279 map_fd); 13280 } else { 13281 if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) { 13282 pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n", 13283 map.name); 13284 return ERR_PTR(-EINVAL); 13285 } 13286 } 13287 13288 pb = calloc(1, sizeof(*pb)); 13289 if (!pb) 13290 return ERR_PTR(-ENOMEM); 13291 13292 pb->event_cb = p->event_cb; 13293 pb->sample_cb = p->sample_cb; 13294 pb->lost_cb = p->lost_cb; 13295 pb->ctx = p->ctx; 13296 13297 pb->page_size = getpagesize(); 13298 pb->mmap_size = pb->page_size * page_cnt; 13299 pb->map_fd = map_fd; 13300 13301 pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC); 13302 if (pb->epoll_fd < 0) { 13303 err = -errno; 13304 pr_warn("failed to create epoll instance: %s\n", 13305 libbpf_strerror_r(err, msg, sizeof(msg))); 13306 goto error; 13307 } 13308 13309 if (p->cpu_cnt > 0) { 13310 pb->cpu_cnt = p->cpu_cnt; 13311 } else { 13312 pb->cpu_cnt = libbpf_num_possible_cpus(); 13313 if (pb->cpu_cnt < 0) { 13314 err = pb->cpu_cnt; 13315 goto error; 13316 } 13317 if (map.max_entries && map.max_entries < pb->cpu_cnt) 13318 pb->cpu_cnt = map.max_entries; 13319 } 13320 13321 pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events)); 13322 if (!pb->events) { 13323 err = -ENOMEM; 13324 pr_warn("failed to allocate events: out of memory\n"); 13325 goto error; 13326 } 13327 pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs)); 13328 if (!pb->cpu_bufs) { 13329 err = -ENOMEM; 13330 pr_warn("failed to allocate buffers: out of memory\n"); 13331 goto error; 13332 } 13333 13334 err = parse_cpu_mask_file(online_cpus_file, &online, &n); 13335 if (err) { 13336 pr_warn("failed to get online CPU mask: %d\n", err); 13337 goto error; 13338 } 13339 13340 for (i = 0, j = 0; i < pb->cpu_cnt; i++) { 13341 struct perf_cpu_buf *cpu_buf; 13342 int cpu, map_key; 13343 13344 cpu = p->cpu_cnt > 0 ? p->cpus[i] : i; 13345 map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i; 13346 13347 /* in case user didn't explicitly requested particular CPUs to 13348 * be attached to, skip offline/not present CPUs 13349 */ 13350 if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu])) 13351 continue; 13352 13353 cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key); 13354 if (IS_ERR(cpu_buf)) { 13355 err = PTR_ERR(cpu_buf); 13356 goto error; 13357 } 13358 13359 pb->cpu_bufs[j] = cpu_buf; 13360 13361 err = bpf_map_update_elem(pb->map_fd, &map_key, 13362 &cpu_buf->fd, 0); 13363 if (err) { 13364 err = -errno; 13365 pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n", 13366 cpu, map_key, cpu_buf->fd, 13367 libbpf_strerror_r(err, msg, sizeof(msg))); 13368 goto error; 13369 } 13370 13371 pb->events[j].events = EPOLLIN; 13372 pb->events[j].data.ptr = cpu_buf; 13373 if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd, 13374 &pb->events[j]) < 0) { 13375 err = -errno; 13376 pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n", 13377 cpu, cpu_buf->fd, 13378 libbpf_strerror_r(err, msg, sizeof(msg))); 13379 goto error; 13380 } 13381 j++; 13382 } 13383 pb->cpu_cnt = j; 13384 free(online); 13385 13386 return pb; 13387 13388 error: 13389 free(online); 13390 if (pb) 13391 perf_buffer__free(pb); 13392 return ERR_PTR(err); 13393 } 13394 13395 struct perf_sample_raw { 13396 struct perf_event_header header; 13397 uint32_t size; 13398 char data[]; 13399 }; 13400 13401 struct perf_sample_lost { 13402 struct perf_event_header header; 13403 uint64_t id; 13404 uint64_t lost; 13405 uint64_t sample_id; 13406 }; 13407 13408 static enum bpf_perf_event_ret 13409 perf_buffer__process_record(struct perf_event_header *e, void *ctx) 13410 { 13411 struct perf_cpu_buf *cpu_buf = ctx; 13412 struct perf_buffer *pb = cpu_buf->pb; 13413 void *data = e; 13414 13415 /* user wants full control over parsing perf event */ 13416 if (pb->event_cb) 13417 return pb->event_cb(pb->ctx, cpu_buf->cpu, e); 13418 13419 switch (e->type) { 13420 case PERF_RECORD_SAMPLE: { 13421 struct perf_sample_raw *s = data; 13422 13423 if (pb->sample_cb) 13424 pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size); 13425 break; 13426 } 13427 case PERF_RECORD_LOST: { 13428 struct perf_sample_lost *s = data; 13429 13430 if (pb->lost_cb) 13431 pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost); 13432 break; 13433 } 13434 default: 13435 pr_warn("unknown perf sample type %d\n", e->type); 13436 return LIBBPF_PERF_EVENT_ERROR; 13437 } 13438 return LIBBPF_PERF_EVENT_CONT; 13439 } 13440 13441 static int perf_buffer__process_records(struct perf_buffer *pb, 13442 struct perf_cpu_buf *cpu_buf) 13443 { 13444 enum bpf_perf_event_ret ret; 13445 13446 ret = perf_event_read_simple(cpu_buf->base, pb->mmap_size, 13447 pb->page_size, &cpu_buf->buf, 13448 &cpu_buf->buf_size, 13449 perf_buffer__process_record, cpu_buf); 13450 if (ret != LIBBPF_PERF_EVENT_CONT) 13451 return ret; 13452 return 0; 13453 } 13454 13455 int perf_buffer__epoll_fd(const struct perf_buffer *pb) 13456 { 13457 return pb->epoll_fd; 13458 } 13459 13460 int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms) 13461 { 13462 int i, cnt, err; 13463 13464 cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms); 13465 if (cnt < 0) 13466 return -errno; 13467 13468 for (i = 0; i < cnt; i++) { 13469 struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr; 13470 13471 err = perf_buffer__process_records(pb, cpu_buf); 13472 if (err) { 13473 pr_warn("error while processing records: %d\n", err); 13474 return libbpf_err(err); 13475 } 13476 } 13477 return cnt; 13478 } 13479 13480 /* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer 13481 * manager. 13482 */ 13483 size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb) 13484 { 13485 return pb->cpu_cnt; 13486 } 13487 13488 /* 13489 * Return perf_event FD of a ring buffer in *buf_idx* slot of 13490 * PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using 13491 * select()/poll()/epoll() Linux syscalls. 13492 */ 13493 int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx) 13494 { 13495 struct perf_cpu_buf *cpu_buf; 13496 13497 if (buf_idx >= pb->cpu_cnt) 13498 return libbpf_err(-EINVAL); 13499 13500 cpu_buf = pb->cpu_bufs[buf_idx]; 13501 if (!cpu_buf) 13502 return libbpf_err(-ENOENT); 13503 13504 return cpu_buf->fd; 13505 } 13506 13507 int perf_buffer__buffer(struct perf_buffer *pb, int buf_idx, void **buf, size_t *buf_size) 13508 { 13509 struct perf_cpu_buf *cpu_buf; 13510 13511 if (buf_idx >= pb->cpu_cnt) 13512 return libbpf_err(-EINVAL); 13513 13514 cpu_buf = pb->cpu_bufs[buf_idx]; 13515 if (!cpu_buf) 13516 return libbpf_err(-ENOENT); 13517 13518 *buf = cpu_buf->base; 13519 *buf_size = pb->mmap_size; 13520 return 0; 13521 } 13522 13523 /* 13524 * Consume data from perf ring buffer corresponding to slot *buf_idx* in 13525 * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to 13526 * consume, do nothing and return success. 13527 * Returns: 13528 * - 0 on success; 13529 * - <0 on failure. 13530 */ 13531 int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx) 13532 { 13533 struct perf_cpu_buf *cpu_buf; 13534 13535 if (buf_idx >= pb->cpu_cnt) 13536 return libbpf_err(-EINVAL); 13537 13538 cpu_buf = pb->cpu_bufs[buf_idx]; 13539 if (!cpu_buf) 13540 return libbpf_err(-ENOENT); 13541 13542 return perf_buffer__process_records(pb, cpu_buf); 13543 } 13544 13545 int perf_buffer__consume(struct perf_buffer *pb) 13546 { 13547 int i, err; 13548 13549 for (i = 0; i < pb->cpu_cnt; i++) { 13550 struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i]; 13551 13552 if (!cpu_buf) 13553 continue; 13554 13555 err = perf_buffer__process_records(pb, cpu_buf); 13556 if (err) { 13557 pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err); 13558 return libbpf_err(err); 13559 } 13560 } 13561 return 0; 13562 } 13563 13564 int bpf_program__set_attach_target(struct bpf_program *prog, 13565 int attach_prog_fd, 13566 const char *attach_func_name) 13567 { 13568 int btf_obj_fd = 0, btf_id = 0, err; 13569 13570 if (!prog || attach_prog_fd < 0) 13571 return libbpf_err(-EINVAL); 13572 13573 if (prog->obj->loaded) 13574 return libbpf_err(-EINVAL); 13575 13576 if (attach_prog_fd && !attach_func_name) { 13577 /* remember attach_prog_fd and let bpf_program__load() find 13578 * BTF ID during the program load 13579 */ 13580 prog->attach_prog_fd = attach_prog_fd; 13581 return 0; 13582 } 13583 13584 if (attach_prog_fd) { 13585 btf_id = libbpf_find_prog_btf_id(attach_func_name, 13586 attach_prog_fd); 13587 if (btf_id < 0) 13588 return libbpf_err(btf_id); 13589 } else { 13590 if (!attach_func_name) 13591 return libbpf_err(-EINVAL); 13592 13593 /* load btf_vmlinux, if not yet */ 13594 err = bpf_object__load_vmlinux_btf(prog->obj, true); 13595 if (err) 13596 return libbpf_err(err); 13597 err = find_kernel_btf_id(prog->obj, attach_func_name, 13598 prog->expected_attach_type, 13599 &btf_obj_fd, &btf_id); 13600 if (err) 13601 return libbpf_err(err); 13602 } 13603 13604 prog->attach_btf_id = btf_id; 13605 prog->attach_btf_obj_fd = btf_obj_fd; 13606 prog->attach_prog_fd = attach_prog_fd; 13607 return 0; 13608 } 13609 13610 int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz) 13611 { 13612 int err = 0, n, len, start, end = -1; 13613 bool *tmp; 13614 13615 *mask = NULL; 13616 *mask_sz = 0; 13617 13618 /* Each sub string separated by ',' has format \d+-\d+ or \d+ */ 13619 while (*s) { 13620 if (*s == ',' || *s == '\n') { 13621 s++; 13622 continue; 13623 } 13624 n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len); 13625 if (n <= 0 || n > 2) { 13626 pr_warn("Failed to get CPU range %s: %d\n", s, n); 13627 err = -EINVAL; 13628 goto cleanup; 13629 } else if (n == 1) { 13630 end = start; 13631 } 13632 if (start < 0 || start > end) { 13633 pr_warn("Invalid CPU range [%d,%d] in %s\n", 13634 start, end, s); 13635 err = -EINVAL; 13636 goto cleanup; 13637 } 13638 tmp = realloc(*mask, end + 1); 13639 if (!tmp) { 13640 err = -ENOMEM; 13641 goto cleanup; 13642 } 13643 *mask = tmp; 13644 memset(tmp + *mask_sz, 0, start - *mask_sz); 13645 memset(tmp + start, 1, end - start + 1); 13646 *mask_sz = end + 1; 13647 s += len; 13648 } 13649 if (!*mask_sz) { 13650 pr_warn("Empty CPU range\n"); 13651 return -EINVAL; 13652 } 13653 return 0; 13654 cleanup: 13655 free(*mask); 13656 *mask = NULL; 13657 return err; 13658 } 13659 13660 int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz) 13661 { 13662 int fd, err = 0, len; 13663 char buf[128]; 13664 13665 fd = open(fcpu, O_RDONLY | O_CLOEXEC); 13666 if (fd < 0) { 13667 err = -errno; 13668 pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err); 13669 return err; 13670 } 13671 len = read(fd, buf, sizeof(buf)); 13672 close(fd); 13673 if (len <= 0) { 13674 err = len ? -errno : -EINVAL; 13675 pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err); 13676 return err; 13677 } 13678 if (len >= sizeof(buf)) { 13679 pr_warn("CPU mask is too big in file %s\n", fcpu); 13680 return -E2BIG; 13681 } 13682 buf[len] = '\0'; 13683 13684 return parse_cpu_mask_str(buf, mask, mask_sz); 13685 } 13686 13687 int libbpf_num_possible_cpus(void) 13688 { 13689 static const char *fcpu = "/sys/devices/system/cpu/possible"; 13690 static int cpus; 13691 int err, n, i, tmp_cpus; 13692 bool *mask; 13693 13694 tmp_cpus = READ_ONCE(cpus); 13695 if (tmp_cpus > 0) 13696 return tmp_cpus; 13697 13698 err = parse_cpu_mask_file(fcpu, &mask, &n); 13699 if (err) 13700 return libbpf_err(err); 13701 13702 tmp_cpus = 0; 13703 for (i = 0; i < n; i++) { 13704 if (mask[i]) 13705 tmp_cpus++; 13706 } 13707 free(mask); 13708 13709 WRITE_ONCE(cpus, tmp_cpus); 13710 return tmp_cpus; 13711 } 13712 13713 static int populate_skeleton_maps(const struct bpf_object *obj, 13714 struct bpf_map_skeleton *maps, 13715 size_t map_cnt, size_t map_skel_sz) 13716 { 13717 int i; 13718 13719 for (i = 0; i < map_cnt; i++) { 13720 struct bpf_map_skeleton *map_skel = (void *)maps + i * map_skel_sz; 13721 struct bpf_map **map = map_skel->map; 13722 const char *name = map_skel->name; 13723 void **mmaped = map_skel->mmaped; 13724 13725 *map = bpf_object__find_map_by_name(obj, name); 13726 if (!*map) { 13727 pr_warn("failed to find skeleton map '%s'\n", name); 13728 return -ESRCH; 13729 } 13730 13731 /* externs shouldn't be pre-setup from user code */ 13732 if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG) 13733 *mmaped = (*map)->mmaped; 13734 } 13735 return 0; 13736 } 13737 13738 static int populate_skeleton_progs(const struct bpf_object *obj, 13739 struct bpf_prog_skeleton *progs, 13740 size_t prog_cnt, size_t prog_skel_sz) 13741 { 13742 int i; 13743 13744 for (i = 0; i < prog_cnt; i++) { 13745 struct bpf_prog_skeleton *prog_skel = (void *)progs + i * prog_skel_sz; 13746 struct bpf_program **prog = prog_skel->prog; 13747 const char *name = prog_skel->name; 13748 13749 *prog = bpf_object__find_program_by_name(obj, name); 13750 if (!*prog) { 13751 pr_warn("failed to find skeleton program '%s'\n", name); 13752 return -ESRCH; 13753 } 13754 } 13755 return 0; 13756 } 13757 13758 int bpf_object__open_skeleton(struct bpf_object_skeleton *s, 13759 const struct bpf_object_open_opts *opts) 13760 { 13761 DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts, 13762 .object_name = s->name, 13763 ); 13764 struct bpf_object *obj; 13765 int err; 13766 13767 /* Attempt to preserve opts->object_name, unless overriden by user 13768 * explicitly. Overwriting object name for skeletons is discouraged, 13769 * as it breaks global data maps, because they contain object name 13770 * prefix as their own map name prefix. When skeleton is generated, 13771 * bpftool is making an assumption that this name will stay the same. 13772 */ 13773 if (opts) { 13774 memcpy(&skel_opts, opts, sizeof(*opts)); 13775 if (!opts->object_name) 13776 skel_opts.object_name = s->name; 13777 } 13778 13779 obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts); 13780 err = libbpf_get_error(obj); 13781 if (err) { 13782 pr_warn("failed to initialize skeleton BPF object '%s': %d\n", 13783 s->name, err); 13784 return libbpf_err(err); 13785 } 13786 13787 *s->obj = obj; 13788 err = populate_skeleton_maps(obj, s->maps, s->map_cnt, s->map_skel_sz); 13789 if (err) { 13790 pr_warn("failed to populate skeleton maps for '%s': %d\n", s->name, err); 13791 return libbpf_err(err); 13792 } 13793 13794 err = populate_skeleton_progs(obj, s->progs, s->prog_cnt, s->prog_skel_sz); 13795 if (err) { 13796 pr_warn("failed to populate skeleton progs for '%s': %d\n", s->name, err); 13797 return libbpf_err(err); 13798 } 13799 13800 return 0; 13801 } 13802 13803 int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s) 13804 { 13805 int err, len, var_idx, i; 13806 const char *var_name; 13807 const struct bpf_map *map; 13808 struct btf *btf; 13809 __u32 map_type_id; 13810 const struct btf_type *map_type, *var_type; 13811 const struct bpf_var_skeleton *var_skel; 13812 struct btf_var_secinfo *var; 13813 13814 if (!s->obj) 13815 return libbpf_err(-EINVAL); 13816 13817 btf = bpf_object__btf(s->obj); 13818 if (!btf) { 13819 pr_warn("subskeletons require BTF at runtime (object %s)\n", 13820 bpf_object__name(s->obj)); 13821 return libbpf_err(-errno); 13822 } 13823 13824 err = populate_skeleton_maps(s->obj, s->maps, s->map_cnt, s->map_skel_sz); 13825 if (err) { 13826 pr_warn("failed to populate subskeleton maps: %d\n", err); 13827 return libbpf_err(err); 13828 } 13829 13830 err = populate_skeleton_progs(s->obj, s->progs, s->prog_cnt, s->prog_skel_sz); 13831 if (err) { 13832 pr_warn("failed to populate subskeleton maps: %d\n", err); 13833 return libbpf_err(err); 13834 } 13835 13836 for (var_idx = 0; var_idx < s->var_cnt; var_idx++) { 13837 var_skel = (void *)s->vars + var_idx * s->var_skel_sz; 13838 map = *var_skel->map; 13839 map_type_id = bpf_map__btf_value_type_id(map); 13840 map_type = btf__type_by_id(btf, map_type_id); 13841 13842 if (!btf_is_datasec(map_type)) { 13843 pr_warn("type for map '%1$s' is not a datasec: %2$s", 13844 bpf_map__name(map), 13845 __btf_kind_str(btf_kind(map_type))); 13846 return libbpf_err(-EINVAL); 13847 } 13848 13849 len = btf_vlen(map_type); 13850 var = btf_var_secinfos(map_type); 13851 for (i = 0; i < len; i++, var++) { 13852 var_type = btf__type_by_id(btf, var->type); 13853 var_name = btf__name_by_offset(btf, var_type->name_off); 13854 if (strcmp(var_name, var_skel->name) == 0) { 13855 *var_skel->addr = map->mmaped + var->offset; 13856 break; 13857 } 13858 } 13859 } 13860 return 0; 13861 } 13862 13863 void bpf_object__destroy_subskeleton(struct bpf_object_subskeleton *s) 13864 { 13865 if (!s) 13866 return; 13867 free(s->maps); 13868 free(s->progs); 13869 free(s->vars); 13870 free(s); 13871 } 13872 13873 int bpf_object__load_skeleton(struct bpf_object_skeleton *s) 13874 { 13875 int i, err; 13876 13877 err = bpf_object__load(*s->obj); 13878 if (err) { 13879 pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err); 13880 return libbpf_err(err); 13881 } 13882 13883 for (i = 0; i < s->map_cnt; i++) { 13884 struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz; 13885 struct bpf_map *map = *map_skel->map; 13886 size_t mmap_sz = bpf_map_mmap_sz(map); 13887 int prot, map_fd = map->fd; 13888 void **mmaped = map_skel->mmaped; 13889 13890 if (!mmaped) 13891 continue; 13892 13893 if (!(map->def.map_flags & BPF_F_MMAPABLE)) { 13894 *mmaped = NULL; 13895 continue; 13896 } 13897 13898 if (map->def.type == BPF_MAP_TYPE_ARENA) { 13899 *mmaped = map->mmaped; 13900 continue; 13901 } 13902 13903 if (map->def.map_flags & BPF_F_RDONLY_PROG) 13904 prot = PROT_READ; 13905 else 13906 prot = PROT_READ | PROT_WRITE; 13907 13908 /* Remap anonymous mmap()-ed "map initialization image" as 13909 * a BPF map-backed mmap()-ed memory, but preserving the same 13910 * memory address. This will cause kernel to change process' 13911 * page table to point to a different piece of kernel memory, 13912 * but from userspace point of view memory address (and its 13913 * contents, being identical at this point) will stay the 13914 * same. This mapping will be released by bpf_object__close() 13915 * as per normal clean up procedure, so we don't need to worry 13916 * about it from skeleton's clean up perspective. 13917 */ 13918 *mmaped = mmap(map->mmaped, mmap_sz, prot, MAP_SHARED | MAP_FIXED, map_fd, 0); 13919 if (*mmaped == MAP_FAILED) { 13920 err = -errno; 13921 *mmaped = NULL; 13922 pr_warn("failed to re-mmap() map '%s': %d\n", 13923 bpf_map__name(map), err); 13924 return libbpf_err(err); 13925 } 13926 } 13927 13928 return 0; 13929 } 13930 13931 int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) 13932 { 13933 int i, err; 13934 13935 for (i = 0; i < s->prog_cnt; i++) { 13936 struct bpf_prog_skeleton *prog_skel = (void *)s->progs + i * s->prog_skel_sz; 13937 struct bpf_program *prog = *prog_skel->prog; 13938 struct bpf_link **link = prog_skel->link; 13939 13940 if (!prog->autoload || !prog->autoattach) 13941 continue; 13942 13943 /* auto-attaching not supported for this program */ 13944 if (!prog->sec_def || !prog->sec_def->prog_attach_fn) 13945 continue; 13946 13947 /* if user already set the link manually, don't attempt auto-attach */ 13948 if (*link) 13949 continue; 13950 13951 err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, link); 13952 if (err) { 13953 pr_warn("prog '%s': failed to auto-attach: %d\n", 13954 bpf_program__name(prog), err); 13955 return libbpf_err(err); 13956 } 13957 13958 /* It's possible that for some SEC() definitions auto-attach 13959 * is supported in some cases (e.g., if definition completely 13960 * specifies target information), but is not in other cases. 13961 * SEC("uprobe") is one such case. If user specified target 13962 * binary and function name, such BPF program can be 13963 * auto-attached. But if not, it shouldn't trigger skeleton's 13964 * attach to fail. It should just be skipped. 13965 * attach_fn signals such case with returning 0 (no error) and 13966 * setting link to NULL. 13967 */ 13968 } 13969 13970 13971 for (i = 0; i < s->map_cnt; i++) { 13972 struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz; 13973 struct bpf_map *map = *map_skel->map; 13974 struct bpf_link **link; 13975 13976 if (!map->autocreate || !map->autoattach) 13977 continue; 13978 13979 /* only struct_ops maps can be attached */ 13980 if (!bpf_map__is_struct_ops(map)) 13981 continue; 13982 13983 /* skeleton is created with earlier version of bpftool, notify user */ 13984 if (s->map_skel_sz < offsetofend(struct bpf_map_skeleton, link)) { 13985 pr_warn("map '%s': BPF skeleton version is old, skipping map auto-attachment...\n", 13986 bpf_map__name(map)); 13987 continue; 13988 } 13989 13990 link = map_skel->link; 13991 if (*link) 13992 continue; 13993 13994 *link = bpf_map__attach_struct_ops(map); 13995 if (!*link) { 13996 err = -errno; 13997 pr_warn("map '%s': failed to auto-attach: %d\n", bpf_map__name(map), err); 13998 return libbpf_err(err); 13999 } 14000 } 14001 14002 return 0; 14003 } 14004 14005 void bpf_object__detach_skeleton(struct bpf_object_skeleton *s) 14006 { 14007 int i; 14008 14009 for (i = 0; i < s->prog_cnt; i++) { 14010 struct bpf_prog_skeleton *prog_skel = (void *)s->progs + i * s->prog_skel_sz; 14011 struct bpf_link **link = prog_skel->link; 14012 14013 bpf_link__destroy(*link); 14014 *link = NULL; 14015 } 14016 14017 if (s->map_skel_sz < sizeof(struct bpf_map_skeleton)) 14018 return; 14019 14020 for (i = 0; i < s->map_cnt; i++) { 14021 struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz; 14022 struct bpf_link **link = map_skel->link; 14023 14024 if (link) { 14025 bpf_link__destroy(*link); 14026 *link = NULL; 14027 } 14028 } 14029 } 14030 14031 void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s) 14032 { 14033 if (!s) 14034 return; 14035 14036 bpf_object__detach_skeleton(s); 14037 if (s->obj) 14038 bpf_object__close(*s->obj); 14039 free(s->maps); 14040 free(s->progs); 14041 free(s); 14042 } 14043