1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 2 3 /* 4 * Common eBPF ELF object loading operations. 5 * 6 * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org> 7 * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> 8 * Copyright (C) 2015 Huawei Inc. 9 * Copyright (C) 2017 Nicira, Inc. 10 * Copyright (C) 2019 Isovalent, Inc. 11 */ 12 13 #ifndef _GNU_SOURCE 14 #define _GNU_SOURCE 15 #endif 16 #include <stdlib.h> 17 #include <stdio.h> 18 #include <stdarg.h> 19 #include <libgen.h> 20 #include <inttypes.h> 21 #include <limits.h> 22 #include <string.h> 23 #include <unistd.h> 24 #include <endian.h> 25 #include <fcntl.h> 26 #include <errno.h> 27 #include <ctype.h> 28 #include <asm/unistd.h> 29 #include <linux/err.h> 30 #include <linux/kernel.h> 31 #include <linux/bpf.h> 32 #include <linux/btf.h> 33 #include <linux/filter.h> 34 #include <linux/limits.h> 35 #include <linux/perf_event.h> 36 #include <linux/ring_buffer.h> 37 #include <sys/epoll.h> 38 #include <sys/ioctl.h> 39 #include <sys/mman.h> 40 #include <sys/stat.h> 41 #include <sys/types.h> 42 #include <sys/vfs.h> 43 #include <sys/utsname.h> 44 #include <sys/resource.h> 45 #include <libelf.h> 46 #include <gelf.h> 47 #include <zlib.h> 48 49 #include "libbpf.h" 50 #include "bpf.h" 51 #include "btf.h" 52 #include "str_error.h" 53 #include "libbpf_internal.h" 54 #include "hashmap.h" 55 #include "bpf_gen_internal.h" 56 #include "zip.h" 57 58 #ifndef BPF_FS_MAGIC 59 #define BPF_FS_MAGIC 0xcafe4a11 60 #endif 61 62 #define BPF_FS_DEFAULT_PATH "/sys/fs/bpf" 63 64 #define BPF_INSN_SZ (sizeof(struct bpf_insn)) 65 66 /* vsprintf() in __base_pr() uses nonliteral format string. It may break 67 * compilation if user enables corresponding warning. Disable it explicitly. 68 */ 69 #pragma GCC diagnostic ignored "-Wformat-nonliteral" 70 71 #define __printf(a, b) __attribute__((format(printf, a, b))) 72 73 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj); 74 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog); 75 static int map_set_def_max_entries(struct bpf_map *map); 76 77 static const char * const attach_type_name[] = { 78 [BPF_CGROUP_INET_INGRESS] = "cgroup_inet_ingress", 79 [BPF_CGROUP_INET_EGRESS] = "cgroup_inet_egress", 80 [BPF_CGROUP_INET_SOCK_CREATE] = "cgroup_inet_sock_create", 81 [BPF_CGROUP_INET_SOCK_RELEASE] = "cgroup_inet_sock_release", 82 [BPF_CGROUP_SOCK_OPS] = "cgroup_sock_ops", 83 [BPF_CGROUP_DEVICE] = "cgroup_device", 84 [BPF_CGROUP_INET4_BIND] = "cgroup_inet4_bind", 85 [BPF_CGROUP_INET6_BIND] = "cgroup_inet6_bind", 86 [BPF_CGROUP_INET4_CONNECT] = "cgroup_inet4_connect", 87 [BPF_CGROUP_INET6_CONNECT] = "cgroup_inet6_connect", 88 [BPF_CGROUP_UNIX_CONNECT] = "cgroup_unix_connect", 89 [BPF_CGROUP_INET4_POST_BIND] = "cgroup_inet4_post_bind", 90 [BPF_CGROUP_INET6_POST_BIND] = "cgroup_inet6_post_bind", 91 [BPF_CGROUP_INET4_GETPEERNAME] = "cgroup_inet4_getpeername", 92 [BPF_CGROUP_INET6_GETPEERNAME] = "cgroup_inet6_getpeername", 93 [BPF_CGROUP_UNIX_GETPEERNAME] = "cgroup_unix_getpeername", 94 [BPF_CGROUP_INET4_GETSOCKNAME] = "cgroup_inet4_getsockname", 95 [BPF_CGROUP_INET6_GETSOCKNAME] = "cgroup_inet6_getsockname", 96 [BPF_CGROUP_UNIX_GETSOCKNAME] = "cgroup_unix_getsockname", 97 [BPF_CGROUP_UDP4_SENDMSG] = "cgroup_udp4_sendmsg", 98 [BPF_CGROUP_UDP6_SENDMSG] = "cgroup_udp6_sendmsg", 99 [BPF_CGROUP_UNIX_SENDMSG] = "cgroup_unix_sendmsg", 100 [BPF_CGROUP_SYSCTL] = "cgroup_sysctl", 101 [BPF_CGROUP_UDP4_RECVMSG] = "cgroup_udp4_recvmsg", 102 [BPF_CGROUP_UDP6_RECVMSG] = "cgroup_udp6_recvmsg", 103 [BPF_CGROUP_UNIX_RECVMSG] = "cgroup_unix_recvmsg", 104 [BPF_CGROUP_GETSOCKOPT] = "cgroup_getsockopt", 105 [BPF_CGROUP_SETSOCKOPT] = "cgroup_setsockopt", 106 [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser", 107 [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict", 108 [BPF_SK_SKB_VERDICT] = "sk_skb_verdict", 109 [BPF_SK_MSG_VERDICT] = "sk_msg_verdict", 110 [BPF_LIRC_MODE2] = "lirc_mode2", 111 [BPF_FLOW_DISSECTOR] = "flow_dissector", 112 [BPF_TRACE_RAW_TP] = "trace_raw_tp", 113 [BPF_TRACE_FENTRY] = "trace_fentry", 114 [BPF_TRACE_FEXIT] = "trace_fexit", 115 [BPF_MODIFY_RETURN] = "modify_return", 116 [BPF_LSM_MAC] = "lsm_mac", 117 [BPF_LSM_CGROUP] = "lsm_cgroup", 118 [BPF_SK_LOOKUP] = "sk_lookup", 119 [BPF_TRACE_ITER] = "trace_iter", 120 [BPF_XDP_DEVMAP] = "xdp_devmap", 121 [BPF_XDP_CPUMAP] = "xdp_cpumap", 122 [BPF_XDP] = "xdp", 123 [BPF_SK_REUSEPORT_SELECT] = "sk_reuseport_select", 124 [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_reuseport_select_or_migrate", 125 [BPF_PERF_EVENT] = "perf_event", 126 [BPF_TRACE_KPROBE_MULTI] = "trace_kprobe_multi", 127 [BPF_STRUCT_OPS] = "struct_ops", 128 [BPF_NETFILTER] = "netfilter", 129 [BPF_TCX_INGRESS] = "tcx_ingress", 130 [BPF_TCX_EGRESS] = "tcx_egress", 131 [BPF_TRACE_UPROBE_MULTI] = "trace_uprobe_multi", 132 [BPF_NETKIT_PRIMARY] = "netkit_primary", 133 [BPF_NETKIT_PEER] = "netkit_peer", 134 }; 135 136 static const char * const link_type_name[] = { 137 [BPF_LINK_TYPE_UNSPEC] = "unspec", 138 [BPF_LINK_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", 139 [BPF_LINK_TYPE_TRACING] = "tracing", 140 [BPF_LINK_TYPE_CGROUP] = "cgroup", 141 [BPF_LINK_TYPE_ITER] = "iter", 142 [BPF_LINK_TYPE_NETNS] = "netns", 143 [BPF_LINK_TYPE_XDP] = "xdp", 144 [BPF_LINK_TYPE_PERF_EVENT] = "perf_event", 145 [BPF_LINK_TYPE_KPROBE_MULTI] = "kprobe_multi", 146 [BPF_LINK_TYPE_STRUCT_OPS] = "struct_ops", 147 [BPF_LINK_TYPE_NETFILTER] = "netfilter", 148 [BPF_LINK_TYPE_TCX] = "tcx", 149 [BPF_LINK_TYPE_UPROBE_MULTI] = "uprobe_multi", 150 [BPF_LINK_TYPE_NETKIT] = "netkit", 151 }; 152 153 static const char * const map_type_name[] = { 154 [BPF_MAP_TYPE_UNSPEC] = "unspec", 155 [BPF_MAP_TYPE_HASH] = "hash", 156 [BPF_MAP_TYPE_ARRAY] = "array", 157 [BPF_MAP_TYPE_PROG_ARRAY] = "prog_array", 158 [BPF_MAP_TYPE_PERF_EVENT_ARRAY] = "perf_event_array", 159 [BPF_MAP_TYPE_PERCPU_HASH] = "percpu_hash", 160 [BPF_MAP_TYPE_PERCPU_ARRAY] = "percpu_array", 161 [BPF_MAP_TYPE_STACK_TRACE] = "stack_trace", 162 [BPF_MAP_TYPE_CGROUP_ARRAY] = "cgroup_array", 163 [BPF_MAP_TYPE_LRU_HASH] = "lru_hash", 164 [BPF_MAP_TYPE_LRU_PERCPU_HASH] = "lru_percpu_hash", 165 [BPF_MAP_TYPE_LPM_TRIE] = "lpm_trie", 166 [BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps", 167 [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps", 168 [BPF_MAP_TYPE_DEVMAP] = "devmap", 169 [BPF_MAP_TYPE_DEVMAP_HASH] = "devmap_hash", 170 [BPF_MAP_TYPE_SOCKMAP] = "sockmap", 171 [BPF_MAP_TYPE_CPUMAP] = "cpumap", 172 [BPF_MAP_TYPE_XSKMAP] = "xskmap", 173 [BPF_MAP_TYPE_SOCKHASH] = "sockhash", 174 [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage", 175 [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray", 176 [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE] = "percpu_cgroup_storage", 177 [BPF_MAP_TYPE_QUEUE] = "queue", 178 [BPF_MAP_TYPE_STACK] = "stack", 179 [BPF_MAP_TYPE_SK_STORAGE] = "sk_storage", 180 [BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops", 181 [BPF_MAP_TYPE_RINGBUF] = "ringbuf", 182 [BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage", 183 [BPF_MAP_TYPE_TASK_STORAGE] = "task_storage", 184 [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter", 185 [BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf", 186 [BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage", 187 }; 188 189 static const char * const prog_type_name[] = { 190 [BPF_PROG_TYPE_UNSPEC] = "unspec", 191 [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter", 192 [BPF_PROG_TYPE_KPROBE] = "kprobe", 193 [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls", 194 [BPF_PROG_TYPE_SCHED_ACT] = "sched_act", 195 [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint", 196 [BPF_PROG_TYPE_XDP] = "xdp", 197 [BPF_PROG_TYPE_PERF_EVENT] = "perf_event", 198 [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb", 199 [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock", 200 [BPF_PROG_TYPE_LWT_IN] = "lwt_in", 201 [BPF_PROG_TYPE_LWT_OUT] = "lwt_out", 202 [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit", 203 [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops", 204 [BPF_PROG_TYPE_SK_SKB] = "sk_skb", 205 [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device", 206 [BPF_PROG_TYPE_SK_MSG] = "sk_msg", 207 [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", 208 [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr", 209 [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local", 210 [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2", 211 [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport", 212 [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector", 213 [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl", 214 [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable", 215 [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt", 216 [BPF_PROG_TYPE_TRACING] = "tracing", 217 [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops", 218 [BPF_PROG_TYPE_EXT] = "ext", 219 [BPF_PROG_TYPE_LSM] = "lsm", 220 [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup", 221 [BPF_PROG_TYPE_SYSCALL] = "syscall", 222 [BPF_PROG_TYPE_NETFILTER] = "netfilter", 223 }; 224 225 static int __base_pr(enum libbpf_print_level level, const char *format, 226 va_list args) 227 { 228 if (level == LIBBPF_DEBUG) 229 return 0; 230 231 return vfprintf(stderr, format, args); 232 } 233 234 static libbpf_print_fn_t __libbpf_pr = __base_pr; 235 236 libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn) 237 { 238 libbpf_print_fn_t old_print_fn; 239 240 old_print_fn = __atomic_exchange_n(&__libbpf_pr, fn, __ATOMIC_RELAXED); 241 242 return old_print_fn; 243 } 244 245 __printf(2, 3) 246 void libbpf_print(enum libbpf_print_level level, const char *format, ...) 247 { 248 va_list args; 249 int old_errno; 250 libbpf_print_fn_t print_fn; 251 252 print_fn = __atomic_load_n(&__libbpf_pr, __ATOMIC_RELAXED); 253 if (!print_fn) 254 return; 255 256 old_errno = errno; 257 258 va_start(args, format); 259 __libbpf_pr(level, format, args); 260 va_end(args); 261 262 errno = old_errno; 263 } 264 265 static void pr_perm_msg(int err) 266 { 267 struct rlimit limit; 268 char buf[100]; 269 270 if (err != -EPERM || geteuid() != 0) 271 return; 272 273 err = getrlimit(RLIMIT_MEMLOCK, &limit); 274 if (err) 275 return; 276 277 if (limit.rlim_cur == RLIM_INFINITY) 278 return; 279 280 if (limit.rlim_cur < 1024) 281 snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur); 282 else if (limit.rlim_cur < 1024*1024) 283 snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024); 284 else 285 snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024)); 286 287 pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n", 288 buf); 289 } 290 291 #define STRERR_BUFSIZE 128 292 293 /* Copied from tools/perf/util/util.h */ 294 #ifndef zfree 295 # define zfree(ptr) ({ free(*ptr); *ptr = NULL; }) 296 #endif 297 298 #ifndef zclose 299 # define zclose(fd) ({ \ 300 int ___err = 0; \ 301 if ((fd) >= 0) \ 302 ___err = close((fd)); \ 303 fd = -1; \ 304 ___err; }) 305 #endif 306 307 static inline __u64 ptr_to_u64(const void *ptr) 308 { 309 return (__u64) (unsigned long) ptr; 310 } 311 312 int libbpf_set_strict_mode(enum libbpf_strict_mode mode) 313 { 314 /* as of v1.0 libbpf_set_strict_mode() is a no-op */ 315 return 0; 316 } 317 318 __u32 libbpf_major_version(void) 319 { 320 return LIBBPF_MAJOR_VERSION; 321 } 322 323 __u32 libbpf_minor_version(void) 324 { 325 return LIBBPF_MINOR_VERSION; 326 } 327 328 const char *libbpf_version_string(void) 329 { 330 #define __S(X) #X 331 #define _S(X) __S(X) 332 return "v" _S(LIBBPF_MAJOR_VERSION) "." _S(LIBBPF_MINOR_VERSION); 333 #undef _S 334 #undef __S 335 } 336 337 enum reloc_type { 338 RELO_LD64, 339 RELO_CALL, 340 RELO_DATA, 341 RELO_EXTERN_LD64, 342 RELO_EXTERN_CALL, 343 RELO_SUBPROG_ADDR, 344 RELO_CORE, 345 }; 346 347 struct reloc_desc { 348 enum reloc_type type; 349 int insn_idx; 350 union { 351 const struct bpf_core_relo *core_relo; /* used when type == RELO_CORE */ 352 struct { 353 int map_idx; 354 int sym_off; 355 int ext_idx; 356 }; 357 }; 358 }; 359 360 /* stored as sec_def->cookie for all libbpf-supported SEC()s */ 361 enum sec_def_flags { 362 SEC_NONE = 0, 363 /* expected_attach_type is optional, if kernel doesn't support that */ 364 SEC_EXP_ATTACH_OPT = 1, 365 /* legacy, only used by libbpf_get_type_names() and 366 * libbpf_attach_type_by_name(), not used by libbpf itself at all. 367 * This used to be associated with cgroup (and few other) BPF programs 368 * that were attachable through BPF_PROG_ATTACH command. Pretty 369 * meaningless nowadays, though. 370 */ 371 SEC_ATTACHABLE = 2, 372 SEC_ATTACHABLE_OPT = SEC_ATTACHABLE | SEC_EXP_ATTACH_OPT, 373 /* attachment target is specified through BTF ID in either kernel or 374 * other BPF program's BTF object 375 */ 376 SEC_ATTACH_BTF = 4, 377 /* BPF program type allows sleeping/blocking in kernel */ 378 SEC_SLEEPABLE = 8, 379 /* BPF program support non-linear XDP buffer */ 380 SEC_XDP_FRAGS = 16, 381 /* Setup proper attach type for usdt probes. */ 382 SEC_USDT = 32, 383 }; 384 385 struct bpf_sec_def { 386 char *sec; 387 enum bpf_prog_type prog_type; 388 enum bpf_attach_type expected_attach_type; 389 long cookie; 390 int handler_id; 391 392 libbpf_prog_setup_fn_t prog_setup_fn; 393 libbpf_prog_prepare_load_fn_t prog_prepare_load_fn; 394 libbpf_prog_attach_fn_t prog_attach_fn; 395 }; 396 397 /* 398 * bpf_prog should be a better name but it has been used in 399 * linux/filter.h. 400 */ 401 struct bpf_program { 402 char *name; 403 char *sec_name; 404 size_t sec_idx; 405 const struct bpf_sec_def *sec_def; 406 /* this program's instruction offset (in number of instructions) 407 * within its containing ELF section 408 */ 409 size_t sec_insn_off; 410 /* number of original instructions in ELF section belonging to this 411 * program, not taking into account subprogram instructions possible 412 * appended later during relocation 413 */ 414 size_t sec_insn_cnt; 415 /* Offset (in number of instructions) of the start of instruction 416 * belonging to this BPF program within its containing main BPF 417 * program. For the entry-point (main) BPF program, this is always 418 * zero. For a sub-program, this gets reset before each of main BPF 419 * programs are processed and relocated and is used to determined 420 * whether sub-program was already appended to the main program, and 421 * if yes, at which instruction offset. 422 */ 423 size_t sub_insn_off; 424 425 /* instructions that belong to BPF program; insns[0] is located at 426 * sec_insn_off instruction within its ELF section in ELF file, so 427 * when mapping ELF file instruction index to the local instruction, 428 * one needs to subtract sec_insn_off; and vice versa. 429 */ 430 struct bpf_insn *insns; 431 /* actual number of instruction in this BPF program's image; for 432 * entry-point BPF programs this includes the size of main program 433 * itself plus all the used sub-programs, appended at the end 434 */ 435 size_t insns_cnt; 436 437 struct reloc_desc *reloc_desc; 438 int nr_reloc; 439 440 /* BPF verifier log settings */ 441 char *log_buf; 442 size_t log_size; 443 __u32 log_level; 444 445 struct bpf_object *obj; 446 447 int fd; 448 bool autoload; 449 bool autoattach; 450 bool sym_global; 451 bool mark_btf_static; 452 enum bpf_prog_type type; 453 enum bpf_attach_type expected_attach_type; 454 int exception_cb_idx; 455 456 int prog_ifindex; 457 __u32 attach_btf_obj_fd; 458 __u32 attach_btf_id; 459 __u32 attach_prog_fd; 460 461 void *func_info; 462 __u32 func_info_rec_size; 463 __u32 func_info_cnt; 464 465 void *line_info; 466 __u32 line_info_rec_size; 467 __u32 line_info_cnt; 468 __u32 prog_flags; 469 }; 470 471 struct bpf_struct_ops { 472 const char *tname; 473 const struct btf_type *type; 474 struct bpf_program **progs; 475 __u32 *kern_func_off; 476 /* e.g. struct tcp_congestion_ops in bpf_prog's btf format */ 477 void *data; 478 /* e.g. struct bpf_struct_ops_tcp_congestion_ops in 479 * btf_vmlinux's format. 480 * struct bpf_struct_ops_tcp_congestion_ops { 481 * [... some other kernel fields ...] 482 * struct tcp_congestion_ops data; 483 * } 484 * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops) 485 * bpf_map__init_kern_struct_ops() will populate the "kern_vdata" 486 * from "data". 487 */ 488 void *kern_vdata; 489 __u32 type_id; 490 }; 491 492 #define DATA_SEC ".data" 493 #define BSS_SEC ".bss" 494 #define RODATA_SEC ".rodata" 495 #define KCONFIG_SEC ".kconfig" 496 #define KSYMS_SEC ".ksyms" 497 #define STRUCT_OPS_SEC ".struct_ops" 498 #define STRUCT_OPS_LINK_SEC ".struct_ops.link" 499 500 enum libbpf_map_type { 501 LIBBPF_MAP_UNSPEC, 502 LIBBPF_MAP_DATA, 503 LIBBPF_MAP_BSS, 504 LIBBPF_MAP_RODATA, 505 LIBBPF_MAP_KCONFIG, 506 }; 507 508 struct bpf_map_def { 509 unsigned int type; 510 unsigned int key_size; 511 unsigned int value_size; 512 unsigned int max_entries; 513 unsigned int map_flags; 514 }; 515 516 struct bpf_map { 517 struct bpf_object *obj; 518 char *name; 519 /* real_name is defined for special internal maps (.rodata*, 520 * .data*, .bss, .kconfig) and preserves their original ELF section 521 * name. This is important to be able to find corresponding BTF 522 * DATASEC information. 523 */ 524 char *real_name; 525 int fd; 526 int sec_idx; 527 size_t sec_offset; 528 int map_ifindex; 529 int inner_map_fd; 530 struct bpf_map_def def; 531 __u32 numa_node; 532 __u32 btf_var_idx; 533 int mod_btf_fd; 534 __u32 btf_key_type_id; 535 __u32 btf_value_type_id; 536 __u32 btf_vmlinux_value_type_id; 537 enum libbpf_map_type libbpf_type; 538 void *mmaped; 539 struct bpf_struct_ops *st_ops; 540 struct bpf_map *inner_map; 541 void **init_slots; 542 int init_slots_sz; 543 char *pin_path; 544 bool pinned; 545 bool reused; 546 bool autocreate; 547 __u64 map_extra; 548 }; 549 550 enum extern_type { 551 EXT_UNKNOWN, 552 EXT_KCFG, 553 EXT_KSYM, 554 }; 555 556 enum kcfg_type { 557 KCFG_UNKNOWN, 558 KCFG_CHAR, 559 KCFG_BOOL, 560 KCFG_INT, 561 KCFG_TRISTATE, 562 KCFG_CHAR_ARR, 563 }; 564 565 struct extern_desc { 566 enum extern_type type; 567 int sym_idx; 568 int btf_id; 569 int sec_btf_id; 570 const char *name; 571 char *essent_name; 572 bool is_set; 573 bool is_weak; 574 union { 575 struct { 576 enum kcfg_type type; 577 int sz; 578 int align; 579 int data_off; 580 bool is_signed; 581 } kcfg; 582 struct { 583 unsigned long long addr; 584 585 /* target btf_id of the corresponding kernel var. */ 586 int kernel_btf_obj_fd; 587 int kernel_btf_id; 588 589 /* local btf_id of the ksym extern's type. */ 590 __u32 type_id; 591 /* BTF fd index to be patched in for insn->off, this is 592 * 0 for vmlinux BTF, index in obj->fd_array for module 593 * BTF 594 */ 595 __s16 btf_fd_idx; 596 } ksym; 597 }; 598 }; 599 600 struct module_btf { 601 struct btf *btf; 602 char *name; 603 __u32 id; 604 int fd; 605 int fd_array_idx; 606 }; 607 608 enum sec_type { 609 SEC_UNUSED = 0, 610 SEC_RELO, 611 SEC_BSS, 612 SEC_DATA, 613 SEC_RODATA, 614 }; 615 616 struct elf_sec_desc { 617 enum sec_type sec_type; 618 Elf64_Shdr *shdr; 619 Elf_Data *data; 620 }; 621 622 struct elf_state { 623 int fd; 624 const void *obj_buf; 625 size_t obj_buf_sz; 626 Elf *elf; 627 Elf64_Ehdr *ehdr; 628 Elf_Data *symbols; 629 Elf_Data *st_ops_data; 630 Elf_Data *st_ops_link_data; 631 size_t shstrndx; /* section index for section name strings */ 632 size_t strtabidx; 633 struct elf_sec_desc *secs; 634 size_t sec_cnt; 635 int btf_maps_shndx; 636 __u32 btf_maps_sec_btf_id; 637 int text_shndx; 638 int symbols_shndx; 639 int st_ops_shndx; 640 int st_ops_link_shndx; 641 }; 642 643 struct usdt_manager; 644 645 struct bpf_object { 646 char name[BPF_OBJ_NAME_LEN]; 647 char license[64]; 648 __u32 kern_version; 649 650 struct bpf_program *programs; 651 size_t nr_programs; 652 struct bpf_map *maps; 653 size_t nr_maps; 654 size_t maps_cap; 655 656 char *kconfig; 657 struct extern_desc *externs; 658 int nr_extern; 659 int kconfig_map_idx; 660 661 bool loaded; 662 bool has_subcalls; 663 bool has_rodata; 664 665 struct bpf_gen *gen_loader; 666 667 /* Information when doing ELF related work. Only valid if efile.elf is not NULL */ 668 struct elf_state efile; 669 670 struct btf *btf; 671 struct btf_ext *btf_ext; 672 673 /* Parse and load BTF vmlinux if any of the programs in the object need 674 * it at load time. 675 */ 676 struct btf *btf_vmlinux; 677 /* Path to the custom BTF to be used for BPF CO-RE relocations as an 678 * override for vmlinux BTF. 679 */ 680 char *btf_custom_path; 681 /* vmlinux BTF override for CO-RE relocations */ 682 struct btf *btf_vmlinux_override; 683 /* Lazily initialized kernel module BTFs */ 684 struct module_btf *btf_modules; 685 bool btf_modules_loaded; 686 size_t btf_module_cnt; 687 size_t btf_module_cap; 688 689 /* optional log settings passed to BPF_BTF_LOAD and BPF_PROG_LOAD commands */ 690 char *log_buf; 691 size_t log_size; 692 __u32 log_level; 693 694 int *fd_array; 695 size_t fd_array_cap; 696 size_t fd_array_cnt; 697 698 struct usdt_manager *usdt_man; 699 700 struct kern_feature_cache *feat_cache; 701 char *token_path; 702 int token_fd; 703 704 char path[]; 705 }; 706 707 static const char *elf_sym_str(const struct bpf_object *obj, size_t off); 708 static const char *elf_sec_str(const struct bpf_object *obj, size_t off); 709 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx); 710 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name); 711 static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn); 712 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn); 713 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn); 714 static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx); 715 static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx); 716 717 void bpf_program__unload(struct bpf_program *prog) 718 { 719 if (!prog) 720 return; 721 722 zclose(prog->fd); 723 724 zfree(&prog->func_info); 725 zfree(&prog->line_info); 726 } 727 728 static void bpf_program__exit(struct bpf_program *prog) 729 { 730 if (!prog) 731 return; 732 733 bpf_program__unload(prog); 734 zfree(&prog->name); 735 zfree(&prog->sec_name); 736 zfree(&prog->insns); 737 zfree(&prog->reloc_desc); 738 739 prog->nr_reloc = 0; 740 prog->insns_cnt = 0; 741 prog->sec_idx = -1; 742 } 743 744 static bool insn_is_subprog_call(const struct bpf_insn *insn) 745 { 746 return BPF_CLASS(insn->code) == BPF_JMP && 747 BPF_OP(insn->code) == BPF_CALL && 748 BPF_SRC(insn->code) == BPF_K && 749 insn->src_reg == BPF_PSEUDO_CALL && 750 insn->dst_reg == 0 && 751 insn->off == 0; 752 } 753 754 static bool is_call_insn(const struct bpf_insn *insn) 755 { 756 return insn->code == (BPF_JMP | BPF_CALL); 757 } 758 759 static bool insn_is_pseudo_func(struct bpf_insn *insn) 760 { 761 return is_ldimm64_insn(insn) && insn->src_reg == BPF_PSEUDO_FUNC; 762 } 763 764 static int 765 bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, 766 const char *name, size_t sec_idx, const char *sec_name, 767 size_t sec_off, void *insn_data, size_t insn_data_sz) 768 { 769 if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) { 770 pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n", 771 sec_name, name, sec_off, insn_data_sz); 772 return -EINVAL; 773 } 774 775 memset(prog, 0, sizeof(*prog)); 776 prog->obj = obj; 777 778 prog->sec_idx = sec_idx; 779 prog->sec_insn_off = sec_off / BPF_INSN_SZ; 780 prog->sec_insn_cnt = insn_data_sz / BPF_INSN_SZ; 781 /* insns_cnt can later be increased by appending used subprograms */ 782 prog->insns_cnt = prog->sec_insn_cnt; 783 784 prog->type = BPF_PROG_TYPE_UNSPEC; 785 prog->fd = -1; 786 prog->exception_cb_idx = -1; 787 788 /* libbpf's convention for SEC("?abc...") is that it's just like 789 * SEC("abc...") but the corresponding bpf_program starts out with 790 * autoload set to false. 791 */ 792 if (sec_name[0] == '?') { 793 prog->autoload = false; 794 /* from now on forget there was ? in section name */ 795 sec_name++; 796 } else { 797 prog->autoload = true; 798 } 799 800 prog->autoattach = true; 801 802 /* inherit object's log_level */ 803 prog->log_level = obj->log_level; 804 805 prog->sec_name = strdup(sec_name); 806 if (!prog->sec_name) 807 goto errout; 808 809 prog->name = strdup(name); 810 if (!prog->name) 811 goto errout; 812 813 prog->insns = malloc(insn_data_sz); 814 if (!prog->insns) 815 goto errout; 816 memcpy(prog->insns, insn_data, insn_data_sz); 817 818 return 0; 819 errout: 820 pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name); 821 bpf_program__exit(prog); 822 return -ENOMEM; 823 } 824 825 static int 826 bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, 827 const char *sec_name, int sec_idx) 828 { 829 Elf_Data *symbols = obj->efile.symbols; 830 struct bpf_program *prog, *progs; 831 void *data = sec_data->d_buf; 832 size_t sec_sz = sec_data->d_size, sec_off, prog_sz, nr_syms; 833 int nr_progs, err, i; 834 const char *name; 835 Elf64_Sym *sym; 836 837 progs = obj->programs; 838 nr_progs = obj->nr_programs; 839 nr_syms = symbols->d_size / sizeof(Elf64_Sym); 840 841 for (i = 0; i < nr_syms; i++) { 842 sym = elf_sym_by_idx(obj, i); 843 844 if (sym->st_shndx != sec_idx) 845 continue; 846 if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC) 847 continue; 848 849 prog_sz = sym->st_size; 850 sec_off = sym->st_value; 851 852 name = elf_sym_str(obj, sym->st_name); 853 if (!name) { 854 pr_warn("sec '%s': failed to get symbol name for offset %zu\n", 855 sec_name, sec_off); 856 return -LIBBPF_ERRNO__FORMAT; 857 } 858 859 if (sec_off + prog_sz > sec_sz) { 860 pr_warn("sec '%s': program at offset %zu crosses section boundary\n", 861 sec_name, sec_off); 862 return -LIBBPF_ERRNO__FORMAT; 863 } 864 865 if (sec_idx != obj->efile.text_shndx && ELF64_ST_BIND(sym->st_info) == STB_LOCAL) { 866 pr_warn("sec '%s': program '%s' is static and not supported\n", sec_name, name); 867 return -ENOTSUP; 868 } 869 870 pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n", 871 sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz); 872 873 progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(*progs)); 874 if (!progs) { 875 /* 876 * In this case the original obj->programs 877 * is still valid, so don't need special treat for 878 * bpf_close_object(). 879 */ 880 pr_warn("sec '%s': failed to alloc memory for new program '%s'\n", 881 sec_name, name); 882 return -ENOMEM; 883 } 884 obj->programs = progs; 885 886 prog = &progs[nr_progs]; 887 888 err = bpf_object__init_prog(obj, prog, name, sec_idx, sec_name, 889 sec_off, data + sec_off, prog_sz); 890 if (err) 891 return err; 892 893 if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL) 894 prog->sym_global = true; 895 896 /* if function is a global/weak symbol, but has restricted 897 * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC 898 * as static to enable more permissive BPF verification mode 899 * with more outside context available to BPF verifier 900 */ 901 if (prog->sym_global && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN 902 || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)) 903 prog->mark_btf_static = true; 904 905 nr_progs++; 906 obj->nr_programs = nr_progs; 907 } 908 909 return 0; 910 } 911 912 static const struct btf_member * 913 find_member_by_offset(const struct btf_type *t, __u32 bit_offset) 914 { 915 struct btf_member *m; 916 int i; 917 918 for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) { 919 if (btf_member_bit_offset(t, i) == bit_offset) 920 return m; 921 } 922 923 return NULL; 924 } 925 926 static const struct btf_member * 927 find_member_by_name(const struct btf *btf, const struct btf_type *t, 928 const char *name) 929 { 930 struct btf_member *m; 931 int i; 932 933 for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) { 934 if (!strcmp(btf__name_by_offset(btf, m->name_off), name)) 935 return m; 936 } 937 938 return NULL; 939 } 940 941 static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name, 942 __u16 kind, struct btf **res_btf, 943 struct module_btf **res_mod_btf); 944 945 #define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_" 946 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, 947 const char *name, __u32 kind); 948 949 static int 950 find_struct_ops_kern_types(struct bpf_object *obj, const char *tname, 951 struct module_btf **mod_btf, 952 const struct btf_type **type, __u32 *type_id, 953 const struct btf_type **vtype, __u32 *vtype_id, 954 const struct btf_member **data_member) 955 { 956 const struct btf_type *kern_type, *kern_vtype; 957 const struct btf_member *kern_data_member; 958 struct btf *btf; 959 __s32 kern_vtype_id, kern_type_id; 960 __u32 i; 961 962 kern_type_id = find_ksym_btf_id(obj, tname, BTF_KIND_STRUCT, 963 &btf, mod_btf); 964 if (kern_type_id < 0) { 965 pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", 966 tname); 967 return kern_type_id; 968 } 969 kern_type = btf__type_by_id(btf, kern_type_id); 970 971 /* Find the corresponding "map_value" type that will be used 972 * in map_update(BPF_MAP_TYPE_STRUCT_OPS). For example, 973 * find "struct bpf_struct_ops_tcp_congestion_ops" from the 974 * btf_vmlinux. 975 */ 976 kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX, 977 tname, BTF_KIND_STRUCT); 978 if (kern_vtype_id < 0) { 979 pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n", 980 STRUCT_OPS_VALUE_PREFIX, tname); 981 return kern_vtype_id; 982 } 983 kern_vtype = btf__type_by_id(btf, kern_vtype_id); 984 985 /* Find "struct tcp_congestion_ops" from 986 * struct bpf_struct_ops_tcp_congestion_ops { 987 * [ ... ] 988 * struct tcp_congestion_ops data; 989 * } 990 */ 991 kern_data_member = btf_members(kern_vtype); 992 for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) { 993 if (kern_data_member->type == kern_type_id) 994 break; 995 } 996 if (i == btf_vlen(kern_vtype)) { 997 pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n", 998 tname, STRUCT_OPS_VALUE_PREFIX, tname); 999 return -EINVAL; 1000 } 1001 1002 *type = kern_type; 1003 *type_id = kern_type_id; 1004 *vtype = kern_vtype; 1005 *vtype_id = kern_vtype_id; 1006 *data_member = kern_data_member; 1007 1008 return 0; 1009 } 1010 1011 static bool bpf_map__is_struct_ops(const struct bpf_map *map) 1012 { 1013 return map->def.type == BPF_MAP_TYPE_STRUCT_OPS; 1014 } 1015 1016 /* Init the map's fields that depend on kern_btf */ 1017 static int bpf_map__init_kern_struct_ops(struct bpf_map *map) 1018 { 1019 const struct btf_member *member, *kern_member, *kern_data_member; 1020 const struct btf_type *type, *kern_type, *kern_vtype; 1021 __u32 i, kern_type_id, kern_vtype_id, kern_data_off; 1022 struct bpf_object *obj = map->obj; 1023 const struct btf *btf = obj->btf; 1024 struct bpf_struct_ops *st_ops; 1025 const struct btf *kern_btf; 1026 struct module_btf *mod_btf; 1027 void *data, *kern_data; 1028 const char *tname; 1029 int err; 1030 1031 st_ops = map->st_ops; 1032 type = st_ops->type; 1033 tname = st_ops->tname; 1034 err = find_struct_ops_kern_types(obj, tname, &mod_btf, 1035 &kern_type, &kern_type_id, 1036 &kern_vtype, &kern_vtype_id, 1037 &kern_data_member); 1038 if (err) 1039 return err; 1040 1041 kern_btf = mod_btf ? mod_btf->btf : obj->btf_vmlinux; 1042 1043 pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n", 1044 map->name, st_ops->type_id, kern_type_id, kern_vtype_id); 1045 1046 map->mod_btf_fd = mod_btf ? mod_btf->fd : -1; 1047 map->def.value_size = kern_vtype->size; 1048 map->btf_vmlinux_value_type_id = kern_vtype_id; 1049 1050 st_ops->kern_vdata = calloc(1, kern_vtype->size); 1051 if (!st_ops->kern_vdata) 1052 return -ENOMEM; 1053 1054 data = st_ops->data; 1055 kern_data_off = kern_data_member->offset / 8; 1056 kern_data = st_ops->kern_vdata + kern_data_off; 1057 1058 member = btf_members(type); 1059 for (i = 0; i < btf_vlen(type); i++, member++) { 1060 const struct btf_type *mtype, *kern_mtype; 1061 __u32 mtype_id, kern_mtype_id; 1062 void *mdata, *kern_mdata; 1063 __s64 msize, kern_msize; 1064 __u32 moff, kern_moff; 1065 __u32 kern_member_idx; 1066 const char *mname; 1067 1068 mname = btf__name_by_offset(btf, member->name_off); 1069 kern_member = find_member_by_name(kern_btf, kern_type, mname); 1070 if (!kern_member) { 1071 pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n", 1072 map->name, mname); 1073 return -ENOTSUP; 1074 } 1075 1076 kern_member_idx = kern_member - btf_members(kern_type); 1077 if (btf_member_bitfield_size(type, i) || 1078 btf_member_bitfield_size(kern_type, kern_member_idx)) { 1079 pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n", 1080 map->name, mname); 1081 return -ENOTSUP; 1082 } 1083 1084 moff = member->offset / 8; 1085 kern_moff = kern_member->offset / 8; 1086 1087 mdata = data + moff; 1088 kern_mdata = kern_data + kern_moff; 1089 1090 mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id); 1091 kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type, 1092 &kern_mtype_id); 1093 if (BTF_INFO_KIND(mtype->info) != 1094 BTF_INFO_KIND(kern_mtype->info)) { 1095 pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n", 1096 map->name, mname, BTF_INFO_KIND(mtype->info), 1097 BTF_INFO_KIND(kern_mtype->info)); 1098 return -ENOTSUP; 1099 } 1100 1101 if (btf_is_ptr(mtype)) { 1102 struct bpf_program *prog; 1103 1104 prog = st_ops->progs[i]; 1105 if (!prog) 1106 continue; 1107 1108 kern_mtype = skip_mods_and_typedefs(kern_btf, 1109 kern_mtype->type, 1110 &kern_mtype_id); 1111 1112 /* mtype->type must be a func_proto which was 1113 * guaranteed in bpf_object__collect_st_ops_relos(), 1114 * so only check kern_mtype for func_proto here. 1115 */ 1116 if (!btf_is_func_proto(kern_mtype)) { 1117 pr_warn("struct_ops init_kern %s: kernel member %s is not a func ptr\n", 1118 map->name, mname); 1119 return -ENOTSUP; 1120 } 1121 1122 if (mod_btf) 1123 prog->attach_btf_obj_fd = mod_btf->fd; 1124 prog->attach_btf_id = kern_type_id; 1125 prog->expected_attach_type = kern_member_idx; 1126 1127 st_ops->kern_func_off[i] = kern_data_off + kern_moff; 1128 1129 pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n", 1130 map->name, mname, prog->name, moff, 1131 kern_moff); 1132 1133 continue; 1134 } 1135 1136 msize = btf__resolve_size(btf, mtype_id); 1137 kern_msize = btf__resolve_size(kern_btf, kern_mtype_id); 1138 if (msize < 0 || kern_msize < 0 || msize != kern_msize) { 1139 pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n", 1140 map->name, mname, (ssize_t)msize, 1141 (ssize_t)kern_msize); 1142 return -ENOTSUP; 1143 } 1144 1145 pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n", 1146 map->name, mname, (unsigned int)msize, 1147 moff, kern_moff); 1148 memcpy(kern_mdata, mdata, msize); 1149 } 1150 1151 return 0; 1152 } 1153 1154 static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj) 1155 { 1156 struct bpf_map *map; 1157 size_t i; 1158 int err; 1159 1160 for (i = 0; i < obj->nr_maps; i++) { 1161 map = &obj->maps[i]; 1162 1163 if (!bpf_map__is_struct_ops(map)) 1164 continue; 1165 1166 err = bpf_map__init_kern_struct_ops(map); 1167 if (err) 1168 return err; 1169 } 1170 1171 return 0; 1172 } 1173 1174 static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name, 1175 int shndx, Elf_Data *data, __u32 map_flags) 1176 { 1177 const struct btf_type *type, *datasec; 1178 const struct btf_var_secinfo *vsi; 1179 struct bpf_struct_ops *st_ops; 1180 const char *tname, *var_name; 1181 __s32 type_id, datasec_id; 1182 const struct btf *btf; 1183 struct bpf_map *map; 1184 __u32 i; 1185 1186 if (shndx == -1) 1187 return 0; 1188 1189 btf = obj->btf; 1190 datasec_id = btf__find_by_name_kind(btf, sec_name, 1191 BTF_KIND_DATASEC); 1192 if (datasec_id < 0) { 1193 pr_warn("struct_ops init: DATASEC %s not found\n", 1194 sec_name); 1195 return -EINVAL; 1196 } 1197 1198 datasec = btf__type_by_id(btf, datasec_id); 1199 vsi = btf_var_secinfos(datasec); 1200 for (i = 0; i < btf_vlen(datasec); i++, vsi++) { 1201 type = btf__type_by_id(obj->btf, vsi->type); 1202 var_name = btf__name_by_offset(obj->btf, type->name_off); 1203 1204 type_id = btf__resolve_type(obj->btf, vsi->type); 1205 if (type_id < 0) { 1206 pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n", 1207 vsi->type, sec_name); 1208 return -EINVAL; 1209 } 1210 1211 type = btf__type_by_id(obj->btf, type_id); 1212 tname = btf__name_by_offset(obj->btf, type->name_off); 1213 if (!tname[0]) { 1214 pr_warn("struct_ops init: anonymous type is not supported\n"); 1215 return -ENOTSUP; 1216 } 1217 if (!btf_is_struct(type)) { 1218 pr_warn("struct_ops init: %s is not a struct\n", tname); 1219 return -EINVAL; 1220 } 1221 1222 map = bpf_object__add_map(obj); 1223 if (IS_ERR(map)) 1224 return PTR_ERR(map); 1225 1226 map->sec_idx = shndx; 1227 map->sec_offset = vsi->offset; 1228 map->name = strdup(var_name); 1229 if (!map->name) 1230 return -ENOMEM; 1231 1232 map->def.type = BPF_MAP_TYPE_STRUCT_OPS; 1233 map->def.key_size = sizeof(int); 1234 map->def.value_size = type->size; 1235 map->def.max_entries = 1; 1236 map->def.map_flags = map_flags; 1237 1238 map->st_ops = calloc(1, sizeof(*map->st_ops)); 1239 if (!map->st_ops) 1240 return -ENOMEM; 1241 st_ops = map->st_ops; 1242 st_ops->data = malloc(type->size); 1243 st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs)); 1244 st_ops->kern_func_off = malloc(btf_vlen(type) * 1245 sizeof(*st_ops->kern_func_off)); 1246 if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off) 1247 return -ENOMEM; 1248 1249 if (vsi->offset + type->size > data->d_size) { 1250 pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n", 1251 var_name, sec_name); 1252 return -EINVAL; 1253 } 1254 1255 memcpy(st_ops->data, 1256 data->d_buf + vsi->offset, 1257 type->size); 1258 st_ops->tname = tname; 1259 st_ops->type = type; 1260 st_ops->type_id = type_id; 1261 1262 pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n", 1263 tname, type_id, var_name, vsi->offset); 1264 } 1265 1266 return 0; 1267 } 1268 1269 static int bpf_object_init_struct_ops(struct bpf_object *obj) 1270 { 1271 int err; 1272 1273 err = init_struct_ops_maps(obj, STRUCT_OPS_SEC, obj->efile.st_ops_shndx, 1274 obj->efile.st_ops_data, 0); 1275 err = err ?: init_struct_ops_maps(obj, STRUCT_OPS_LINK_SEC, 1276 obj->efile.st_ops_link_shndx, 1277 obj->efile.st_ops_link_data, 1278 BPF_F_LINK); 1279 return err; 1280 } 1281 1282 static struct bpf_object *bpf_object__new(const char *path, 1283 const void *obj_buf, 1284 size_t obj_buf_sz, 1285 const char *obj_name) 1286 { 1287 struct bpf_object *obj; 1288 char *end; 1289 1290 obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1); 1291 if (!obj) { 1292 pr_warn("alloc memory failed for %s\n", path); 1293 return ERR_PTR(-ENOMEM); 1294 } 1295 1296 strcpy(obj->path, path); 1297 if (obj_name) { 1298 libbpf_strlcpy(obj->name, obj_name, sizeof(obj->name)); 1299 } else { 1300 /* Using basename() GNU version which doesn't modify arg. */ 1301 libbpf_strlcpy(obj->name, basename((void *)path), sizeof(obj->name)); 1302 end = strchr(obj->name, '.'); 1303 if (end) 1304 *end = 0; 1305 } 1306 1307 obj->efile.fd = -1; 1308 /* 1309 * Caller of this function should also call 1310 * bpf_object__elf_finish() after data collection to return 1311 * obj_buf to user. If not, we should duplicate the buffer to 1312 * avoid user freeing them before elf finish. 1313 */ 1314 obj->efile.obj_buf = obj_buf; 1315 obj->efile.obj_buf_sz = obj_buf_sz; 1316 obj->efile.btf_maps_shndx = -1; 1317 obj->efile.st_ops_shndx = -1; 1318 obj->efile.st_ops_link_shndx = -1; 1319 obj->kconfig_map_idx = -1; 1320 1321 obj->kern_version = get_kernel_version(); 1322 obj->loaded = false; 1323 1324 return obj; 1325 } 1326 1327 static void bpf_object__elf_finish(struct bpf_object *obj) 1328 { 1329 if (!obj->efile.elf) 1330 return; 1331 1332 elf_end(obj->efile.elf); 1333 obj->efile.elf = NULL; 1334 obj->efile.symbols = NULL; 1335 obj->efile.st_ops_data = NULL; 1336 obj->efile.st_ops_link_data = NULL; 1337 1338 zfree(&obj->efile.secs); 1339 obj->efile.sec_cnt = 0; 1340 zclose(obj->efile.fd); 1341 obj->efile.obj_buf = NULL; 1342 obj->efile.obj_buf_sz = 0; 1343 } 1344 1345 static int bpf_object__elf_init(struct bpf_object *obj) 1346 { 1347 Elf64_Ehdr *ehdr; 1348 int err = 0; 1349 Elf *elf; 1350 1351 if (obj->efile.elf) { 1352 pr_warn("elf: init internal error\n"); 1353 return -LIBBPF_ERRNO__LIBELF; 1354 } 1355 1356 if (obj->efile.obj_buf_sz > 0) { 1357 /* obj_buf should have been validated by bpf_object__open_mem(). */ 1358 elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz); 1359 } else { 1360 obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC); 1361 if (obj->efile.fd < 0) { 1362 char errmsg[STRERR_BUFSIZE], *cp; 1363 1364 err = -errno; 1365 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); 1366 pr_warn("elf: failed to open %s: %s\n", obj->path, cp); 1367 return err; 1368 } 1369 1370 elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL); 1371 } 1372 1373 if (!elf) { 1374 pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1)); 1375 err = -LIBBPF_ERRNO__LIBELF; 1376 goto errout; 1377 } 1378 1379 obj->efile.elf = elf; 1380 1381 if (elf_kind(elf) != ELF_K_ELF) { 1382 err = -LIBBPF_ERRNO__FORMAT; 1383 pr_warn("elf: '%s' is not a proper ELF object\n", obj->path); 1384 goto errout; 1385 } 1386 1387 if (gelf_getclass(elf) != ELFCLASS64) { 1388 err = -LIBBPF_ERRNO__FORMAT; 1389 pr_warn("elf: '%s' is not a 64-bit ELF object\n", obj->path); 1390 goto errout; 1391 } 1392 1393 obj->efile.ehdr = ehdr = elf64_getehdr(elf); 1394 if (!obj->efile.ehdr) { 1395 pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1)); 1396 err = -LIBBPF_ERRNO__FORMAT; 1397 goto errout; 1398 } 1399 1400 if (elf_getshdrstrndx(elf, &obj->efile.shstrndx)) { 1401 pr_warn("elf: failed to get section names section index for %s: %s\n", 1402 obj->path, elf_errmsg(-1)); 1403 err = -LIBBPF_ERRNO__FORMAT; 1404 goto errout; 1405 } 1406 1407 /* ELF is corrupted/truncated, avoid calling elf_strptr. */ 1408 if (!elf_rawdata(elf_getscn(elf, obj->efile.shstrndx), NULL)) { 1409 pr_warn("elf: failed to get section names strings from %s: %s\n", 1410 obj->path, elf_errmsg(-1)); 1411 err = -LIBBPF_ERRNO__FORMAT; 1412 goto errout; 1413 } 1414 1415 /* Old LLVM set e_machine to EM_NONE */ 1416 if (ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF)) { 1417 pr_warn("elf: %s is not a valid eBPF object file\n", obj->path); 1418 err = -LIBBPF_ERRNO__FORMAT; 1419 goto errout; 1420 } 1421 1422 return 0; 1423 errout: 1424 bpf_object__elf_finish(obj); 1425 return err; 1426 } 1427 1428 static int bpf_object__check_endianness(struct bpf_object *obj) 1429 { 1430 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 1431 if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2LSB) 1432 return 0; 1433 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 1434 if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2MSB) 1435 return 0; 1436 #else 1437 # error "Unrecognized __BYTE_ORDER__" 1438 #endif 1439 pr_warn("elf: endianness mismatch in %s.\n", obj->path); 1440 return -LIBBPF_ERRNO__ENDIAN; 1441 } 1442 1443 static int 1444 bpf_object__init_license(struct bpf_object *obj, void *data, size_t size) 1445 { 1446 if (!data) { 1447 pr_warn("invalid license section in %s\n", obj->path); 1448 return -LIBBPF_ERRNO__FORMAT; 1449 } 1450 /* libbpf_strlcpy() only copies first N - 1 bytes, so size + 1 won't 1451 * go over allowed ELF data section buffer 1452 */ 1453 libbpf_strlcpy(obj->license, data, min(size + 1, sizeof(obj->license))); 1454 pr_debug("license of %s is %s\n", obj->path, obj->license); 1455 return 0; 1456 } 1457 1458 static int 1459 bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size) 1460 { 1461 __u32 kver; 1462 1463 if (!data || size != sizeof(kver)) { 1464 pr_warn("invalid kver section in %s\n", obj->path); 1465 return -LIBBPF_ERRNO__FORMAT; 1466 } 1467 memcpy(&kver, data, sizeof(kver)); 1468 obj->kern_version = kver; 1469 pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version); 1470 return 0; 1471 } 1472 1473 static bool bpf_map_type__is_map_in_map(enum bpf_map_type type) 1474 { 1475 if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS || 1476 type == BPF_MAP_TYPE_HASH_OF_MAPS) 1477 return true; 1478 return false; 1479 } 1480 1481 static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32 *size) 1482 { 1483 Elf_Data *data; 1484 Elf_Scn *scn; 1485 1486 if (!name) 1487 return -EINVAL; 1488 1489 scn = elf_sec_by_name(obj, name); 1490 data = elf_sec_data(obj, scn); 1491 if (data) { 1492 *size = data->d_size; 1493 return 0; /* found it */ 1494 } 1495 1496 return -ENOENT; 1497 } 1498 1499 static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *name) 1500 { 1501 Elf_Data *symbols = obj->efile.symbols; 1502 const char *sname; 1503 size_t si; 1504 1505 for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) { 1506 Elf64_Sym *sym = elf_sym_by_idx(obj, si); 1507 1508 if (ELF64_ST_TYPE(sym->st_info) != STT_OBJECT) 1509 continue; 1510 1511 if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL && 1512 ELF64_ST_BIND(sym->st_info) != STB_WEAK) 1513 continue; 1514 1515 sname = elf_sym_str(obj, sym->st_name); 1516 if (!sname) { 1517 pr_warn("failed to get sym name string for var %s\n", name); 1518 return ERR_PTR(-EIO); 1519 } 1520 if (strcmp(name, sname) == 0) 1521 return sym; 1522 } 1523 1524 return ERR_PTR(-ENOENT); 1525 } 1526 1527 static int create_placeholder_fd(void) 1528 { 1529 int fd; 1530 1531 fd = ensure_good_fd(memfd_create("libbpf-placeholder-fd", MFD_CLOEXEC)); 1532 if (fd < 0) 1533 return -errno; 1534 return fd; 1535 } 1536 1537 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) 1538 { 1539 struct bpf_map *map; 1540 int err; 1541 1542 err = libbpf_ensure_mem((void **)&obj->maps, &obj->maps_cap, 1543 sizeof(*obj->maps), obj->nr_maps + 1); 1544 if (err) 1545 return ERR_PTR(err); 1546 1547 map = &obj->maps[obj->nr_maps++]; 1548 map->obj = obj; 1549 /* Preallocate map FD without actually creating BPF map just yet. 1550 * These map FD "placeholders" will be reused later without changing 1551 * FD value when map is actually created in the kernel. 1552 * 1553 * This is useful to be able to perform BPF program relocations 1554 * without having to create BPF maps before that step. This allows us 1555 * to finalize and load BTF very late in BPF object's loading phase, 1556 * right before BPF maps have to be created and BPF programs have to 1557 * be loaded. By having these map FD placeholders we can perform all 1558 * the sanitizations, relocations, and any other adjustments before we 1559 * start creating actual BPF kernel objects (BTF, maps, progs). 1560 */ 1561 map->fd = create_placeholder_fd(); 1562 if (map->fd < 0) 1563 return ERR_PTR(map->fd); 1564 map->inner_map_fd = -1; 1565 map->autocreate = true; 1566 1567 return map; 1568 } 1569 1570 static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) 1571 { 1572 const long page_sz = sysconf(_SC_PAGE_SIZE); 1573 size_t map_sz; 1574 1575 map_sz = (size_t)roundup(value_sz, 8) * max_entries; 1576 map_sz = roundup(map_sz, page_sz); 1577 return map_sz; 1578 } 1579 1580 static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz) 1581 { 1582 void *mmaped; 1583 1584 if (!map->mmaped) 1585 return -EINVAL; 1586 1587 if (old_sz == new_sz) 1588 return 0; 1589 1590 mmaped = mmap(NULL, new_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); 1591 if (mmaped == MAP_FAILED) 1592 return -errno; 1593 1594 memcpy(mmaped, map->mmaped, min(old_sz, new_sz)); 1595 munmap(map->mmaped, old_sz); 1596 map->mmaped = mmaped; 1597 return 0; 1598 } 1599 1600 static char *internal_map_name(struct bpf_object *obj, const char *real_name) 1601 { 1602 char map_name[BPF_OBJ_NAME_LEN], *p; 1603 int pfx_len, sfx_len = max((size_t)7, strlen(real_name)); 1604 1605 /* This is one of the more confusing parts of libbpf for various 1606 * reasons, some of which are historical. The original idea for naming 1607 * internal names was to include as much of BPF object name prefix as 1608 * possible, so that it can be distinguished from similar internal 1609 * maps of a different BPF object. 1610 * As an example, let's say we have bpf_object named 'my_object_name' 1611 * and internal map corresponding to '.rodata' ELF section. The final 1612 * map name advertised to user and to the kernel will be 1613 * 'my_objec.rodata', taking first 8 characters of object name and 1614 * entire 7 characters of '.rodata'. 1615 * Somewhat confusingly, if internal map ELF section name is shorter 1616 * than 7 characters, e.g., '.bss', we still reserve 7 characters 1617 * for the suffix, even though we only have 4 actual characters, and 1618 * resulting map will be called 'my_objec.bss', not even using all 15 1619 * characters allowed by the kernel. Oh well, at least the truncated 1620 * object name is somewhat consistent in this case. But if the map 1621 * name is '.kconfig', we'll still have entirety of '.kconfig' added 1622 * (8 chars) and thus will be left with only first 7 characters of the 1623 * object name ('my_obje'). Happy guessing, user, that the final map 1624 * name will be "my_obje.kconfig". 1625 * Now, with libbpf starting to support arbitrarily named .rodata.* 1626 * and .data.* data sections, it's possible that ELF section name is 1627 * longer than allowed 15 chars, so we now need to be careful to take 1628 * only up to 15 first characters of ELF name, taking no BPF object 1629 * name characters at all. So '.rodata.abracadabra' will result in 1630 * '.rodata.abracad' kernel and user-visible name. 1631 * We need to keep this convoluted logic intact for .data, .bss and 1632 * .rodata maps, but for new custom .data.custom and .rodata.custom 1633 * maps we use their ELF names as is, not prepending bpf_object name 1634 * in front. We still need to truncate them to 15 characters for the 1635 * kernel. Full name can be recovered for such maps by using DATASEC 1636 * BTF type associated with such map's value type, though. 1637 */ 1638 if (sfx_len >= BPF_OBJ_NAME_LEN) 1639 sfx_len = BPF_OBJ_NAME_LEN - 1; 1640 1641 /* if there are two or more dots in map name, it's a custom dot map */ 1642 if (strchr(real_name + 1, '.') != NULL) 1643 pfx_len = 0; 1644 else 1645 pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, strlen(obj->name)); 1646 1647 snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name, 1648 sfx_len, real_name); 1649 1650 /* sanitise map name to characters allowed by kernel */ 1651 for (p = map_name; *p && p < map_name + sizeof(map_name); p++) 1652 if (!isalnum(*p) && *p != '_' && *p != '.') 1653 *p = '_'; 1654 1655 return strdup(map_name); 1656 } 1657 1658 static int 1659 map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map); 1660 1661 /* Internal BPF map is mmap()'able only if at least one of corresponding 1662 * DATASEC's VARs are to be exposed through BPF skeleton. I.e., it's a GLOBAL 1663 * variable and it's not marked as __hidden (which turns it into, effectively, 1664 * a STATIC variable). 1665 */ 1666 static bool map_is_mmapable(struct bpf_object *obj, struct bpf_map *map) 1667 { 1668 const struct btf_type *t, *vt; 1669 struct btf_var_secinfo *vsi; 1670 int i, n; 1671 1672 if (!map->btf_value_type_id) 1673 return false; 1674 1675 t = btf__type_by_id(obj->btf, map->btf_value_type_id); 1676 if (!btf_is_datasec(t)) 1677 return false; 1678 1679 vsi = btf_var_secinfos(t); 1680 for (i = 0, n = btf_vlen(t); i < n; i++, vsi++) { 1681 vt = btf__type_by_id(obj->btf, vsi->type); 1682 if (!btf_is_var(vt)) 1683 continue; 1684 1685 if (btf_var(vt)->linkage != BTF_VAR_STATIC) 1686 return true; 1687 } 1688 1689 return false; 1690 } 1691 1692 static int 1693 bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, 1694 const char *real_name, int sec_idx, void *data, size_t data_sz) 1695 { 1696 struct bpf_map_def *def; 1697 struct bpf_map *map; 1698 size_t mmap_sz; 1699 int err; 1700 1701 map = bpf_object__add_map(obj); 1702 if (IS_ERR(map)) 1703 return PTR_ERR(map); 1704 1705 map->libbpf_type = type; 1706 map->sec_idx = sec_idx; 1707 map->sec_offset = 0; 1708 map->real_name = strdup(real_name); 1709 map->name = internal_map_name(obj, real_name); 1710 if (!map->real_name || !map->name) { 1711 zfree(&map->real_name); 1712 zfree(&map->name); 1713 return -ENOMEM; 1714 } 1715 1716 def = &map->def; 1717 def->type = BPF_MAP_TYPE_ARRAY; 1718 def->key_size = sizeof(int); 1719 def->value_size = data_sz; 1720 def->max_entries = 1; 1721 def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG 1722 ? BPF_F_RDONLY_PROG : 0; 1723 1724 /* failures are fine because of maps like .rodata.str1.1 */ 1725 (void) map_fill_btf_type_info(obj, map); 1726 1727 if (map_is_mmapable(obj, map)) 1728 def->map_flags |= BPF_F_MMAPABLE; 1729 1730 pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", 1731 map->name, map->sec_idx, map->sec_offset, def->map_flags); 1732 1733 mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); 1734 map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, 1735 MAP_SHARED | MAP_ANONYMOUS, -1, 0); 1736 if (map->mmaped == MAP_FAILED) { 1737 err = -errno; 1738 map->mmaped = NULL; 1739 pr_warn("failed to alloc map '%s' content buffer: %d\n", 1740 map->name, err); 1741 zfree(&map->real_name); 1742 zfree(&map->name); 1743 return err; 1744 } 1745 1746 if (data) 1747 memcpy(map->mmaped, data, data_sz); 1748 1749 pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name); 1750 return 0; 1751 } 1752 1753 static int bpf_object__init_global_data_maps(struct bpf_object *obj) 1754 { 1755 struct elf_sec_desc *sec_desc; 1756 const char *sec_name; 1757 int err = 0, sec_idx; 1758 1759 /* 1760 * Populate obj->maps with libbpf internal maps. 1761 */ 1762 for (sec_idx = 1; sec_idx < obj->efile.sec_cnt; sec_idx++) { 1763 sec_desc = &obj->efile.secs[sec_idx]; 1764 1765 /* Skip recognized sections with size 0. */ 1766 if (!sec_desc->data || sec_desc->data->d_size == 0) 1767 continue; 1768 1769 switch (sec_desc->sec_type) { 1770 case SEC_DATA: 1771 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); 1772 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA, 1773 sec_name, sec_idx, 1774 sec_desc->data->d_buf, 1775 sec_desc->data->d_size); 1776 break; 1777 case SEC_RODATA: 1778 obj->has_rodata = true; 1779 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); 1780 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA, 1781 sec_name, sec_idx, 1782 sec_desc->data->d_buf, 1783 sec_desc->data->d_size); 1784 break; 1785 case SEC_BSS: 1786 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); 1787 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS, 1788 sec_name, sec_idx, 1789 NULL, 1790 sec_desc->data->d_size); 1791 break; 1792 default: 1793 /* skip */ 1794 break; 1795 } 1796 if (err) 1797 return err; 1798 } 1799 return 0; 1800 } 1801 1802 1803 static struct extern_desc *find_extern_by_name(const struct bpf_object *obj, 1804 const void *name) 1805 { 1806 int i; 1807 1808 for (i = 0; i < obj->nr_extern; i++) { 1809 if (strcmp(obj->externs[i].name, name) == 0) 1810 return &obj->externs[i]; 1811 } 1812 return NULL; 1813 } 1814 1815 static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val, 1816 char value) 1817 { 1818 switch (ext->kcfg.type) { 1819 case KCFG_BOOL: 1820 if (value == 'm') { 1821 pr_warn("extern (kcfg) '%s': value '%c' implies tristate or char type\n", 1822 ext->name, value); 1823 return -EINVAL; 1824 } 1825 *(bool *)ext_val = value == 'y' ? true : false; 1826 break; 1827 case KCFG_TRISTATE: 1828 if (value == 'y') 1829 *(enum libbpf_tristate *)ext_val = TRI_YES; 1830 else if (value == 'm') 1831 *(enum libbpf_tristate *)ext_val = TRI_MODULE; 1832 else /* value == 'n' */ 1833 *(enum libbpf_tristate *)ext_val = TRI_NO; 1834 break; 1835 case KCFG_CHAR: 1836 *(char *)ext_val = value; 1837 break; 1838 case KCFG_UNKNOWN: 1839 case KCFG_INT: 1840 case KCFG_CHAR_ARR: 1841 default: 1842 pr_warn("extern (kcfg) '%s': value '%c' implies bool, tristate, or char type\n", 1843 ext->name, value); 1844 return -EINVAL; 1845 } 1846 ext->is_set = true; 1847 return 0; 1848 } 1849 1850 static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val, 1851 const char *value) 1852 { 1853 size_t len; 1854 1855 if (ext->kcfg.type != KCFG_CHAR_ARR) { 1856 pr_warn("extern (kcfg) '%s': value '%s' implies char array type\n", 1857 ext->name, value); 1858 return -EINVAL; 1859 } 1860 1861 len = strlen(value); 1862 if (value[len - 1] != '"') { 1863 pr_warn("extern (kcfg) '%s': invalid string config '%s'\n", 1864 ext->name, value); 1865 return -EINVAL; 1866 } 1867 1868 /* strip quotes */ 1869 len -= 2; 1870 if (len >= ext->kcfg.sz) { 1871 pr_warn("extern (kcfg) '%s': long string '%s' of (%zu bytes) truncated to %d bytes\n", 1872 ext->name, value, len, ext->kcfg.sz - 1); 1873 len = ext->kcfg.sz - 1; 1874 } 1875 memcpy(ext_val, value + 1, len); 1876 ext_val[len] = '\0'; 1877 ext->is_set = true; 1878 return 0; 1879 } 1880 1881 static int parse_u64(const char *value, __u64 *res) 1882 { 1883 char *value_end; 1884 int err; 1885 1886 errno = 0; 1887 *res = strtoull(value, &value_end, 0); 1888 if (errno) { 1889 err = -errno; 1890 pr_warn("failed to parse '%s' as integer: %d\n", value, err); 1891 return err; 1892 } 1893 if (*value_end) { 1894 pr_warn("failed to parse '%s' as integer completely\n", value); 1895 return -EINVAL; 1896 } 1897 return 0; 1898 } 1899 1900 static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v) 1901 { 1902 int bit_sz = ext->kcfg.sz * 8; 1903 1904 if (ext->kcfg.sz == 8) 1905 return true; 1906 1907 /* Validate that value stored in u64 fits in integer of `ext->sz` 1908 * bytes size without any loss of information. If the target integer 1909 * is signed, we rely on the following limits of integer type of 1910 * Y bits and subsequent transformation: 1911 * 1912 * -2^(Y-1) <= X <= 2^(Y-1) - 1 1913 * 0 <= X + 2^(Y-1) <= 2^Y - 1 1914 * 0 <= X + 2^(Y-1) < 2^Y 1915 * 1916 * For unsigned target integer, check that all the (64 - Y) bits are 1917 * zero. 1918 */ 1919 if (ext->kcfg.is_signed) 1920 return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz); 1921 else 1922 return (v >> bit_sz) == 0; 1923 } 1924 1925 static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val, 1926 __u64 value) 1927 { 1928 if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR && 1929 ext->kcfg.type != KCFG_BOOL) { 1930 pr_warn("extern (kcfg) '%s': value '%llu' implies integer, char, or boolean type\n", 1931 ext->name, (unsigned long long)value); 1932 return -EINVAL; 1933 } 1934 if (ext->kcfg.type == KCFG_BOOL && value > 1) { 1935 pr_warn("extern (kcfg) '%s': value '%llu' isn't boolean compatible\n", 1936 ext->name, (unsigned long long)value); 1937 return -EINVAL; 1938 1939 } 1940 if (!is_kcfg_value_in_range(ext, value)) { 1941 pr_warn("extern (kcfg) '%s': value '%llu' doesn't fit in %d bytes\n", 1942 ext->name, (unsigned long long)value, ext->kcfg.sz); 1943 return -ERANGE; 1944 } 1945 switch (ext->kcfg.sz) { 1946 case 1: 1947 *(__u8 *)ext_val = value; 1948 break; 1949 case 2: 1950 *(__u16 *)ext_val = value; 1951 break; 1952 case 4: 1953 *(__u32 *)ext_val = value; 1954 break; 1955 case 8: 1956 *(__u64 *)ext_val = value; 1957 break; 1958 default: 1959 return -EINVAL; 1960 } 1961 ext->is_set = true; 1962 return 0; 1963 } 1964 1965 static int bpf_object__process_kconfig_line(struct bpf_object *obj, 1966 char *buf, void *data) 1967 { 1968 struct extern_desc *ext; 1969 char *sep, *value; 1970 int len, err = 0; 1971 void *ext_val; 1972 __u64 num; 1973 1974 if (!str_has_pfx(buf, "CONFIG_")) 1975 return 0; 1976 1977 sep = strchr(buf, '='); 1978 if (!sep) { 1979 pr_warn("failed to parse '%s': no separator\n", buf); 1980 return -EINVAL; 1981 } 1982 1983 /* Trim ending '\n' */ 1984 len = strlen(buf); 1985 if (buf[len - 1] == '\n') 1986 buf[len - 1] = '\0'; 1987 /* Split on '=' and ensure that a value is present. */ 1988 *sep = '\0'; 1989 if (!sep[1]) { 1990 *sep = '='; 1991 pr_warn("failed to parse '%s': no value\n", buf); 1992 return -EINVAL; 1993 } 1994 1995 ext = find_extern_by_name(obj, buf); 1996 if (!ext || ext->is_set) 1997 return 0; 1998 1999 ext_val = data + ext->kcfg.data_off; 2000 value = sep + 1; 2001 2002 switch (*value) { 2003 case 'y': case 'n': case 'm': 2004 err = set_kcfg_value_tri(ext, ext_val, *value); 2005 break; 2006 case '"': 2007 err = set_kcfg_value_str(ext, ext_val, value); 2008 break; 2009 default: 2010 /* assume integer */ 2011 err = parse_u64(value, &num); 2012 if (err) { 2013 pr_warn("extern (kcfg) '%s': value '%s' isn't a valid integer\n", ext->name, value); 2014 return err; 2015 } 2016 if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) { 2017 pr_warn("extern (kcfg) '%s': value '%s' implies integer type\n", ext->name, value); 2018 return -EINVAL; 2019 } 2020 err = set_kcfg_value_num(ext, ext_val, num); 2021 break; 2022 } 2023 if (err) 2024 return err; 2025 pr_debug("extern (kcfg) '%s': set to %s\n", ext->name, value); 2026 return 0; 2027 } 2028 2029 static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data) 2030 { 2031 char buf[PATH_MAX]; 2032 struct utsname uts; 2033 int len, err = 0; 2034 gzFile file; 2035 2036 uname(&uts); 2037 len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release); 2038 if (len < 0) 2039 return -EINVAL; 2040 else if (len >= PATH_MAX) 2041 return -ENAMETOOLONG; 2042 2043 /* gzopen also accepts uncompressed files. */ 2044 file = gzopen(buf, "re"); 2045 if (!file) 2046 file = gzopen("/proc/config.gz", "re"); 2047 2048 if (!file) { 2049 pr_warn("failed to open system Kconfig\n"); 2050 return -ENOENT; 2051 } 2052 2053 while (gzgets(file, buf, sizeof(buf))) { 2054 err = bpf_object__process_kconfig_line(obj, buf, data); 2055 if (err) { 2056 pr_warn("error parsing system Kconfig line '%s': %d\n", 2057 buf, err); 2058 goto out; 2059 } 2060 } 2061 2062 out: 2063 gzclose(file); 2064 return err; 2065 } 2066 2067 static int bpf_object__read_kconfig_mem(struct bpf_object *obj, 2068 const char *config, void *data) 2069 { 2070 char buf[PATH_MAX]; 2071 int err = 0; 2072 FILE *file; 2073 2074 file = fmemopen((void *)config, strlen(config), "r"); 2075 if (!file) { 2076 err = -errno; 2077 pr_warn("failed to open in-memory Kconfig: %d\n", err); 2078 return err; 2079 } 2080 2081 while (fgets(buf, sizeof(buf), file)) { 2082 err = bpf_object__process_kconfig_line(obj, buf, data); 2083 if (err) { 2084 pr_warn("error parsing in-memory Kconfig line '%s': %d\n", 2085 buf, err); 2086 break; 2087 } 2088 } 2089 2090 fclose(file); 2091 return err; 2092 } 2093 2094 static int bpf_object__init_kconfig_map(struct bpf_object *obj) 2095 { 2096 struct extern_desc *last_ext = NULL, *ext; 2097 size_t map_sz; 2098 int i, err; 2099 2100 for (i = 0; i < obj->nr_extern; i++) { 2101 ext = &obj->externs[i]; 2102 if (ext->type == EXT_KCFG) 2103 last_ext = ext; 2104 } 2105 2106 if (!last_ext) 2107 return 0; 2108 2109 map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz; 2110 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG, 2111 ".kconfig", obj->efile.symbols_shndx, 2112 NULL, map_sz); 2113 if (err) 2114 return err; 2115 2116 obj->kconfig_map_idx = obj->nr_maps - 1; 2117 2118 return 0; 2119 } 2120 2121 const struct btf_type * 2122 skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id) 2123 { 2124 const struct btf_type *t = btf__type_by_id(btf, id); 2125 2126 if (res_id) 2127 *res_id = id; 2128 2129 while (btf_is_mod(t) || btf_is_typedef(t)) { 2130 if (res_id) 2131 *res_id = t->type; 2132 t = btf__type_by_id(btf, t->type); 2133 } 2134 2135 return t; 2136 } 2137 2138 static const struct btf_type * 2139 resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id) 2140 { 2141 const struct btf_type *t; 2142 2143 t = skip_mods_and_typedefs(btf, id, NULL); 2144 if (!btf_is_ptr(t)) 2145 return NULL; 2146 2147 t = skip_mods_and_typedefs(btf, t->type, res_id); 2148 2149 return btf_is_func_proto(t) ? t : NULL; 2150 } 2151 2152 static const char *__btf_kind_str(__u16 kind) 2153 { 2154 switch (kind) { 2155 case BTF_KIND_UNKN: return "void"; 2156 case BTF_KIND_INT: return "int"; 2157 case BTF_KIND_PTR: return "ptr"; 2158 case BTF_KIND_ARRAY: return "array"; 2159 case BTF_KIND_STRUCT: return "struct"; 2160 case BTF_KIND_UNION: return "union"; 2161 case BTF_KIND_ENUM: return "enum"; 2162 case BTF_KIND_FWD: return "fwd"; 2163 case BTF_KIND_TYPEDEF: return "typedef"; 2164 case BTF_KIND_VOLATILE: return "volatile"; 2165 case BTF_KIND_CONST: return "const"; 2166 case BTF_KIND_RESTRICT: return "restrict"; 2167 case BTF_KIND_FUNC: return "func"; 2168 case BTF_KIND_FUNC_PROTO: return "func_proto"; 2169 case BTF_KIND_VAR: return "var"; 2170 case BTF_KIND_DATASEC: return "datasec"; 2171 case BTF_KIND_FLOAT: return "float"; 2172 case BTF_KIND_DECL_TAG: return "decl_tag"; 2173 case BTF_KIND_TYPE_TAG: return "type_tag"; 2174 case BTF_KIND_ENUM64: return "enum64"; 2175 default: return "unknown"; 2176 } 2177 } 2178 2179 const char *btf_kind_str(const struct btf_type *t) 2180 { 2181 return __btf_kind_str(btf_kind(t)); 2182 } 2183 2184 /* 2185 * Fetch integer attribute of BTF map definition. Such attributes are 2186 * represented using a pointer to an array, in which dimensionality of array 2187 * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY]; 2188 * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF 2189 * type definition, while using only sizeof(void *) space in ELF data section. 2190 */ 2191 static bool get_map_field_int(const char *map_name, const struct btf *btf, 2192 const struct btf_member *m, __u32 *res) 2193 { 2194 const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL); 2195 const char *name = btf__name_by_offset(btf, m->name_off); 2196 const struct btf_array *arr_info; 2197 const struct btf_type *arr_t; 2198 2199 if (!btf_is_ptr(t)) { 2200 pr_warn("map '%s': attr '%s': expected PTR, got %s.\n", 2201 map_name, name, btf_kind_str(t)); 2202 return false; 2203 } 2204 2205 arr_t = btf__type_by_id(btf, t->type); 2206 if (!arr_t) { 2207 pr_warn("map '%s': attr '%s': type [%u] not found.\n", 2208 map_name, name, t->type); 2209 return false; 2210 } 2211 if (!btf_is_array(arr_t)) { 2212 pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n", 2213 map_name, name, btf_kind_str(arr_t)); 2214 return false; 2215 } 2216 arr_info = btf_array(arr_t); 2217 *res = arr_info->nelems; 2218 return true; 2219 } 2220 2221 static int pathname_concat(char *buf, size_t buf_sz, const char *path, const char *name) 2222 { 2223 int len; 2224 2225 len = snprintf(buf, buf_sz, "%s/%s", path, name); 2226 if (len < 0) 2227 return -EINVAL; 2228 if (len >= buf_sz) 2229 return -ENAMETOOLONG; 2230 2231 return 0; 2232 } 2233 2234 static int build_map_pin_path(struct bpf_map *map, const char *path) 2235 { 2236 char buf[PATH_MAX]; 2237 int err; 2238 2239 if (!path) 2240 path = BPF_FS_DEFAULT_PATH; 2241 2242 err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map)); 2243 if (err) 2244 return err; 2245 2246 return bpf_map__set_pin_path(map, buf); 2247 } 2248 2249 /* should match definition in bpf_helpers.h */ 2250 enum libbpf_pin_type { 2251 LIBBPF_PIN_NONE, 2252 /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */ 2253 LIBBPF_PIN_BY_NAME, 2254 }; 2255 2256 int parse_btf_map_def(const char *map_name, struct btf *btf, 2257 const struct btf_type *def_t, bool strict, 2258 struct btf_map_def *map_def, struct btf_map_def *inner_def) 2259 { 2260 const struct btf_type *t; 2261 const struct btf_member *m; 2262 bool is_inner = inner_def == NULL; 2263 int vlen, i; 2264 2265 vlen = btf_vlen(def_t); 2266 m = btf_members(def_t); 2267 for (i = 0; i < vlen; i++, m++) { 2268 const char *name = btf__name_by_offset(btf, m->name_off); 2269 2270 if (!name) { 2271 pr_warn("map '%s': invalid field #%d.\n", map_name, i); 2272 return -EINVAL; 2273 } 2274 if (strcmp(name, "type") == 0) { 2275 if (!get_map_field_int(map_name, btf, m, &map_def->map_type)) 2276 return -EINVAL; 2277 map_def->parts |= MAP_DEF_MAP_TYPE; 2278 } else if (strcmp(name, "max_entries") == 0) { 2279 if (!get_map_field_int(map_name, btf, m, &map_def->max_entries)) 2280 return -EINVAL; 2281 map_def->parts |= MAP_DEF_MAX_ENTRIES; 2282 } else if (strcmp(name, "map_flags") == 0) { 2283 if (!get_map_field_int(map_name, btf, m, &map_def->map_flags)) 2284 return -EINVAL; 2285 map_def->parts |= MAP_DEF_MAP_FLAGS; 2286 } else if (strcmp(name, "numa_node") == 0) { 2287 if (!get_map_field_int(map_name, btf, m, &map_def->numa_node)) 2288 return -EINVAL; 2289 map_def->parts |= MAP_DEF_NUMA_NODE; 2290 } else if (strcmp(name, "key_size") == 0) { 2291 __u32 sz; 2292 2293 if (!get_map_field_int(map_name, btf, m, &sz)) 2294 return -EINVAL; 2295 if (map_def->key_size && map_def->key_size != sz) { 2296 pr_warn("map '%s': conflicting key size %u != %u.\n", 2297 map_name, map_def->key_size, sz); 2298 return -EINVAL; 2299 } 2300 map_def->key_size = sz; 2301 map_def->parts |= MAP_DEF_KEY_SIZE; 2302 } else if (strcmp(name, "key") == 0) { 2303 __s64 sz; 2304 2305 t = btf__type_by_id(btf, m->type); 2306 if (!t) { 2307 pr_warn("map '%s': key type [%d] not found.\n", 2308 map_name, m->type); 2309 return -EINVAL; 2310 } 2311 if (!btf_is_ptr(t)) { 2312 pr_warn("map '%s': key spec is not PTR: %s.\n", 2313 map_name, btf_kind_str(t)); 2314 return -EINVAL; 2315 } 2316 sz = btf__resolve_size(btf, t->type); 2317 if (sz < 0) { 2318 pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n", 2319 map_name, t->type, (ssize_t)sz); 2320 return sz; 2321 } 2322 if (map_def->key_size && map_def->key_size != sz) { 2323 pr_warn("map '%s': conflicting key size %u != %zd.\n", 2324 map_name, map_def->key_size, (ssize_t)sz); 2325 return -EINVAL; 2326 } 2327 map_def->key_size = sz; 2328 map_def->key_type_id = t->type; 2329 map_def->parts |= MAP_DEF_KEY_SIZE | MAP_DEF_KEY_TYPE; 2330 } else if (strcmp(name, "value_size") == 0) { 2331 __u32 sz; 2332 2333 if (!get_map_field_int(map_name, btf, m, &sz)) 2334 return -EINVAL; 2335 if (map_def->value_size && map_def->value_size != sz) { 2336 pr_warn("map '%s': conflicting value size %u != %u.\n", 2337 map_name, map_def->value_size, sz); 2338 return -EINVAL; 2339 } 2340 map_def->value_size = sz; 2341 map_def->parts |= MAP_DEF_VALUE_SIZE; 2342 } else if (strcmp(name, "value") == 0) { 2343 __s64 sz; 2344 2345 t = btf__type_by_id(btf, m->type); 2346 if (!t) { 2347 pr_warn("map '%s': value type [%d] not found.\n", 2348 map_name, m->type); 2349 return -EINVAL; 2350 } 2351 if (!btf_is_ptr(t)) { 2352 pr_warn("map '%s': value spec is not PTR: %s.\n", 2353 map_name, btf_kind_str(t)); 2354 return -EINVAL; 2355 } 2356 sz = btf__resolve_size(btf, t->type); 2357 if (sz < 0) { 2358 pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n", 2359 map_name, t->type, (ssize_t)sz); 2360 return sz; 2361 } 2362 if (map_def->value_size && map_def->value_size != sz) { 2363 pr_warn("map '%s': conflicting value size %u != %zd.\n", 2364 map_name, map_def->value_size, (ssize_t)sz); 2365 return -EINVAL; 2366 } 2367 map_def->value_size = sz; 2368 map_def->value_type_id = t->type; 2369 map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE; 2370 } 2371 else if (strcmp(name, "values") == 0) { 2372 bool is_map_in_map = bpf_map_type__is_map_in_map(map_def->map_type); 2373 bool is_prog_array = map_def->map_type == BPF_MAP_TYPE_PROG_ARRAY; 2374 const char *desc = is_map_in_map ? "map-in-map inner" : "prog-array value"; 2375 char inner_map_name[128]; 2376 int err; 2377 2378 if (is_inner) { 2379 pr_warn("map '%s': multi-level inner maps not supported.\n", 2380 map_name); 2381 return -ENOTSUP; 2382 } 2383 if (i != vlen - 1) { 2384 pr_warn("map '%s': '%s' member should be last.\n", 2385 map_name, name); 2386 return -EINVAL; 2387 } 2388 if (!is_map_in_map && !is_prog_array) { 2389 pr_warn("map '%s': should be map-in-map or prog-array.\n", 2390 map_name); 2391 return -ENOTSUP; 2392 } 2393 if (map_def->value_size && map_def->value_size != 4) { 2394 pr_warn("map '%s': conflicting value size %u != 4.\n", 2395 map_name, map_def->value_size); 2396 return -EINVAL; 2397 } 2398 map_def->value_size = 4; 2399 t = btf__type_by_id(btf, m->type); 2400 if (!t) { 2401 pr_warn("map '%s': %s type [%d] not found.\n", 2402 map_name, desc, m->type); 2403 return -EINVAL; 2404 } 2405 if (!btf_is_array(t) || btf_array(t)->nelems) { 2406 pr_warn("map '%s': %s spec is not a zero-sized array.\n", 2407 map_name, desc); 2408 return -EINVAL; 2409 } 2410 t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL); 2411 if (!btf_is_ptr(t)) { 2412 pr_warn("map '%s': %s def is of unexpected kind %s.\n", 2413 map_name, desc, btf_kind_str(t)); 2414 return -EINVAL; 2415 } 2416 t = skip_mods_and_typedefs(btf, t->type, NULL); 2417 if (is_prog_array) { 2418 if (!btf_is_func_proto(t)) { 2419 pr_warn("map '%s': prog-array value def is of unexpected kind %s.\n", 2420 map_name, btf_kind_str(t)); 2421 return -EINVAL; 2422 } 2423 continue; 2424 } 2425 if (!btf_is_struct(t)) { 2426 pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n", 2427 map_name, btf_kind_str(t)); 2428 return -EINVAL; 2429 } 2430 2431 snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", map_name); 2432 err = parse_btf_map_def(inner_map_name, btf, t, strict, inner_def, NULL); 2433 if (err) 2434 return err; 2435 2436 map_def->parts |= MAP_DEF_INNER_MAP; 2437 } else if (strcmp(name, "pinning") == 0) { 2438 __u32 val; 2439 2440 if (is_inner) { 2441 pr_warn("map '%s': inner def can't be pinned.\n", map_name); 2442 return -EINVAL; 2443 } 2444 if (!get_map_field_int(map_name, btf, m, &val)) 2445 return -EINVAL; 2446 if (val != LIBBPF_PIN_NONE && val != LIBBPF_PIN_BY_NAME) { 2447 pr_warn("map '%s': invalid pinning value %u.\n", 2448 map_name, val); 2449 return -EINVAL; 2450 } 2451 map_def->pinning = val; 2452 map_def->parts |= MAP_DEF_PINNING; 2453 } else if (strcmp(name, "map_extra") == 0) { 2454 __u32 map_extra; 2455 2456 if (!get_map_field_int(map_name, btf, m, &map_extra)) 2457 return -EINVAL; 2458 map_def->map_extra = map_extra; 2459 map_def->parts |= MAP_DEF_MAP_EXTRA; 2460 } else { 2461 if (strict) { 2462 pr_warn("map '%s': unknown field '%s'.\n", map_name, name); 2463 return -ENOTSUP; 2464 } 2465 pr_debug("map '%s': ignoring unknown field '%s'.\n", map_name, name); 2466 } 2467 } 2468 2469 if (map_def->map_type == BPF_MAP_TYPE_UNSPEC) { 2470 pr_warn("map '%s': map type isn't specified.\n", map_name); 2471 return -EINVAL; 2472 } 2473 2474 return 0; 2475 } 2476 2477 static size_t adjust_ringbuf_sz(size_t sz) 2478 { 2479 __u32 page_sz = sysconf(_SC_PAGE_SIZE); 2480 __u32 mul; 2481 2482 /* if user forgot to set any size, make sure they see error */ 2483 if (sz == 0) 2484 return 0; 2485 /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be 2486 * a power-of-2 multiple of kernel's page size. If user diligently 2487 * satisified these conditions, pass the size through. 2488 */ 2489 if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz)) 2490 return sz; 2491 2492 /* Otherwise find closest (page_sz * power_of_2) product bigger than 2493 * user-set size to satisfy both user size request and kernel 2494 * requirements and substitute correct max_entries for map creation. 2495 */ 2496 for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) { 2497 if (mul * page_sz > sz) 2498 return mul * page_sz; 2499 } 2500 2501 /* if it's impossible to satisfy the conditions (i.e., user size is 2502 * very close to UINT_MAX but is not a power-of-2 multiple of 2503 * page_size) then just return original size and let kernel reject it 2504 */ 2505 return sz; 2506 } 2507 2508 static bool map_is_ringbuf(const struct bpf_map *map) 2509 { 2510 return map->def.type == BPF_MAP_TYPE_RINGBUF || 2511 map->def.type == BPF_MAP_TYPE_USER_RINGBUF; 2512 } 2513 2514 static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def) 2515 { 2516 map->def.type = def->map_type; 2517 map->def.key_size = def->key_size; 2518 map->def.value_size = def->value_size; 2519 map->def.max_entries = def->max_entries; 2520 map->def.map_flags = def->map_flags; 2521 map->map_extra = def->map_extra; 2522 2523 map->numa_node = def->numa_node; 2524 map->btf_key_type_id = def->key_type_id; 2525 map->btf_value_type_id = def->value_type_id; 2526 2527 /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */ 2528 if (map_is_ringbuf(map)) 2529 map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries); 2530 2531 if (def->parts & MAP_DEF_MAP_TYPE) 2532 pr_debug("map '%s': found type = %u.\n", map->name, def->map_type); 2533 2534 if (def->parts & MAP_DEF_KEY_TYPE) 2535 pr_debug("map '%s': found key [%u], sz = %u.\n", 2536 map->name, def->key_type_id, def->key_size); 2537 else if (def->parts & MAP_DEF_KEY_SIZE) 2538 pr_debug("map '%s': found key_size = %u.\n", map->name, def->key_size); 2539 2540 if (def->parts & MAP_DEF_VALUE_TYPE) 2541 pr_debug("map '%s': found value [%u], sz = %u.\n", 2542 map->name, def->value_type_id, def->value_size); 2543 else if (def->parts & MAP_DEF_VALUE_SIZE) 2544 pr_debug("map '%s': found value_size = %u.\n", map->name, def->value_size); 2545 2546 if (def->parts & MAP_DEF_MAX_ENTRIES) 2547 pr_debug("map '%s': found max_entries = %u.\n", map->name, def->max_entries); 2548 if (def->parts & MAP_DEF_MAP_FLAGS) 2549 pr_debug("map '%s': found map_flags = 0x%x.\n", map->name, def->map_flags); 2550 if (def->parts & MAP_DEF_MAP_EXTRA) 2551 pr_debug("map '%s': found map_extra = 0x%llx.\n", map->name, 2552 (unsigned long long)def->map_extra); 2553 if (def->parts & MAP_DEF_PINNING) 2554 pr_debug("map '%s': found pinning = %u.\n", map->name, def->pinning); 2555 if (def->parts & MAP_DEF_NUMA_NODE) 2556 pr_debug("map '%s': found numa_node = %u.\n", map->name, def->numa_node); 2557 2558 if (def->parts & MAP_DEF_INNER_MAP) 2559 pr_debug("map '%s': found inner map definition.\n", map->name); 2560 } 2561 2562 static const char *btf_var_linkage_str(__u32 linkage) 2563 { 2564 switch (linkage) { 2565 case BTF_VAR_STATIC: return "static"; 2566 case BTF_VAR_GLOBAL_ALLOCATED: return "global"; 2567 case BTF_VAR_GLOBAL_EXTERN: return "extern"; 2568 default: return "unknown"; 2569 } 2570 } 2571 2572 static int bpf_object__init_user_btf_map(struct bpf_object *obj, 2573 const struct btf_type *sec, 2574 int var_idx, int sec_idx, 2575 const Elf_Data *data, bool strict, 2576 const char *pin_root_path) 2577 { 2578 struct btf_map_def map_def = {}, inner_def = {}; 2579 const struct btf_type *var, *def; 2580 const struct btf_var_secinfo *vi; 2581 const struct btf_var *var_extra; 2582 const char *map_name; 2583 struct bpf_map *map; 2584 int err; 2585 2586 vi = btf_var_secinfos(sec) + var_idx; 2587 var = btf__type_by_id(obj->btf, vi->type); 2588 var_extra = btf_var(var); 2589 map_name = btf__name_by_offset(obj->btf, var->name_off); 2590 2591 if (map_name == NULL || map_name[0] == '\0') { 2592 pr_warn("map #%d: empty name.\n", var_idx); 2593 return -EINVAL; 2594 } 2595 if ((__u64)vi->offset + vi->size > data->d_size) { 2596 pr_warn("map '%s' BTF data is corrupted.\n", map_name); 2597 return -EINVAL; 2598 } 2599 if (!btf_is_var(var)) { 2600 pr_warn("map '%s': unexpected var kind %s.\n", 2601 map_name, btf_kind_str(var)); 2602 return -EINVAL; 2603 } 2604 if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED) { 2605 pr_warn("map '%s': unsupported map linkage %s.\n", 2606 map_name, btf_var_linkage_str(var_extra->linkage)); 2607 return -EOPNOTSUPP; 2608 } 2609 2610 def = skip_mods_and_typedefs(obj->btf, var->type, NULL); 2611 if (!btf_is_struct(def)) { 2612 pr_warn("map '%s': unexpected def kind %s.\n", 2613 map_name, btf_kind_str(var)); 2614 return -EINVAL; 2615 } 2616 if (def->size > vi->size) { 2617 pr_warn("map '%s': invalid def size.\n", map_name); 2618 return -EINVAL; 2619 } 2620 2621 map = bpf_object__add_map(obj); 2622 if (IS_ERR(map)) 2623 return PTR_ERR(map); 2624 map->name = strdup(map_name); 2625 if (!map->name) { 2626 pr_warn("map '%s': failed to alloc map name.\n", map_name); 2627 return -ENOMEM; 2628 } 2629 map->libbpf_type = LIBBPF_MAP_UNSPEC; 2630 map->def.type = BPF_MAP_TYPE_UNSPEC; 2631 map->sec_idx = sec_idx; 2632 map->sec_offset = vi->offset; 2633 map->btf_var_idx = var_idx; 2634 pr_debug("map '%s': at sec_idx %d, offset %zu.\n", 2635 map_name, map->sec_idx, map->sec_offset); 2636 2637 err = parse_btf_map_def(map->name, obj->btf, def, strict, &map_def, &inner_def); 2638 if (err) 2639 return err; 2640 2641 fill_map_from_def(map, &map_def); 2642 2643 if (map_def.pinning == LIBBPF_PIN_BY_NAME) { 2644 err = build_map_pin_path(map, pin_root_path); 2645 if (err) { 2646 pr_warn("map '%s': couldn't build pin path.\n", map->name); 2647 return err; 2648 } 2649 } 2650 2651 if (map_def.parts & MAP_DEF_INNER_MAP) { 2652 map->inner_map = calloc(1, sizeof(*map->inner_map)); 2653 if (!map->inner_map) 2654 return -ENOMEM; 2655 map->inner_map->fd = create_placeholder_fd(); 2656 if (map->inner_map->fd < 0) 2657 return map->inner_map->fd; 2658 map->inner_map->sec_idx = sec_idx; 2659 map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1); 2660 if (!map->inner_map->name) 2661 return -ENOMEM; 2662 sprintf(map->inner_map->name, "%s.inner", map_name); 2663 2664 fill_map_from_def(map->inner_map, &inner_def); 2665 } 2666 2667 err = map_fill_btf_type_info(obj, map); 2668 if (err) 2669 return err; 2670 2671 return 0; 2672 } 2673 2674 static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, 2675 const char *pin_root_path) 2676 { 2677 const struct btf_type *sec = NULL; 2678 int nr_types, i, vlen, err; 2679 const struct btf_type *t; 2680 const char *name; 2681 Elf_Data *data; 2682 Elf_Scn *scn; 2683 2684 if (obj->efile.btf_maps_shndx < 0) 2685 return 0; 2686 2687 scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx); 2688 data = elf_sec_data(obj, scn); 2689 if (!scn || !data) { 2690 pr_warn("elf: failed to get %s map definitions for %s\n", 2691 MAPS_ELF_SEC, obj->path); 2692 return -EINVAL; 2693 } 2694 2695 nr_types = btf__type_cnt(obj->btf); 2696 for (i = 1; i < nr_types; i++) { 2697 t = btf__type_by_id(obj->btf, i); 2698 if (!btf_is_datasec(t)) 2699 continue; 2700 name = btf__name_by_offset(obj->btf, t->name_off); 2701 if (strcmp(name, MAPS_ELF_SEC) == 0) { 2702 sec = t; 2703 obj->efile.btf_maps_sec_btf_id = i; 2704 break; 2705 } 2706 } 2707 2708 if (!sec) { 2709 pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC); 2710 return -ENOENT; 2711 } 2712 2713 vlen = btf_vlen(sec); 2714 for (i = 0; i < vlen; i++) { 2715 err = bpf_object__init_user_btf_map(obj, sec, i, 2716 obj->efile.btf_maps_shndx, 2717 data, strict, 2718 pin_root_path); 2719 if (err) 2720 return err; 2721 } 2722 2723 return 0; 2724 } 2725 2726 static int bpf_object__init_maps(struct bpf_object *obj, 2727 const struct bpf_object_open_opts *opts) 2728 { 2729 const char *pin_root_path; 2730 bool strict; 2731 int err = 0; 2732 2733 strict = !OPTS_GET(opts, relaxed_maps, false); 2734 pin_root_path = OPTS_GET(opts, pin_root_path, NULL); 2735 2736 err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path); 2737 err = err ?: bpf_object__init_global_data_maps(obj); 2738 err = err ?: bpf_object__init_kconfig_map(obj); 2739 err = err ?: bpf_object_init_struct_ops(obj); 2740 2741 return err; 2742 } 2743 2744 static bool section_have_execinstr(struct bpf_object *obj, int idx) 2745 { 2746 Elf64_Shdr *sh; 2747 2748 sh = elf_sec_hdr(obj, elf_sec_by_idx(obj, idx)); 2749 if (!sh) 2750 return false; 2751 2752 return sh->sh_flags & SHF_EXECINSTR; 2753 } 2754 2755 static bool btf_needs_sanitization(struct bpf_object *obj) 2756 { 2757 bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC); 2758 bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC); 2759 bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT); 2760 bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); 2761 bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); 2762 bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); 2763 bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64); 2764 2765 return !has_func || !has_datasec || !has_func_global || !has_float || 2766 !has_decl_tag || !has_type_tag || !has_enum64; 2767 } 2768 2769 static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) 2770 { 2771 bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC); 2772 bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC); 2773 bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT); 2774 bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); 2775 bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); 2776 bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); 2777 bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64); 2778 int enum64_placeholder_id = 0; 2779 struct btf_type *t; 2780 int i, j, vlen; 2781 2782 for (i = 1; i < btf__type_cnt(btf); i++) { 2783 t = (struct btf_type *)btf__type_by_id(btf, i); 2784 2785 if ((!has_datasec && btf_is_var(t)) || (!has_decl_tag && btf_is_decl_tag(t))) { 2786 /* replace VAR/DECL_TAG with INT */ 2787 t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0); 2788 /* 2789 * using size = 1 is the safest choice, 4 will be too 2790 * big and cause kernel BTF validation failure if 2791 * original variable took less than 4 bytes 2792 */ 2793 t->size = 1; 2794 *(int *)(t + 1) = BTF_INT_ENC(0, 0, 8); 2795 } else if (!has_datasec && btf_is_datasec(t)) { 2796 /* replace DATASEC with STRUCT */ 2797 const struct btf_var_secinfo *v = btf_var_secinfos(t); 2798 struct btf_member *m = btf_members(t); 2799 struct btf_type *vt; 2800 char *name; 2801 2802 name = (char *)btf__name_by_offset(btf, t->name_off); 2803 while (*name) { 2804 if (*name == '.') 2805 *name = '_'; 2806 name++; 2807 } 2808 2809 vlen = btf_vlen(t); 2810 t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen); 2811 for (j = 0; j < vlen; j++, v++, m++) { 2812 /* order of field assignments is important */ 2813 m->offset = v->offset * 8; 2814 m->type = v->type; 2815 /* preserve variable name as member name */ 2816 vt = (void *)btf__type_by_id(btf, v->type); 2817 m->name_off = vt->name_off; 2818 } 2819 } else if (!has_func && btf_is_func_proto(t)) { 2820 /* replace FUNC_PROTO with ENUM */ 2821 vlen = btf_vlen(t); 2822 t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen); 2823 t->size = sizeof(__u32); /* kernel enforced */ 2824 } else if (!has_func && btf_is_func(t)) { 2825 /* replace FUNC with TYPEDEF */ 2826 t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0); 2827 } else if (!has_func_global && btf_is_func(t)) { 2828 /* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */ 2829 t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0); 2830 } else if (!has_float && btf_is_float(t)) { 2831 /* replace FLOAT with an equally-sized empty STRUCT; 2832 * since C compilers do not accept e.g. "float" as a 2833 * valid struct name, make it anonymous 2834 */ 2835 t->name_off = 0; 2836 t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0); 2837 } else if (!has_type_tag && btf_is_type_tag(t)) { 2838 /* replace TYPE_TAG with a CONST */ 2839 t->name_off = 0; 2840 t->info = BTF_INFO_ENC(BTF_KIND_CONST, 0, 0); 2841 } else if (!has_enum64 && btf_is_enum(t)) { 2842 /* clear the kflag */ 2843 t->info = btf_type_info(btf_kind(t), btf_vlen(t), false); 2844 } else if (!has_enum64 && btf_is_enum64(t)) { 2845 /* replace ENUM64 with a union */ 2846 struct btf_member *m; 2847 2848 if (enum64_placeholder_id == 0) { 2849 enum64_placeholder_id = btf__add_int(btf, "enum64_placeholder", 1, 0); 2850 if (enum64_placeholder_id < 0) 2851 return enum64_placeholder_id; 2852 2853 t = (struct btf_type *)btf__type_by_id(btf, i); 2854 } 2855 2856 m = btf_members(t); 2857 vlen = btf_vlen(t); 2858 t->info = BTF_INFO_ENC(BTF_KIND_UNION, 0, vlen); 2859 for (j = 0; j < vlen; j++, m++) { 2860 m->type = enum64_placeholder_id; 2861 m->offset = 0; 2862 } 2863 } 2864 } 2865 2866 return 0; 2867 } 2868 2869 static bool libbpf_needs_btf(const struct bpf_object *obj) 2870 { 2871 return obj->efile.btf_maps_shndx >= 0 || 2872 obj->efile.st_ops_shndx >= 0 || 2873 obj->efile.st_ops_link_shndx >= 0 || 2874 obj->nr_extern > 0; 2875 } 2876 2877 static bool kernel_needs_btf(const struct bpf_object *obj) 2878 { 2879 return obj->efile.st_ops_shndx >= 0 || obj->efile.st_ops_link_shndx >= 0; 2880 } 2881 2882 static int bpf_object__init_btf(struct bpf_object *obj, 2883 Elf_Data *btf_data, 2884 Elf_Data *btf_ext_data) 2885 { 2886 int err = -ENOENT; 2887 2888 if (btf_data) { 2889 obj->btf = btf__new(btf_data->d_buf, btf_data->d_size); 2890 err = libbpf_get_error(obj->btf); 2891 if (err) { 2892 obj->btf = NULL; 2893 pr_warn("Error loading ELF section %s: %d.\n", BTF_ELF_SEC, err); 2894 goto out; 2895 } 2896 /* enforce 8-byte pointers for BPF-targeted BTFs */ 2897 btf__set_pointer_size(obj->btf, 8); 2898 } 2899 if (btf_ext_data) { 2900 struct btf_ext_info *ext_segs[3]; 2901 int seg_num, sec_num; 2902 2903 if (!obj->btf) { 2904 pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n", 2905 BTF_EXT_ELF_SEC, BTF_ELF_SEC); 2906 goto out; 2907 } 2908 obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size); 2909 err = libbpf_get_error(obj->btf_ext); 2910 if (err) { 2911 pr_warn("Error loading ELF section %s: %d. Ignored and continue.\n", 2912 BTF_EXT_ELF_SEC, err); 2913 obj->btf_ext = NULL; 2914 goto out; 2915 } 2916 2917 /* setup .BTF.ext to ELF section mapping */ 2918 ext_segs[0] = &obj->btf_ext->func_info; 2919 ext_segs[1] = &obj->btf_ext->line_info; 2920 ext_segs[2] = &obj->btf_ext->core_relo_info; 2921 for (seg_num = 0; seg_num < ARRAY_SIZE(ext_segs); seg_num++) { 2922 struct btf_ext_info *seg = ext_segs[seg_num]; 2923 const struct btf_ext_info_sec *sec; 2924 const char *sec_name; 2925 Elf_Scn *scn; 2926 2927 if (seg->sec_cnt == 0) 2928 continue; 2929 2930 seg->sec_idxs = calloc(seg->sec_cnt, sizeof(*seg->sec_idxs)); 2931 if (!seg->sec_idxs) { 2932 err = -ENOMEM; 2933 goto out; 2934 } 2935 2936 sec_num = 0; 2937 for_each_btf_ext_sec(seg, sec) { 2938 /* preventively increment index to avoid doing 2939 * this before every continue below 2940 */ 2941 sec_num++; 2942 2943 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); 2944 if (str_is_empty(sec_name)) 2945 continue; 2946 scn = elf_sec_by_name(obj, sec_name); 2947 if (!scn) 2948 continue; 2949 2950 seg->sec_idxs[sec_num - 1] = elf_ndxscn(scn); 2951 } 2952 } 2953 } 2954 out: 2955 if (err && libbpf_needs_btf(obj)) { 2956 pr_warn("BTF is required, but is missing or corrupted.\n"); 2957 return err; 2958 } 2959 return 0; 2960 } 2961 2962 static int compare_vsi_off(const void *_a, const void *_b) 2963 { 2964 const struct btf_var_secinfo *a = _a; 2965 const struct btf_var_secinfo *b = _b; 2966 2967 return a->offset - b->offset; 2968 } 2969 2970 static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, 2971 struct btf_type *t) 2972 { 2973 __u32 size = 0, i, vars = btf_vlen(t); 2974 const char *sec_name = btf__name_by_offset(btf, t->name_off); 2975 struct btf_var_secinfo *vsi; 2976 bool fixup_offsets = false; 2977 int err; 2978 2979 if (!sec_name) { 2980 pr_debug("No name found in string section for DATASEC kind.\n"); 2981 return -ENOENT; 2982 } 2983 2984 /* Extern-backing datasecs (.ksyms, .kconfig) have their size and 2985 * variable offsets set at the previous step. Further, not every 2986 * extern BTF VAR has corresponding ELF symbol preserved, so we skip 2987 * all fixups altogether for such sections and go straight to sorting 2988 * VARs within their DATASEC. 2989 */ 2990 if (strcmp(sec_name, KCONFIG_SEC) == 0 || strcmp(sec_name, KSYMS_SEC) == 0) 2991 goto sort_vars; 2992 2993 /* Clang leaves DATASEC size and VAR offsets as zeroes, so we need to 2994 * fix this up. But BPF static linker already fixes this up and fills 2995 * all the sizes and offsets during static linking. So this step has 2996 * to be optional. But the STV_HIDDEN handling is non-optional for any 2997 * non-extern DATASEC, so the variable fixup loop below handles both 2998 * functions at the same time, paying the cost of BTF VAR <-> ELF 2999 * symbol matching just once. 3000 */ 3001 if (t->size == 0) { 3002 err = find_elf_sec_sz(obj, sec_name, &size); 3003 if (err || !size) { 3004 pr_debug("sec '%s': failed to determine size from ELF: size %u, err %d\n", 3005 sec_name, size, err); 3006 return -ENOENT; 3007 } 3008 3009 t->size = size; 3010 fixup_offsets = true; 3011 } 3012 3013 for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) { 3014 const struct btf_type *t_var; 3015 struct btf_var *var; 3016 const char *var_name; 3017 Elf64_Sym *sym; 3018 3019 t_var = btf__type_by_id(btf, vsi->type); 3020 if (!t_var || !btf_is_var(t_var)) { 3021 pr_debug("sec '%s': unexpected non-VAR type found\n", sec_name); 3022 return -EINVAL; 3023 } 3024 3025 var = btf_var(t_var); 3026 if (var->linkage == BTF_VAR_STATIC || var->linkage == BTF_VAR_GLOBAL_EXTERN) 3027 continue; 3028 3029 var_name = btf__name_by_offset(btf, t_var->name_off); 3030 if (!var_name) { 3031 pr_debug("sec '%s': failed to find name of DATASEC's member #%d\n", 3032 sec_name, i); 3033 return -ENOENT; 3034 } 3035 3036 sym = find_elf_var_sym(obj, var_name); 3037 if (IS_ERR(sym)) { 3038 pr_debug("sec '%s': failed to find ELF symbol for VAR '%s'\n", 3039 sec_name, var_name); 3040 return -ENOENT; 3041 } 3042 3043 if (fixup_offsets) 3044 vsi->offset = sym->st_value; 3045 3046 /* if variable is a global/weak symbol, but has restricted 3047 * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF VAR 3048 * as static. This follows similar logic for functions (BPF 3049 * subprogs) and influences libbpf's further decisions about 3050 * whether to make global data BPF array maps as 3051 * BPF_F_MMAPABLE. 3052 */ 3053 if (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN 3054 || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL) 3055 var->linkage = BTF_VAR_STATIC; 3056 } 3057 3058 sort_vars: 3059 qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off); 3060 return 0; 3061 } 3062 3063 static int bpf_object_fixup_btf(struct bpf_object *obj) 3064 { 3065 int i, n, err = 0; 3066 3067 if (!obj->btf) 3068 return 0; 3069 3070 n = btf__type_cnt(obj->btf); 3071 for (i = 1; i < n; i++) { 3072 struct btf_type *t = btf_type_by_id(obj->btf, i); 3073 3074 /* Loader needs to fix up some of the things compiler 3075 * couldn't get its hands on while emitting BTF. This 3076 * is section size and global variable offset. We use 3077 * the info from the ELF itself for this purpose. 3078 */ 3079 if (btf_is_datasec(t)) { 3080 err = btf_fixup_datasec(obj, obj->btf, t); 3081 if (err) 3082 return err; 3083 } 3084 } 3085 3086 return 0; 3087 } 3088 3089 static bool prog_needs_vmlinux_btf(struct bpf_program *prog) 3090 { 3091 if (prog->type == BPF_PROG_TYPE_STRUCT_OPS || 3092 prog->type == BPF_PROG_TYPE_LSM) 3093 return true; 3094 3095 /* BPF_PROG_TYPE_TRACING programs which do not attach to other programs 3096 * also need vmlinux BTF 3097 */ 3098 if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd) 3099 return true; 3100 3101 return false; 3102 } 3103 3104 static bool map_needs_vmlinux_btf(struct bpf_map *map) 3105 { 3106 return bpf_map__is_struct_ops(map); 3107 } 3108 3109 static bool obj_needs_vmlinux_btf(const struct bpf_object *obj) 3110 { 3111 struct bpf_program *prog; 3112 struct bpf_map *map; 3113 int i; 3114 3115 /* CO-RE relocations need kernel BTF, only when btf_custom_path 3116 * is not specified 3117 */ 3118 if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path) 3119 return true; 3120 3121 /* Support for typed ksyms needs kernel BTF */ 3122 for (i = 0; i < obj->nr_extern; i++) { 3123 const struct extern_desc *ext; 3124 3125 ext = &obj->externs[i]; 3126 if (ext->type == EXT_KSYM && ext->ksym.type_id) 3127 return true; 3128 } 3129 3130 bpf_object__for_each_program(prog, obj) { 3131 if (!prog->autoload) 3132 continue; 3133 if (prog_needs_vmlinux_btf(prog)) 3134 return true; 3135 } 3136 3137 bpf_object__for_each_map(map, obj) { 3138 if (map_needs_vmlinux_btf(map)) 3139 return true; 3140 } 3141 3142 return false; 3143 } 3144 3145 static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force) 3146 { 3147 int err; 3148 3149 /* btf_vmlinux could be loaded earlier */ 3150 if (obj->btf_vmlinux || obj->gen_loader) 3151 return 0; 3152 3153 if (!force && !obj_needs_vmlinux_btf(obj)) 3154 return 0; 3155 3156 obj->btf_vmlinux = btf__load_vmlinux_btf(); 3157 err = libbpf_get_error(obj->btf_vmlinux); 3158 if (err) { 3159 pr_warn("Error loading vmlinux BTF: %d\n", err); 3160 obj->btf_vmlinux = NULL; 3161 return err; 3162 } 3163 return 0; 3164 } 3165 3166 static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) 3167 { 3168 struct btf *kern_btf = obj->btf; 3169 bool btf_mandatory, sanitize; 3170 int i, err = 0; 3171 3172 if (!obj->btf) 3173 return 0; 3174 3175 if (!kernel_supports(obj, FEAT_BTF)) { 3176 if (kernel_needs_btf(obj)) { 3177 err = -EOPNOTSUPP; 3178 goto report; 3179 } 3180 pr_debug("Kernel doesn't support BTF, skipping uploading it.\n"); 3181 return 0; 3182 } 3183 3184 /* Even though some subprogs are global/weak, user might prefer more 3185 * permissive BPF verification process that BPF verifier performs for 3186 * static functions, taking into account more context from the caller 3187 * functions. In such case, they need to mark such subprogs with 3188 * __attribute__((visibility("hidden"))) and libbpf will adjust 3189 * corresponding FUNC BTF type to be marked as static and trigger more 3190 * involved BPF verification process. 3191 */ 3192 for (i = 0; i < obj->nr_programs; i++) { 3193 struct bpf_program *prog = &obj->programs[i]; 3194 struct btf_type *t; 3195 const char *name; 3196 int j, n; 3197 3198 if (!prog->mark_btf_static || !prog_is_subprog(obj, prog)) 3199 continue; 3200 3201 n = btf__type_cnt(obj->btf); 3202 for (j = 1; j < n; j++) { 3203 t = btf_type_by_id(obj->btf, j); 3204 if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) 3205 continue; 3206 3207 name = btf__str_by_offset(obj->btf, t->name_off); 3208 if (strcmp(name, prog->name) != 0) 3209 continue; 3210 3211 t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_STATIC, 0); 3212 break; 3213 } 3214 } 3215 3216 sanitize = btf_needs_sanitization(obj); 3217 if (sanitize) { 3218 const void *raw_data; 3219 __u32 sz; 3220 3221 /* clone BTF to sanitize a copy and leave the original intact */ 3222 raw_data = btf__raw_data(obj->btf, &sz); 3223 kern_btf = btf__new(raw_data, sz); 3224 err = libbpf_get_error(kern_btf); 3225 if (err) 3226 return err; 3227 3228 /* enforce 8-byte pointers for BPF-targeted BTFs */ 3229 btf__set_pointer_size(obj->btf, 8); 3230 err = bpf_object__sanitize_btf(obj, kern_btf); 3231 if (err) 3232 return err; 3233 } 3234 3235 if (obj->gen_loader) { 3236 __u32 raw_size = 0; 3237 const void *raw_data = btf__raw_data(kern_btf, &raw_size); 3238 3239 if (!raw_data) 3240 return -ENOMEM; 3241 bpf_gen__load_btf(obj->gen_loader, raw_data, raw_size); 3242 /* Pretend to have valid FD to pass various fd >= 0 checks. 3243 * This fd == 0 will not be used with any syscall and will be reset to -1 eventually. 3244 */ 3245 btf__set_fd(kern_btf, 0); 3246 } else { 3247 /* currently BPF_BTF_LOAD only supports log_level 1 */ 3248 err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size, 3249 obj->log_level ? 1 : 0, obj->token_fd); 3250 } 3251 if (sanitize) { 3252 if (!err) { 3253 /* move fd to libbpf's BTF */ 3254 btf__set_fd(obj->btf, btf__fd(kern_btf)); 3255 btf__set_fd(kern_btf, -1); 3256 } 3257 btf__free(kern_btf); 3258 } 3259 report: 3260 if (err) { 3261 btf_mandatory = kernel_needs_btf(obj); 3262 pr_warn("Error loading .BTF into kernel: %d. %s\n", err, 3263 btf_mandatory ? "BTF is mandatory, can't proceed." 3264 : "BTF is optional, ignoring."); 3265 if (!btf_mandatory) 3266 err = 0; 3267 } 3268 return err; 3269 } 3270 3271 static const char *elf_sym_str(const struct bpf_object *obj, size_t off) 3272 { 3273 const char *name; 3274 3275 name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off); 3276 if (!name) { 3277 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n", 3278 off, obj->path, elf_errmsg(-1)); 3279 return NULL; 3280 } 3281 3282 return name; 3283 } 3284 3285 static const char *elf_sec_str(const struct bpf_object *obj, size_t off) 3286 { 3287 const char *name; 3288 3289 name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off); 3290 if (!name) { 3291 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n", 3292 off, obj->path, elf_errmsg(-1)); 3293 return NULL; 3294 } 3295 3296 return name; 3297 } 3298 3299 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx) 3300 { 3301 Elf_Scn *scn; 3302 3303 scn = elf_getscn(obj->efile.elf, idx); 3304 if (!scn) { 3305 pr_warn("elf: failed to get section(%zu) from %s: %s\n", 3306 idx, obj->path, elf_errmsg(-1)); 3307 return NULL; 3308 } 3309 return scn; 3310 } 3311 3312 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name) 3313 { 3314 Elf_Scn *scn = NULL; 3315 Elf *elf = obj->efile.elf; 3316 const char *sec_name; 3317 3318 while ((scn = elf_nextscn(elf, scn)) != NULL) { 3319 sec_name = elf_sec_name(obj, scn); 3320 if (!sec_name) 3321 return NULL; 3322 3323 if (strcmp(sec_name, name) != 0) 3324 continue; 3325 3326 return scn; 3327 } 3328 return NULL; 3329 } 3330 3331 static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn) 3332 { 3333 Elf64_Shdr *shdr; 3334 3335 if (!scn) 3336 return NULL; 3337 3338 shdr = elf64_getshdr(scn); 3339 if (!shdr) { 3340 pr_warn("elf: failed to get section(%zu) header from %s: %s\n", 3341 elf_ndxscn(scn), obj->path, elf_errmsg(-1)); 3342 return NULL; 3343 } 3344 3345 return shdr; 3346 } 3347 3348 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn) 3349 { 3350 const char *name; 3351 Elf64_Shdr *sh; 3352 3353 if (!scn) 3354 return NULL; 3355 3356 sh = elf_sec_hdr(obj, scn); 3357 if (!sh) 3358 return NULL; 3359 3360 name = elf_sec_str(obj, sh->sh_name); 3361 if (!name) { 3362 pr_warn("elf: failed to get section(%zu) name from %s: %s\n", 3363 elf_ndxscn(scn), obj->path, elf_errmsg(-1)); 3364 return NULL; 3365 } 3366 3367 return name; 3368 } 3369 3370 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn) 3371 { 3372 Elf_Data *data; 3373 3374 if (!scn) 3375 return NULL; 3376 3377 data = elf_getdata(scn, 0); 3378 if (!data) { 3379 pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n", 3380 elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "<?>", 3381 obj->path, elf_errmsg(-1)); 3382 return NULL; 3383 } 3384 3385 return data; 3386 } 3387 3388 static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx) 3389 { 3390 if (idx >= obj->efile.symbols->d_size / sizeof(Elf64_Sym)) 3391 return NULL; 3392 3393 return (Elf64_Sym *)obj->efile.symbols->d_buf + idx; 3394 } 3395 3396 static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx) 3397 { 3398 if (idx >= data->d_size / sizeof(Elf64_Rel)) 3399 return NULL; 3400 3401 return (Elf64_Rel *)data->d_buf + idx; 3402 } 3403 3404 static bool is_sec_name_dwarf(const char *name) 3405 { 3406 /* approximation, but the actual list is too long */ 3407 return str_has_pfx(name, ".debug_"); 3408 } 3409 3410 static bool ignore_elf_section(Elf64_Shdr *hdr, const char *name) 3411 { 3412 /* no special handling of .strtab */ 3413 if (hdr->sh_type == SHT_STRTAB) 3414 return true; 3415 3416 /* ignore .llvm_addrsig section as well */ 3417 if (hdr->sh_type == SHT_LLVM_ADDRSIG) 3418 return true; 3419 3420 /* no subprograms will lead to an empty .text section, ignore it */ 3421 if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 && 3422 strcmp(name, ".text") == 0) 3423 return true; 3424 3425 /* DWARF sections */ 3426 if (is_sec_name_dwarf(name)) 3427 return true; 3428 3429 if (str_has_pfx(name, ".rel")) { 3430 name += sizeof(".rel") - 1; 3431 /* DWARF section relocations */ 3432 if (is_sec_name_dwarf(name)) 3433 return true; 3434 3435 /* .BTF and .BTF.ext don't need relocations */ 3436 if (strcmp(name, BTF_ELF_SEC) == 0 || 3437 strcmp(name, BTF_EXT_ELF_SEC) == 0) 3438 return true; 3439 } 3440 3441 return false; 3442 } 3443 3444 static int cmp_progs(const void *_a, const void *_b) 3445 { 3446 const struct bpf_program *a = _a; 3447 const struct bpf_program *b = _b; 3448 3449 if (a->sec_idx != b->sec_idx) 3450 return a->sec_idx < b->sec_idx ? -1 : 1; 3451 3452 /* sec_insn_off can't be the same within the section */ 3453 return a->sec_insn_off < b->sec_insn_off ? -1 : 1; 3454 } 3455 3456 static int bpf_object__elf_collect(struct bpf_object *obj) 3457 { 3458 struct elf_sec_desc *sec_desc; 3459 Elf *elf = obj->efile.elf; 3460 Elf_Data *btf_ext_data = NULL; 3461 Elf_Data *btf_data = NULL; 3462 int idx = 0, err = 0; 3463 const char *name; 3464 Elf_Data *data; 3465 Elf_Scn *scn; 3466 Elf64_Shdr *sh; 3467 3468 /* ELF section indices are 0-based, but sec #0 is special "invalid" 3469 * section. Since section count retrieved by elf_getshdrnum() does 3470 * include sec #0, it is already the necessary size of an array to keep 3471 * all the sections. 3472 */ 3473 if (elf_getshdrnum(obj->efile.elf, &obj->efile.sec_cnt)) { 3474 pr_warn("elf: failed to get the number of sections for %s: %s\n", 3475 obj->path, elf_errmsg(-1)); 3476 return -LIBBPF_ERRNO__FORMAT; 3477 } 3478 obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs)); 3479 if (!obj->efile.secs) 3480 return -ENOMEM; 3481 3482 /* a bunch of ELF parsing functionality depends on processing symbols, 3483 * so do the first pass and find the symbol table 3484 */ 3485 scn = NULL; 3486 while ((scn = elf_nextscn(elf, scn)) != NULL) { 3487 sh = elf_sec_hdr(obj, scn); 3488 if (!sh) 3489 return -LIBBPF_ERRNO__FORMAT; 3490 3491 if (sh->sh_type == SHT_SYMTAB) { 3492 if (obj->efile.symbols) { 3493 pr_warn("elf: multiple symbol tables in %s\n", obj->path); 3494 return -LIBBPF_ERRNO__FORMAT; 3495 } 3496 3497 data = elf_sec_data(obj, scn); 3498 if (!data) 3499 return -LIBBPF_ERRNO__FORMAT; 3500 3501 idx = elf_ndxscn(scn); 3502 3503 obj->efile.symbols = data; 3504 obj->efile.symbols_shndx = idx; 3505 obj->efile.strtabidx = sh->sh_link; 3506 } 3507 } 3508 3509 if (!obj->efile.symbols) { 3510 pr_warn("elf: couldn't find symbol table in %s, stripped object file?\n", 3511 obj->path); 3512 return -ENOENT; 3513 } 3514 3515 scn = NULL; 3516 while ((scn = elf_nextscn(elf, scn)) != NULL) { 3517 idx = elf_ndxscn(scn); 3518 sec_desc = &obj->efile.secs[idx]; 3519 3520 sh = elf_sec_hdr(obj, scn); 3521 if (!sh) 3522 return -LIBBPF_ERRNO__FORMAT; 3523 3524 name = elf_sec_str(obj, sh->sh_name); 3525 if (!name) 3526 return -LIBBPF_ERRNO__FORMAT; 3527 3528 if (ignore_elf_section(sh, name)) 3529 continue; 3530 3531 data = elf_sec_data(obj, scn); 3532 if (!data) 3533 return -LIBBPF_ERRNO__FORMAT; 3534 3535 pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n", 3536 idx, name, (unsigned long)data->d_size, 3537 (int)sh->sh_link, (unsigned long)sh->sh_flags, 3538 (int)sh->sh_type); 3539 3540 if (strcmp(name, "license") == 0) { 3541 err = bpf_object__init_license(obj, data->d_buf, data->d_size); 3542 if (err) 3543 return err; 3544 } else if (strcmp(name, "version") == 0) { 3545 err = bpf_object__init_kversion(obj, data->d_buf, data->d_size); 3546 if (err) 3547 return err; 3548 } else if (strcmp(name, "maps") == 0) { 3549 pr_warn("elf: legacy map definitions in 'maps' section are not supported by libbpf v1.0+\n"); 3550 return -ENOTSUP; 3551 } else if (strcmp(name, MAPS_ELF_SEC) == 0) { 3552 obj->efile.btf_maps_shndx = idx; 3553 } else if (strcmp(name, BTF_ELF_SEC) == 0) { 3554 if (sh->sh_type != SHT_PROGBITS) 3555 return -LIBBPF_ERRNO__FORMAT; 3556 btf_data = data; 3557 } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) { 3558 if (sh->sh_type != SHT_PROGBITS) 3559 return -LIBBPF_ERRNO__FORMAT; 3560 btf_ext_data = data; 3561 } else if (sh->sh_type == SHT_SYMTAB) { 3562 /* already processed during the first pass above */ 3563 } else if (sh->sh_type == SHT_PROGBITS && data->d_size > 0) { 3564 if (sh->sh_flags & SHF_EXECINSTR) { 3565 if (strcmp(name, ".text") == 0) 3566 obj->efile.text_shndx = idx; 3567 err = bpf_object__add_programs(obj, data, name, idx); 3568 if (err) 3569 return err; 3570 } else if (strcmp(name, DATA_SEC) == 0 || 3571 str_has_pfx(name, DATA_SEC ".")) { 3572 sec_desc->sec_type = SEC_DATA; 3573 sec_desc->shdr = sh; 3574 sec_desc->data = data; 3575 } else if (strcmp(name, RODATA_SEC) == 0 || 3576 str_has_pfx(name, RODATA_SEC ".")) { 3577 sec_desc->sec_type = SEC_RODATA; 3578 sec_desc->shdr = sh; 3579 sec_desc->data = data; 3580 } else if (strcmp(name, STRUCT_OPS_SEC) == 0) { 3581 obj->efile.st_ops_data = data; 3582 obj->efile.st_ops_shndx = idx; 3583 } else if (strcmp(name, STRUCT_OPS_LINK_SEC) == 0) { 3584 obj->efile.st_ops_link_data = data; 3585 obj->efile.st_ops_link_shndx = idx; 3586 } else { 3587 pr_info("elf: skipping unrecognized data section(%d) %s\n", 3588 idx, name); 3589 } 3590 } else if (sh->sh_type == SHT_REL) { 3591 int targ_sec_idx = sh->sh_info; /* points to other section */ 3592 3593 if (sh->sh_entsize != sizeof(Elf64_Rel) || 3594 targ_sec_idx >= obj->efile.sec_cnt) 3595 return -LIBBPF_ERRNO__FORMAT; 3596 3597 /* Only do relo for section with exec instructions */ 3598 if (!section_have_execinstr(obj, targ_sec_idx) && 3599 strcmp(name, ".rel" STRUCT_OPS_SEC) && 3600 strcmp(name, ".rel" STRUCT_OPS_LINK_SEC) && 3601 strcmp(name, ".rel" MAPS_ELF_SEC)) { 3602 pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n", 3603 idx, name, targ_sec_idx, 3604 elf_sec_name(obj, elf_sec_by_idx(obj, targ_sec_idx)) ?: "<?>"); 3605 continue; 3606 } 3607 3608 sec_desc->sec_type = SEC_RELO; 3609 sec_desc->shdr = sh; 3610 sec_desc->data = data; 3611 } else if (sh->sh_type == SHT_NOBITS && (strcmp(name, BSS_SEC) == 0 || 3612 str_has_pfx(name, BSS_SEC "."))) { 3613 sec_desc->sec_type = SEC_BSS; 3614 sec_desc->shdr = sh; 3615 sec_desc->data = data; 3616 } else { 3617 pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name, 3618 (size_t)sh->sh_size); 3619 } 3620 } 3621 3622 if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) { 3623 pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path); 3624 return -LIBBPF_ERRNO__FORMAT; 3625 } 3626 3627 /* sort BPF programs by section name and in-section instruction offset 3628 * for faster search 3629 */ 3630 if (obj->nr_programs) 3631 qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs); 3632 3633 return bpf_object__init_btf(obj, btf_data, btf_ext_data); 3634 } 3635 3636 static bool sym_is_extern(const Elf64_Sym *sym) 3637 { 3638 int bind = ELF64_ST_BIND(sym->st_info); 3639 /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */ 3640 return sym->st_shndx == SHN_UNDEF && 3641 (bind == STB_GLOBAL || bind == STB_WEAK) && 3642 ELF64_ST_TYPE(sym->st_info) == STT_NOTYPE; 3643 } 3644 3645 static bool sym_is_subprog(const Elf64_Sym *sym, int text_shndx) 3646 { 3647 int bind = ELF64_ST_BIND(sym->st_info); 3648 int type = ELF64_ST_TYPE(sym->st_info); 3649 3650 /* in .text section */ 3651 if (sym->st_shndx != text_shndx) 3652 return false; 3653 3654 /* local function */ 3655 if (bind == STB_LOCAL && type == STT_SECTION) 3656 return true; 3657 3658 /* global function */ 3659 return bind == STB_GLOBAL && type == STT_FUNC; 3660 } 3661 3662 static int find_extern_btf_id(const struct btf *btf, const char *ext_name) 3663 { 3664 const struct btf_type *t; 3665 const char *tname; 3666 int i, n; 3667 3668 if (!btf) 3669 return -ESRCH; 3670 3671 n = btf__type_cnt(btf); 3672 for (i = 1; i < n; i++) { 3673 t = btf__type_by_id(btf, i); 3674 3675 if (!btf_is_var(t) && !btf_is_func(t)) 3676 continue; 3677 3678 tname = btf__name_by_offset(btf, t->name_off); 3679 if (strcmp(tname, ext_name)) 3680 continue; 3681 3682 if (btf_is_var(t) && 3683 btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN) 3684 return -EINVAL; 3685 3686 if (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_EXTERN) 3687 return -EINVAL; 3688 3689 return i; 3690 } 3691 3692 return -ENOENT; 3693 } 3694 3695 static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) { 3696 const struct btf_var_secinfo *vs; 3697 const struct btf_type *t; 3698 int i, j, n; 3699 3700 if (!btf) 3701 return -ESRCH; 3702 3703 n = btf__type_cnt(btf); 3704 for (i = 1; i < n; i++) { 3705 t = btf__type_by_id(btf, i); 3706 3707 if (!btf_is_datasec(t)) 3708 continue; 3709 3710 vs = btf_var_secinfos(t); 3711 for (j = 0; j < btf_vlen(t); j++, vs++) { 3712 if (vs->type == ext_btf_id) 3713 return i; 3714 } 3715 } 3716 3717 return -ENOENT; 3718 } 3719 3720 static enum kcfg_type find_kcfg_type(const struct btf *btf, int id, 3721 bool *is_signed) 3722 { 3723 const struct btf_type *t; 3724 const char *name; 3725 3726 t = skip_mods_and_typedefs(btf, id, NULL); 3727 name = btf__name_by_offset(btf, t->name_off); 3728 3729 if (is_signed) 3730 *is_signed = false; 3731 switch (btf_kind(t)) { 3732 case BTF_KIND_INT: { 3733 int enc = btf_int_encoding(t); 3734 3735 if (enc & BTF_INT_BOOL) 3736 return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN; 3737 if (is_signed) 3738 *is_signed = enc & BTF_INT_SIGNED; 3739 if (t->size == 1) 3740 return KCFG_CHAR; 3741 if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1))) 3742 return KCFG_UNKNOWN; 3743 return KCFG_INT; 3744 } 3745 case BTF_KIND_ENUM: 3746 if (t->size != 4) 3747 return KCFG_UNKNOWN; 3748 if (strcmp(name, "libbpf_tristate")) 3749 return KCFG_UNKNOWN; 3750 return KCFG_TRISTATE; 3751 case BTF_KIND_ENUM64: 3752 if (strcmp(name, "libbpf_tristate")) 3753 return KCFG_UNKNOWN; 3754 return KCFG_TRISTATE; 3755 case BTF_KIND_ARRAY: 3756 if (btf_array(t)->nelems == 0) 3757 return KCFG_UNKNOWN; 3758 if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR) 3759 return KCFG_UNKNOWN; 3760 return KCFG_CHAR_ARR; 3761 default: 3762 return KCFG_UNKNOWN; 3763 } 3764 } 3765 3766 static int cmp_externs(const void *_a, const void *_b) 3767 { 3768 const struct extern_desc *a = _a; 3769 const struct extern_desc *b = _b; 3770 3771 if (a->type != b->type) 3772 return a->type < b->type ? -1 : 1; 3773 3774 if (a->type == EXT_KCFG) { 3775 /* descending order by alignment requirements */ 3776 if (a->kcfg.align != b->kcfg.align) 3777 return a->kcfg.align > b->kcfg.align ? -1 : 1; 3778 /* ascending order by size, within same alignment class */ 3779 if (a->kcfg.sz != b->kcfg.sz) 3780 return a->kcfg.sz < b->kcfg.sz ? -1 : 1; 3781 } 3782 3783 /* resolve ties by name */ 3784 return strcmp(a->name, b->name); 3785 } 3786 3787 static int find_int_btf_id(const struct btf *btf) 3788 { 3789 const struct btf_type *t; 3790 int i, n; 3791 3792 n = btf__type_cnt(btf); 3793 for (i = 1; i < n; i++) { 3794 t = btf__type_by_id(btf, i); 3795 3796 if (btf_is_int(t) && btf_int_bits(t) == 32) 3797 return i; 3798 } 3799 3800 return 0; 3801 } 3802 3803 static int add_dummy_ksym_var(struct btf *btf) 3804 { 3805 int i, int_btf_id, sec_btf_id, dummy_var_btf_id; 3806 const struct btf_var_secinfo *vs; 3807 const struct btf_type *sec; 3808 3809 if (!btf) 3810 return 0; 3811 3812 sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC, 3813 BTF_KIND_DATASEC); 3814 if (sec_btf_id < 0) 3815 return 0; 3816 3817 sec = btf__type_by_id(btf, sec_btf_id); 3818 vs = btf_var_secinfos(sec); 3819 for (i = 0; i < btf_vlen(sec); i++, vs++) { 3820 const struct btf_type *vt; 3821 3822 vt = btf__type_by_id(btf, vs->type); 3823 if (btf_is_func(vt)) 3824 break; 3825 } 3826 3827 /* No func in ksyms sec. No need to add dummy var. */ 3828 if (i == btf_vlen(sec)) 3829 return 0; 3830 3831 int_btf_id = find_int_btf_id(btf); 3832 dummy_var_btf_id = btf__add_var(btf, 3833 "dummy_ksym", 3834 BTF_VAR_GLOBAL_ALLOCATED, 3835 int_btf_id); 3836 if (dummy_var_btf_id < 0) 3837 pr_warn("cannot create a dummy_ksym var\n"); 3838 3839 return dummy_var_btf_id; 3840 } 3841 3842 static int bpf_object__collect_externs(struct bpf_object *obj) 3843 { 3844 struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL; 3845 const struct btf_type *t; 3846 struct extern_desc *ext; 3847 int i, n, off, dummy_var_btf_id; 3848 const char *ext_name, *sec_name; 3849 size_t ext_essent_len; 3850 Elf_Scn *scn; 3851 Elf64_Shdr *sh; 3852 3853 if (!obj->efile.symbols) 3854 return 0; 3855 3856 scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx); 3857 sh = elf_sec_hdr(obj, scn); 3858 if (!sh || sh->sh_entsize != sizeof(Elf64_Sym)) 3859 return -LIBBPF_ERRNO__FORMAT; 3860 3861 dummy_var_btf_id = add_dummy_ksym_var(obj->btf); 3862 if (dummy_var_btf_id < 0) 3863 return dummy_var_btf_id; 3864 3865 n = sh->sh_size / sh->sh_entsize; 3866 pr_debug("looking for externs among %d symbols...\n", n); 3867 3868 for (i = 0; i < n; i++) { 3869 Elf64_Sym *sym = elf_sym_by_idx(obj, i); 3870 3871 if (!sym) 3872 return -LIBBPF_ERRNO__FORMAT; 3873 if (!sym_is_extern(sym)) 3874 continue; 3875 ext_name = elf_sym_str(obj, sym->st_name); 3876 if (!ext_name || !ext_name[0]) 3877 continue; 3878 3879 ext = obj->externs; 3880 ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext)); 3881 if (!ext) 3882 return -ENOMEM; 3883 obj->externs = ext; 3884 ext = &ext[obj->nr_extern]; 3885 memset(ext, 0, sizeof(*ext)); 3886 obj->nr_extern++; 3887 3888 ext->btf_id = find_extern_btf_id(obj->btf, ext_name); 3889 if (ext->btf_id <= 0) { 3890 pr_warn("failed to find BTF for extern '%s': %d\n", 3891 ext_name, ext->btf_id); 3892 return ext->btf_id; 3893 } 3894 t = btf__type_by_id(obj->btf, ext->btf_id); 3895 ext->name = btf__name_by_offset(obj->btf, t->name_off); 3896 ext->sym_idx = i; 3897 ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK; 3898 3899 ext_essent_len = bpf_core_essential_name_len(ext->name); 3900 ext->essent_name = NULL; 3901 if (ext_essent_len != strlen(ext->name)) { 3902 ext->essent_name = strndup(ext->name, ext_essent_len); 3903 if (!ext->essent_name) 3904 return -ENOMEM; 3905 } 3906 3907 ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id); 3908 if (ext->sec_btf_id <= 0) { 3909 pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n", 3910 ext_name, ext->btf_id, ext->sec_btf_id); 3911 return ext->sec_btf_id; 3912 } 3913 sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id); 3914 sec_name = btf__name_by_offset(obj->btf, sec->name_off); 3915 3916 if (strcmp(sec_name, KCONFIG_SEC) == 0) { 3917 if (btf_is_func(t)) { 3918 pr_warn("extern function %s is unsupported under %s section\n", 3919 ext->name, KCONFIG_SEC); 3920 return -ENOTSUP; 3921 } 3922 kcfg_sec = sec; 3923 ext->type = EXT_KCFG; 3924 ext->kcfg.sz = btf__resolve_size(obj->btf, t->type); 3925 if (ext->kcfg.sz <= 0) { 3926 pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n", 3927 ext_name, ext->kcfg.sz); 3928 return ext->kcfg.sz; 3929 } 3930 ext->kcfg.align = btf__align_of(obj->btf, t->type); 3931 if (ext->kcfg.align <= 0) { 3932 pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n", 3933 ext_name, ext->kcfg.align); 3934 return -EINVAL; 3935 } 3936 ext->kcfg.type = find_kcfg_type(obj->btf, t->type, 3937 &ext->kcfg.is_signed); 3938 if (ext->kcfg.type == KCFG_UNKNOWN) { 3939 pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name); 3940 return -ENOTSUP; 3941 } 3942 } else if (strcmp(sec_name, KSYMS_SEC) == 0) { 3943 ksym_sec = sec; 3944 ext->type = EXT_KSYM; 3945 skip_mods_and_typedefs(obj->btf, t->type, 3946 &ext->ksym.type_id); 3947 } else { 3948 pr_warn("unrecognized extern section '%s'\n", sec_name); 3949 return -ENOTSUP; 3950 } 3951 } 3952 pr_debug("collected %d externs total\n", obj->nr_extern); 3953 3954 if (!obj->nr_extern) 3955 return 0; 3956 3957 /* sort externs by type, for kcfg ones also by (align, size, name) */ 3958 qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs); 3959 3960 /* for .ksyms section, we need to turn all externs into allocated 3961 * variables in BTF to pass kernel verification; we do this by 3962 * pretending that each extern is a 8-byte variable 3963 */ 3964 if (ksym_sec) { 3965 /* find existing 4-byte integer type in BTF to use for fake 3966 * extern variables in DATASEC 3967 */ 3968 int int_btf_id = find_int_btf_id(obj->btf); 3969 /* For extern function, a dummy_var added earlier 3970 * will be used to replace the vs->type and 3971 * its name string will be used to refill 3972 * the missing param's name. 3973 */ 3974 const struct btf_type *dummy_var; 3975 3976 dummy_var = btf__type_by_id(obj->btf, dummy_var_btf_id); 3977 for (i = 0; i < obj->nr_extern; i++) { 3978 ext = &obj->externs[i]; 3979 if (ext->type != EXT_KSYM) 3980 continue; 3981 pr_debug("extern (ksym) #%d: symbol %d, name %s\n", 3982 i, ext->sym_idx, ext->name); 3983 } 3984 3985 sec = ksym_sec; 3986 n = btf_vlen(sec); 3987 for (i = 0, off = 0; i < n; i++, off += sizeof(int)) { 3988 struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i; 3989 struct btf_type *vt; 3990 3991 vt = (void *)btf__type_by_id(obj->btf, vs->type); 3992 ext_name = btf__name_by_offset(obj->btf, vt->name_off); 3993 ext = find_extern_by_name(obj, ext_name); 3994 if (!ext) { 3995 pr_warn("failed to find extern definition for BTF %s '%s'\n", 3996 btf_kind_str(vt), ext_name); 3997 return -ESRCH; 3998 } 3999 if (btf_is_func(vt)) { 4000 const struct btf_type *func_proto; 4001 struct btf_param *param; 4002 int j; 4003 4004 func_proto = btf__type_by_id(obj->btf, 4005 vt->type); 4006 param = btf_params(func_proto); 4007 /* Reuse the dummy_var string if the 4008 * func proto does not have param name. 4009 */ 4010 for (j = 0; j < btf_vlen(func_proto); j++) 4011 if (param[j].type && !param[j].name_off) 4012 param[j].name_off = 4013 dummy_var->name_off; 4014 vs->type = dummy_var_btf_id; 4015 vt->info &= ~0xffff; 4016 vt->info |= BTF_FUNC_GLOBAL; 4017 } else { 4018 btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED; 4019 vt->type = int_btf_id; 4020 } 4021 vs->offset = off; 4022 vs->size = sizeof(int); 4023 } 4024 sec->size = off; 4025 } 4026 4027 if (kcfg_sec) { 4028 sec = kcfg_sec; 4029 /* for kcfg externs calculate their offsets within a .kconfig map */ 4030 off = 0; 4031 for (i = 0; i < obj->nr_extern; i++) { 4032 ext = &obj->externs[i]; 4033 if (ext->type != EXT_KCFG) 4034 continue; 4035 4036 ext->kcfg.data_off = roundup(off, ext->kcfg.align); 4037 off = ext->kcfg.data_off + ext->kcfg.sz; 4038 pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n", 4039 i, ext->sym_idx, ext->kcfg.data_off, ext->name); 4040 } 4041 sec->size = off; 4042 n = btf_vlen(sec); 4043 for (i = 0; i < n; i++) { 4044 struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i; 4045 4046 t = btf__type_by_id(obj->btf, vs->type); 4047 ext_name = btf__name_by_offset(obj->btf, t->name_off); 4048 ext = find_extern_by_name(obj, ext_name); 4049 if (!ext) { 4050 pr_warn("failed to find extern definition for BTF var '%s'\n", 4051 ext_name); 4052 return -ESRCH; 4053 } 4054 btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED; 4055 vs->offset = ext->kcfg.data_off; 4056 } 4057 } 4058 return 0; 4059 } 4060 4061 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog) 4062 { 4063 return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1; 4064 } 4065 4066 struct bpf_program * 4067 bpf_object__find_program_by_name(const struct bpf_object *obj, 4068 const char *name) 4069 { 4070 struct bpf_program *prog; 4071 4072 bpf_object__for_each_program(prog, obj) { 4073 if (prog_is_subprog(obj, prog)) 4074 continue; 4075 if (!strcmp(prog->name, name)) 4076 return prog; 4077 } 4078 return errno = ENOENT, NULL; 4079 } 4080 4081 static bool bpf_object__shndx_is_data(const struct bpf_object *obj, 4082 int shndx) 4083 { 4084 switch (obj->efile.secs[shndx].sec_type) { 4085 case SEC_BSS: 4086 case SEC_DATA: 4087 case SEC_RODATA: 4088 return true; 4089 default: 4090 return false; 4091 } 4092 } 4093 4094 static bool bpf_object__shndx_is_maps(const struct bpf_object *obj, 4095 int shndx) 4096 { 4097 return shndx == obj->efile.btf_maps_shndx; 4098 } 4099 4100 static enum libbpf_map_type 4101 bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx) 4102 { 4103 if (shndx == obj->efile.symbols_shndx) 4104 return LIBBPF_MAP_KCONFIG; 4105 4106 switch (obj->efile.secs[shndx].sec_type) { 4107 case SEC_BSS: 4108 return LIBBPF_MAP_BSS; 4109 case SEC_DATA: 4110 return LIBBPF_MAP_DATA; 4111 case SEC_RODATA: 4112 return LIBBPF_MAP_RODATA; 4113 default: 4114 return LIBBPF_MAP_UNSPEC; 4115 } 4116 } 4117 4118 static int bpf_program__record_reloc(struct bpf_program *prog, 4119 struct reloc_desc *reloc_desc, 4120 __u32 insn_idx, const char *sym_name, 4121 const Elf64_Sym *sym, const Elf64_Rel *rel) 4122 { 4123 struct bpf_insn *insn = &prog->insns[insn_idx]; 4124 size_t map_idx, nr_maps = prog->obj->nr_maps; 4125 struct bpf_object *obj = prog->obj; 4126 __u32 shdr_idx = sym->st_shndx; 4127 enum libbpf_map_type type; 4128 const char *sym_sec_name; 4129 struct bpf_map *map; 4130 4131 if (!is_call_insn(insn) && !is_ldimm64_insn(insn)) { 4132 pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n", 4133 prog->name, sym_name, insn_idx, insn->code); 4134 return -LIBBPF_ERRNO__RELOC; 4135 } 4136 4137 if (sym_is_extern(sym)) { 4138 int sym_idx = ELF64_R_SYM(rel->r_info); 4139 int i, n = obj->nr_extern; 4140 struct extern_desc *ext; 4141 4142 for (i = 0; i < n; i++) { 4143 ext = &obj->externs[i]; 4144 if (ext->sym_idx == sym_idx) 4145 break; 4146 } 4147 if (i >= n) { 4148 pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n", 4149 prog->name, sym_name, sym_idx); 4150 return -LIBBPF_ERRNO__RELOC; 4151 } 4152 pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n", 4153 prog->name, i, ext->name, ext->sym_idx, insn_idx); 4154 if (insn->code == (BPF_JMP | BPF_CALL)) 4155 reloc_desc->type = RELO_EXTERN_CALL; 4156 else 4157 reloc_desc->type = RELO_EXTERN_LD64; 4158 reloc_desc->insn_idx = insn_idx; 4159 reloc_desc->ext_idx = i; 4160 return 0; 4161 } 4162 4163 /* sub-program call relocation */ 4164 if (is_call_insn(insn)) { 4165 if (insn->src_reg != BPF_PSEUDO_CALL) { 4166 pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name); 4167 return -LIBBPF_ERRNO__RELOC; 4168 } 4169 /* text_shndx can be 0, if no default "main" program exists */ 4170 if (!shdr_idx || shdr_idx != obj->efile.text_shndx) { 4171 sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx)); 4172 pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n", 4173 prog->name, sym_name, sym_sec_name); 4174 return -LIBBPF_ERRNO__RELOC; 4175 } 4176 if (sym->st_value % BPF_INSN_SZ) { 4177 pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n", 4178 prog->name, sym_name, (size_t)sym->st_value); 4179 return -LIBBPF_ERRNO__RELOC; 4180 } 4181 reloc_desc->type = RELO_CALL; 4182 reloc_desc->insn_idx = insn_idx; 4183 reloc_desc->sym_off = sym->st_value; 4184 return 0; 4185 } 4186 4187 if (!shdr_idx || shdr_idx >= SHN_LORESERVE) { 4188 pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n", 4189 prog->name, sym_name, shdr_idx); 4190 return -LIBBPF_ERRNO__RELOC; 4191 } 4192 4193 /* loading subprog addresses */ 4194 if (sym_is_subprog(sym, obj->efile.text_shndx)) { 4195 /* global_func: sym->st_value = offset in the section, insn->imm = 0. 4196 * local_func: sym->st_value = 0, insn->imm = offset in the section. 4197 */ 4198 if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) { 4199 pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n", 4200 prog->name, sym_name, (size_t)sym->st_value, insn->imm); 4201 return -LIBBPF_ERRNO__RELOC; 4202 } 4203 4204 reloc_desc->type = RELO_SUBPROG_ADDR; 4205 reloc_desc->insn_idx = insn_idx; 4206 reloc_desc->sym_off = sym->st_value; 4207 return 0; 4208 } 4209 4210 type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx); 4211 sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx)); 4212 4213 /* generic map reference relocation */ 4214 if (type == LIBBPF_MAP_UNSPEC) { 4215 if (!bpf_object__shndx_is_maps(obj, shdr_idx)) { 4216 pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n", 4217 prog->name, sym_name, sym_sec_name); 4218 return -LIBBPF_ERRNO__RELOC; 4219 } 4220 for (map_idx = 0; map_idx < nr_maps; map_idx++) { 4221 map = &obj->maps[map_idx]; 4222 if (map->libbpf_type != type || 4223 map->sec_idx != sym->st_shndx || 4224 map->sec_offset != sym->st_value) 4225 continue; 4226 pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n", 4227 prog->name, map_idx, map->name, map->sec_idx, 4228 map->sec_offset, insn_idx); 4229 break; 4230 } 4231 if (map_idx >= nr_maps) { 4232 pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n", 4233 prog->name, sym_sec_name, (size_t)sym->st_value); 4234 return -LIBBPF_ERRNO__RELOC; 4235 } 4236 reloc_desc->type = RELO_LD64; 4237 reloc_desc->insn_idx = insn_idx; 4238 reloc_desc->map_idx = map_idx; 4239 reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */ 4240 return 0; 4241 } 4242 4243 /* global data map relocation */ 4244 if (!bpf_object__shndx_is_data(obj, shdr_idx)) { 4245 pr_warn("prog '%s': bad data relo against section '%s'\n", 4246 prog->name, sym_sec_name); 4247 return -LIBBPF_ERRNO__RELOC; 4248 } 4249 for (map_idx = 0; map_idx < nr_maps; map_idx++) { 4250 map = &obj->maps[map_idx]; 4251 if (map->libbpf_type != type || map->sec_idx != sym->st_shndx) 4252 continue; 4253 pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n", 4254 prog->name, map_idx, map->name, map->sec_idx, 4255 map->sec_offset, insn_idx); 4256 break; 4257 } 4258 if (map_idx >= nr_maps) { 4259 pr_warn("prog '%s': data relo failed to find map for section '%s'\n", 4260 prog->name, sym_sec_name); 4261 return -LIBBPF_ERRNO__RELOC; 4262 } 4263 4264 reloc_desc->type = RELO_DATA; 4265 reloc_desc->insn_idx = insn_idx; 4266 reloc_desc->map_idx = map_idx; 4267 reloc_desc->sym_off = sym->st_value; 4268 return 0; 4269 } 4270 4271 static bool prog_contains_insn(const struct bpf_program *prog, size_t insn_idx) 4272 { 4273 return insn_idx >= prog->sec_insn_off && 4274 insn_idx < prog->sec_insn_off + prog->sec_insn_cnt; 4275 } 4276 4277 static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj, 4278 size_t sec_idx, size_t insn_idx) 4279 { 4280 int l = 0, r = obj->nr_programs - 1, m; 4281 struct bpf_program *prog; 4282 4283 if (!obj->nr_programs) 4284 return NULL; 4285 4286 while (l < r) { 4287 m = l + (r - l + 1) / 2; 4288 prog = &obj->programs[m]; 4289 4290 if (prog->sec_idx < sec_idx || 4291 (prog->sec_idx == sec_idx && prog->sec_insn_off <= insn_idx)) 4292 l = m; 4293 else 4294 r = m - 1; 4295 } 4296 /* matching program could be at index l, but it still might be the 4297 * wrong one, so we need to double check conditions for the last time 4298 */ 4299 prog = &obj->programs[l]; 4300 if (prog->sec_idx == sec_idx && prog_contains_insn(prog, insn_idx)) 4301 return prog; 4302 return NULL; 4303 } 4304 4305 static int 4306 bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data) 4307 { 4308 const char *relo_sec_name, *sec_name; 4309 size_t sec_idx = shdr->sh_info, sym_idx; 4310 struct bpf_program *prog; 4311 struct reloc_desc *relos; 4312 int err, i, nrels; 4313 const char *sym_name; 4314 __u32 insn_idx; 4315 Elf_Scn *scn; 4316 Elf_Data *scn_data; 4317 Elf64_Sym *sym; 4318 Elf64_Rel *rel; 4319 4320 if (sec_idx >= obj->efile.sec_cnt) 4321 return -EINVAL; 4322 4323 scn = elf_sec_by_idx(obj, sec_idx); 4324 scn_data = elf_sec_data(obj, scn); 4325 if (!scn_data) 4326 return -LIBBPF_ERRNO__FORMAT; 4327 4328 relo_sec_name = elf_sec_str(obj, shdr->sh_name); 4329 sec_name = elf_sec_name(obj, scn); 4330 if (!relo_sec_name || !sec_name) 4331 return -EINVAL; 4332 4333 pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n", 4334 relo_sec_name, sec_idx, sec_name); 4335 nrels = shdr->sh_size / shdr->sh_entsize; 4336 4337 for (i = 0; i < nrels; i++) { 4338 rel = elf_rel_by_idx(data, i); 4339 if (!rel) { 4340 pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i); 4341 return -LIBBPF_ERRNO__FORMAT; 4342 } 4343 4344 sym_idx = ELF64_R_SYM(rel->r_info); 4345 sym = elf_sym_by_idx(obj, sym_idx); 4346 if (!sym) { 4347 pr_warn("sec '%s': symbol #%zu not found for relo #%d\n", 4348 relo_sec_name, sym_idx, i); 4349 return -LIBBPF_ERRNO__FORMAT; 4350 } 4351 4352 if (sym->st_shndx >= obj->efile.sec_cnt) { 4353 pr_warn("sec '%s': corrupted symbol #%zu pointing to invalid section #%zu for relo #%d\n", 4354 relo_sec_name, sym_idx, (size_t)sym->st_shndx, i); 4355 return -LIBBPF_ERRNO__FORMAT; 4356 } 4357 4358 if (rel->r_offset % BPF_INSN_SZ || rel->r_offset >= scn_data->d_size) { 4359 pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n", 4360 relo_sec_name, (size_t)rel->r_offset, i); 4361 return -LIBBPF_ERRNO__FORMAT; 4362 } 4363 4364 insn_idx = rel->r_offset / BPF_INSN_SZ; 4365 /* relocations against static functions are recorded as 4366 * relocations against the section that contains a function; 4367 * in such case, symbol will be STT_SECTION and sym.st_name 4368 * will point to empty string (0), so fetch section name 4369 * instead 4370 */ 4371 if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && sym->st_name == 0) 4372 sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym->st_shndx)); 4373 else 4374 sym_name = elf_sym_str(obj, sym->st_name); 4375 sym_name = sym_name ?: "<?"; 4376 4377 pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n", 4378 relo_sec_name, i, insn_idx, sym_name); 4379 4380 prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx); 4381 if (!prog) { 4382 pr_debug("sec '%s': relo #%d: couldn't find program in section '%s' for insn #%u, probably overridden weak function, skipping...\n", 4383 relo_sec_name, i, sec_name, insn_idx); 4384 continue; 4385 } 4386 4387 relos = libbpf_reallocarray(prog->reloc_desc, 4388 prog->nr_reloc + 1, sizeof(*relos)); 4389 if (!relos) 4390 return -ENOMEM; 4391 prog->reloc_desc = relos; 4392 4393 /* adjust insn_idx to local BPF program frame of reference */ 4394 insn_idx -= prog->sec_insn_off; 4395 err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc], 4396 insn_idx, sym_name, sym, rel); 4397 if (err) 4398 return err; 4399 4400 prog->nr_reloc++; 4401 } 4402 return 0; 4403 } 4404 4405 static int map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map) 4406 { 4407 int id; 4408 4409 if (!obj->btf) 4410 return -ENOENT; 4411 4412 /* if it's BTF-defined map, we don't need to search for type IDs. 4413 * For struct_ops map, it does not need btf_key_type_id and 4414 * btf_value_type_id. 4415 */ 4416 if (map->sec_idx == obj->efile.btf_maps_shndx || bpf_map__is_struct_ops(map)) 4417 return 0; 4418 4419 /* 4420 * LLVM annotates global data differently in BTF, that is, 4421 * only as '.data', '.bss' or '.rodata'. 4422 */ 4423 if (!bpf_map__is_internal(map)) 4424 return -ENOENT; 4425 4426 id = btf__find_by_name(obj->btf, map->real_name); 4427 if (id < 0) 4428 return id; 4429 4430 map->btf_key_type_id = 0; 4431 map->btf_value_type_id = id; 4432 return 0; 4433 } 4434 4435 static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info) 4436 { 4437 char file[PATH_MAX], buff[4096]; 4438 FILE *fp; 4439 __u32 val; 4440 int err; 4441 4442 snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd); 4443 memset(info, 0, sizeof(*info)); 4444 4445 fp = fopen(file, "re"); 4446 if (!fp) { 4447 err = -errno; 4448 pr_warn("failed to open %s: %d. No procfs support?\n", file, 4449 err); 4450 return err; 4451 } 4452 4453 while (fgets(buff, sizeof(buff), fp)) { 4454 if (sscanf(buff, "map_type:\t%u", &val) == 1) 4455 info->type = val; 4456 else if (sscanf(buff, "key_size:\t%u", &val) == 1) 4457 info->key_size = val; 4458 else if (sscanf(buff, "value_size:\t%u", &val) == 1) 4459 info->value_size = val; 4460 else if (sscanf(buff, "max_entries:\t%u", &val) == 1) 4461 info->max_entries = val; 4462 else if (sscanf(buff, "map_flags:\t%i", &val) == 1) 4463 info->map_flags = val; 4464 } 4465 4466 fclose(fp); 4467 4468 return 0; 4469 } 4470 4471 bool bpf_map__autocreate(const struct bpf_map *map) 4472 { 4473 return map->autocreate; 4474 } 4475 4476 int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate) 4477 { 4478 if (map->obj->loaded) 4479 return libbpf_err(-EBUSY); 4480 4481 map->autocreate = autocreate; 4482 return 0; 4483 } 4484 4485 int bpf_map__reuse_fd(struct bpf_map *map, int fd) 4486 { 4487 struct bpf_map_info info; 4488 __u32 len = sizeof(info), name_len; 4489 int new_fd, err; 4490 char *new_name; 4491 4492 memset(&info, 0, len); 4493 err = bpf_map_get_info_by_fd(fd, &info, &len); 4494 if (err && errno == EINVAL) 4495 err = bpf_get_map_info_from_fdinfo(fd, &info); 4496 if (err) 4497 return libbpf_err(err); 4498 4499 name_len = strlen(info.name); 4500 if (name_len == BPF_OBJ_NAME_LEN - 1 && strncmp(map->name, info.name, name_len) == 0) 4501 new_name = strdup(map->name); 4502 else 4503 new_name = strdup(info.name); 4504 4505 if (!new_name) 4506 return libbpf_err(-errno); 4507 4508 /* 4509 * Like dup(), but make sure new FD is >= 3 and has O_CLOEXEC set. 4510 * This is similar to what we do in ensure_good_fd(), but without 4511 * closing original FD. 4512 */ 4513 new_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); 4514 if (new_fd < 0) { 4515 err = -errno; 4516 goto err_free_new_name; 4517 } 4518 4519 err = reuse_fd(map->fd, new_fd); 4520 if (err) 4521 goto err_free_new_name; 4522 4523 free(map->name); 4524 4525 map->name = new_name; 4526 map->def.type = info.type; 4527 map->def.key_size = info.key_size; 4528 map->def.value_size = info.value_size; 4529 map->def.max_entries = info.max_entries; 4530 map->def.map_flags = info.map_flags; 4531 map->btf_key_type_id = info.btf_key_type_id; 4532 map->btf_value_type_id = info.btf_value_type_id; 4533 map->reused = true; 4534 map->map_extra = info.map_extra; 4535 4536 return 0; 4537 4538 err_free_new_name: 4539 free(new_name); 4540 return libbpf_err(err); 4541 } 4542 4543 __u32 bpf_map__max_entries(const struct bpf_map *map) 4544 { 4545 return map->def.max_entries; 4546 } 4547 4548 struct bpf_map *bpf_map__inner_map(struct bpf_map *map) 4549 { 4550 if (!bpf_map_type__is_map_in_map(map->def.type)) 4551 return errno = EINVAL, NULL; 4552 4553 return map->inner_map; 4554 } 4555 4556 int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries) 4557 { 4558 if (map->obj->loaded) 4559 return libbpf_err(-EBUSY); 4560 4561 map->def.max_entries = max_entries; 4562 4563 /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */ 4564 if (map_is_ringbuf(map)) 4565 map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries); 4566 4567 return 0; 4568 } 4569 4570 static int bpf_object_prepare_token(struct bpf_object *obj) 4571 { 4572 const char *bpffs_path; 4573 int bpffs_fd = -1, token_fd, err; 4574 bool mandatory; 4575 enum libbpf_print_level level; 4576 4577 /* token is explicitly prevented */ 4578 if (obj->token_path && obj->token_path[0] == '\0') { 4579 pr_debug("object '%s': token is prevented, skipping...\n", obj->name); 4580 return 0; 4581 } 4582 4583 mandatory = obj->token_path != NULL; 4584 level = mandatory ? LIBBPF_WARN : LIBBPF_DEBUG; 4585 4586 bpffs_path = obj->token_path ?: BPF_FS_DEFAULT_PATH; 4587 bpffs_fd = open(bpffs_path, O_DIRECTORY, O_RDWR); 4588 if (bpffs_fd < 0) { 4589 err = -errno; 4590 __pr(level, "object '%s': failed (%d) to open BPF FS mount at '%s'%s\n", 4591 obj->name, err, bpffs_path, 4592 mandatory ? "" : ", skipping optional step..."); 4593 return mandatory ? err : 0; 4594 } 4595 4596 token_fd = bpf_token_create(bpffs_fd, 0); 4597 close(bpffs_fd); 4598 if (token_fd < 0) { 4599 if (!mandatory && token_fd == -ENOENT) { 4600 pr_debug("object '%s': BPF FS at '%s' doesn't have BPF token delegation set up, skipping...\n", 4601 obj->name, bpffs_path); 4602 return 0; 4603 } 4604 __pr(level, "object '%s': failed (%d) to create BPF token from '%s'%s\n", 4605 obj->name, token_fd, bpffs_path, 4606 mandatory ? "" : ", skipping optional step..."); 4607 return mandatory ? token_fd : 0; 4608 } 4609 4610 obj->feat_cache = calloc(1, sizeof(*obj->feat_cache)); 4611 if (!obj->feat_cache) { 4612 close(token_fd); 4613 return -ENOMEM; 4614 } 4615 4616 obj->token_fd = token_fd; 4617 obj->feat_cache->token_fd = token_fd; 4618 4619 return 0; 4620 } 4621 4622 static int 4623 bpf_object__probe_loading(struct bpf_object *obj) 4624 { 4625 char *cp, errmsg[STRERR_BUFSIZE]; 4626 struct bpf_insn insns[] = { 4627 BPF_MOV64_IMM(BPF_REG_0, 0), 4628 BPF_EXIT_INSN(), 4629 }; 4630 int ret, insn_cnt = ARRAY_SIZE(insns); 4631 LIBBPF_OPTS(bpf_prog_load_opts, opts, 4632 .token_fd = obj->token_fd, 4633 .prog_flags = obj->token_fd ? BPF_F_TOKEN_FD : 0, 4634 ); 4635 4636 if (obj->gen_loader) 4637 return 0; 4638 4639 ret = bump_rlimit_memlock(); 4640 if (ret) 4641 pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret); 4642 4643 /* make sure basic loading works */ 4644 ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &opts); 4645 if (ret < 0) 4646 ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); 4647 if (ret < 0) { 4648 ret = errno; 4649 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); 4650 pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF " 4651 "program. Make sure your kernel supports BPF " 4652 "(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is " 4653 "set to big enough value.\n", __func__, cp, ret); 4654 return -ret; 4655 } 4656 close(ret); 4657 4658 return 0; 4659 } 4660 4661 bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) 4662 { 4663 if (obj && obj->gen_loader) 4664 /* To generate loader program assume the latest kernel 4665 * to avoid doing extra prog_load, map_create syscalls. 4666 */ 4667 return true; 4668 4669 if (obj->token_fd) 4670 return feat_supported(obj->feat_cache, feat_id); 4671 4672 return feat_supported(NULL, feat_id); 4673 } 4674 4675 static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) 4676 { 4677 struct bpf_map_info map_info; 4678 char msg[STRERR_BUFSIZE]; 4679 __u32 map_info_len = sizeof(map_info); 4680 int err; 4681 4682 memset(&map_info, 0, map_info_len); 4683 err = bpf_map_get_info_by_fd(map_fd, &map_info, &map_info_len); 4684 if (err && errno == EINVAL) 4685 err = bpf_get_map_info_from_fdinfo(map_fd, &map_info); 4686 if (err) { 4687 pr_warn("failed to get map info for map FD %d: %s\n", map_fd, 4688 libbpf_strerror_r(errno, msg, sizeof(msg))); 4689 return false; 4690 } 4691 4692 return (map_info.type == map->def.type && 4693 map_info.key_size == map->def.key_size && 4694 map_info.value_size == map->def.value_size && 4695 map_info.max_entries == map->def.max_entries && 4696 map_info.map_flags == map->def.map_flags && 4697 map_info.map_extra == map->map_extra); 4698 } 4699 4700 static int 4701 bpf_object__reuse_map(struct bpf_map *map) 4702 { 4703 char *cp, errmsg[STRERR_BUFSIZE]; 4704 int err, pin_fd; 4705 4706 pin_fd = bpf_obj_get(map->pin_path); 4707 if (pin_fd < 0) { 4708 err = -errno; 4709 if (err == -ENOENT) { 4710 pr_debug("found no pinned map to reuse at '%s'\n", 4711 map->pin_path); 4712 return 0; 4713 } 4714 4715 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); 4716 pr_warn("couldn't retrieve pinned map '%s': %s\n", 4717 map->pin_path, cp); 4718 return err; 4719 } 4720 4721 if (!map_is_reuse_compat(map, pin_fd)) { 4722 pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n", 4723 map->pin_path); 4724 close(pin_fd); 4725 return -EINVAL; 4726 } 4727 4728 err = bpf_map__reuse_fd(map, pin_fd); 4729 close(pin_fd); 4730 if (err) 4731 return err; 4732 4733 map->pinned = true; 4734 pr_debug("reused pinned map at '%s'\n", map->pin_path); 4735 4736 return 0; 4737 } 4738 4739 static int 4740 bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) 4741 { 4742 enum libbpf_map_type map_type = map->libbpf_type; 4743 char *cp, errmsg[STRERR_BUFSIZE]; 4744 int err, zero = 0; 4745 4746 if (obj->gen_loader) { 4747 bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps, 4748 map->mmaped, map->def.value_size); 4749 if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) 4750 bpf_gen__map_freeze(obj->gen_loader, map - obj->maps); 4751 return 0; 4752 } 4753 err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0); 4754 if (err) { 4755 err = -errno; 4756 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); 4757 pr_warn("Error setting initial map(%s) contents: %s\n", 4758 map->name, cp); 4759 return err; 4760 } 4761 4762 /* Freeze .rodata and .kconfig map as read-only from syscall side. */ 4763 if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) { 4764 err = bpf_map_freeze(map->fd); 4765 if (err) { 4766 err = -errno; 4767 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); 4768 pr_warn("Error freezing map(%s) as read-only: %s\n", 4769 map->name, cp); 4770 return err; 4771 } 4772 } 4773 return 0; 4774 } 4775 4776 static void bpf_map__destroy(struct bpf_map *map); 4777 4778 static bool map_is_created(const struct bpf_map *map) 4779 { 4780 return map->obj->loaded || map->reused; 4781 } 4782 4783 static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner) 4784 { 4785 LIBBPF_OPTS(bpf_map_create_opts, create_attr); 4786 struct bpf_map_def *def = &map->def; 4787 const char *map_name = NULL; 4788 int err = 0, map_fd; 4789 4790 if (kernel_supports(obj, FEAT_PROG_NAME)) 4791 map_name = map->name; 4792 create_attr.map_ifindex = map->map_ifindex; 4793 create_attr.map_flags = def->map_flags; 4794 create_attr.numa_node = map->numa_node; 4795 create_attr.map_extra = map->map_extra; 4796 create_attr.token_fd = obj->token_fd; 4797 if (obj->token_fd) 4798 create_attr.map_flags |= BPF_F_TOKEN_FD; 4799 4800 if (bpf_map__is_struct_ops(map)) { 4801 create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; 4802 if (map->mod_btf_fd >= 0) { 4803 create_attr.value_type_btf_obj_fd = map->mod_btf_fd; 4804 create_attr.map_flags |= BPF_F_VTYPE_BTF_OBJ_FD; 4805 } 4806 } 4807 4808 if (obj->btf && btf__fd(obj->btf) >= 0) { 4809 create_attr.btf_fd = btf__fd(obj->btf); 4810 create_attr.btf_key_type_id = map->btf_key_type_id; 4811 create_attr.btf_value_type_id = map->btf_value_type_id; 4812 } 4813 4814 if (bpf_map_type__is_map_in_map(def->type)) { 4815 if (map->inner_map) { 4816 err = map_set_def_max_entries(map->inner_map); 4817 if (err) 4818 return err; 4819 err = bpf_object__create_map(obj, map->inner_map, true); 4820 if (err) { 4821 pr_warn("map '%s': failed to create inner map: %d\n", 4822 map->name, err); 4823 return err; 4824 } 4825 map->inner_map_fd = map->inner_map->fd; 4826 } 4827 if (map->inner_map_fd >= 0) 4828 create_attr.inner_map_fd = map->inner_map_fd; 4829 } 4830 4831 switch (def->type) { 4832 case BPF_MAP_TYPE_PERF_EVENT_ARRAY: 4833 case BPF_MAP_TYPE_CGROUP_ARRAY: 4834 case BPF_MAP_TYPE_STACK_TRACE: 4835 case BPF_MAP_TYPE_ARRAY_OF_MAPS: 4836 case BPF_MAP_TYPE_HASH_OF_MAPS: 4837 case BPF_MAP_TYPE_DEVMAP: 4838 case BPF_MAP_TYPE_DEVMAP_HASH: 4839 case BPF_MAP_TYPE_CPUMAP: 4840 case BPF_MAP_TYPE_XSKMAP: 4841 case BPF_MAP_TYPE_SOCKMAP: 4842 case BPF_MAP_TYPE_SOCKHASH: 4843 case BPF_MAP_TYPE_QUEUE: 4844 case BPF_MAP_TYPE_STACK: 4845 create_attr.btf_fd = 0; 4846 create_attr.btf_key_type_id = 0; 4847 create_attr.btf_value_type_id = 0; 4848 map->btf_key_type_id = 0; 4849 map->btf_value_type_id = 0; 4850 default: 4851 break; 4852 } 4853 4854 if (obj->gen_loader) { 4855 bpf_gen__map_create(obj->gen_loader, def->type, map_name, 4856 def->key_size, def->value_size, def->max_entries, 4857 &create_attr, is_inner ? -1 : map - obj->maps); 4858 /* We keep pretenting we have valid FD to pass various fd >= 0 4859 * checks by just keeping original placeholder FDs in place. 4860 * See bpf_object__add_map() comment. 4861 * This placeholder fd will not be used with any syscall and 4862 * will be reset to -1 eventually. 4863 */ 4864 map_fd = map->fd; 4865 } else { 4866 map_fd = bpf_map_create(def->type, map_name, 4867 def->key_size, def->value_size, 4868 def->max_entries, &create_attr); 4869 } 4870 if (map_fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) { 4871 char *cp, errmsg[STRERR_BUFSIZE]; 4872 4873 err = -errno; 4874 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); 4875 pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", 4876 map->name, cp, err); 4877 create_attr.btf_fd = 0; 4878 create_attr.btf_key_type_id = 0; 4879 create_attr.btf_value_type_id = 0; 4880 map->btf_key_type_id = 0; 4881 map->btf_value_type_id = 0; 4882 map_fd = bpf_map_create(def->type, map_name, 4883 def->key_size, def->value_size, 4884 def->max_entries, &create_attr); 4885 } 4886 4887 if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) { 4888 if (obj->gen_loader) 4889 map->inner_map->fd = -1; 4890 bpf_map__destroy(map->inner_map); 4891 zfree(&map->inner_map); 4892 } 4893 4894 if (map_fd < 0) 4895 return map_fd; 4896 4897 /* obj->gen_loader case, prevent reuse_fd() from closing map_fd */ 4898 if (map->fd == map_fd) 4899 return 0; 4900 4901 /* Keep placeholder FD value but now point it to the BPF map object. 4902 * This way everything that relied on this map's FD (e.g., relocated 4903 * ldimm64 instructions) will stay valid and won't need adjustments. 4904 * map->fd stays valid but now point to what map_fd points to. 4905 */ 4906 return reuse_fd(map->fd, map_fd); 4907 } 4908 4909 static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map) 4910 { 4911 const struct bpf_map *targ_map; 4912 unsigned int i; 4913 int fd, err = 0; 4914 4915 for (i = 0; i < map->init_slots_sz; i++) { 4916 if (!map->init_slots[i]) 4917 continue; 4918 4919 targ_map = map->init_slots[i]; 4920 fd = targ_map->fd; 4921 4922 if (obj->gen_loader) { 4923 bpf_gen__populate_outer_map(obj->gen_loader, 4924 map - obj->maps, i, 4925 targ_map - obj->maps); 4926 } else { 4927 err = bpf_map_update_elem(map->fd, &i, &fd, 0); 4928 } 4929 if (err) { 4930 err = -errno; 4931 pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n", 4932 map->name, i, targ_map->name, fd, err); 4933 return err; 4934 } 4935 pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n", 4936 map->name, i, targ_map->name, fd); 4937 } 4938 4939 zfree(&map->init_slots); 4940 map->init_slots_sz = 0; 4941 4942 return 0; 4943 } 4944 4945 static int init_prog_array_slots(struct bpf_object *obj, struct bpf_map *map) 4946 { 4947 const struct bpf_program *targ_prog; 4948 unsigned int i; 4949 int fd, err; 4950 4951 if (obj->gen_loader) 4952 return -ENOTSUP; 4953 4954 for (i = 0; i < map->init_slots_sz; i++) { 4955 if (!map->init_slots[i]) 4956 continue; 4957 4958 targ_prog = map->init_slots[i]; 4959 fd = bpf_program__fd(targ_prog); 4960 4961 err = bpf_map_update_elem(map->fd, &i, &fd, 0); 4962 if (err) { 4963 err = -errno; 4964 pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %d\n", 4965 map->name, i, targ_prog->name, fd, err); 4966 return err; 4967 } 4968 pr_debug("map '%s': slot [%d] set to prog '%s' fd=%d\n", 4969 map->name, i, targ_prog->name, fd); 4970 } 4971 4972 zfree(&map->init_slots); 4973 map->init_slots_sz = 0; 4974 4975 return 0; 4976 } 4977 4978 static int bpf_object_init_prog_arrays(struct bpf_object *obj) 4979 { 4980 struct bpf_map *map; 4981 int i, err; 4982 4983 for (i = 0; i < obj->nr_maps; i++) { 4984 map = &obj->maps[i]; 4985 4986 if (!map->init_slots_sz || map->def.type != BPF_MAP_TYPE_PROG_ARRAY) 4987 continue; 4988 4989 err = init_prog_array_slots(obj, map); 4990 if (err < 0) 4991 return err; 4992 } 4993 return 0; 4994 } 4995 4996 static int map_set_def_max_entries(struct bpf_map *map) 4997 { 4998 if (map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !map->def.max_entries) { 4999 int nr_cpus; 5000 5001 nr_cpus = libbpf_num_possible_cpus(); 5002 if (nr_cpus < 0) { 5003 pr_warn("map '%s': failed to determine number of system CPUs: %d\n", 5004 map->name, nr_cpus); 5005 return nr_cpus; 5006 } 5007 pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus); 5008 map->def.max_entries = nr_cpus; 5009 } 5010 5011 return 0; 5012 } 5013 5014 static int 5015 bpf_object__create_maps(struct bpf_object *obj) 5016 { 5017 struct bpf_map *map; 5018 char *cp, errmsg[STRERR_BUFSIZE]; 5019 unsigned int i, j; 5020 int err; 5021 bool retried; 5022 5023 for (i = 0; i < obj->nr_maps; i++) { 5024 map = &obj->maps[i]; 5025 5026 /* To support old kernels, we skip creating global data maps 5027 * (.rodata, .data, .kconfig, etc); later on, during program 5028 * loading, if we detect that at least one of the to-be-loaded 5029 * programs is referencing any global data map, we'll error 5030 * out with program name and relocation index logged. 5031 * This approach allows to accommodate Clang emitting 5032 * unnecessary .rodata.str1.1 sections for string literals, 5033 * but also it allows to have CO-RE applications that use 5034 * global variables in some of BPF programs, but not others. 5035 * If those global variable-using programs are not loaded at 5036 * runtime due to bpf_program__set_autoload(prog, false), 5037 * bpf_object loading will succeed just fine even on old 5038 * kernels. 5039 */ 5040 if (bpf_map__is_internal(map) && !kernel_supports(obj, FEAT_GLOBAL_DATA)) 5041 map->autocreate = false; 5042 5043 if (!map->autocreate) { 5044 pr_debug("map '%s': skipped auto-creating...\n", map->name); 5045 continue; 5046 } 5047 5048 err = map_set_def_max_entries(map); 5049 if (err) 5050 goto err_out; 5051 5052 retried = false; 5053 retry: 5054 if (map->pin_path) { 5055 err = bpf_object__reuse_map(map); 5056 if (err) { 5057 pr_warn("map '%s': error reusing pinned map\n", 5058 map->name); 5059 goto err_out; 5060 } 5061 if (retried && map->fd < 0) { 5062 pr_warn("map '%s': cannot find pinned map\n", 5063 map->name); 5064 err = -ENOENT; 5065 goto err_out; 5066 } 5067 } 5068 5069 if (map->reused) { 5070 pr_debug("map '%s': skipping creation (preset fd=%d)\n", 5071 map->name, map->fd); 5072 } else { 5073 err = bpf_object__create_map(obj, map, false); 5074 if (err) 5075 goto err_out; 5076 5077 pr_debug("map '%s': created successfully, fd=%d\n", 5078 map->name, map->fd); 5079 5080 if (bpf_map__is_internal(map)) { 5081 err = bpf_object__populate_internal_map(obj, map); 5082 if (err < 0) 5083 goto err_out; 5084 } 5085 5086 if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) { 5087 err = init_map_in_map_slots(obj, map); 5088 if (err < 0) 5089 goto err_out; 5090 } 5091 } 5092 5093 if (map->pin_path && !map->pinned) { 5094 err = bpf_map__pin(map, NULL); 5095 if (err) { 5096 if (!retried && err == -EEXIST) { 5097 retried = true; 5098 goto retry; 5099 } 5100 pr_warn("map '%s': failed to auto-pin at '%s': %d\n", 5101 map->name, map->pin_path, err); 5102 goto err_out; 5103 } 5104 } 5105 } 5106 5107 return 0; 5108 5109 err_out: 5110 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); 5111 pr_warn("map '%s': failed to create: %s(%d)\n", map->name, cp, err); 5112 pr_perm_msg(err); 5113 for (j = 0; j < i; j++) 5114 zclose(obj->maps[j].fd); 5115 return err; 5116 } 5117 5118 static bool bpf_core_is_flavor_sep(const char *s) 5119 { 5120 /* check X___Y name pattern, where X and Y are not underscores */ 5121 return s[0] != '_' && /* X */ 5122 s[1] == '_' && s[2] == '_' && s[3] == '_' && /* ___ */ 5123 s[4] != '_'; /* Y */ 5124 } 5125 5126 /* Given 'some_struct_name___with_flavor' return the length of a name prefix 5127 * before last triple underscore. Struct name part after last triple 5128 * underscore is ignored by BPF CO-RE relocation during relocation matching. 5129 */ 5130 size_t bpf_core_essential_name_len(const char *name) 5131 { 5132 size_t n = strlen(name); 5133 int i; 5134 5135 for (i = n - 5; i >= 0; i--) { 5136 if (bpf_core_is_flavor_sep(name + i)) 5137 return i + 1; 5138 } 5139 return n; 5140 } 5141 5142 void bpf_core_free_cands(struct bpf_core_cand_list *cands) 5143 { 5144 if (!cands) 5145 return; 5146 5147 free(cands->cands); 5148 free(cands); 5149 } 5150 5151 int bpf_core_add_cands(struct bpf_core_cand *local_cand, 5152 size_t local_essent_len, 5153 const struct btf *targ_btf, 5154 const char *targ_btf_name, 5155 int targ_start_id, 5156 struct bpf_core_cand_list *cands) 5157 { 5158 struct bpf_core_cand *new_cands, *cand; 5159 const struct btf_type *t, *local_t; 5160 const char *targ_name, *local_name; 5161 size_t targ_essent_len; 5162 int n, i; 5163 5164 local_t = btf__type_by_id(local_cand->btf, local_cand->id); 5165 local_name = btf__str_by_offset(local_cand->btf, local_t->name_off); 5166 5167 n = btf__type_cnt(targ_btf); 5168 for (i = targ_start_id; i < n; i++) { 5169 t = btf__type_by_id(targ_btf, i); 5170 if (!btf_kind_core_compat(t, local_t)) 5171 continue; 5172 5173 targ_name = btf__name_by_offset(targ_btf, t->name_off); 5174 if (str_is_empty(targ_name)) 5175 continue; 5176 5177 targ_essent_len = bpf_core_essential_name_len(targ_name); 5178 if (targ_essent_len != local_essent_len) 5179 continue; 5180 5181 if (strncmp(local_name, targ_name, local_essent_len) != 0) 5182 continue; 5183 5184 pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n", 5185 local_cand->id, btf_kind_str(local_t), 5186 local_name, i, btf_kind_str(t), targ_name, 5187 targ_btf_name); 5188 new_cands = libbpf_reallocarray(cands->cands, cands->len + 1, 5189 sizeof(*cands->cands)); 5190 if (!new_cands) 5191 return -ENOMEM; 5192 5193 cand = &new_cands[cands->len]; 5194 cand->btf = targ_btf; 5195 cand->id = i; 5196 5197 cands->cands = new_cands; 5198 cands->len++; 5199 } 5200 return 0; 5201 } 5202 5203 static int load_module_btfs(struct bpf_object *obj) 5204 { 5205 struct bpf_btf_info info; 5206 struct module_btf *mod_btf; 5207 struct btf *btf; 5208 char name[64]; 5209 __u32 id = 0, len; 5210 int err, fd; 5211 5212 if (obj->btf_modules_loaded) 5213 return 0; 5214 5215 if (obj->gen_loader) 5216 return 0; 5217 5218 /* don't do this again, even if we find no module BTFs */ 5219 obj->btf_modules_loaded = true; 5220 5221 /* kernel too old to support module BTFs */ 5222 if (!kernel_supports(obj, FEAT_MODULE_BTF)) 5223 return 0; 5224 5225 while (true) { 5226 err = bpf_btf_get_next_id(id, &id); 5227 if (err && errno == ENOENT) 5228 return 0; 5229 if (err && errno == EPERM) { 5230 pr_debug("skipping module BTFs loading, missing privileges\n"); 5231 return 0; 5232 } 5233 if (err) { 5234 err = -errno; 5235 pr_warn("failed to iterate BTF objects: %d\n", err); 5236 return err; 5237 } 5238 5239 fd = bpf_btf_get_fd_by_id(id); 5240 if (fd < 0) { 5241 if (errno == ENOENT) 5242 continue; /* expected race: BTF was unloaded */ 5243 err = -errno; 5244 pr_warn("failed to get BTF object #%d FD: %d\n", id, err); 5245 return err; 5246 } 5247 5248 len = sizeof(info); 5249 memset(&info, 0, sizeof(info)); 5250 info.name = ptr_to_u64(name); 5251 info.name_len = sizeof(name); 5252 5253 err = bpf_btf_get_info_by_fd(fd, &info, &len); 5254 if (err) { 5255 err = -errno; 5256 pr_warn("failed to get BTF object #%d info: %d\n", id, err); 5257 goto err_out; 5258 } 5259 5260 /* ignore non-module BTFs */ 5261 if (!info.kernel_btf || strcmp(name, "vmlinux") == 0) { 5262 close(fd); 5263 continue; 5264 } 5265 5266 btf = btf_get_from_fd(fd, obj->btf_vmlinux); 5267 err = libbpf_get_error(btf); 5268 if (err) { 5269 pr_warn("failed to load module [%s]'s BTF object #%d: %d\n", 5270 name, id, err); 5271 goto err_out; 5272 } 5273 5274 err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap, 5275 sizeof(*obj->btf_modules), obj->btf_module_cnt + 1); 5276 if (err) 5277 goto err_out; 5278 5279 mod_btf = &obj->btf_modules[obj->btf_module_cnt++]; 5280 5281 mod_btf->btf = btf; 5282 mod_btf->id = id; 5283 mod_btf->fd = fd; 5284 mod_btf->name = strdup(name); 5285 if (!mod_btf->name) { 5286 err = -ENOMEM; 5287 goto err_out; 5288 } 5289 continue; 5290 5291 err_out: 5292 close(fd); 5293 return err; 5294 } 5295 5296 return 0; 5297 } 5298 5299 static struct bpf_core_cand_list * 5300 bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id) 5301 { 5302 struct bpf_core_cand local_cand = {}; 5303 struct bpf_core_cand_list *cands; 5304 const struct btf *main_btf; 5305 const struct btf_type *local_t; 5306 const char *local_name; 5307 size_t local_essent_len; 5308 int err, i; 5309 5310 local_cand.btf = local_btf; 5311 local_cand.id = local_type_id; 5312 local_t = btf__type_by_id(local_btf, local_type_id); 5313 if (!local_t) 5314 return ERR_PTR(-EINVAL); 5315 5316 local_name = btf__name_by_offset(local_btf, local_t->name_off); 5317 if (str_is_empty(local_name)) 5318 return ERR_PTR(-EINVAL); 5319 local_essent_len = bpf_core_essential_name_len(local_name); 5320 5321 cands = calloc(1, sizeof(*cands)); 5322 if (!cands) 5323 return ERR_PTR(-ENOMEM); 5324 5325 /* Attempt to find target candidates in vmlinux BTF first */ 5326 main_btf = obj->btf_vmlinux_override ?: obj->btf_vmlinux; 5327 err = bpf_core_add_cands(&local_cand, local_essent_len, main_btf, "vmlinux", 1, cands); 5328 if (err) 5329 goto err_out; 5330 5331 /* if vmlinux BTF has any candidate, don't got for module BTFs */ 5332 if (cands->len) 5333 return cands; 5334 5335 /* if vmlinux BTF was overridden, don't attempt to load module BTFs */ 5336 if (obj->btf_vmlinux_override) 5337 return cands; 5338 5339 /* now look through module BTFs, trying to still find candidates */ 5340 err = load_module_btfs(obj); 5341 if (err) 5342 goto err_out; 5343 5344 for (i = 0; i < obj->btf_module_cnt; i++) { 5345 err = bpf_core_add_cands(&local_cand, local_essent_len, 5346 obj->btf_modules[i].btf, 5347 obj->btf_modules[i].name, 5348 btf__type_cnt(obj->btf_vmlinux), 5349 cands); 5350 if (err) 5351 goto err_out; 5352 } 5353 5354 return cands; 5355 err_out: 5356 bpf_core_free_cands(cands); 5357 return ERR_PTR(err); 5358 } 5359 5360 /* Check local and target types for compatibility. This check is used for 5361 * type-based CO-RE relocations and follow slightly different rules than 5362 * field-based relocations. This function assumes that root types were already 5363 * checked for name match. Beyond that initial root-level name check, names 5364 * are completely ignored. Compatibility rules are as follows: 5365 * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but 5366 * kind should match for local and target types (i.e., STRUCT is not 5367 * compatible with UNION); 5368 * - for ENUMs, the size is ignored; 5369 * - for INT, size and signedness are ignored; 5370 * - for ARRAY, dimensionality is ignored, element types are checked for 5371 * compatibility recursively; 5372 * - CONST/VOLATILE/RESTRICT modifiers are ignored; 5373 * - TYPEDEFs/PTRs are compatible if types they pointing to are compatible; 5374 * - FUNC_PROTOs are compatible if they have compatible signature: same 5375 * number of input args and compatible return and argument types. 5376 * These rules are not set in stone and probably will be adjusted as we get 5377 * more experience with using BPF CO-RE relocations. 5378 */ 5379 int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, 5380 const struct btf *targ_btf, __u32 targ_id) 5381 { 5382 return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, 32); 5383 } 5384 5385 int bpf_core_types_match(const struct btf *local_btf, __u32 local_id, 5386 const struct btf *targ_btf, __u32 targ_id) 5387 { 5388 return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false, 32); 5389 } 5390 5391 static size_t bpf_core_hash_fn(const long key, void *ctx) 5392 { 5393 return key; 5394 } 5395 5396 static bool bpf_core_equal_fn(const long k1, const long k2, void *ctx) 5397 { 5398 return k1 == k2; 5399 } 5400 5401 static int record_relo_core(struct bpf_program *prog, 5402 const struct bpf_core_relo *core_relo, int insn_idx) 5403 { 5404 struct reloc_desc *relos, *relo; 5405 5406 relos = libbpf_reallocarray(prog->reloc_desc, 5407 prog->nr_reloc + 1, sizeof(*relos)); 5408 if (!relos) 5409 return -ENOMEM; 5410 relo = &relos[prog->nr_reloc]; 5411 relo->type = RELO_CORE; 5412 relo->insn_idx = insn_idx; 5413 relo->core_relo = core_relo; 5414 prog->reloc_desc = relos; 5415 prog->nr_reloc++; 5416 return 0; 5417 } 5418 5419 static const struct bpf_core_relo *find_relo_core(struct bpf_program *prog, int insn_idx) 5420 { 5421 struct reloc_desc *relo; 5422 int i; 5423 5424 for (i = 0; i < prog->nr_reloc; i++) { 5425 relo = &prog->reloc_desc[i]; 5426 if (relo->type != RELO_CORE || relo->insn_idx != insn_idx) 5427 continue; 5428 5429 return relo->core_relo; 5430 } 5431 5432 return NULL; 5433 } 5434 5435 static int bpf_core_resolve_relo(struct bpf_program *prog, 5436 const struct bpf_core_relo *relo, 5437 int relo_idx, 5438 const struct btf *local_btf, 5439 struct hashmap *cand_cache, 5440 struct bpf_core_relo_res *targ_res) 5441 { 5442 struct bpf_core_spec specs_scratch[3] = {}; 5443 struct bpf_core_cand_list *cands = NULL; 5444 const char *prog_name = prog->name; 5445 const struct btf_type *local_type; 5446 const char *local_name; 5447 __u32 local_id = relo->type_id; 5448 int err; 5449 5450 local_type = btf__type_by_id(local_btf, local_id); 5451 if (!local_type) 5452 return -EINVAL; 5453 5454 local_name = btf__name_by_offset(local_btf, local_type->name_off); 5455 if (!local_name) 5456 return -EINVAL; 5457 5458 if (relo->kind != BPF_CORE_TYPE_ID_LOCAL && 5459 !hashmap__find(cand_cache, local_id, &cands)) { 5460 cands = bpf_core_find_cands(prog->obj, local_btf, local_id); 5461 if (IS_ERR(cands)) { 5462 pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n", 5463 prog_name, relo_idx, local_id, btf_kind_str(local_type), 5464 local_name, PTR_ERR(cands)); 5465 return PTR_ERR(cands); 5466 } 5467 err = hashmap__set(cand_cache, local_id, cands, NULL, NULL); 5468 if (err) { 5469 bpf_core_free_cands(cands); 5470 return err; 5471 } 5472 } 5473 5474 return bpf_core_calc_relo_insn(prog_name, relo, relo_idx, local_btf, cands, specs_scratch, 5475 targ_res); 5476 } 5477 5478 static int 5479 bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) 5480 { 5481 const struct btf_ext_info_sec *sec; 5482 struct bpf_core_relo_res targ_res; 5483 const struct bpf_core_relo *rec; 5484 const struct btf_ext_info *seg; 5485 struct hashmap_entry *entry; 5486 struct hashmap *cand_cache = NULL; 5487 struct bpf_program *prog; 5488 struct bpf_insn *insn; 5489 const char *sec_name; 5490 int i, err = 0, insn_idx, sec_idx, sec_num; 5491 5492 if (obj->btf_ext->core_relo_info.len == 0) 5493 return 0; 5494 5495 if (targ_btf_path) { 5496 obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL); 5497 err = libbpf_get_error(obj->btf_vmlinux_override); 5498 if (err) { 5499 pr_warn("failed to parse target BTF: %d\n", err); 5500 return err; 5501 } 5502 } 5503 5504 cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL); 5505 if (IS_ERR(cand_cache)) { 5506 err = PTR_ERR(cand_cache); 5507 goto out; 5508 } 5509 5510 seg = &obj->btf_ext->core_relo_info; 5511 sec_num = 0; 5512 for_each_btf_ext_sec(seg, sec) { 5513 sec_idx = seg->sec_idxs[sec_num]; 5514 sec_num++; 5515 5516 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); 5517 if (str_is_empty(sec_name)) { 5518 err = -EINVAL; 5519 goto out; 5520 } 5521 5522 pr_debug("sec '%s': found %d CO-RE relocations\n", sec_name, sec->num_info); 5523 5524 for_each_btf_ext_rec(seg, sec, i, rec) { 5525 if (rec->insn_off % BPF_INSN_SZ) 5526 return -EINVAL; 5527 insn_idx = rec->insn_off / BPF_INSN_SZ; 5528 prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx); 5529 if (!prog) { 5530 /* When __weak subprog is "overridden" by another instance 5531 * of the subprog from a different object file, linker still 5532 * appends all the .BTF.ext info that used to belong to that 5533 * eliminated subprogram. 5534 * This is similar to what x86-64 linker does for relocations. 5535 * So just ignore such relocations just like we ignore 5536 * subprog instructions when discovering subprograms. 5537 */ 5538 pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n", 5539 sec_name, i, insn_idx); 5540 continue; 5541 } 5542 /* no need to apply CO-RE relocation if the program is 5543 * not going to be loaded 5544 */ 5545 if (!prog->autoload) 5546 continue; 5547 5548 /* adjust insn_idx from section frame of reference to the local 5549 * program's frame of reference; (sub-)program code is not yet 5550 * relocated, so it's enough to just subtract in-section offset 5551 */ 5552 insn_idx = insn_idx - prog->sec_insn_off; 5553 if (insn_idx >= prog->insns_cnt) 5554 return -EINVAL; 5555 insn = &prog->insns[insn_idx]; 5556 5557 err = record_relo_core(prog, rec, insn_idx); 5558 if (err) { 5559 pr_warn("prog '%s': relo #%d: failed to record relocation: %d\n", 5560 prog->name, i, err); 5561 goto out; 5562 } 5563 5564 if (prog->obj->gen_loader) 5565 continue; 5566 5567 err = bpf_core_resolve_relo(prog, rec, i, obj->btf, cand_cache, &targ_res); 5568 if (err) { 5569 pr_warn("prog '%s': relo #%d: failed to relocate: %d\n", 5570 prog->name, i, err); 5571 goto out; 5572 } 5573 5574 err = bpf_core_patch_insn(prog->name, insn, insn_idx, rec, i, &targ_res); 5575 if (err) { 5576 pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n", 5577 prog->name, i, insn_idx, err); 5578 goto out; 5579 } 5580 } 5581 } 5582 5583 out: 5584 /* obj->btf_vmlinux and module BTFs are freed after object load */ 5585 btf__free(obj->btf_vmlinux_override); 5586 obj->btf_vmlinux_override = NULL; 5587 5588 if (!IS_ERR_OR_NULL(cand_cache)) { 5589 hashmap__for_each_entry(cand_cache, entry, i) { 5590 bpf_core_free_cands(entry->pvalue); 5591 } 5592 hashmap__free(cand_cache); 5593 } 5594 return err; 5595 } 5596 5597 /* base map load ldimm64 special constant, used also for log fixup logic */ 5598 #define POISON_LDIMM64_MAP_BASE 2001000000 5599 #define POISON_LDIMM64_MAP_PFX "200100" 5600 5601 static void poison_map_ldimm64(struct bpf_program *prog, int relo_idx, 5602 int insn_idx, struct bpf_insn *insn, 5603 int map_idx, const struct bpf_map *map) 5604 { 5605 int i; 5606 5607 pr_debug("prog '%s': relo #%d: poisoning insn #%d that loads map #%d '%s'\n", 5608 prog->name, relo_idx, insn_idx, map_idx, map->name); 5609 5610 /* we turn single ldimm64 into two identical invalid calls */ 5611 for (i = 0; i < 2; i++) { 5612 insn->code = BPF_JMP | BPF_CALL; 5613 insn->dst_reg = 0; 5614 insn->src_reg = 0; 5615 insn->off = 0; 5616 /* if this instruction is reachable (not a dead code), 5617 * verifier will complain with something like: 5618 * invalid func unknown#2001000123 5619 * where lower 123 is map index into obj->maps[] array 5620 */ 5621 insn->imm = POISON_LDIMM64_MAP_BASE + map_idx; 5622 5623 insn++; 5624 } 5625 } 5626 5627 /* unresolved kfunc call special constant, used also for log fixup logic */ 5628 #define POISON_CALL_KFUNC_BASE 2002000000 5629 #define POISON_CALL_KFUNC_PFX "2002" 5630 5631 static void poison_kfunc_call(struct bpf_program *prog, int relo_idx, 5632 int insn_idx, struct bpf_insn *insn, 5633 int ext_idx, const struct extern_desc *ext) 5634 { 5635 pr_debug("prog '%s': relo #%d: poisoning insn #%d that calls kfunc '%s'\n", 5636 prog->name, relo_idx, insn_idx, ext->name); 5637 5638 /* we turn kfunc call into invalid helper call with identifiable constant */ 5639 insn->code = BPF_JMP | BPF_CALL; 5640 insn->dst_reg = 0; 5641 insn->src_reg = 0; 5642 insn->off = 0; 5643 /* if this instruction is reachable (not a dead code), 5644 * verifier will complain with something like: 5645 * invalid func unknown#2001000123 5646 * where lower 123 is extern index into obj->externs[] array 5647 */ 5648 insn->imm = POISON_CALL_KFUNC_BASE + ext_idx; 5649 } 5650 5651 /* Relocate data references within program code: 5652 * - map references; 5653 * - global variable references; 5654 * - extern references. 5655 */ 5656 static int 5657 bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) 5658 { 5659 int i; 5660 5661 for (i = 0; i < prog->nr_reloc; i++) { 5662 struct reloc_desc *relo = &prog->reloc_desc[i]; 5663 struct bpf_insn *insn = &prog->insns[relo->insn_idx]; 5664 const struct bpf_map *map; 5665 struct extern_desc *ext; 5666 5667 switch (relo->type) { 5668 case RELO_LD64: 5669 map = &obj->maps[relo->map_idx]; 5670 if (obj->gen_loader) { 5671 insn[0].src_reg = BPF_PSEUDO_MAP_IDX; 5672 insn[0].imm = relo->map_idx; 5673 } else if (map->autocreate) { 5674 insn[0].src_reg = BPF_PSEUDO_MAP_FD; 5675 insn[0].imm = map->fd; 5676 } else { 5677 poison_map_ldimm64(prog, i, relo->insn_idx, insn, 5678 relo->map_idx, map); 5679 } 5680 break; 5681 case RELO_DATA: 5682 map = &obj->maps[relo->map_idx]; 5683 insn[1].imm = insn[0].imm + relo->sym_off; 5684 if (obj->gen_loader) { 5685 insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE; 5686 insn[0].imm = relo->map_idx; 5687 } else if (map->autocreate) { 5688 insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; 5689 insn[0].imm = map->fd; 5690 } else { 5691 poison_map_ldimm64(prog, i, relo->insn_idx, insn, 5692 relo->map_idx, map); 5693 } 5694 break; 5695 case RELO_EXTERN_LD64: 5696 ext = &obj->externs[relo->ext_idx]; 5697 if (ext->type == EXT_KCFG) { 5698 if (obj->gen_loader) { 5699 insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE; 5700 insn[0].imm = obj->kconfig_map_idx; 5701 } else { 5702 insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; 5703 insn[0].imm = obj->maps[obj->kconfig_map_idx].fd; 5704 } 5705 insn[1].imm = ext->kcfg.data_off; 5706 } else /* EXT_KSYM */ { 5707 if (ext->ksym.type_id && ext->is_set) { /* typed ksyms */ 5708 insn[0].src_reg = BPF_PSEUDO_BTF_ID; 5709 insn[0].imm = ext->ksym.kernel_btf_id; 5710 insn[1].imm = ext->ksym.kernel_btf_obj_fd; 5711 } else { /* typeless ksyms or unresolved typed ksyms */ 5712 insn[0].imm = (__u32)ext->ksym.addr; 5713 insn[1].imm = ext->ksym.addr >> 32; 5714 } 5715 } 5716 break; 5717 case RELO_EXTERN_CALL: 5718 ext = &obj->externs[relo->ext_idx]; 5719 insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL; 5720 if (ext->is_set) { 5721 insn[0].imm = ext->ksym.kernel_btf_id; 5722 insn[0].off = ext->ksym.btf_fd_idx; 5723 } else { /* unresolved weak kfunc call */ 5724 poison_kfunc_call(prog, i, relo->insn_idx, insn, 5725 relo->ext_idx, ext); 5726 } 5727 break; 5728 case RELO_SUBPROG_ADDR: 5729 if (insn[0].src_reg != BPF_PSEUDO_FUNC) { 5730 pr_warn("prog '%s': relo #%d: bad insn\n", 5731 prog->name, i); 5732 return -EINVAL; 5733 } 5734 /* handled already */ 5735 break; 5736 case RELO_CALL: 5737 /* handled already */ 5738 break; 5739 case RELO_CORE: 5740 /* will be handled by bpf_program_record_relos() */ 5741 break; 5742 default: 5743 pr_warn("prog '%s': relo #%d: bad relo type %d\n", 5744 prog->name, i, relo->type); 5745 return -EINVAL; 5746 } 5747 } 5748 5749 return 0; 5750 } 5751 5752 static int adjust_prog_btf_ext_info(const struct bpf_object *obj, 5753 const struct bpf_program *prog, 5754 const struct btf_ext_info *ext_info, 5755 void **prog_info, __u32 *prog_rec_cnt, 5756 __u32 *prog_rec_sz) 5757 { 5758 void *copy_start = NULL, *copy_end = NULL; 5759 void *rec, *rec_end, *new_prog_info; 5760 const struct btf_ext_info_sec *sec; 5761 size_t old_sz, new_sz; 5762 int i, sec_num, sec_idx, off_adj; 5763 5764 sec_num = 0; 5765 for_each_btf_ext_sec(ext_info, sec) { 5766 sec_idx = ext_info->sec_idxs[sec_num]; 5767 sec_num++; 5768 if (prog->sec_idx != sec_idx) 5769 continue; 5770 5771 for_each_btf_ext_rec(ext_info, sec, i, rec) { 5772 __u32 insn_off = *(__u32 *)rec / BPF_INSN_SZ; 5773 5774 if (insn_off < prog->sec_insn_off) 5775 continue; 5776 if (insn_off >= prog->sec_insn_off + prog->sec_insn_cnt) 5777 break; 5778 5779 if (!copy_start) 5780 copy_start = rec; 5781 copy_end = rec + ext_info->rec_size; 5782 } 5783 5784 if (!copy_start) 5785 return -ENOENT; 5786 5787 /* append func/line info of a given (sub-)program to the main 5788 * program func/line info 5789 */ 5790 old_sz = (size_t)(*prog_rec_cnt) * ext_info->rec_size; 5791 new_sz = old_sz + (copy_end - copy_start); 5792 new_prog_info = realloc(*prog_info, new_sz); 5793 if (!new_prog_info) 5794 return -ENOMEM; 5795 *prog_info = new_prog_info; 5796 *prog_rec_cnt = new_sz / ext_info->rec_size; 5797 memcpy(new_prog_info + old_sz, copy_start, copy_end - copy_start); 5798 5799 /* Kernel instruction offsets are in units of 8-byte 5800 * instructions, while .BTF.ext instruction offsets generated 5801 * by Clang are in units of bytes. So convert Clang offsets 5802 * into kernel offsets and adjust offset according to program 5803 * relocated position. 5804 */ 5805 off_adj = prog->sub_insn_off - prog->sec_insn_off; 5806 rec = new_prog_info + old_sz; 5807 rec_end = new_prog_info + new_sz; 5808 for (; rec < rec_end; rec += ext_info->rec_size) { 5809 __u32 *insn_off = rec; 5810 5811 *insn_off = *insn_off / BPF_INSN_SZ + off_adj; 5812 } 5813 *prog_rec_sz = ext_info->rec_size; 5814 return 0; 5815 } 5816 5817 return -ENOENT; 5818 } 5819 5820 static int 5821 reloc_prog_func_and_line_info(const struct bpf_object *obj, 5822 struct bpf_program *main_prog, 5823 const struct bpf_program *prog) 5824 { 5825 int err; 5826 5827 /* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't 5828 * support func/line info 5829 */ 5830 if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC)) 5831 return 0; 5832 5833 /* only attempt func info relocation if main program's func_info 5834 * relocation was successful 5835 */ 5836 if (main_prog != prog && !main_prog->func_info) 5837 goto line_info; 5838 5839 err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->func_info, 5840 &main_prog->func_info, 5841 &main_prog->func_info_cnt, 5842 &main_prog->func_info_rec_size); 5843 if (err) { 5844 if (err != -ENOENT) { 5845 pr_warn("prog '%s': error relocating .BTF.ext function info: %d\n", 5846 prog->name, err); 5847 return err; 5848 } 5849 if (main_prog->func_info) { 5850 /* 5851 * Some info has already been found but has problem 5852 * in the last btf_ext reloc. Must have to error out. 5853 */ 5854 pr_warn("prog '%s': missing .BTF.ext function info.\n", prog->name); 5855 return err; 5856 } 5857 /* Have problem loading the very first info. Ignore the rest. */ 5858 pr_warn("prog '%s': missing .BTF.ext function info for the main program, skipping all of .BTF.ext func info.\n", 5859 prog->name); 5860 } 5861 5862 line_info: 5863 /* don't relocate line info if main program's relocation failed */ 5864 if (main_prog != prog && !main_prog->line_info) 5865 return 0; 5866 5867 err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->line_info, 5868 &main_prog->line_info, 5869 &main_prog->line_info_cnt, 5870 &main_prog->line_info_rec_size); 5871 if (err) { 5872 if (err != -ENOENT) { 5873 pr_warn("prog '%s': error relocating .BTF.ext line info: %d\n", 5874 prog->name, err); 5875 return err; 5876 } 5877 if (main_prog->line_info) { 5878 /* 5879 * Some info has already been found but has problem 5880 * in the last btf_ext reloc. Must have to error out. 5881 */ 5882 pr_warn("prog '%s': missing .BTF.ext line info.\n", prog->name); 5883 return err; 5884 } 5885 /* Have problem loading the very first info. Ignore the rest. */ 5886 pr_warn("prog '%s': missing .BTF.ext line info for the main program, skipping all of .BTF.ext line info.\n", 5887 prog->name); 5888 } 5889 return 0; 5890 } 5891 5892 static int cmp_relo_by_insn_idx(const void *key, const void *elem) 5893 { 5894 size_t insn_idx = *(const size_t *)key; 5895 const struct reloc_desc *relo = elem; 5896 5897 if (insn_idx == relo->insn_idx) 5898 return 0; 5899 return insn_idx < relo->insn_idx ? -1 : 1; 5900 } 5901 5902 static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx) 5903 { 5904 if (!prog->nr_reloc) 5905 return NULL; 5906 return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc, 5907 sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx); 5908 } 5909 5910 static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_program *subprog) 5911 { 5912 int new_cnt = main_prog->nr_reloc + subprog->nr_reloc; 5913 struct reloc_desc *relos; 5914 int i; 5915 5916 if (main_prog == subprog) 5917 return 0; 5918 relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos)); 5919 /* if new count is zero, reallocarray can return a valid NULL result; 5920 * in this case the previous pointer will be freed, so we *have to* 5921 * reassign old pointer to the new value (even if it's NULL) 5922 */ 5923 if (!relos && new_cnt) 5924 return -ENOMEM; 5925 if (subprog->nr_reloc) 5926 memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc, 5927 sizeof(*relos) * subprog->nr_reloc); 5928 5929 for (i = main_prog->nr_reloc; i < new_cnt; i++) 5930 relos[i].insn_idx += subprog->sub_insn_off; 5931 /* After insn_idx adjustment the 'relos' array is still sorted 5932 * by insn_idx and doesn't break bsearch. 5933 */ 5934 main_prog->reloc_desc = relos; 5935 main_prog->nr_reloc = new_cnt; 5936 return 0; 5937 } 5938 5939 static int 5940 bpf_object__append_subprog_code(struct bpf_object *obj, struct bpf_program *main_prog, 5941 struct bpf_program *subprog) 5942 { 5943 struct bpf_insn *insns; 5944 size_t new_cnt; 5945 int err; 5946 5947 subprog->sub_insn_off = main_prog->insns_cnt; 5948 5949 new_cnt = main_prog->insns_cnt + subprog->insns_cnt; 5950 insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns)); 5951 if (!insns) { 5952 pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name); 5953 return -ENOMEM; 5954 } 5955 main_prog->insns = insns; 5956 main_prog->insns_cnt = new_cnt; 5957 5958 memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns, 5959 subprog->insns_cnt * sizeof(*insns)); 5960 5961 pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n", 5962 main_prog->name, subprog->insns_cnt, subprog->name); 5963 5964 /* The subprog insns are now appended. Append its relos too. */ 5965 err = append_subprog_relos(main_prog, subprog); 5966 if (err) 5967 return err; 5968 return 0; 5969 } 5970 5971 static int 5972 bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog, 5973 struct bpf_program *prog) 5974 { 5975 size_t sub_insn_idx, insn_idx; 5976 struct bpf_program *subprog; 5977 struct reloc_desc *relo; 5978 struct bpf_insn *insn; 5979 int err; 5980 5981 err = reloc_prog_func_and_line_info(obj, main_prog, prog); 5982 if (err) 5983 return err; 5984 5985 for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) { 5986 insn = &main_prog->insns[prog->sub_insn_off + insn_idx]; 5987 if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn)) 5988 continue; 5989 5990 relo = find_prog_insn_relo(prog, insn_idx); 5991 if (relo && relo->type == RELO_EXTERN_CALL) 5992 /* kfunc relocations will be handled later 5993 * in bpf_object__relocate_data() 5994 */ 5995 continue; 5996 if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) { 5997 pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n", 5998 prog->name, insn_idx, relo->type); 5999 return -LIBBPF_ERRNO__RELOC; 6000 } 6001 if (relo) { 6002 /* sub-program instruction index is a combination of 6003 * an offset of a symbol pointed to by relocation and 6004 * call instruction's imm field; for global functions, 6005 * call always has imm = -1, but for static functions 6006 * relocation is against STT_SECTION and insn->imm 6007 * points to a start of a static function 6008 * 6009 * for subprog addr relocation, the relo->sym_off + insn->imm is 6010 * the byte offset in the corresponding section. 6011 */ 6012 if (relo->type == RELO_CALL) 6013 sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1; 6014 else 6015 sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ; 6016 } else if (insn_is_pseudo_func(insn)) { 6017 /* 6018 * RELO_SUBPROG_ADDR relo is always emitted even if both 6019 * functions are in the same section, so it shouldn't reach here. 6020 */ 6021 pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n", 6022 prog->name, insn_idx); 6023 return -LIBBPF_ERRNO__RELOC; 6024 } else { 6025 /* if subprogram call is to a static function within 6026 * the same ELF section, there won't be any relocation 6027 * emitted, but it also means there is no additional 6028 * offset necessary, insns->imm is relative to 6029 * instruction's original position within the section 6030 */ 6031 sub_insn_idx = prog->sec_insn_off + insn_idx + insn->imm + 1; 6032 } 6033 6034 /* we enforce that sub-programs should be in .text section */ 6035 subprog = find_prog_by_sec_insn(obj, obj->efile.text_shndx, sub_insn_idx); 6036 if (!subprog) { 6037 pr_warn("prog '%s': no .text section found yet sub-program call exists\n", 6038 prog->name); 6039 return -LIBBPF_ERRNO__RELOC; 6040 } 6041 6042 /* if it's the first call instruction calling into this 6043 * subprogram (meaning this subprog hasn't been processed 6044 * yet) within the context of current main program: 6045 * - append it at the end of main program's instructions blog; 6046 * - process is recursively, while current program is put on hold; 6047 * - if that subprogram calls some other not yet processes 6048 * subprogram, same thing will happen recursively until 6049 * there are no more unprocesses subprograms left to append 6050 * and relocate. 6051 */ 6052 if (subprog->sub_insn_off == 0) { 6053 err = bpf_object__append_subprog_code(obj, main_prog, subprog); 6054 if (err) 6055 return err; 6056 err = bpf_object__reloc_code(obj, main_prog, subprog); 6057 if (err) 6058 return err; 6059 } 6060 6061 /* main_prog->insns memory could have been re-allocated, so 6062 * calculate pointer again 6063 */ 6064 insn = &main_prog->insns[prog->sub_insn_off + insn_idx]; 6065 /* calculate correct instruction position within current main 6066 * prog; each main prog can have a different set of 6067 * subprograms appended (potentially in different order as 6068 * well), so position of any subprog can be different for 6069 * different main programs 6070 */ 6071 insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1; 6072 6073 pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n", 6074 prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off); 6075 } 6076 6077 return 0; 6078 } 6079 6080 /* 6081 * Relocate sub-program calls. 6082 * 6083 * Algorithm operates as follows. Each entry-point BPF program (referred to as 6084 * main prog) is processed separately. For each subprog (non-entry functions, 6085 * that can be called from either entry progs or other subprogs) gets their 6086 * sub_insn_off reset to zero. This serves as indicator that this subprogram 6087 * hasn't been yet appended and relocated within current main prog. Once its 6088 * relocated, sub_insn_off will point at the position within current main prog 6089 * where given subprog was appended. This will further be used to relocate all 6090 * the call instructions jumping into this subprog. 6091 * 6092 * We start with main program and process all call instructions. If the call 6093 * is into a subprog that hasn't been processed (i.e., subprog->sub_insn_off 6094 * is zero), subprog instructions are appended at the end of main program's 6095 * instruction array. Then main program is "put on hold" while we recursively 6096 * process newly appended subprogram. If that subprogram calls into another 6097 * subprogram that hasn't been appended, new subprogram is appended again to 6098 * the *main* prog's instructions (subprog's instructions are always left 6099 * untouched, as they need to be in unmodified state for subsequent main progs 6100 * and subprog instructions are always sent only as part of a main prog) and 6101 * the process continues recursively. Once all the subprogs called from a main 6102 * prog or any of its subprogs are appended (and relocated), all their 6103 * positions within finalized instructions array are known, so it's easy to 6104 * rewrite call instructions with correct relative offsets, corresponding to 6105 * desired target subprog. 6106 * 6107 * Its important to realize that some subprogs might not be called from some 6108 * main prog and any of its called/used subprogs. Those will keep their 6109 * subprog->sub_insn_off as zero at all times and won't be appended to current 6110 * main prog and won't be relocated within the context of current main prog. 6111 * They might still be used from other main progs later. 6112 * 6113 * Visually this process can be shown as below. Suppose we have two main 6114 * programs mainA and mainB and BPF object contains three subprogs: subA, 6115 * subB, and subC. mainA calls only subA, mainB calls only subC, but subA and 6116 * subC both call subB: 6117 * 6118 * +--------+ +-------+ 6119 * | v v | 6120 * +--+---+ +--+-+-+ +---+--+ 6121 * | subA | | subB | | subC | 6122 * +--+---+ +------+ +---+--+ 6123 * ^ ^ 6124 * | | 6125 * +---+-------+ +------+----+ 6126 * | mainA | | mainB | 6127 * +-----------+ +-----------+ 6128 * 6129 * We'll start relocating mainA, will find subA, append it and start 6130 * processing sub A recursively: 6131 * 6132 * +-----------+------+ 6133 * | mainA | subA | 6134 * +-----------+------+ 6135 * 6136 * At this point we notice that subB is used from subA, so we append it and 6137 * relocate (there are no further subcalls from subB): 6138 * 6139 * +-----------+------+------+ 6140 * | mainA | subA | subB | 6141 * +-----------+------+------+ 6142 * 6143 * At this point, we relocate subA calls, then go one level up and finish with 6144 * relocatin mainA calls. mainA is done. 6145 * 6146 * For mainB process is similar but results in different order. We start with 6147 * mainB and skip subA and subB, as mainB never calls them (at least 6148 * directly), but we see subC is needed, so we append and start processing it: 6149 * 6150 * +-----------+------+ 6151 * | mainB | subC | 6152 * +-----------+------+ 6153 * Now we see subC needs subB, so we go back to it, append and relocate it: 6154 * 6155 * +-----------+------+------+ 6156 * | mainB | subC | subB | 6157 * +-----------+------+------+ 6158 * 6159 * At this point we unwind recursion, relocate calls in subC, then in mainB. 6160 */ 6161 static int 6162 bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog) 6163 { 6164 struct bpf_program *subprog; 6165 int i, err; 6166 6167 /* mark all subprogs as not relocated (yet) within the context of 6168 * current main program 6169 */ 6170 for (i = 0; i < obj->nr_programs; i++) { 6171 subprog = &obj->programs[i]; 6172 if (!prog_is_subprog(obj, subprog)) 6173 continue; 6174 6175 subprog->sub_insn_off = 0; 6176 } 6177 6178 err = bpf_object__reloc_code(obj, prog, prog); 6179 if (err) 6180 return err; 6181 6182 return 0; 6183 } 6184 6185 static void 6186 bpf_object__free_relocs(struct bpf_object *obj) 6187 { 6188 struct bpf_program *prog; 6189 int i; 6190 6191 /* free up relocation descriptors */ 6192 for (i = 0; i < obj->nr_programs; i++) { 6193 prog = &obj->programs[i]; 6194 zfree(&prog->reloc_desc); 6195 prog->nr_reloc = 0; 6196 } 6197 } 6198 6199 static int cmp_relocs(const void *_a, const void *_b) 6200 { 6201 const struct reloc_desc *a = _a; 6202 const struct reloc_desc *b = _b; 6203 6204 if (a->insn_idx != b->insn_idx) 6205 return a->insn_idx < b->insn_idx ? -1 : 1; 6206 6207 /* no two relocations should have the same insn_idx, but ... */ 6208 if (a->type != b->type) 6209 return a->type < b->type ? -1 : 1; 6210 6211 return 0; 6212 } 6213 6214 static void bpf_object__sort_relos(struct bpf_object *obj) 6215 { 6216 int i; 6217 6218 for (i = 0; i < obj->nr_programs; i++) { 6219 struct bpf_program *p = &obj->programs[i]; 6220 6221 if (!p->nr_reloc) 6222 continue; 6223 6224 qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs); 6225 } 6226 } 6227 6228 static int bpf_prog_assign_exc_cb(struct bpf_object *obj, struct bpf_program *prog) 6229 { 6230 const char *str = "exception_callback:"; 6231 size_t pfx_len = strlen(str); 6232 int i, j, n; 6233 6234 if (!obj->btf || !kernel_supports(obj, FEAT_BTF_DECL_TAG)) 6235 return 0; 6236 6237 n = btf__type_cnt(obj->btf); 6238 for (i = 1; i < n; i++) { 6239 const char *name; 6240 struct btf_type *t; 6241 6242 t = btf_type_by_id(obj->btf, i); 6243 if (!btf_is_decl_tag(t) || btf_decl_tag(t)->component_idx != -1) 6244 continue; 6245 6246 name = btf__str_by_offset(obj->btf, t->name_off); 6247 if (strncmp(name, str, pfx_len) != 0) 6248 continue; 6249 6250 t = btf_type_by_id(obj->btf, t->type); 6251 if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) { 6252 pr_warn("prog '%s': exception_callback:<value> decl tag not applied to the main program\n", 6253 prog->name); 6254 return -EINVAL; 6255 } 6256 if (strcmp(prog->name, btf__str_by_offset(obj->btf, t->name_off)) != 0) 6257 continue; 6258 /* Multiple callbacks are specified for the same prog, 6259 * the verifier will eventually return an error for this 6260 * case, hence simply skip appending a subprog. 6261 */ 6262 if (prog->exception_cb_idx >= 0) { 6263 prog->exception_cb_idx = -1; 6264 break; 6265 } 6266 6267 name += pfx_len; 6268 if (str_is_empty(name)) { 6269 pr_warn("prog '%s': exception_callback:<value> decl tag contains empty value\n", 6270 prog->name); 6271 return -EINVAL; 6272 } 6273 6274 for (j = 0; j < obj->nr_programs; j++) { 6275 struct bpf_program *subprog = &obj->programs[j]; 6276 6277 if (!prog_is_subprog(obj, subprog)) 6278 continue; 6279 if (strcmp(name, subprog->name) != 0) 6280 continue; 6281 /* Enforce non-hidden, as from verifier point of 6282 * view it expects global functions, whereas the 6283 * mark_btf_static fixes up linkage as static. 6284 */ 6285 if (!subprog->sym_global || subprog->mark_btf_static) { 6286 pr_warn("prog '%s': exception callback %s must be a global non-hidden function\n", 6287 prog->name, subprog->name); 6288 return -EINVAL; 6289 } 6290 /* Let's see if we already saw a static exception callback with the same name */ 6291 if (prog->exception_cb_idx >= 0) { 6292 pr_warn("prog '%s': multiple subprogs with same name as exception callback '%s'\n", 6293 prog->name, subprog->name); 6294 return -EINVAL; 6295 } 6296 prog->exception_cb_idx = j; 6297 break; 6298 } 6299 6300 if (prog->exception_cb_idx >= 0) 6301 continue; 6302 6303 pr_warn("prog '%s': cannot find exception callback '%s'\n", prog->name, name); 6304 return -ENOENT; 6305 } 6306 6307 return 0; 6308 } 6309 6310 static struct { 6311 enum bpf_prog_type prog_type; 6312 const char *ctx_name; 6313 } global_ctx_map[] = { 6314 { BPF_PROG_TYPE_CGROUP_DEVICE, "bpf_cgroup_dev_ctx" }, 6315 { BPF_PROG_TYPE_CGROUP_SKB, "__sk_buff" }, 6316 { BPF_PROG_TYPE_CGROUP_SOCK, "bpf_sock" }, 6317 { BPF_PROG_TYPE_CGROUP_SOCK_ADDR, "bpf_sock_addr" }, 6318 { BPF_PROG_TYPE_CGROUP_SOCKOPT, "bpf_sockopt" }, 6319 { BPF_PROG_TYPE_CGROUP_SYSCTL, "bpf_sysctl" }, 6320 { BPF_PROG_TYPE_FLOW_DISSECTOR, "__sk_buff" }, 6321 { BPF_PROG_TYPE_KPROBE, "bpf_user_pt_regs_t" }, 6322 { BPF_PROG_TYPE_LWT_IN, "__sk_buff" }, 6323 { BPF_PROG_TYPE_LWT_OUT, "__sk_buff" }, 6324 { BPF_PROG_TYPE_LWT_SEG6LOCAL, "__sk_buff" }, 6325 { BPF_PROG_TYPE_LWT_XMIT, "__sk_buff" }, 6326 { BPF_PROG_TYPE_NETFILTER, "bpf_nf_ctx" }, 6327 { BPF_PROG_TYPE_PERF_EVENT, "bpf_perf_event_data" }, 6328 { BPF_PROG_TYPE_RAW_TRACEPOINT, "bpf_raw_tracepoint_args" }, 6329 { BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, "bpf_raw_tracepoint_args" }, 6330 { BPF_PROG_TYPE_SCHED_ACT, "__sk_buff" }, 6331 { BPF_PROG_TYPE_SCHED_CLS, "__sk_buff" }, 6332 { BPF_PROG_TYPE_SK_LOOKUP, "bpf_sk_lookup" }, 6333 { BPF_PROG_TYPE_SK_MSG, "sk_msg_md" }, 6334 { BPF_PROG_TYPE_SK_REUSEPORT, "sk_reuseport_md" }, 6335 { BPF_PROG_TYPE_SK_SKB, "__sk_buff" }, 6336 { BPF_PROG_TYPE_SOCK_OPS, "bpf_sock_ops" }, 6337 { BPF_PROG_TYPE_SOCKET_FILTER, "__sk_buff" }, 6338 { BPF_PROG_TYPE_XDP, "xdp_md" }, 6339 /* all other program types don't have "named" context structs */ 6340 }; 6341 6342 static bool need_func_arg_type_fixup(const struct btf *btf, const struct bpf_program *prog, 6343 const char *subprog_name, int arg_idx, 6344 int arg_type_id, const char *ctx_name) 6345 { 6346 const struct btf_type *t; 6347 const char *tname; 6348 6349 /* check if existing parameter already matches verifier expectations */ 6350 t = skip_mods_and_typedefs(btf, arg_type_id, NULL); 6351 if (!btf_is_ptr(t)) 6352 goto out_warn; 6353 6354 /* typedef bpf_user_pt_regs_t is a special PITA case, valid for kprobe 6355 * and perf_event programs, so check this case early on and forget 6356 * about it for subsequent checks 6357 */ 6358 while (btf_is_mod(t)) 6359 t = btf__type_by_id(btf, t->type); 6360 if (btf_is_typedef(t) && 6361 (prog->type == BPF_PROG_TYPE_KPROBE || prog->type == BPF_PROG_TYPE_PERF_EVENT)) { 6362 tname = btf__str_by_offset(btf, t->name_off) ?: "<anon>"; 6363 if (strcmp(tname, "bpf_user_pt_regs_t") == 0) 6364 return false; /* canonical type for kprobe/perf_event */ 6365 } 6366 6367 /* now we can ignore typedefs moving forward */ 6368 t = skip_mods_and_typedefs(btf, t->type, NULL); 6369 6370 /* if it's `void *`, definitely fix up BTF info */ 6371 if (btf_is_void(t)) 6372 return true; 6373 6374 /* if it's already proper canonical type, no need to fix up */ 6375 tname = btf__str_by_offset(btf, t->name_off) ?: "<anon>"; 6376 if (btf_is_struct(t) && strcmp(tname, ctx_name) == 0) 6377 return false; 6378 6379 /* special cases */ 6380 switch (prog->type) { 6381 case BPF_PROG_TYPE_KPROBE: 6382 case BPF_PROG_TYPE_PERF_EVENT: 6383 /* `struct pt_regs *` is expected, but we need to fix up */ 6384 if (btf_is_struct(t) && strcmp(tname, "pt_regs") == 0) 6385 return true; 6386 break; 6387 case BPF_PROG_TYPE_RAW_TRACEPOINT: 6388 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: 6389 /* allow u64* as ctx */ 6390 if (btf_is_int(t) && t->size == 8) 6391 return true; 6392 break; 6393 default: 6394 break; 6395 } 6396 6397 out_warn: 6398 pr_warn("prog '%s': subprog '%s' arg#%d is expected to be of `struct %s *` type\n", 6399 prog->name, subprog_name, arg_idx, ctx_name); 6400 return false; 6401 } 6402 6403 static int clone_func_btf_info(struct btf *btf, int orig_fn_id, struct bpf_program *prog) 6404 { 6405 int fn_id, fn_proto_id, ret_type_id, orig_proto_id; 6406 int i, err, arg_cnt, fn_name_off, linkage; 6407 struct btf_type *fn_t, *fn_proto_t, *t; 6408 struct btf_param *p; 6409 6410 /* caller already validated FUNC -> FUNC_PROTO validity */ 6411 fn_t = btf_type_by_id(btf, orig_fn_id); 6412 fn_proto_t = btf_type_by_id(btf, fn_t->type); 6413 6414 /* Note that each btf__add_xxx() operation invalidates 6415 * all btf_type and string pointers, so we need to be 6416 * very careful when cloning BTF types. BTF type 6417 * pointers have to be always refetched. And to avoid 6418 * problems with invalidated string pointers, we 6419 * add empty strings initially, then just fix up 6420 * name_off offsets in place. Offsets are stable for 6421 * existing strings, so that works out. 6422 */ 6423 fn_name_off = fn_t->name_off; /* we are about to invalidate fn_t */ 6424 linkage = btf_func_linkage(fn_t); 6425 orig_proto_id = fn_t->type; /* original FUNC_PROTO ID */ 6426 ret_type_id = fn_proto_t->type; /* fn_proto_t will be invalidated */ 6427 arg_cnt = btf_vlen(fn_proto_t); 6428 6429 /* clone FUNC_PROTO and its params */ 6430 fn_proto_id = btf__add_func_proto(btf, ret_type_id); 6431 if (fn_proto_id < 0) 6432 return -EINVAL; 6433 6434 for (i = 0; i < arg_cnt; i++) { 6435 int name_off; 6436 6437 /* copy original parameter data */ 6438 t = btf_type_by_id(btf, orig_proto_id); 6439 p = &btf_params(t)[i]; 6440 name_off = p->name_off; 6441 6442 err = btf__add_func_param(btf, "", p->type); 6443 if (err) 6444 return err; 6445 6446 fn_proto_t = btf_type_by_id(btf, fn_proto_id); 6447 p = &btf_params(fn_proto_t)[i]; 6448 p->name_off = name_off; /* use remembered str offset */ 6449 } 6450 6451 /* clone FUNC now, btf__add_func() enforces non-empty name, so use 6452 * entry program's name as a placeholder, which we replace immediately 6453 * with original name_off 6454 */ 6455 fn_id = btf__add_func(btf, prog->name, linkage, fn_proto_id); 6456 if (fn_id < 0) 6457 return -EINVAL; 6458 6459 fn_t = btf_type_by_id(btf, fn_id); 6460 fn_t->name_off = fn_name_off; /* reuse original string */ 6461 6462 return fn_id; 6463 } 6464 6465 static int probe_kern_arg_ctx_tag(void) 6466 { 6467 /* To minimize merge conflicts with BPF token series that refactors 6468 * feature detection code a lot, we don't integrate 6469 * probe_kern_arg_ctx_tag() into kernel_supports() feature-detection 6470 * framework yet, doing our own caching internally. 6471 * This will be cleaned up a bit later when bpf/bpf-next trees settle. 6472 */ 6473 static int cached_result = -1; 6474 static const char strs[] = "\0a\0b\0arg:ctx\0"; 6475 const __u32 types[] = { 6476 /* [1] INT */ 6477 BTF_TYPE_INT_ENC(1 /* "a" */, BTF_INT_SIGNED, 0, 32, 4), 6478 /* [2] PTR -> VOID */ 6479 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0), 6480 /* [3] FUNC_PROTO `int(void *a)` */ 6481 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 1), 6482 BTF_PARAM_ENC(1 /* "a" */, 2), 6483 /* [4] FUNC 'a' -> FUNC_PROTO (main prog) */ 6484 BTF_TYPE_ENC(1 /* "a" */, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 3), 6485 /* [5] FUNC_PROTO `int(void *b __arg_ctx)` */ 6486 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 1), 6487 BTF_PARAM_ENC(3 /* "b" */, 2), 6488 /* [6] FUNC 'b' -> FUNC_PROTO (subprog) */ 6489 BTF_TYPE_ENC(3 /* "b" */, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 5), 6490 /* [7] DECL_TAG 'arg:ctx' -> func 'b' arg 'b' */ 6491 BTF_TYPE_DECL_TAG_ENC(5 /* "arg:ctx" */, 6, 0), 6492 }; 6493 const struct bpf_insn insns[] = { 6494 /* main prog */ 6495 BPF_CALL_REL(+1), 6496 BPF_EXIT_INSN(), 6497 /* global subprog */ 6498 BPF_EMIT_CALL(BPF_FUNC_get_func_ip), /* needs PTR_TO_CTX */ 6499 BPF_EXIT_INSN(), 6500 }; 6501 const struct bpf_func_info_min func_infos[] = { 6502 { 0, 4 }, /* main prog -> FUNC 'a' */ 6503 { 2, 6 }, /* subprog -> FUNC 'b' */ 6504 }; 6505 LIBBPF_OPTS(bpf_prog_load_opts, opts); 6506 int prog_fd, btf_fd, insn_cnt = ARRAY_SIZE(insns); 6507 6508 if (cached_result >= 0) 6509 return cached_result; 6510 6511 btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs), 0); 6512 if (btf_fd < 0) 6513 return 0; 6514 6515 opts.prog_btf_fd = btf_fd; 6516 opts.func_info = &func_infos; 6517 opts.func_info_cnt = ARRAY_SIZE(func_infos); 6518 opts.func_info_rec_size = sizeof(func_infos[0]); 6519 6520 prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, "det_arg_ctx", 6521 "GPL", insns, insn_cnt, &opts); 6522 close(btf_fd); 6523 6524 cached_result = probe_fd(prog_fd); 6525 return cached_result; 6526 } 6527 6528 /* Check if main program or global subprog's function prototype has `arg:ctx` 6529 * argument tags, and, if necessary, substitute correct type to match what BPF 6530 * verifier would expect, taking into account specific program type. This 6531 * allows to support __arg_ctx tag transparently on old kernels that don't yet 6532 * have a native support for it in the verifier, making user's life much 6533 * easier. 6534 */ 6535 static int bpf_program_fixup_func_info(struct bpf_object *obj, struct bpf_program *prog) 6536 { 6537 const char *ctx_name = NULL, *ctx_tag = "arg:ctx", *fn_name; 6538 struct bpf_func_info_min *func_rec; 6539 struct btf_type *fn_t, *fn_proto_t; 6540 struct btf *btf = obj->btf; 6541 const struct btf_type *t; 6542 struct btf_param *p; 6543 int ptr_id = 0, struct_id, tag_id, orig_fn_id; 6544 int i, n, arg_idx, arg_cnt, err, rec_idx; 6545 int *orig_ids; 6546 6547 /* no .BTF.ext, no problem */ 6548 if (!obj->btf_ext || !prog->func_info) 6549 return 0; 6550 6551 /* don't do any fix ups if kernel natively supports __arg_ctx */ 6552 if (probe_kern_arg_ctx_tag() > 0) 6553 return 0; 6554 6555 /* some BPF program types just don't have named context structs, so 6556 * this fallback mechanism doesn't work for them 6557 */ 6558 for (i = 0; i < ARRAY_SIZE(global_ctx_map); i++) { 6559 if (global_ctx_map[i].prog_type != prog->type) 6560 continue; 6561 ctx_name = global_ctx_map[i].ctx_name; 6562 break; 6563 } 6564 if (!ctx_name) 6565 return 0; 6566 6567 /* remember original func BTF IDs to detect if we already cloned them */ 6568 orig_ids = calloc(prog->func_info_cnt, sizeof(*orig_ids)); 6569 if (!orig_ids) 6570 return -ENOMEM; 6571 for (i = 0; i < prog->func_info_cnt; i++) { 6572 func_rec = prog->func_info + prog->func_info_rec_size * i; 6573 orig_ids[i] = func_rec->type_id; 6574 } 6575 6576 /* go through each DECL_TAG with "arg:ctx" and see if it points to one 6577 * of our subprogs; if yes and subprog is global and needs adjustment, 6578 * clone and adjust FUNC -> FUNC_PROTO combo 6579 */ 6580 for (i = 1, n = btf__type_cnt(btf); i < n; i++) { 6581 /* only DECL_TAG with "arg:ctx" value are interesting */ 6582 t = btf__type_by_id(btf, i); 6583 if (!btf_is_decl_tag(t)) 6584 continue; 6585 if (strcmp(btf__str_by_offset(btf, t->name_off), ctx_tag) != 0) 6586 continue; 6587 6588 /* only global funcs need adjustment, if at all */ 6589 orig_fn_id = t->type; 6590 fn_t = btf_type_by_id(btf, orig_fn_id); 6591 if (!btf_is_func(fn_t) || btf_func_linkage(fn_t) != BTF_FUNC_GLOBAL) 6592 continue; 6593 6594 /* sanity check FUNC -> FUNC_PROTO chain, just in case */ 6595 fn_proto_t = btf_type_by_id(btf, fn_t->type); 6596 if (!fn_proto_t || !btf_is_func_proto(fn_proto_t)) 6597 continue; 6598 6599 /* find corresponding func_info record */ 6600 func_rec = NULL; 6601 for (rec_idx = 0; rec_idx < prog->func_info_cnt; rec_idx++) { 6602 if (orig_ids[rec_idx] == t->type) { 6603 func_rec = prog->func_info + prog->func_info_rec_size * rec_idx; 6604 break; 6605 } 6606 } 6607 /* current main program doesn't call into this subprog */ 6608 if (!func_rec) 6609 continue; 6610 6611 /* some more sanity checking of DECL_TAG */ 6612 arg_cnt = btf_vlen(fn_proto_t); 6613 arg_idx = btf_decl_tag(t)->component_idx; 6614 if (arg_idx < 0 || arg_idx >= arg_cnt) 6615 continue; 6616 6617 /* check if we should fix up argument type */ 6618 p = &btf_params(fn_proto_t)[arg_idx]; 6619 fn_name = btf__str_by_offset(btf, fn_t->name_off) ?: "<anon>"; 6620 if (!need_func_arg_type_fixup(btf, prog, fn_name, arg_idx, p->type, ctx_name)) 6621 continue; 6622 6623 /* clone fn/fn_proto, unless we already did it for another arg */ 6624 if (func_rec->type_id == orig_fn_id) { 6625 int fn_id; 6626 6627 fn_id = clone_func_btf_info(btf, orig_fn_id, prog); 6628 if (fn_id < 0) { 6629 err = fn_id; 6630 goto err_out; 6631 } 6632 6633 /* point func_info record to a cloned FUNC type */ 6634 func_rec->type_id = fn_id; 6635 } 6636 6637 /* create PTR -> STRUCT type chain to mark PTR_TO_CTX argument; 6638 * we do it just once per main BPF program, as all global 6639 * funcs share the same program type, so need only PTR -> 6640 * STRUCT type chain 6641 */ 6642 if (ptr_id == 0) { 6643 struct_id = btf__add_struct(btf, ctx_name, 0); 6644 ptr_id = btf__add_ptr(btf, struct_id); 6645 if (ptr_id < 0 || struct_id < 0) { 6646 err = -EINVAL; 6647 goto err_out; 6648 } 6649 } 6650 6651 /* for completeness, clone DECL_TAG and point it to cloned param */ 6652 tag_id = btf__add_decl_tag(btf, ctx_tag, func_rec->type_id, arg_idx); 6653 if (tag_id < 0) { 6654 err = -EINVAL; 6655 goto err_out; 6656 } 6657 6658 /* all the BTF manipulations invalidated pointers, refetch them */ 6659 fn_t = btf_type_by_id(btf, func_rec->type_id); 6660 fn_proto_t = btf_type_by_id(btf, fn_t->type); 6661 6662 /* fix up type ID pointed to by param */ 6663 p = &btf_params(fn_proto_t)[arg_idx]; 6664 p->type = ptr_id; 6665 } 6666 6667 free(orig_ids); 6668 return 0; 6669 err_out: 6670 free(orig_ids); 6671 return err; 6672 } 6673 6674 static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) 6675 { 6676 struct bpf_program *prog; 6677 size_t i, j; 6678 int err; 6679 6680 if (obj->btf_ext) { 6681 err = bpf_object__relocate_core(obj, targ_btf_path); 6682 if (err) { 6683 pr_warn("failed to perform CO-RE relocations: %d\n", 6684 err); 6685 return err; 6686 } 6687 bpf_object__sort_relos(obj); 6688 } 6689 6690 /* Before relocating calls pre-process relocations and mark 6691 * few ld_imm64 instructions that points to subprogs. 6692 * Otherwise bpf_object__reloc_code() later would have to consider 6693 * all ld_imm64 insns as relocation candidates. That would 6694 * reduce relocation speed, since amount of find_prog_insn_relo() 6695 * would increase and most of them will fail to find a relo. 6696 */ 6697 for (i = 0; i < obj->nr_programs; i++) { 6698 prog = &obj->programs[i]; 6699 for (j = 0; j < prog->nr_reloc; j++) { 6700 struct reloc_desc *relo = &prog->reloc_desc[j]; 6701 struct bpf_insn *insn = &prog->insns[relo->insn_idx]; 6702 6703 /* mark the insn, so it's recognized by insn_is_pseudo_func() */ 6704 if (relo->type == RELO_SUBPROG_ADDR) 6705 insn[0].src_reg = BPF_PSEUDO_FUNC; 6706 } 6707 } 6708 6709 /* relocate subprogram calls and append used subprograms to main 6710 * programs; each copy of subprogram code needs to be relocated 6711 * differently for each main program, because its code location might 6712 * have changed. 6713 * Append subprog relos to main programs to allow data relos to be 6714 * processed after text is completely relocated. 6715 */ 6716 for (i = 0; i < obj->nr_programs; i++) { 6717 prog = &obj->programs[i]; 6718 /* sub-program's sub-calls are relocated within the context of 6719 * its main program only 6720 */ 6721 if (prog_is_subprog(obj, prog)) 6722 continue; 6723 if (!prog->autoload) 6724 continue; 6725 6726 err = bpf_object__relocate_calls(obj, prog); 6727 if (err) { 6728 pr_warn("prog '%s': failed to relocate calls: %d\n", 6729 prog->name, err); 6730 return err; 6731 } 6732 6733 err = bpf_prog_assign_exc_cb(obj, prog); 6734 if (err) 6735 return err; 6736 /* Now, also append exception callback if it has not been done already. */ 6737 if (prog->exception_cb_idx >= 0) { 6738 struct bpf_program *subprog = &obj->programs[prog->exception_cb_idx]; 6739 6740 /* Calling exception callback directly is disallowed, which the 6741 * verifier will reject later. In case it was processed already, 6742 * we can skip this step, otherwise for all other valid cases we 6743 * have to append exception callback now. 6744 */ 6745 if (subprog->sub_insn_off == 0) { 6746 err = bpf_object__append_subprog_code(obj, prog, subprog); 6747 if (err) 6748 return err; 6749 err = bpf_object__reloc_code(obj, prog, subprog); 6750 if (err) 6751 return err; 6752 } 6753 } 6754 } 6755 for (i = 0; i < obj->nr_programs; i++) { 6756 prog = &obj->programs[i]; 6757 if (prog_is_subprog(obj, prog)) 6758 continue; 6759 if (!prog->autoload) 6760 continue; 6761 6762 /* Process data relos for main programs */ 6763 err = bpf_object__relocate_data(obj, prog); 6764 if (err) { 6765 pr_warn("prog '%s': failed to relocate data references: %d\n", 6766 prog->name, err); 6767 return err; 6768 } 6769 6770 /* Fix up .BTF.ext information, if necessary */ 6771 err = bpf_program_fixup_func_info(obj, prog); 6772 if (err) { 6773 pr_warn("prog '%s': failed to perform .BTF.ext fix ups: %d\n", 6774 prog->name, err); 6775 return err; 6776 } 6777 } 6778 6779 return 0; 6780 } 6781 6782 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, 6783 Elf64_Shdr *shdr, Elf_Data *data); 6784 6785 static int bpf_object__collect_map_relos(struct bpf_object *obj, 6786 Elf64_Shdr *shdr, Elf_Data *data) 6787 { 6788 const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *); 6789 int i, j, nrels, new_sz; 6790 const struct btf_var_secinfo *vi = NULL; 6791 const struct btf_type *sec, *var, *def; 6792 struct bpf_map *map = NULL, *targ_map = NULL; 6793 struct bpf_program *targ_prog = NULL; 6794 bool is_prog_array, is_map_in_map; 6795 const struct btf_member *member; 6796 const char *name, *mname, *type; 6797 unsigned int moff; 6798 Elf64_Sym *sym; 6799 Elf64_Rel *rel; 6800 void *tmp; 6801 6802 if (!obj->efile.btf_maps_sec_btf_id || !obj->btf) 6803 return -EINVAL; 6804 sec = btf__type_by_id(obj->btf, obj->efile.btf_maps_sec_btf_id); 6805 if (!sec) 6806 return -EINVAL; 6807 6808 nrels = shdr->sh_size / shdr->sh_entsize; 6809 for (i = 0; i < nrels; i++) { 6810 rel = elf_rel_by_idx(data, i); 6811 if (!rel) { 6812 pr_warn(".maps relo #%d: failed to get ELF relo\n", i); 6813 return -LIBBPF_ERRNO__FORMAT; 6814 } 6815 6816 sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info)); 6817 if (!sym) { 6818 pr_warn(".maps relo #%d: symbol %zx not found\n", 6819 i, (size_t)ELF64_R_SYM(rel->r_info)); 6820 return -LIBBPF_ERRNO__FORMAT; 6821 } 6822 name = elf_sym_str(obj, sym->st_name) ?: "<?>"; 6823 6824 pr_debug(".maps relo #%d: for %zd value %zd rel->r_offset %zu name %d ('%s')\n", 6825 i, (ssize_t)(rel->r_info >> 32), (size_t)sym->st_value, 6826 (size_t)rel->r_offset, sym->st_name, name); 6827 6828 for (j = 0; j < obj->nr_maps; j++) { 6829 map = &obj->maps[j]; 6830 if (map->sec_idx != obj->efile.btf_maps_shndx) 6831 continue; 6832 6833 vi = btf_var_secinfos(sec) + map->btf_var_idx; 6834 if (vi->offset <= rel->r_offset && 6835 rel->r_offset + bpf_ptr_sz <= vi->offset + vi->size) 6836 break; 6837 } 6838 if (j == obj->nr_maps) { 6839 pr_warn(".maps relo #%d: cannot find map '%s' at rel->r_offset %zu\n", 6840 i, name, (size_t)rel->r_offset); 6841 return -EINVAL; 6842 } 6843 6844 is_map_in_map = bpf_map_type__is_map_in_map(map->def.type); 6845 is_prog_array = map->def.type == BPF_MAP_TYPE_PROG_ARRAY; 6846 type = is_map_in_map ? "map" : "prog"; 6847 if (is_map_in_map) { 6848 if (sym->st_shndx != obj->efile.btf_maps_shndx) { 6849 pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n", 6850 i, name); 6851 return -LIBBPF_ERRNO__RELOC; 6852 } 6853 if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS && 6854 map->def.key_size != sizeof(int)) { 6855 pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n", 6856 i, map->name, sizeof(int)); 6857 return -EINVAL; 6858 } 6859 targ_map = bpf_object__find_map_by_name(obj, name); 6860 if (!targ_map) { 6861 pr_warn(".maps relo #%d: '%s' isn't a valid map reference\n", 6862 i, name); 6863 return -ESRCH; 6864 } 6865 } else if (is_prog_array) { 6866 targ_prog = bpf_object__find_program_by_name(obj, name); 6867 if (!targ_prog) { 6868 pr_warn(".maps relo #%d: '%s' isn't a valid program reference\n", 6869 i, name); 6870 return -ESRCH; 6871 } 6872 if (targ_prog->sec_idx != sym->st_shndx || 6873 targ_prog->sec_insn_off * 8 != sym->st_value || 6874 prog_is_subprog(obj, targ_prog)) { 6875 pr_warn(".maps relo #%d: '%s' isn't an entry-point program\n", 6876 i, name); 6877 return -LIBBPF_ERRNO__RELOC; 6878 } 6879 } else { 6880 return -EINVAL; 6881 } 6882 6883 var = btf__type_by_id(obj->btf, vi->type); 6884 def = skip_mods_and_typedefs(obj->btf, var->type, NULL); 6885 if (btf_vlen(def) == 0) 6886 return -EINVAL; 6887 member = btf_members(def) + btf_vlen(def) - 1; 6888 mname = btf__name_by_offset(obj->btf, member->name_off); 6889 if (strcmp(mname, "values")) 6890 return -EINVAL; 6891 6892 moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8; 6893 if (rel->r_offset - vi->offset < moff) 6894 return -EINVAL; 6895 6896 moff = rel->r_offset - vi->offset - moff; 6897 /* here we use BPF pointer size, which is always 64 bit, as we 6898 * are parsing ELF that was built for BPF target 6899 */ 6900 if (moff % bpf_ptr_sz) 6901 return -EINVAL; 6902 moff /= bpf_ptr_sz; 6903 if (moff >= map->init_slots_sz) { 6904 new_sz = moff + 1; 6905 tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz); 6906 if (!tmp) 6907 return -ENOMEM; 6908 map->init_slots = tmp; 6909 memset(map->init_slots + map->init_slots_sz, 0, 6910 (new_sz - map->init_slots_sz) * host_ptr_sz); 6911 map->init_slots_sz = new_sz; 6912 } 6913 map->init_slots[moff] = is_map_in_map ? (void *)targ_map : (void *)targ_prog; 6914 6915 pr_debug(".maps relo #%d: map '%s' slot [%d] points to %s '%s'\n", 6916 i, map->name, moff, type, name); 6917 } 6918 6919 return 0; 6920 } 6921 6922 static int bpf_object__collect_relos(struct bpf_object *obj) 6923 { 6924 int i, err; 6925 6926 for (i = 0; i < obj->efile.sec_cnt; i++) { 6927 struct elf_sec_desc *sec_desc = &obj->efile.secs[i]; 6928 Elf64_Shdr *shdr; 6929 Elf_Data *data; 6930 int idx; 6931 6932 if (sec_desc->sec_type != SEC_RELO) 6933 continue; 6934 6935 shdr = sec_desc->shdr; 6936 data = sec_desc->data; 6937 idx = shdr->sh_info; 6938 6939 if (shdr->sh_type != SHT_REL) { 6940 pr_warn("internal error at %d\n", __LINE__); 6941 return -LIBBPF_ERRNO__INTERNAL; 6942 } 6943 6944 if (idx == obj->efile.st_ops_shndx || idx == obj->efile.st_ops_link_shndx) 6945 err = bpf_object__collect_st_ops_relos(obj, shdr, data); 6946 else if (idx == obj->efile.btf_maps_shndx) 6947 err = bpf_object__collect_map_relos(obj, shdr, data); 6948 else 6949 err = bpf_object__collect_prog_relos(obj, shdr, data); 6950 if (err) 6951 return err; 6952 } 6953 6954 bpf_object__sort_relos(obj); 6955 return 0; 6956 } 6957 6958 static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id) 6959 { 6960 if (BPF_CLASS(insn->code) == BPF_JMP && 6961 BPF_OP(insn->code) == BPF_CALL && 6962 BPF_SRC(insn->code) == BPF_K && 6963 insn->src_reg == 0 && 6964 insn->dst_reg == 0) { 6965 *func_id = insn->imm; 6966 return true; 6967 } 6968 return false; 6969 } 6970 6971 static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program *prog) 6972 { 6973 struct bpf_insn *insn = prog->insns; 6974 enum bpf_func_id func_id; 6975 int i; 6976 6977 if (obj->gen_loader) 6978 return 0; 6979 6980 for (i = 0; i < prog->insns_cnt; i++, insn++) { 6981 if (!insn_is_helper_call(insn, &func_id)) 6982 continue; 6983 6984 /* on kernels that don't yet support 6985 * bpf_probe_read_{kernel,user}[_str] helpers, fall back 6986 * to bpf_probe_read() which works well for old kernels 6987 */ 6988 switch (func_id) { 6989 case BPF_FUNC_probe_read_kernel: 6990 case BPF_FUNC_probe_read_user: 6991 if (!kernel_supports(obj, FEAT_PROBE_READ_KERN)) 6992 insn->imm = BPF_FUNC_probe_read; 6993 break; 6994 case BPF_FUNC_probe_read_kernel_str: 6995 case BPF_FUNC_probe_read_user_str: 6996 if (!kernel_supports(obj, FEAT_PROBE_READ_KERN)) 6997 insn->imm = BPF_FUNC_probe_read_str; 6998 break; 6999 default: 7000 break; 7001 } 7002 } 7003 return 0; 7004 } 7005 7006 static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name, 7007 int *btf_obj_fd, int *btf_type_id); 7008 7009 /* this is called as prog->sec_def->prog_prepare_load_fn for libbpf-supported sec_defs */ 7010 static int libbpf_prepare_prog_load(struct bpf_program *prog, 7011 struct bpf_prog_load_opts *opts, long cookie) 7012 { 7013 enum sec_def_flags def = cookie; 7014 7015 /* old kernels might not support specifying expected_attach_type */ 7016 if ((def & SEC_EXP_ATTACH_OPT) && !kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE)) 7017 opts->expected_attach_type = 0; 7018 7019 if (def & SEC_SLEEPABLE) 7020 opts->prog_flags |= BPF_F_SLEEPABLE; 7021 7022 if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS)) 7023 opts->prog_flags |= BPF_F_XDP_HAS_FRAGS; 7024 7025 /* special check for usdt to use uprobe_multi link */ 7026 if ((def & SEC_USDT) && kernel_supports(prog->obj, FEAT_UPROBE_MULTI_LINK)) 7027 prog->expected_attach_type = BPF_TRACE_UPROBE_MULTI; 7028 7029 if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) { 7030 int btf_obj_fd = 0, btf_type_id = 0, err; 7031 const char *attach_name; 7032 7033 attach_name = strchr(prog->sec_name, '/'); 7034 if (!attach_name) { 7035 /* if BPF program is annotated with just SEC("fentry") 7036 * (or similar) without declaratively specifying 7037 * target, then it is expected that target will be 7038 * specified with bpf_program__set_attach_target() at 7039 * runtime before BPF object load step. If not, then 7040 * there is nothing to load into the kernel as BPF 7041 * verifier won't be able to validate BPF program 7042 * correctness anyways. 7043 */ 7044 pr_warn("prog '%s': no BTF-based attach target is specified, use bpf_program__set_attach_target()\n", 7045 prog->name); 7046 return -EINVAL; 7047 } 7048 attach_name++; /* skip over / */ 7049 7050 err = libbpf_find_attach_btf_id(prog, attach_name, &btf_obj_fd, &btf_type_id); 7051 if (err) 7052 return err; 7053 7054 /* cache resolved BTF FD and BTF type ID in the prog */ 7055 prog->attach_btf_obj_fd = btf_obj_fd; 7056 prog->attach_btf_id = btf_type_id; 7057 7058 /* but by now libbpf common logic is not utilizing 7059 * prog->atach_btf_obj_fd/prog->attach_btf_id anymore because 7060 * this callback is called after opts were populated by 7061 * libbpf, so this callback has to update opts explicitly here 7062 */ 7063 opts->attach_btf_obj_fd = btf_obj_fd; 7064 opts->attach_btf_id = btf_type_id; 7065 } 7066 return 0; 7067 } 7068 7069 static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz); 7070 7071 static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog, 7072 struct bpf_insn *insns, int insns_cnt, 7073 const char *license, __u32 kern_version, int *prog_fd) 7074 { 7075 LIBBPF_OPTS(bpf_prog_load_opts, load_attr); 7076 const char *prog_name = NULL; 7077 char *cp, errmsg[STRERR_BUFSIZE]; 7078 size_t log_buf_size = 0; 7079 char *log_buf = NULL, *tmp; 7080 int btf_fd, ret, err; 7081 bool own_log_buf = true; 7082 __u32 log_level = prog->log_level; 7083 7084 if (prog->type == BPF_PROG_TYPE_UNSPEC) { 7085 /* 7086 * The program type must be set. Most likely we couldn't find a proper 7087 * section definition at load time, and thus we didn't infer the type. 7088 */ 7089 pr_warn("prog '%s': missing BPF prog type, check ELF section name '%s'\n", 7090 prog->name, prog->sec_name); 7091 return -EINVAL; 7092 } 7093 7094 if (!insns || !insns_cnt) 7095 return -EINVAL; 7096 7097 if (kernel_supports(obj, FEAT_PROG_NAME)) 7098 prog_name = prog->name; 7099 load_attr.attach_prog_fd = prog->attach_prog_fd; 7100 load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd; 7101 load_attr.attach_btf_id = prog->attach_btf_id; 7102 load_attr.kern_version = kern_version; 7103 load_attr.prog_ifindex = prog->prog_ifindex; 7104 7105 /* specify func_info/line_info only if kernel supports them */ 7106 btf_fd = btf__fd(obj->btf); 7107 if (btf_fd >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) { 7108 load_attr.prog_btf_fd = btf_fd; 7109 load_attr.func_info = prog->func_info; 7110 load_attr.func_info_rec_size = prog->func_info_rec_size; 7111 load_attr.func_info_cnt = prog->func_info_cnt; 7112 load_attr.line_info = prog->line_info; 7113 load_attr.line_info_rec_size = prog->line_info_rec_size; 7114 load_attr.line_info_cnt = prog->line_info_cnt; 7115 } 7116 load_attr.log_level = log_level; 7117 load_attr.prog_flags = prog->prog_flags; 7118 load_attr.fd_array = obj->fd_array; 7119 7120 load_attr.token_fd = obj->token_fd; 7121 if (obj->token_fd) 7122 load_attr.prog_flags |= BPF_F_TOKEN_FD; 7123 7124 /* adjust load_attr if sec_def provides custom preload callback */ 7125 if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) { 7126 err = prog->sec_def->prog_prepare_load_fn(prog, &load_attr, prog->sec_def->cookie); 7127 if (err < 0) { 7128 pr_warn("prog '%s': failed to prepare load attributes: %d\n", 7129 prog->name, err); 7130 return err; 7131 } 7132 insns = prog->insns; 7133 insns_cnt = prog->insns_cnt; 7134 } 7135 7136 /* allow prog_prepare_load_fn to change expected_attach_type */ 7137 load_attr.expected_attach_type = prog->expected_attach_type; 7138 7139 if (obj->gen_loader) { 7140 bpf_gen__prog_load(obj->gen_loader, prog->type, prog->name, 7141 license, insns, insns_cnt, &load_attr, 7142 prog - obj->programs); 7143 *prog_fd = -1; 7144 return 0; 7145 } 7146 7147 retry_load: 7148 /* if log_level is zero, we don't request logs initially even if 7149 * custom log_buf is specified; if the program load fails, then we'll 7150 * bump log_level to 1 and use either custom log_buf or we'll allocate 7151 * our own and retry the load to get details on what failed 7152 */ 7153 if (log_level) { 7154 if (prog->log_buf) { 7155 log_buf = prog->log_buf; 7156 log_buf_size = prog->log_size; 7157 own_log_buf = false; 7158 } else if (obj->log_buf) { 7159 log_buf = obj->log_buf; 7160 log_buf_size = obj->log_size; 7161 own_log_buf = false; 7162 } else { 7163 log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, log_buf_size * 2); 7164 tmp = realloc(log_buf, log_buf_size); 7165 if (!tmp) { 7166 ret = -ENOMEM; 7167 goto out; 7168 } 7169 log_buf = tmp; 7170 log_buf[0] = '\0'; 7171 own_log_buf = true; 7172 } 7173 } 7174 7175 load_attr.log_buf = log_buf; 7176 load_attr.log_size = log_buf_size; 7177 load_attr.log_level = log_level; 7178 7179 ret = bpf_prog_load(prog->type, prog_name, license, insns, insns_cnt, &load_attr); 7180 if (ret >= 0) { 7181 if (log_level && own_log_buf) { 7182 pr_debug("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n", 7183 prog->name, log_buf); 7184 } 7185 7186 if (obj->has_rodata && kernel_supports(obj, FEAT_PROG_BIND_MAP)) { 7187 struct bpf_map *map; 7188 int i; 7189 7190 for (i = 0; i < obj->nr_maps; i++) { 7191 map = &prog->obj->maps[i]; 7192 if (map->libbpf_type != LIBBPF_MAP_RODATA) 7193 continue; 7194 7195 if (bpf_prog_bind_map(ret, map->fd, NULL)) { 7196 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); 7197 pr_warn("prog '%s': failed to bind map '%s': %s\n", 7198 prog->name, map->real_name, cp); 7199 /* Don't fail hard if can't bind rodata. */ 7200 } 7201 } 7202 } 7203 7204 *prog_fd = ret; 7205 ret = 0; 7206 goto out; 7207 } 7208 7209 if (log_level == 0) { 7210 log_level = 1; 7211 goto retry_load; 7212 } 7213 /* On ENOSPC, increase log buffer size and retry, unless custom 7214 * log_buf is specified. 7215 * Be careful to not overflow u32, though. Kernel's log buf size limit 7216 * isn't part of UAPI so it can always be bumped to full 4GB. So don't 7217 * multiply by 2 unless we are sure we'll fit within 32 bits. 7218 * Currently, we'll get -EINVAL when we reach (UINT_MAX >> 2). 7219 */ 7220 if (own_log_buf && errno == ENOSPC && log_buf_size <= UINT_MAX / 2) 7221 goto retry_load; 7222 7223 ret = -errno; 7224 7225 /* post-process verifier log to improve error descriptions */ 7226 fixup_verifier_log(prog, log_buf, log_buf_size); 7227 7228 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); 7229 pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp); 7230 pr_perm_msg(ret); 7231 7232 if (own_log_buf && log_buf && log_buf[0] != '\0') { 7233 pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n", 7234 prog->name, log_buf); 7235 } 7236 7237 out: 7238 if (own_log_buf) 7239 free(log_buf); 7240 return ret; 7241 } 7242 7243 static char *find_prev_line(char *buf, char *cur) 7244 { 7245 char *p; 7246 7247 if (cur == buf) /* end of a log buf */ 7248 return NULL; 7249 7250 p = cur - 1; 7251 while (p - 1 >= buf && *(p - 1) != '\n') 7252 p--; 7253 7254 return p; 7255 } 7256 7257 static void patch_log(char *buf, size_t buf_sz, size_t log_sz, 7258 char *orig, size_t orig_sz, const char *patch) 7259 { 7260 /* size of the remaining log content to the right from the to-be-replaced part */ 7261 size_t rem_sz = (buf + log_sz) - (orig + orig_sz); 7262 size_t patch_sz = strlen(patch); 7263 7264 if (patch_sz != orig_sz) { 7265 /* If patch line(s) are longer than original piece of verifier log, 7266 * shift log contents by (patch_sz - orig_sz) bytes to the right 7267 * starting from after to-be-replaced part of the log. 7268 * 7269 * If patch line(s) are shorter than original piece of verifier log, 7270 * shift log contents by (orig_sz - patch_sz) bytes to the left 7271 * starting from after to-be-replaced part of the log 7272 * 7273 * We need to be careful about not overflowing available 7274 * buf_sz capacity. If that's the case, we'll truncate the end 7275 * of the original log, as necessary. 7276 */ 7277 if (patch_sz > orig_sz) { 7278 if (orig + patch_sz >= buf + buf_sz) { 7279 /* patch is big enough to cover remaining space completely */ 7280 patch_sz -= (orig + patch_sz) - (buf + buf_sz) + 1; 7281 rem_sz = 0; 7282 } else if (patch_sz - orig_sz > buf_sz - log_sz) { 7283 /* patch causes part of remaining log to be truncated */ 7284 rem_sz -= (patch_sz - orig_sz) - (buf_sz - log_sz); 7285 } 7286 } 7287 /* shift remaining log to the right by calculated amount */ 7288 memmove(orig + patch_sz, orig + orig_sz, rem_sz); 7289 } 7290 7291 memcpy(orig, patch, patch_sz); 7292 } 7293 7294 static void fixup_log_failed_core_relo(struct bpf_program *prog, 7295 char *buf, size_t buf_sz, size_t log_sz, 7296 char *line1, char *line2, char *line3) 7297 { 7298 /* Expected log for failed and not properly guarded CO-RE relocation: 7299 * line1 -> 123: (85) call unknown#195896080 7300 * line2 -> invalid func unknown#195896080 7301 * line3 -> <anything else or end of buffer> 7302 * 7303 * "123" is the index of the instruction that was poisoned. We extract 7304 * instruction index to find corresponding CO-RE relocation and 7305 * replace this part of the log with more relevant information about 7306 * failed CO-RE relocation. 7307 */ 7308 const struct bpf_core_relo *relo; 7309 struct bpf_core_spec spec; 7310 char patch[512], spec_buf[256]; 7311 int insn_idx, err, spec_len; 7312 7313 if (sscanf(line1, "%d: (%*d) call unknown#195896080\n", &insn_idx) != 1) 7314 return; 7315 7316 relo = find_relo_core(prog, insn_idx); 7317 if (!relo) 7318 return; 7319 7320 err = bpf_core_parse_spec(prog->name, prog->obj->btf, relo, &spec); 7321 if (err) 7322 return; 7323 7324 spec_len = bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec); 7325 snprintf(patch, sizeof(patch), 7326 "%d: <invalid CO-RE relocation>\n" 7327 "failed to resolve CO-RE relocation %s%s\n", 7328 insn_idx, spec_buf, spec_len >= sizeof(spec_buf) ? "..." : ""); 7329 7330 patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); 7331 } 7332 7333 static void fixup_log_missing_map_load(struct bpf_program *prog, 7334 char *buf, size_t buf_sz, size_t log_sz, 7335 char *line1, char *line2, char *line3) 7336 { 7337 /* Expected log for failed and not properly guarded map reference: 7338 * line1 -> 123: (85) call unknown#2001000345 7339 * line2 -> invalid func unknown#2001000345 7340 * line3 -> <anything else or end of buffer> 7341 * 7342 * "123" is the index of the instruction that was poisoned. 7343 * "345" in "2001000345" is a map index in obj->maps to fetch map name. 7344 */ 7345 struct bpf_object *obj = prog->obj; 7346 const struct bpf_map *map; 7347 int insn_idx, map_idx; 7348 char patch[128]; 7349 7350 if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &map_idx) != 2) 7351 return; 7352 7353 map_idx -= POISON_LDIMM64_MAP_BASE; 7354 if (map_idx < 0 || map_idx >= obj->nr_maps) 7355 return; 7356 map = &obj->maps[map_idx]; 7357 7358 snprintf(patch, sizeof(patch), 7359 "%d: <invalid BPF map reference>\n" 7360 "BPF map '%s' is referenced but wasn't created\n", 7361 insn_idx, map->name); 7362 7363 patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); 7364 } 7365 7366 static void fixup_log_missing_kfunc_call(struct bpf_program *prog, 7367 char *buf, size_t buf_sz, size_t log_sz, 7368 char *line1, char *line2, char *line3) 7369 { 7370 /* Expected log for failed and not properly guarded kfunc call: 7371 * line1 -> 123: (85) call unknown#2002000345 7372 * line2 -> invalid func unknown#2002000345 7373 * line3 -> <anything else or end of buffer> 7374 * 7375 * "123" is the index of the instruction that was poisoned. 7376 * "345" in "2002000345" is an extern index in obj->externs to fetch kfunc name. 7377 */ 7378 struct bpf_object *obj = prog->obj; 7379 const struct extern_desc *ext; 7380 int insn_idx, ext_idx; 7381 char patch[128]; 7382 7383 if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &ext_idx) != 2) 7384 return; 7385 7386 ext_idx -= POISON_CALL_KFUNC_BASE; 7387 if (ext_idx < 0 || ext_idx >= obj->nr_extern) 7388 return; 7389 ext = &obj->externs[ext_idx]; 7390 7391 snprintf(patch, sizeof(patch), 7392 "%d: <invalid kfunc call>\n" 7393 "kfunc '%s' is referenced but wasn't resolved\n", 7394 insn_idx, ext->name); 7395 7396 patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); 7397 } 7398 7399 static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz) 7400 { 7401 /* look for familiar error patterns in last N lines of the log */ 7402 const size_t max_last_line_cnt = 10; 7403 char *prev_line, *cur_line, *next_line; 7404 size_t log_sz; 7405 int i; 7406 7407 if (!buf) 7408 return; 7409 7410 log_sz = strlen(buf) + 1; 7411 next_line = buf + log_sz - 1; 7412 7413 for (i = 0; i < max_last_line_cnt; i++, next_line = cur_line) { 7414 cur_line = find_prev_line(buf, next_line); 7415 if (!cur_line) 7416 return; 7417 7418 if (str_has_pfx(cur_line, "invalid func unknown#195896080\n")) { 7419 prev_line = find_prev_line(buf, cur_line); 7420 if (!prev_line) 7421 continue; 7422 7423 /* failed CO-RE relocation case */ 7424 fixup_log_failed_core_relo(prog, buf, buf_sz, log_sz, 7425 prev_line, cur_line, next_line); 7426 return; 7427 } else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_LDIMM64_MAP_PFX)) { 7428 prev_line = find_prev_line(buf, cur_line); 7429 if (!prev_line) 7430 continue; 7431 7432 /* reference to uncreated BPF map */ 7433 fixup_log_missing_map_load(prog, buf, buf_sz, log_sz, 7434 prev_line, cur_line, next_line); 7435 return; 7436 } else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_CALL_KFUNC_PFX)) { 7437 prev_line = find_prev_line(buf, cur_line); 7438 if (!prev_line) 7439 continue; 7440 7441 /* reference to unresolved kfunc */ 7442 fixup_log_missing_kfunc_call(prog, buf, buf_sz, log_sz, 7443 prev_line, cur_line, next_line); 7444 return; 7445 } 7446 } 7447 } 7448 7449 static int bpf_program_record_relos(struct bpf_program *prog) 7450 { 7451 struct bpf_object *obj = prog->obj; 7452 int i; 7453 7454 for (i = 0; i < prog->nr_reloc; i++) { 7455 struct reloc_desc *relo = &prog->reloc_desc[i]; 7456 struct extern_desc *ext = &obj->externs[relo->ext_idx]; 7457 int kind; 7458 7459 switch (relo->type) { 7460 case RELO_EXTERN_LD64: 7461 if (ext->type != EXT_KSYM) 7462 continue; 7463 kind = btf_is_var(btf__type_by_id(obj->btf, ext->btf_id)) ? 7464 BTF_KIND_VAR : BTF_KIND_FUNC; 7465 bpf_gen__record_extern(obj->gen_loader, ext->name, 7466 ext->is_weak, !ext->ksym.type_id, 7467 true, kind, relo->insn_idx); 7468 break; 7469 case RELO_EXTERN_CALL: 7470 bpf_gen__record_extern(obj->gen_loader, ext->name, 7471 ext->is_weak, false, false, BTF_KIND_FUNC, 7472 relo->insn_idx); 7473 break; 7474 case RELO_CORE: { 7475 struct bpf_core_relo cr = { 7476 .insn_off = relo->insn_idx * 8, 7477 .type_id = relo->core_relo->type_id, 7478 .access_str_off = relo->core_relo->access_str_off, 7479 .kind = relo->core_relo->kind, 7480 }; 7481 7482 bpf_gen__record_relo_core(obj->gen_loader, &cr); 7483 break; 7484 } 7485 default: 7486 continue; 7487 } 7488 } 7489 return 0; 7490 } 7491 7492 static int 7493 bpf_object__load_progs(struct bpf_object *obj, int log_level) 7494 { 7495 struct bpf_program *prog; 7496 size_t i; 7497 int err; 7498 7499 for (i = 0; i < obj->nr_programs; i++) { 7500 prog = &obj->programs[i]; 7501 err = bpf_object__sanitize_prog(obj, prog); 7502 if (err) 7503 return err; 7504 } 7505 7506 for (i = 0; i < obj->nr_programs; i++) { 7507 prog = &obj->programs[i]; 7508 if (prog_is_subprog(obj, prog)) 7509 continue; 7510 if (!prog->autoload) { 7511 pr_debug("prog '%s': skipped loading\n", prog->name); 7512 continue; 7513 } 7514 prog->log_level |= log_level; 7515 7516 if (obj->gen_loader) 7517 bpf_program_record_relos(prog); 7518 7519 err = bpf_object_load_prog(obj, prog, prog->insns, prog->insns_cnt, 7520 obj->license, obj->kern_version, &prog->fd); 7521 if (err) { 7522 pr_warn("prog '%s': failed to load: %d\n", prog->name, err); 7523 return err; 7524 } 7525 } 7526 7527 bpf_object__free_relocs(obj); 7528 return 0; 7529 } 7530 7531 static const struct bpf_sec_def *find_sec_def(const char *sec_name); 7532 7533 static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object_open_opts *opts) 7534 { 7535 struct bpf_program *prog; 7536 int err; 7537 7538 bpf_object__for_each_program(prog, obj) { 7539 prog->sec_def = find_sec_def(prog->sec_name); 7540 if (!prog->sec_def) { 7541 /* couldn't guess, but user might manually specify */ 7542 pr_debug("prog '%s': unrecognized ELF section name '%s'\n", 7543 prog->name, prog->sec_name); 7544 continue; 7545 } 7546 7547 prog->type = prog->sec_def->prog_type; 7548 prog->expected_attach_type = prog->sec_def->expected_attach_type; 7549 7550 /* sec_def can have custom callback which should be called 7551 * after bpf_program is initialized to adjust its properties 7552 */ 7553 if (prog->sec_def->prog_setup_fn) { 7554 err = prog->sec_def->prog_setup_fn(prog, prog->sec_def->cookie); 7555 if (err < 0) { 7556 pr_warn("prog '%s': failed to initialize: %d\n", 7557 prog->name, err); 7558 return err; 7559 } 7560 } 7561 } 7562 7563 return 0; 7564 } 7565 7566 static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz, 7567 const struct bpf_object_open_opts *opts) 7568 { 7569 const char *obj_name, *kconfig, *btf_tmp_path, *token_path; 7570 struct bpf_object *obj; 7571 char tmp_name[64]; 7572 int err; 7573 char *log_buf; 7574 size_t log_size; 7575 __u32 log_level; 7576 7577 if (elf_version(EV_CURRENT) == EV_NONE) { 7578 pr_warn("failed to init libelf for %s\n", 7579 path ? : "(mem buf)"); 7580 return ERR_PTR(-LIBBPF_ERRNO__LIBELF); 7581 } 7582 7583 if (!OPTS_VALID(opts, bpf_object_open_opts)) 7584 return ERR_PTR(-EINVAL); 7585 7586 obj_name = OPTS_GET(opts, object_name, NULL); 7587 if (obj_buf) { 7588 if (!obj_name) { 7589 snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx", 7590 (unsigned long)obj_buf, 7591 (unsigned long)obj_buf_sz); 7592 obj_name = tmp_name; 7593 } 7594 path = obj_name; 7595 pr_debug("loading object '%s' from buffer\n", obj_name); 7596 } 7597 7598 log_buf = OPTS_GET(opts, kernel_log_buf, NULL); 7599 log_size = OPTS_GET(opts, kernel_log_size, 0); 7600 log_level = OPTS_GET(opts, kernel_log_level, 0); 7601 if (log_size > UINT_MAX) 7602 return ERR_PTR(-EINVAL); 7603 if (log_size && !log_buf) 7604 return ERR_PTR(-EINVAL); 7605 7606 token_path = OPTS_GET(opts, bpf_token_path, NULL); 7607 /* if user didn't specify bpf_token_path explicitly, check if 7608 * LIBBPF_BPF_TOKEN_PATH envvar was set and treat it as bpf_token_path 7609 * option 7610 */ 7611 if (!token_path) 7612 token_path = getenv("LIBBPF_BPF_TOKEN_PATH"); 7613 if (token_path && strlen(token_path) >= PATH_MAX) 7614 return ERR_PTR(-ENAMETOOLONG); 7615 7616 obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name); 7617 if (IS_ERR(obj)) 7618 return obj; 7619 7620 obj->log_buf = log_buf; 7621 obj->log_size = log_size; 7622 obj->log_level = log_level; 7623 7624 if (token_path) { 7625 obj->token_path = strdup(token_path); 7626 if (!obj->token_path) { 7627 err = -ENOMEM; 7628 goto out; 7629 } 7630 } 7631 7632 btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL); 7633 if (btf_tmp_path) { 7634 if (strlen(btf_tmp_path) >= PATH_MAX) { 7635 err = -ENAMETOOLONG; 7636 goto out; 7637 } 7638 obj->btf_custom_path = strdup(btf_tmp_path); 7639 if (!obj->btf_custom_path) { 7640 err = -ENOMEM; 7641 goto out; 7642 } 7643 } 7644 7645 kconfig = OPTS_GET(opts, kconfig, NULL); 7646 if (kconfig) { 7647 obj->kconfig = strdup(kconfig); 7648 if (!obj->kconfig) { 7649 err = -ENOMEM; 7650 goto out; 7651 } 7652 } 7653 7654 err = bpf_object__elf_init(obj); 7655 err = err ? : bpf_object__check_endianness(obj); 7656 err = err ? : bpf_object__elf_collect(obj); 7657 err = err ? : bpf_object__collect_externs(obj); 7658 err = err ? : bpf_object_fixup_btf(obj); 7659 err = err ? : bpf_object__init_maps(obj, opts); 7660 err = err ? : bpf_object_init_progs(obj, opts); 7661 err = err ? : bpf_object__collect_relos(obj); 7662 if (err) 7663 goto out; 7664 7665 bpf_object__elf_finish(obj); 7666 7667 return obj; 7668 out: 7669 bpf_object__close(obj); 7670 return ERR_PTR(err); 7671 } 7672 7673 struct bpf_object * 7674 bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts) 7675 { 7676 if (!path) 7677 return libbpf_err_ptr(-EINVAL); 7678 7679 pr_debug("loading %s\n", path); 7680 7681 return libbpf_ptr(bpf_object_open(path, NULL, 0, opts)); 7682 } 7683 7684 struct bpf_object *bpf_object__open(const char *path) 7685 { 7686 return bpf_object__open_file(path, NULL); 7687 } 7688 7689 struct bpf_object * 7690 bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, 7691 const struct bpf_object_open_opts *opts) 7692 { 7693 if (!obj_buf || obj_buf_sz == 0) 7694 return libbpf_err_ptr(-EINVAL); 7695 7696 return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, opts)); 7697 } 7698 7699 static int bpf_object_unload(struct bpf_object *obj) 7700 { 7701 size_t i; 7702 7703 if (!obj) 7704 return libbpf_err(-EINVAL); 7705 7706 for (i = 0; i < obj->nr_maps; i++) { 7707 zclose(obj->maps[i].fd); 7708 if (obj->maps[i].st_ops) 7709 zfree(&obj->maps[i].st_ops->kern_vdata); 7710 } 7711 7712 for (i = 0; i < obj->nr_programs; i++) 7713 bpf_program__unload(&obj->programs[i]); 7714 7715 return 0; 7716 } 7717 7718 static int bpf_object__sanitize_maps(struct bpf_object *obj) 7719 { 7720 struct bpf_map *m; 7721 7722 bpf_object__for_each_map(m, obj) { 7723 if (!bpf_map__is_internal(m)) 7724 continue; 7725 if (!kernel_supports(obj, FEAT_ARRAY_MMAP)) 7726 m->def.map_flags &= ~BPF_F_MMAPABLE; 7727 } 7728 7729 return 0; 7730 } 7731 7732 int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *ctx) 7733 { 7734 char sym_type, sym_name[500]; 7735 unsigned long long sym_addr; 7736 int ret, err = 0; 7737 FILE *f; 7738 7739 f = fopen("/proc/kallsyms", "re"); 7740 if (!f) { 7741 err = -errno; 7742 pr_warn("failed to open /proc/kallsyms: %d\n", err); 7743 return err; 7744 } 7745 7746 while (true) { 7747 ret = fscanf(f, "%llx %c %499s%*[^\n]\n", 7748 &sym_addr, &sym_type, sym_name); 7749 if (ret == EOF && feof(f)) 7750 break; 7751 if (ret != 3) { 7752 pr_warn("failed to read kallsyms entry: %d\n", ret); 7753 err = -EINVAL; 7754 break; 7755 } 7756 7757 err = cb(sym_addr, sym_type, sym_name, ctx); 7758 if (err) 7759 break; 7760 } 7761 7762 fclose(f); 7763 return err; 7764 } 7765 7766 static int kallsyms_cb(unsigned long long sym_addr, char sym_type, 7767 const char *sym_name, void *ctx) 7768 { 7769 struct bpf_object *obj = ctx; 7770 const struct btf_type *t; 7771 struct extern_desc *ext; 7772 7773 ext = find_extern_by_name(obj, sym_name); 7774 if (!ext || ext->type != EXT_KSYM) 7775 return 0; 7776 7777 t = btf__type_by_id(obj->btf, ext->btf_id); 7778 if (!btf_is_var(t)) 7779 return 0; 7780 7781 if (ext->is_set && ext->ksym.addr != sym_addr) { 7782 pr_warn("extern (ksym) '%s': resolution is ambiguous: 0x%llx or 0x%llx\n", 7783 sym_name, ext->ksym.addr, sym_addr); 7784 return -EINVAL; 7785 } 7786 if (!ext->is_set) { 7787 ext->is_set = true; 7788 ext->ksym.addr = sym_addr; 7789 pr_debug("extern (ksym) '%s': set to 0x%llx\n", sym_name, sym_addr); 7790 } 7791 return 0; 7792 } 7793 7794 static int bpf_object__read_kallsyms_file(struct bpf_object *obj) 7795 { 7796 return libbpf_kallsyms_parse(kallsyms_cb, obj); 7797 } 7798 7799 static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name, 7800 __u16 kind, struct btf **res_btf, 7801 struct module_btf **res_mod_btf) 7802 { 7803 struct module_btf *mod_btf; 7804 struct btf *btf; 7805 int i, id, err; 7806 7807 btf = obj->btf_vmlinux; 7808 mod_btf = NULL; 7809 id = btf__find_by_name_kind(btf, ksym_name, kind); 7810 7811 if (id == -ENOENT) { 7812 err = load_module_btfs(obj); 7813 if (err) 7814 return err; 7815 7816 for (i = 0; i < obj->btf_module_cnt; i++) { 7817 /* we assume module_btf's BTF FD is always >0 */ 7818 mod_btf = &obj->btf_modules[i]; 7819 btf = mod_btf->btf; 7820 id = btf__find_by_name_kind_own(btf, ksym_name, kind); 7821 if (id != -ENOENT) 7822 break; 7823 } 7824 } 7825 if (id <= 0) 7826 return -ESRCH; 7827 7828 *res_btf = btf; 7829 *res_mod_btf = mod_btf; 7830 return id; 7831 } 7832 7833 static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj, 7834 struct extern_desc *ext) 7835 { 7836 const struct btf_type *targ_var, *targ_type; 7837 __u32 targ_type_id, local_type_id; 7838 struct module_btf *mod_btf = NULL; 7839 const char *targ_var_name; 7840 struct btf *btf = NULL; 7841 int id, err; 7842 7843 id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &mod_btf); 7844 if (id < 0) { 7845 if (id == -ESRCH && ext->is_weak) 7846 return 0; 7847 pr_warn("extern (var ksym) '%s': not found in kernel BTF\n", 7848 ext->name); 7849 return id; 7850 } 7851 7852 /* find local type_id */ 7853 local_type_id = ext->ksym.type_id; 7854 7855 /* find target type_id */ 7856 targ_var = btf__type_by_id(btf, id); 7857 targ_var_name = btf__name_by_offset(btf, targ_var->name_off); 7858 targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id); 7859 7860 err = bpf_core_types_are_compat(obj->btf, local_type_id, 7861 btf, targ_type_id); 7862 if (err <= 0) { 7863 const struct btf_type *local_type; 7864 const char *targ_name, *local_name; 7865 7866 local_type = btf__type_by_id(obj->btf, local_type_id); 7867 local_name = btf__name_by_offset(obj->btf, local_type->name_off); 7868 targ_name = btf__name_by_offset(btf, targ_type->name_off); 7869 7870 pr_warn("extern (var ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n", 7871 ext->name, local_type_id, 7872 btf_kind_str(local_type), local_name, targ_type_id, 7873 btf_kind_str(targ_type), targ_name); 7874 return -EINVAL; 7875 } 7876 7877 ext->is_set = true; 7878 ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0; 7879 ext->ksym.kernel_btf_id = id; 7880 pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n", 7881 ext->name, id, btf_kind_str(targ_var), targ_var_name); 7882 7883 return 0; 7884 } 7885 7886 static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj, 7887 struct extern_desc *ext) 7888 { 7889 int local_func_proto_id, kfunc_proto_id, kfunc_id; 7890 struct module_btf *mod_btf = NULL; 7891 const struct btf_type *kern_func; 7892 struct btf *kern_btf = NULL; 7893 int ret; 7894 7895 local_func_proto_id = ext->ksym.type_id; 7896 7897 kfunc_id = find_ksym_btf_id(obj, ext->essent_name ?: ext->name, BTF_KIND_FUNC, &kern_btf, 7898 &mod_btf); 7899 if (kfunc_id < 0) { 7900 if (kfunc_id == -ESRCH && ext->is_weak) 7901 return 0; 7902 pr_warn("extern (func ksym) '%s': not found in kernel or module BTFs\n", 7903 ext->name); 7904 return kfunc_id; 7905 } 7906 7907 kern_func = btf__type_by_id(kern_btf, kfunc_id); 7908 kfunc_proto_id = kern_func->type; 7909 7910 ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id, 7911 kern_btf, kfunc_proto_id); 7912 if (ret <= 0) { 7913 if (ext->is_weak) 7914 return 0; 7915 7916 pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with %s [%d]\n", 7917 ext->name, local_func_proto_id, 7918 mod_btf ? mod_btf->name : "vmlinux", kfunc_proto_id); 7919 return -EINVAL; 7920 } 7921 7922 /* set index for module BTF fd in fd_array, if unset */ 7923 if (mod_btf && !mod_btf->fd_array_idx) { 7924 /* insn->off is s16 */ 7925 if (obj->fd_array_cnt == INT16_MAX) { 7926 pr_warn("extern (func ksym) '%s': module BTF fd index %d too big to fit in bpf_insn offset\n", 7927 ext->name, mod_btf->fd_array_idx); 7928 return -E2BIG; 7929 } 7930 /* Cannot use index 0 for module BTF fd */ 7931 if (!obj->fd_array_cnt) 7932 obj->fd_array_cnt = 1; 7933 7934 ret = libbpf_ensure_mem((void **)&obj->fd_array, &obj->fd_array_cap, sizeof(int), 7935 obj->fd_array_cnt + 1); 7936 if (ret) 7937 return ret; 7938 mod_btf->fd_array_idx = obj->fd_array_cnt; 7939 /* we assume module BTF FD is always >0 */ 7940 obj->fd_array[obj->fd_array_cnt++] = mod_btf->fd; 7941 } 7942 7943 ext->is_set = true; 7944 ext->ksym.kernel_btf_id = kfunc_id; 7945 ext->ksym.btf_fd_idx = mod_btf ? mod_btf->fd_array_idx : 0; 7946 /* Also set kernel_btf_obj_fd to make sure that bpf_object__relocate_data() 7947 * populates FD into ld_imm64 insn when it's used to point to kfunc. 7948 * {kernel_btf_id, btf_fd_idx} -> fixup bpf_call. 7949 * {kernel_btf_id, kernel_btf_obj_fd} -> fixup ld_imm64. 7950 */ 7951 ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0; 7952 pr_debug("extern (func ksym) '%s': resolved to %s [%d]\n", 7953 ext->name, mod_btf ? mod_btf->name : "vmlinux", kfunc_id); 7954 7955 return 0; 7956 } 7957 7958 static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj) 7959 { 7960 const struct btf_type *t; 7961 struct extern_desc *ext; 7962 int i, err; 7963 7964 for (i = 0; i < obj->nr_extern; i++) { 7965 ext = &obj->externs[i]; 7966 if (ext->type != EXT_KSYM || !ext->ksym.type_id) 7967 continue; 7968 7969 if (obj->gen_loader) { 7970 ext->is_set = true; 7971 ext->ksym.kernel_btf_obj_fd = 0; 7972 ext->ksym.kernel_btf_id = 0; 7973 continue; 7974 } 7975 t = btf__type_by_id(obj->btf, ext->btf_id); 7976 if (btf_is_var(t)) 7977 err = bpf_object__resolve_ksym_var_btf_id(obj, ext); 7978 else 7979 err = bpf_object__resolve_ksym_func_btf_id(obj, ext); 7980 if (err) 7981 return err; 7982 } 7983 return 0; 7984 } 7985 7986 static int bpf_object__resolve_externs(struct bpf_object *obj, 7987 const char *extra_kconfig) 7988 { 7989 bool need_config = false, need_kallsyms = false; 7990 bool need_vmlinux_btf = false; 7991 struct extern_desc *ext; 7992 void *kcfg_data = NULL; 7993 int err, i; 7994 7995 if (obj->nr_extern == 0) 7996 return 0; 7997 7998 if (obj->kconfig_map_idx >= 0) 7999 kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped; 8000 8001 for (i = 0; i < obj->nr_extern; i++) { 8002 ext = &obj->externs[i]; 8003 8004 if (ext->type == EXT_KSYM) { 8005 if (ext->ksym.type_id) 8006 need_vmlinux_btf = true; 8007 else 8008 need_kallsyms = true; 8009 continue; 8010 } else if (ext->type == EXT_KCFG) { 8011 void *ext_ptr = kcfg_data + ext->kcfg.data_off; 8012 __u64 value = 0; 8013 8014 /* Kconfig externs need actual /proc/config.gz */ 8015 if (str_has_pfx(ext->name, "CONFIG_")) { 8016 need_config = true; 8017 continue; 8018 } 8019 8020 /* Virtual kcfg externs are customly handled by libbpf */ 8021 if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) { 8022 value = get_kernel_version(); 8023 if (!value) { 8024 pr_warn("extern (kcfg) '%s': failed to get kernel version\n", ext->name); 8025 return -EINVAL; 8026 } 8027 } else if (strcmp(ext->name, "LINUX_HAS_BPF_COOKIE") == 0) { 8028 value = kernel_supports(obj, FEAT_BPF_COOKIE); 8029 } else if (strcmp(ext->name, "LINUX_HAS_SYSCALL_WRAPPER") == 0) { 8030 value = kernel_supports(obj, FEAT_SYSCALL_WRAPPER); 8031 } else if (!str_has_pfx(ext->name, "LINUX_") || !ext->is_weak) { 8032 /* Currently libbpf supports only CONFIG_ and LINUX_ prefixed 8033 * __kconfig externs, where LINUX_ ones are virtual and filled out 8034 * customly by libbpf (their values don't come from Kconfig). 8035 * If LINUX_xxx variable is not recognized by libbpf, but is marked 8036 * __weak, it defaults to zero value, just like for CONFIG_xxx 8037 * externs. 8038 */ 8039 pr_warn("extern (kcfg) '%s': unrecognized virtual extern\n", ext->name); 8040 return -EINVAL; 8041 } 8042 8043 err = set_kcfg_value_num(ext, ext_ptr, value); 8044 if (err) 8045 return err; 8046 pr_debug("extern (kcfg) '%s': set to 0x%llx\n", 8047 ext->name, (long long)value); 8048 } else { 8049 pr_warn("extern '%s': unrecognized extern kind\n", ext->name); 8050 return -EINVAL; 8051 } 8052 } 8053 if (need_config && extra_kconfig) { 8054 err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data); 8055 if (err) 8056 return -EINVAL; 8057 need_config = false; 8058 for (i = 0; i < obj->nr_extern; i++) { 8059 ext = &obj->externs[i]; 8060 if (ext->type == EXT_KCFG && !ext->is_set) { 8061 need_config = true; 8062 break; 8063 } 8064 } 8065 } 8066 if (need_config) { 8067 err = bpf_object__read_kconfig_file(obj, kcfg_data); 8068 if (err) 8069 return -EINVAL; 8070 } 8071 if (need_kallsyms) { 8072 err = bpf_object__read_kallsyms_file(obj); 8073 if (err) 8074 return -EINVAL; 8075 } 8076 if (need_vmlinux_btf) { 8077 err = bpf_object__resolve_ksyms_btf_id(obj); 8078 if (err) 8079 return -EINVAL; 8080 } 8081 for (i = 0; i < obj->nr_extern; i++) { 8082 ext = &obj->externs[i]; 8083 8084 if (!ext->is_set && !ext->is_weak) { 8085 pr_warn("extern '%s' (strong): not resolved\n", ext->name); 8086 return -ESRCH; 8087 } else if (!ext->is_set) { 8088 pr_debug("extern '%s' (weak): not resolved, defaulting to zero\n", 8089 ext->name); 8090 } 8091 } 8092 8093 return 0; 8094 } 8095 8096 static void bpf_map_prepare_vdata(const struct bpf_map *map) 8097 { 8098 struct bpf_struct_ops *st_ops; 8099 __u32 i; 8100 8101 st_ops = map->st_ops; 8102 for (i = 0; i < btf_vlen(st_ops->type); i++) { 8103 struct bpf_program *prog = st_ops->progs[i]; 8104 void *kern_data; 8105 int prog_fd; 8106 8107 if (!prog) 8108 continue; 8109 8110 prog_fd = bpf_program__fd(prog); 8111 kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i]; 8112 *(unsigned long *)kern_data = prog_fd; 8113 } 8114 } 8115 8116 static int bpf_object_prepare_struct_ops(struct bpf_object *obj) 8117 { 8118 int i; 8119 8120 for (i = 0; i < obj->nr_maps; i++) 8121 if (bpf_map__is_struct_ops(&obj->maps[i])) 8122 bpf_map_prepare_vdata(&obj->maps[i]); 8123 8124 return 0; 8125 } 8126 8127 static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path) 8128 { 8129 int err, i; 8130 8131 if (!obj) 8132 return libbpf_err(-EINVAL); 8133 8134 if (obj->loaded) { 8135 pr_warn("object '%s': load can't be attempted twice\n", obj->name); 8136 return libbpf_err(-EINVAL); 8137 } 8138 8139 if (obj->gen_loader) 8140 bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps); 8141 8142 err = bpf_object_prepare_token(obj); 8143 err = err ? : bpf_object__probe_loading(obj); 8144 err = err ? : bpf_object__load_vmlinux_btf(obj, false); 8145 err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); 8146 err = err ? : bpf_object__sanitize_maps(obj); 8147 err = err ? : bpf_object__init_kern_struct_ops_maps(obj); 8148 err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path); 8149 err = err ? : bpf_object__sanitize_and_load_btf(obj); 8150 err = err ? : bpf_object__create_maps(obj); 8151 err = err ? : bpf_object__load_progs(obj, extra_log_level); 8152 err = err ? : bpf_object_init_prog_arrays(obj); 8153 err = err ? : bpf_object_prepare_struct_ops(obj); 8154 8155 if (obj->gen_loader) { 8156 /* reset FDs */ 8157 if (obj->btf) 8158 btf__set_fd(obj->btf, -1); 8159 if (!err) 8160 err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps); 8161 } 8162 8163 /* clean up fd_array */ 8164 zfree(&obj->fd_array); 8165 8166 /* clean up module BTFs */ 8167 for (i = 0; i < obj->btf_module_cnt; i++) { 8168 close(obj->btf_modules[i].fd); 8169 btf__free(obj->btf_modules[i].btf); 8170 free(obj->btf_modules[i].name); 8171 } 8172 free(obj->btf_modules); 8173 8174 /* clean up vmlinux BTF */ 8175 btf__free(obj->btf_vmlinux); 8176 obj->btf_vmlinux = NULL; 8177 8178 obj->loaded = true; /* doesn't matter if successfully or not */ 8179 8180 if (err) 8181 goto out; 8182 8183 return 0; 8184 out: 8185 /* unpin any maps that were auto-pinned during load */ 8186 for (i = 0; i < obj->nr_maps; i++) 8187 if (obj->maps[i].pinned && !obj->maps[i].reused) 8188 bpf_map__unpin(&obj->maps[i], NULL); 8189 8190 bpf_object_unload(obj); 8191 pr_warn("failed to load object '%s'\n", obj->path); 8192 return libbpf_err(err); 8193 } 8194 8195 int bpf_object__load(struct bpf_object *obj) 8196 { 8197 return bpf_object_load(obj, 0, NULL); 8198 } 8199 8200 static int make_parent_dir(const char *path) 8201 { 8202 char *cp, errmsg[STRERR_BUFSIZE]; 8203 char *dname, *dir; 8204 int err = 0; 8205 8206 dname = strdup(path); 8207 if (dname == NULL) 8208 return -ENOMEM; 8209 8210 dir = dirname(dname); 8211 if (mkdir(dir, 0700) && errno != EEXIST) 8212 err = -errno; 8213 8214 free(dname); 8215 if (err) { 8216 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); 8217 pr_warn("failed to mkdir %s: %s\n", path, cp); 8218 } 8219 return err; 8220 } 8221 8222 static int check_path(const char *path) 8223 { 8224 char *cp, errmsg[STRERR_BUFSIZE]; 8225 struct statfs st_fs; 8226 char *dname, *dir; 8227 int err = 0; 8228 8229 if (path == NULL) 8230 return -EINVAL; 8231 8232 dname = strdup(path); 8233 if (dname == NULL) 8234 return -ENOMEM; 8235 8236 dir = dirname(dname); 8237 if (statfs(dir, &st_fs)) { 8238 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); 8239 pr_warn("failed to statfs %s: %s\n", dir, cp); 8240 err = -errno; 8241 } 8242 free(dname); 8243 8244 if (!err && st_fs.f_type != BPF_FS_MAGIC) { 8245 pr_warn("specified path %s is not on BPF FS\n", path); 8246 err = -EINVAL; 8247 } 8248 8249 return err; 8250 } 8251 8252 int bpf_program__pin(struct bpf_program *prog, const char *path) 8253 { 8254 char *cp, errmsg[STRERR_BUFSIZE]; 8255 int err; 8256 8257 if (prog->fd < 0) { 8258 pr_warn("prog '%s': can't pin program that wasn't loaded\n", prog->name); 8259 return libbpf_err(-EINVAL); 8260 } 8261 8262 err = make_parent_dir(path); 8263 if (err) 8264 return libbpf_err(err); 8265 8266 err = check_path(path); 8267 if (err) 8268 return libbpf_err(err); 8269 8270 if (bpf_obj_pin(prog->fd, path)) { 8271 err = -errno; 8272 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); 8273 pr_warn("prog '%s': failed to pin at '%s': %s\n", prog->name, path, cp); 8274 return libbpf_err(err); 8275 } 8276 8277 pr_debug("prog '%s': pinned at '%s'\n", prog->name, path); 8278 return 0; 8279 } 8280 8281 int bpf_program__unpin(struct bpf_program *prog, const char *path) 8282 { 8283 int err; 8284 8285 if (prog->fd < 0) { 8286 pr_warn("prog '%s': can't unpin program that wasn't loaded\n", prog->name); 8287 return libbpf_err(-EINVAL); 8288 } 8289 8290 err = check_path(path); 8291 if (err) 8292 return libbpf_err(err); 8293 8294 err = unlink(path); 8295 if (err) 8296 return libbpf_err(-errno); 8297 8298 pr_debug("prog '%s': unpinned from '%s'\n", prog->name, path); 8299 return 0; 8300 } 8301 8302 int bpf_map__pin(struct bpf_map *map, const char *path) 8303 { 8304 char *cp, errmsg[STRERR_BUFSIZE]; 8305 int err; 8306 8307 if (map == NULL) { 8308 pr_warn("invalid map pointer\n"); 8309 return libbpf_err(-EINVAL); 8310 } 8311 8312 if (map->pin_path) { 8313 if (path && strcmp(path, map->pin_path)) { 8314 pr_warn("map '%s' already has pin path '%s' different from '%s'\n", 8315 bpf_map__name(map), map->pin_path, path); 8316 return libbpf_err(-EINVAL); 8317 } else if (map->pinned) { 8318 pr_debug("map '%s' already pinned at '%s'; not re-pinning\n", 8319 bpf_map__name(map), map->pin_path); 8320 return 0; 8321 } 8322 } else { 8323 if (!path) { 8324 pr_warn("missing a path to pin map '%s' at\n", 8325 bpf_map__name(map)); 8326 return libbpf_err(-EINVAL); 8327 } else if (map->pinned) { 8328 pr_warn("map '%s' already pinned\n", bpf_map__name(map)); 8329 return libbpf_err(-EEXIST); 8330 } 8331 8332 map->pin_path = strdup(path); 8333 if (!map->pin_path) { 8334 err = -errno; 8335 goto out_err; 8336 } 8337 } 8338 8339 err = make_parent_dir(map->pin_path); 8340 if (err) 8341 return libbpf_err(err); 8342 8343 err = check_path(map->pin_path); 8344 if (err) 8345 return libbpf_err(err); 8346 8347 if (bpf_obj_pin(map->fd, map->pin_path)) { 8348 err = -errno; 8349 goto out_err; 8350 } 8351 8352 map->pinned = true; 8353 pr_debug("pinned map '%s'\n", map->pin_path); 8354 8355 return 0; 8356 8357 out_err: 8358 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); 8359 pr_warn("failed to pin map: %s\n", cp); 8360 return libbpf_err(err); 8361 } 8362 8363 int bpf_map__unpin(struct bpf_map *map, const char *path) 8364 { 8365 int err; 8366 8367 if (map == NULL) { 8368 pr_warn("invalid map pointer\n"); 8369 return libbpf_err(-EINVAL); 8370 } 8371 8372 if (map->pin_path) { 8373 if (path && strcmp(path, map->pin_path)) { 8374 pr_warn("map '%s' already has pin path '%s' different from '%s'\n", 8375 bpf_map__name(map), map->pin_path, path); 8376 return libbpf_err(-EINVAL); 8377 } 8378 path = map->pin_path; 8379 } else if (!path) { 8380 pr_warn("no path to unpin map '%s' from\n", 8381 bpf_map__name(map)); 8382 return libbpf_err(-EINVAL); 8383 } 8384 8385 err = check_path(path); 8386 if (err) 8387 return libbpf_err(err); 8388 8389 err = unlink(path); 8390 if (err != 0) 8391 return libbpf_err(-errno); 8392 8393 map->pinned = false; 8394 pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path); 8395 8396 return 0; 8397 } 8398 8399 int bpf_map__set_pin_path(struct bpf_map *map, const char *path) 8400 { 8401 char *new = NULL; 8402 8403 if (path) { 8404 new = strdup(path); 8405 if (!new) 8406 return libbpf_err(-errno); 8407 } 8408 8409 free(map->pin_path); 8410 map->pin_path = new; 8411 return 0; 8412 } 8413 8414 __alias(bpf_map__pin_path) 8415 const char *bpf_map__get_pin_path(const struct bpf_map *map); 8416 8417 const char *bpf_map__pin_path(const struct bpf_map *map) 8418 { 8419 return map->pin_path; 8420 } 8421 8422 bool bpf_map__is_pinned(const struct bpf_map *map) 8423 { 8424 return map->pinned; 8425 } 8426 8427 static void sanitize_pin_path(char *s) 8428 { 8429 /* bpffs disallows periods in path names */ 8430 while (*s) { 8431 if (*s == '.') 8432 *s = '_'; 8433 s++; 8434 } 8435 } 8436 8437 int bpf_object__pin_maps(struct bpf_object *obj, const char *path) 8438 { 8439 struct bpf_map *map; 8440 int err; 8441 8442 if (!obj) 8443 return libbpf_err(-ENOENT); 8444 8445 if (!obj->loaded) { 8446 pr_warn("object not yet loaded; load it first\n"); 8447 return libbpf_err(-ENOENT); 8448 } 8449 8450 bpf_object__for_each_map(map, obj) { 8451 char *pin_path = NULL; 8452 char buf[PATH_MAX]; 8453 8454 if (!map->autocreate) 8455 continue; 8456 8457 if (path) { 8458 err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map)); 8459 if (err) 8460 goto err_unpin_maps; 8461 sanitize_pin_path(buf); 8462 pin_path = buf; 8463 } else if (!map->pin_path) { 8464 continue; 8465 } 8466 8467 err = bpf_map__pin(map, pin_path); 8468 if (err) 8469 goto err_unpin_maps; 8470 } 8471 8472 return 0; 8473 8474 err_unpin_maps: 8475 while ((map = bpf_object__prev_map(obj, map))) { 8476 if (!map->pin_path) 8477 continue; 8478 8479 bpf_map__unpin(map, NULL); 8480 } 8481 8482 return libbpf_err(err); 8483 } 8484 8485 int bpf_object__unpin_maps(struct bpf_object *obj, const char *path) 8486 { 8487 struct bpf_map *map; 8488 int err; 8489 8490 if (!obj) 8491 return libbpf_err(-ENOENT); 8492 8493 bpf_object__for_each_map(map, obj) { 8494 char *pin_path = NULL; 8495 char buf[PATH_MAX]; 8496 8497 if (path) { 8498 err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map)); 8499 if (err) 8500 return libbpf_err(err); 8501 sanitize_pin_path(buf); 8502 pin_path = buf; 8503 } else if (!map->pin_path) { 8504 continue; 8505 } 8506 8507 err = bpf_map__unpin(map, pin_path); 8508 if (err) 8509 return libbpf_err(err); 8510 } 8511 8512 return 0; 8513 } 8514 8515 int bpf_object__pin_programs(struct bpf_object *obj, const char *path) 8516 { 8517 struct bpf_program *prog; 8518 char buf[PATH_MAX]; 8519 int err; 8520 8521 if (!obj) 8522 return libbpf_err(-ENOENT); 8523 8524 if (!obj->loaded) { 8525 pr_warn("object not yet loaded; load it first\n"); 8526 return libbpf_err(-ENOENT); 8527 } 8528 8529 bpf_object__for_each_program(prog, obj) { 8530 err = pathname_concat(buf, sizeof(buf), path, prog->name); 8531 if (err) 8532 goto err_unpin_programs; 8533 8534 err = bpf_program__pin(prog, buf); 8535 if (err) 8536 goto err_unpin_programs; 8537 } 8538 8539 return 0; 8540 8541 err_unpin_programs: 8542 while ((prog = bpf_object__prev_program(obj, prog))) { 8543 if (pathname_concat(buf, sizeof(buf), path, prog->name)) 8544 continue; 8545 8546 bpf_program__unpin(prog, buf); 8547 } 8548 8549 return libbpf_err(err); 8550 } 8551 8552 int bpf_object__unpin_programs(struct bpf_object *obj, const char *path) 8553 { 8554 struct bpf_program *prog; 8555 int err; 8556 8557 if (!obj) 8558 return libbpf_err(-ENOENT); 8559 8560 bpf_object__for_each_program(prog, obj) { 8561 char buf[PATH_MAX]; 8562 8563 err = pathname_concat(buf, sizeof(buf), path, prog->name); 8564 if (err) 8565 return libbpf_err(err); 8566 8567 err = bpf_program__unpin(prog, buf); 8568 if (err) 8569 return libbpf_err(err); 8570 } 8571 8572 return 0; 8573 } 8574 8575 int bpf_object__pin(struct bpf_object *obj, const char *path) 8576 { 8577 int err; 8578 8579 err = bpf_object__pin_maps(obj, path); 8580 if (err) 8581 return libbpf_err(err); 8582 8583 err = bpf_object__pin_programs(obj, path); 8584 if (err) { 8585 bpf_object__unpin_maps(obj, path); 8586 return libbpf_err(err); 8587 } 8588 8589 return 0; 8590 } 8591 8592 int bpf_object__unpin(struct bpf_object *obj, const char *path) 8593 { 8594 int err; 8595 8596 err = bpf_object__unpin_programs(obj, path); 8597 if (err) 8598 return libbpf_err(err); 8599 8600 err = bpf_object__unpin_maps(obj, path); 8601 if (err) 8602 return libbpf_err(err); 8603 8604 return 0; 8605 } 8606 8607 static void bpf_map__destroy(struct bpf_map *map) 8608 { 8609 if (map->inner_map) { 8610 bpf_map__destroy(map->inner_map); 8611 zfree(&map->inner_map); 8612 } 8613 8614 zfree(&map->init_slots); 8615 map->init_slots_sz = 0; 8616 8617 if (map->mmaped) { 8618 size_t mmap_sz; 8619 8620 mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); 8621 munmap(map->mmaped, mmap_sz); 8622 map->mmaped = NULL; 8623 } 8624 8625 if (map->st_ops) { 8626 zfree(&map->st_ops->data); 8627 zfree(&map->st_ops->progs); 8628 zfree(&map->st_ops->kern_func_off); 8629 zfree(&map->st_ops); 8630 } 8631 8632 zfree(&map->name); 8633 zfree(&map->real_name); 8634 zfree(&map->pin_path); 8635 8636 if (map->fd >= 0) 8637 zclose(map->fd); 8638 } 8639 8640 void bpf_object__close(struct bpf_object *obj) 8641 { 8642 size_t i; 8643 8644 if (IS_ERR_OR_NULL(obj)) 8645 return; 8646 8647 usdt_manager_free(obj->usdt_man); 8648 obj->usdt_man = NULL; 8649 8650 bpf_gen__free(obj->gen_loader); 8651 bpf_object__elf_finish(obj); 8652 bpf_object_unload(obj); 8653 btf__free(obj->btf); 8654 btf__free(obj->btf_vmlinux); 8655 btf_ext__free(obj->btf_ext); 8656 8657 for (i = 0; i < obj->nr_maps; i++) 8658 bpf_map__destroy(&obj->maps[i]); 8659 8660 zfree(&obj->btf_custom_path); 8661 zfree(&obj->kconfig); 8662 8663 for (i = 0; i < obj->nr_extern; i++) 8664 zfree(&obj->externs[i].essent_name); 8665 8666 zfree(&obj->externs); 8667 obj->nr_extern = 0; 8668 8669 zfree(&obj->maps); 8670 obj->nr_maps = 0; 8671 8672 if (obj->programs && obj->nr_programs) { 8673 for (i = 0; i < obj->nr_programs; i++) 8674 bpf_program__exit(&obj->programs[i]); 8675 } 8676 zfree(&obj->programs); 8677 8678 zfree(&obj->feat_cache); 8679 zfree(&obj->token_path); 8680 if (obj->token_fd > 0) 8681 close(obj->token_fd); 8682 8683 free(obj); 8684 } 8685 8686 const char *bpf_object__name(const struct bpf_object *obj) 8687 { 8688 return obj ? obj->name : libbpf_err_ptr(-EINVAL); 8689 } 8690 8691 unsigned int bpf_object__kversion(const struct bpf_object *obj) 8692 { 8693 return obj ? obj->kern_version : 0; 8694 } 8695 8696 struct btf *bpf_object__btf(const struct bpf_object *obj) 8697 { 8698 return obj ? obj->btf : NULL; 8699 } 8700 8701 int bpf_object__btf_fd(const struct bpf_object *obj) 8702 { 8703 return obj->btf ? btf__fd(obj->btf) : -1; 8704 } 8705 8706 int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version) 8707 { 8708 if (obj->loaded) 8709 return libbpf_err(-EINVAL); 8710 8711 obj->kern_version = kern_version; 8712 8713 return 0; 8714 } 8715 8716 int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts) 8717 { 8718 struct bpf_gen *gen; 8719 8720 if (!opts) 8721 return -EFAULT; 8722 if (!OPTS_VALID(opts, gen_loader_opts)) 8723 return -EINVAL; 8724 gen = calloc(sizeof(*gen), 1); 8725 if (!gen) 8726 return -ENOMEM; 8727 gen->opts = opts; 8728 obj->gen_loader = gen; 8729 return 0; 8730 } 8731 8732 static struct bpf_program * 8733 __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj, 8734 bool forward) 8735 { 8736 size_t nr_programs = obj->nr_programs; 8737 ssize_t idx; 8738 8739 if (!nr_programs) 8740 return NULL; 8741 8742 if (!p) 8743 /* Iter from the beginning */ 8744 return forward ? &obj->programs[0] : 8745 &obj->programs[nr_programs - 1]; 8746 8747 if (p->obj != obj) { 8748 pr_warn("error: program handler doesn't match object\n"); 8749 return errno = EINVAL, NULL; 8750 } 8751 8752 idx = (p - obj->programs) + (forward ? 1 : -1); 8753 if (idx >= obj->nr_programs || idx < 0) 8754 return NULL; 8755 return &obj->programs[idx]; 8756 } 8757 8758 struct bpf_program * 8759 bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev) 8760 { 8761 struct bpf_program *prog = prev; 8762 8763 do { 8764 prog = __bpf_program__iter(prog, obj, true); 8765 } while (prog && prog_is_subprog(obj, prog)); 8766 8767 return prog; 8768 } 8769 8770 struct bpf_program * 8771 bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next) 8772 { 8773 struct bpf_program *prog = next; 8774 8775 do { 8776 prog = __bpf_program__iter(prog, obj, false); 8777 } while (prog && prog_is_subprog(obj, prog)); 8778 8779 return prog; 8780 } 8781 8782 void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex) 8783 { 8784 prog->prog_ifindex = ifindex; 8785 } 8786 8787 const char *bpf_program__name(const struct bpf_program *prog) 8788 { 8789 return prog->name; 8790 } 8791 8792 const char *bpf_program__section_name(const struct bpf_program *prog) 8793 { 8794 return prog->sec_name; 8795 } 8796 8797 bool bpf_program__autoload(const struct bpf_program *prog) 8798 { 8799 return prog->autoload; 8800 } 8801 8802 int bpf_program__set_autoload(struct bpf_program *prog, bool autoload) 8803 { 8804 if (prog->obj->loaded) 8805 return libbpf_err(-EINVAL); 8806 8807 prog->autoload = autoload; 8808 return 0; 8809 } 8810 8811 bool bpf_program__autoattach(const struct bpf_program *prog) 8812 { 8813 return prog->autoattach; 8814 } 8815 8816 void bpf_program__set_autoattach(struct bpf_program *prog, bool autoattach) 8817 { 8818 prog->autoattach = autoattach; 8819 } 8820 8821 const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog) 8822 { 8823 return prog->insns; 8824 } 8825 8826 size_t bpf_program__insn_cnt(const struct bpf_program *prog) 8827 { 8828 return prog->insns_cnt; 8829 } 8830 8831 int bpf_program__set_insns(struct bpf_program *prog, 8832 struct bpf_insn *new_insns, size_t new_insn_cnt) 8833 { 8834 struct bpf_insn *insns; 8835 8836 if (prog->obj->loaded) 8837 return -EBUSY; 8838 8839 insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns)); 8840 /* NULL is a valid return from reallocarray if the new count is zero */ 8841 if (!insns && new_insn_cnt) { 8842 pr_warn("prog '%s': failed to realloc prog code\n", prog->name); 8843 return -ENOMEM; 8844 } 8845 memcpy(insns, new_insns, new_insn_cnt * sizeof(*insns)); 8846 8847 prog->insns = insns; 8848 prog->insns_cnt = new_insn_cnt; 8849 return 0; 8850 } 8851 8852 int bpf_program__fd(const struct bpf_program *prog) 8853 { 8854 if (!prog) 8855 return libbpf_err(-EINVAL); 8856 8857 if (prog->fd < 0) 8858 return libbpf_err(-ENOENT); 8859 8860 return prog->fd; 8861 } 8862 8863 __alias(bpf_program__type) 8864 enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog); 8865 8866 enum bpf_prog_type bpf_program__type(const struct bpf_program *prog) 8867 { 8868 return prog->type; 8869 } 8870 8871 static size_t custom_sec_def_cnt; 8872 static struct bpf_sec_def *custom_sec_defs; 8873 static struct bpf_sec_def custom_fallback_def; 8874 static bool has_custom_fallback_def; 8875 static int last_custom_sec_def_handler_id; 8876 8877 int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type) 8878 { 8879 if (prog->obj->loaded) 8880 return libbpf_err(-EBUSY); 8881 8882 /* if type is not changed, do nothing */ 8883 if (prog->type == type) 8884 return 0; 8885 8886 prog->type = type; 8887 8888 /* If a program type was changed, we need to reset associated SEC() 8889 * handler, as it will be invalid now. The only exception is a generic 8890 * fallback handler, which by definition is program type-agnostic and 8891 * is a catch-all custom handler, optionally set by the application, 8892 * so should be able to handle any type of BPF program. 8893 */ 8894 if (prog->sec_def != &custom_fallback_def) 8895 prog->sec_def = NULL; 8896 return 0; 8897 } 8898 8899 __alias(bpf_program__expected_attach_type) 8900 enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog); 8901 8902 enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program *prog) 8903 { 8904 return prog->expected_attach_type; 8905 } 8906 8907 int bpf_program__set_expected_attach_type(struct bpf_program *prog, 8908 enum bpf_attach_type type) 8909 { 8910 if (prog->obj->loaded) 8911 return libbpf_err(-EBUSY); 8912 8913 prog->expected_attach_type = type; 8914 return 0; 8915 } 8916 8917 __u32 bpf_program__flags(const struct bpf_program *prog) 8918 { 8919 return prog->prog_flags; 8920 } 8921 8922 int bpf_program__set_flags(struct bpf_program *prog, __u32 flags) 8923 { 8924 if (prog->obj->loaded) 8925 return libbpf_err(-EBUSY); 8926 8927 prog->prog_flags = flags; 8928 return 0; 8929 } 8930 8931 __u32 bpf_program__log_level(const struct bpf_program *prog) 8932 { 8933 return prog->log_level; 8934 } 8935 8936 int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level) 8937 { 8938 if (prog->obj->loaded) 8939 return libbpf_err(-EBUSY); 8940 8941 prog->log_level = log_level; 8942 return 0; 8943 } 8944 8945 const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size) 8946 { 8947 *log_size = prog->log_size; 8948 return prog->log_buf; 8949 } 8950 8951 int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size) 8952 { 8953 if (log_size && !log_buf) 8954 return -EINVAL; 8955 if (prog->log_size > UINT_MAX) 8956 return -EINVAL; 8957 if (prog->obj->loaded) 8958 return -EBUSY; 8959 8960 prog->log_buf = log_buf; 8961 prog->log_size = log_size; 8962 return 0; 8963 } 8964 8965 #define SEC_DEF(sec_pfx, ptype, atype, flags, ...) { \ 8966 .sec = (char *)sec_pfx, \ 8967 .prog_type = BPF_PROG_TYPE_##ptype, \ 8968 .expected_attach_type = atype, \ 8969 .cookie = (long)(flags), \ 8970 .prog_prepare_load_fn = libbpf_prepare_prog_load, \ 8971 __VA_ARGS__ \ 8972 } 8973 8974 static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8975 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8976 static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8977 static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8978 static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8979 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8980 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8981 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8982 static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8983 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8984 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8985 8986 static const struct bpf_sec_def section_defs[] = { 8987 SEC_DEF("socket", SOCKET_FILTER, 0, SEC_NONE), 8988 SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE), 8989 SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE), 8990 SEC_DEF("kprobe+", KPROBE, 0, SEC_NONE, attach_kprobe), 8991 SEC_DEF("uprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), 8992 SEC_DEF("uprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe), 8993 SEC_DEF("kretprobe+", KPROBE, 0, SEC_NONE, attach_kprobe), 8994 SEC_DEF("uretprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), 8995 SEC_DEF("uretprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe), 8996 SEC_DEF("kprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), 8997 SEC_DEF("kretprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), 8998 SEC_DEF("uprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi), 8999 SEC_DEF("uretprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi), 9000 SEC_DEF("uprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi), 9001 SEC_DEF("uretprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi), 9002 SEC_DEF("ksyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall), 9003 SEC_DEF("kretsyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall), 9004 SEC_DEF("usdt+", KPROBE, 0, SEC_USDT, attach_usdt), 9005 SEC_DEF("usdt.s+", KPROBE, 0, SEC_USDT | SEC_SLEEPABLE, attach_usdt), 9006 SEC_DEF("tc/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE), /* alias for tcx */ 9007 SEC_DEF("tc/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE), /* alias for tcx */ 9008 SEC_DEF("tcx/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE), 9009 SEC_DEF("tcx/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE), 9010 SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */ 9011 SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */ 9012 SEC_DEF("action", SCHED_ACT, 0, SEC_NONE), /* deprecated / legacy, use tcx */ 9013 SEC_DEF("netkit/primary", SCHED_CLS, BPF_NETKIT_PRIMARY, SEC_NONE), 9014 SEC_DEF("netkit/peer", SCHED_CLS, BPF_NETKIT_PEER, SEC_NONE), 9015 SEC_DEF("tracepoint+", TRACEPOINT, 0, SEC_NONE, attach_tp), 9016 SEC_DEF("tp+", TRACEPOINT, 0, SEC_NONE, attach_tp), 9017 SEC_DEF("raw_tracepoint+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), 9018 SEC_DEF("raw_tp+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), 9019 SEC_DEF("raw_tracepoint.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), 9020 SEC_DEF("raw_tp.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), 9021 SEC_DEF("tp_btf+", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace), 9022 SEC_DEF("fentry+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace), 9023 SEC_DEF("fmod_ret+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace), 9024 SEC_DEF("fexit+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace), 9025 SEC_DEF("fentry.s+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), 9026 SEC_DEF("fmod_ret.s+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), 9027 SEC_DEF("fexit.s+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), 9028 SEC_DEF("freplace+", EXT, 0, SEC_ATTACH_BTF, attach_trace), 9029 SEC_DEF("lsm+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm), 9030 SEC_DEF("lsm.s+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm), 9031 SEC_DEF("lsm_cgroup+", LSM, BPF_LSM_CGROUP, SEC_ATTACH_BTF), 9032 SEC_DEF("iter+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter), 9033 SEC_DEF("iter.s+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter), 9034 SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE), 9035 SEC_DEF("xdp.frags/devmap", XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS), 9036 SEC_DEF("xdp/devmap", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE), 9037 SEC_DEF("xdp.frags/cpumap", XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS), 9038 SEC_DEF("xdp/cpumap", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE), 9039 SEC_DEF("xdp.frags", XDP, BPF_XDP, SEC_XDP_FRAGS), 9040 SEC_DEF("xdp", XDP, BPF_XDP, SEC_ATTACHABLE_OPT), 9041 SEC_DEF("perf_event", PERF_EVENT, 0, SEC_NONE), 9042 SEC_DEF("lwt_in", LWT_IN, 0, SEC_NONE), 9043 SEC_DEF("lwt_out", LWT_OUT, 0, SEC_NONE), 9044 SEC_DEF("lwt_xmit", LWT_XMIT, 0, SEC_NONE), 9045 SEC_DEF("lwt_seg6local", LWT_SEG6LOCAL, 0, SEC_NONE), 9046 SEC_DEF("sockops", SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT), 9047 SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT), 9048 SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT), 9049 SEC_DEF("sk_skb", SK_SKB, 0, SEC_NONE), 9050 SEC_DEF("sk_msg", SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT), 9051 SEC_DEF("lirc_mode2", LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT), 9052 SEC_DEF("flow_dissector", FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT), 9053 SEC_DEF("cgroup_skb/ingress", CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT), 9054 SEC_DEF("cgroup_skb/egress", CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT), 9055 SEC_DEF("cgroup/skb", CGROUP_SKB, 0, SEC_NONE), 9056 SEC_DEF("cgroup/sock_create", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE), 9057 SEC_DEF("cgroup/sock_release", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE), 9058 SEC_DEF("cgroup/sock", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT), 9059 SEC_DEF("cgroup/post_bind4", CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE), 9060 SEC_DEF("cgroup/post_bind6", CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE), 9061 SEC_DEF("cgroup/bind4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE), 9062 SEC_DEF("cgroup/bind6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE), 9063 SEC_DEF("cgroup/connect4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE), 9064 SEC_DEF("cgroup/connect6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE), 9065 SEC_DEF("cgroup/connect_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_CONNECT, SEC_ATTACHABLE), 9066 SEC_DEF("cgroup/sendmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE), 9067 SEC_DEF("cgroup/sendmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE), 9068 SEC_DEF("cgroup/sendmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_SENDMSG, SEC_ATTACHABLE), 9069 SEC_DEF("cgroup/recvmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE), 9070 SEC_DEF("cgroup/recvmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE), 9071 SEC_DEF("cgroup/recvmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_RECVMSG, SEC_ATTACHABLE), 9072 SEC_DEF("cgroup/getpeername4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE), 9073 SEC_DEF("cgroup/getpeername6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE), 9074 SEC_DEF("cgroup/getpeername_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETPEERNAME, SEC_ATTACHABLE), 9075 SEC_DEF("cgroup/getsockname4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE), 9076 SEC_DEF("cgroup/getsockname6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE), 9077 SEC_DEF("cgroup/getsockname_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETSOCKNAME, SEC_ATTACHABLE), 9078 SEC_DEF("cgroup/sysctl", CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE), 9079 SEC_DEF("cgroup/getsockopt", CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE), 9080 SEC_DEF("cgroup/setsockopt", CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE), 9081 SEC_DEF("cgroup/dev", CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT), 9082 SEC_DEF("struct_ops+", STRUCT_OPS, 0, SEC_NONE), 9083 SEC_DEF("struct_ops.s+", STRUCT_OPS, 0, SEC_SLEEPABLE), 9084 SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE), 9085 SEC_DEF("netfilter", NETFILTER, BPF_NETFILTER, SEC_NONE), 9086 }; 9087 9088 int libbpf_register_prog_handler(const char *sec, 9089 enum bpf_prog_type prog_type, 9090 enum bpf_attach_type exp_attach_type, 9091 const struct libbpf_prog_handler_opts *opts) 9092 { 9093 struct bpf_sec_def *sec_def; 9094 9095 if (!OPTS_VALID(opts, libbpf_prog_handler_opts)) 9096 return libbpf_err(-EINVAL); 9097 9098 if (last_custom_sec_def_handler_id == INT_MAX) /* prevent overflow */ 9099 return libbpf_err(-E2BIG); 9100 9101 if (sec) { 9102 sec_def = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt + 1, 9103 sizeof(*sec_def)); 9104 if (!sec_def) 9105 return libbpf_err(-ENOMEM); 9106 9107 custom_sec_defs = sec_def; 9108 sec_def = &custom_sec_defs[custom_sec_def_cnt]; 9109 } else { 9110 if (has_custom_fallback_def) 9111 return libbpf_err(-EBUSY); 9112 9113 sec_def = &custom_fallback_def; 9114 } 9115 9116 sec_def->sec = sec ? strdup(sec) : NULL; 9117 if (sec && !sec_def->sec) 9118 return libbpf_err(-ENOMEM); 9119 9120 sec_def->prog_type = prog_type; 9121 sec_def->expected_attach_type = exp_attach_type; 9122 sec_def->cookie = OPTS_GET(opts, cookie, 0); 9123 9124 sec_def->prog_setup_fn = OPTS_GET(opts, prog_setup_fn, NULL); 9125 sec_def->prog_prepare_load_fn = OPTS_GET(opts, prog_prepare_load_fn, NULL); 9126 sec_def->prog_attach_fn = OPTS_GET(opts, prog_attach_fn, NULL); 9127 9128 sec_def->handler_id = ++last_custom_sec_def_handler_id; 9129 9130 if (sec) 9131 custom_sec_def_cnt++; 9132 else 9133 has_custom_fallback_def = true; 9134 9135 return sec_def->handler_id; 9136 } 9137 9138 int libbpf_unregister_prog_handler(int handler_id) 9139 { 9140 struct bpf_sec_def *sec_defs; 9141 int i; 9142 9143 if (handler_id <= 0) 9144 return libbpf_err(-EINVAL); 9145 9146 if (has_custom_fallback_def && custom_fallback_def.handler_id == handler_id) { 9147 memset(&custom_fallback_def, 0, sizeof(custom_fallback_def)); 9148 has_custom_fallback_def = false; 9149 return 0; 9150 } 9151 9152 for (i = 0; i < custom_sec_def_cnt; i++) { 9153 if (custom_sec_defs[i].handler_id == handler_id) 9154 break; 9155 } 9156 9157 if (i == custom_sec_def_cnt) 9158 return libbpf_err(-ENOENT); 9159 9160 free(custom_sec_defs[i].sec); 9161 for (i = i + 1; i < custom_sec_def_cnt; i++) 9162 custom_sec_defs[i - 1] = custom_sec_defs[i]; 9163 custom_sec_def_cnt--; 9164 9165 /* try to shrink the array, but it's ok if we couldn't */ 9166 sec_defs = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt, sizeof(*sec_defs)); 9167 /* if new count is zero, reallocarray can return a valid NULL result; 9168 * in this case the previous pointer will be freed, so we *have to* 9169 * reassign old pointer to the new value (even if it's NULL) 9170 */ 9171 if (sec_defs || custom_sec_def_cnt == 0) 9172 custom_sec_defs = sec_defs; 9173 9174 return 0; 9175 } 9176 9177 static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_name) 9178 { 9179 size_t len = strlen(sec_def->sec); 9180 9181 /* "type/" always has to have proper SEC("type/extras") form */ 9182 if (sec_def->sec[len - 1] == '/') { 9183 if (str_has_pfx(sec_name, sec_def->sec)) 9184 return true; 9185 return false; 9186 } 9187 9188 /* "type+" means it can be either exact SEC("type") or 9189 * well-formed SEC("type/extras") with proper '/' separator 9190 */ 9191 if (sec_def->sec[len - 1] == '+') { 9192 len--; 9193 /* not even a prefix */ 9194 if (strncmp(sec_name, sec_def->sec, len) != 0) 9195 return false; 9196 /* exact match or has '/' separator */ 9197 if (sec_name[len] == '\0' || sec_name[len] == '/') 9198 return true; 9199 return false; 9200 } 9201 9202 return strcmp(sec_name, sec_def->sec) == 0; 9203 } 9204 9205 static const struct bpf_sec_def *find_sec_def(const char *sec_name) 9206 { 9207 const struct bpf_sec_def *sec_def; 9208 int i, n; 9209 9210 n = custom_sec_def_cnt; 9211 for (i = 0; i < n; i++) { 9212 sec_def = &custom_sec_defs[i]; 9213 if (sec_def_matches(sec_def, sec_name)) 9214 return sec_def; 9215 } 9216 9217 n = ARRAY_SIZE(section_defs); 9218 for (i = 0; i < n; i++) { 9219 sec_def = §ion_defs[i]; 9220 if (sec_def_matches(sec_def, sec_name)) 9221 return sec_def; 9222 } 9223 9224 if (has_custom_fallback_def) 9225 return &custom_fallback_def; 9226 9227 return NULL; 9228 } 9229 9230 #define MAX_TYPE_NAME_SIZE 32 9231 9232 static char *libbpf_get_type_names(bool attach_type) 9233 { 9234 int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE; 9235 char *buf; 9236 9237 buf = malloc(len); 9238 if (!buf) 9239 return NULL; 9240 9241 buf[0] = '\0'; 9242 /* Forge string buf with all available names */ 9243 for (i = 0; i < ARRAY_SIZE(section_defs); i++) { 9244 const struct bpf_sec_def *sec_def = §ion_defs[i]; 9245 9246 if (attach_type) { 9247 if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load) 9248 continue; 9249 9250 if (!(sec_def->cookie & SEC_ATTACHABLE)) 9251 continue; 9252 } 9253 9254 if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) { 9255 free(buf); 9256 return NULL; 9257 } 9258 strcat(buf, " "); 9259 strcat(buf, section_defs[i].sec); 9260 } 9261 9262 return buf; 9263 } 9264 9265 int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, 9266 enum bpf_attach_type *expected_attach_type) 9267 { 9268 const struct bpf_sec_def *sec_def; 9269 char *type_names; 9270 9271 if (!name) 9272 return libbpf_err(-EINVAL); 9273 9274 sec_def = find_sec_def(name); 9275 if (sec_def) { 9276 *prog_type = sec_def->prog_type; 9277 *expected_attach_type = sec_def->expected_attach_type; 9278 return 0; 9279 } 9280 9281 pr_debug("failed to guess program type from ELF section '%s'\n", name); 9282 type_names = libbpf_get_type_names(false); 9283 if (type_names != NULL) { 9284 pr_debug("supported section(type) names are:%s\n", type_names); 9285 free(type_names); 9286 } 9287 9288 return libbpf_err(-ESRCH); 9289 } 9290 9291 const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t) 9292 { 9293 if (t < 0 || t >= ARRAY_SIZE(attach_type_name)) 9294 return NULL; 9295 9296 return attach_type_name[t]; 9297 } 9298 9299 const char *libbpf_bpf_link_type_str(enum bpf_link_type t) 9300 { 9301 if (t < 0 || t >= ARRAY_SIZE(link_type_name)) 9302 return NULL; 9303 9304 return link_type_name[t]; 9305 } 9306 9307 const char *libbpf_bpf_map_type_str(enum bpf_map_type t) 9308 { 9309 if (t < 0 || t >= ARRAY_SIZE(map_type_name)) 9310 return NULL; 9311 9312 return map_type_name[t]; 9313 } 9314 9315 const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t) 9316 { 9317 if (t < 0 || t >= ARRAY_SIZE(prog_type_name)) 9318 return NULL; 9319 9320 return prog_type_name[t]; 9321 } 9322 9323 static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj, 9324 int sec_idx, 9325 size_t offset) 9326 { 9327 struct bpf_map *map; 9328 size_t i; 9329 9330 for (i = 0; i < obj->nr_maps; i++) { 9331 map = &obj->maps[i]; 9332 if (!bpf_map__is_struct_ops(map)) 9333 continue; 9334 if (map->sec_idx == sec_idx && 9335 map->sec_offset <= offset && 9336 offset - map->sec_offset < map->def.value_size) 9337 return map; 9338 } 9339 9340 return NULL; 9341 } 9342 9343 /* Collect the reloc from ELF and populate the st_ops->progs[] */ 9344 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, 9345 Elf64_Shdr *shdr, Elf_Data *data) 9346 { 9347 const struct btf_member *member; 9348 struct bpf_struct_ops *st_ops; 9349 struct bpf_program *prog; 9350 unsigned int shdr_idx; 9351 const struct btf *btf; 9352 struct bpf_map *map; 9353 unsigned int moff, insn_idx; 9354 const char *name; 9355 __u32 member_idx; 9356 Elf64_Sym *sym; 9357 Elf64_Rel *rel; 9358 int i, nrels; 9359 9360 btf = obj->btf; 9361 nrels = shdr->sh_size / shdr->sh_entsize; 9362 for (i = 0; i < nrels; i++) { 9363 rel = elf_rel_by_idx(data, i); 9364 if (!rel) { 9365 pr_warn("struct_ops reloc: failed to get %d reloc\n", i); 9366 return -LIBBPF_ERRNO__FORMAT; 9367 } 9368 9369 sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info)); 9370 if (!sym) { 9371 pr_warn("struct_ops reloc: symbol %zx not found\n", 9372 (size_t)ELF64_R_SYM(rel->r_info)); 9373 return -LIBBPF_ERRNO__FORMAT; 9374 } 9375 9376 name = elf_sym_str(obj, sym->st_name) ?: "<?>"; 9377 map = find_struct_ops_map_by_offset(obj, shdr->sh_info, rel->r_offset); 9378 if (!map) { 9379 pr_warn("struct_ops reloc: cannot find map at rel->r_offset %zu\n", 9380 (size_t)rel->r_offset); 9381 return -EINVAL; 9382 } 9383 9384 moff = rel->r_offset - map->sec_offset; 9385 shdr_idx = sym->st_shndx; 9386 st_ops = map->st_ops; 9387 pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel->r_offset %zu map->sec_offset %zu name %d (\'%s\')\n", 9388 map->name, 9389 (long long)(rel->r_info >> 32), 9390 (long long)sym->st_value, 9391 shdr_idx, (size_t)rel->r_offset, 9392 map->sec_offset, sym->st_name, name); 9393 9394 if (shdr_idx >= SHN_LORESERVE) { 9395 pr_warn("struct_ops reloc %s: rel->r_offset %zu shdr_idx %u unsupported non-static function\n", 9396 map->name, (size_t)rel->r_offset, shdr_idx); 9397 return -LIBBPF_ERRNO__RELOC; 9398 } 9399 if (sym->st_value % BPF_INSN_SZ) { 9400 pr_warn("struct_ops reloc %s: invalid target program offset %llu\n", 9401 map->name, (unsigned long long)sym->st_value); 9402 return -LIBBPF_ERRNO__FORMAT; 9403 } 9404 insn_idx = sym->st_value / BPF_INSN_SZ; 9405 9406 member = find_member_by_offset(st_ops->type, moff * 8); 9407 if (!member) { 9408 pr_warn("struct_ops reloc %s: cannot find member at moff %u\n", 9409 map->name, moff); 9410 return -EINVAL; 9411 } 9412 member_idx = member - btf_members(st_ops->type); 9413 name = btf__name_by_offset(btf, member->name_off); 9414 9415 if (!resolve_func_ptr(btf, member->type, NULL)) { 9416 pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n", 9417 map->name, name); 9418 return -EINVAL; 9419 } 9420 9421 prog = find_prog_by_sec_insn(obj, shdr_idx, insn_idx); 9422 if (!prog) { 9423 pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n", 9424 map->name, shdr_idx, name); 9425 return -EINVAL; 9426 } 9427 9428 /* prevent the use of BPF prog with invalid type */ 9429 if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) { 9430 pr_warn("struct_ops reloc %s: prog %s is not struct_ops BPF program\n", 9431 map->name, prog->name); 9432 return -EINVAL; 9433 } 9434 9435 /* if we haven't yet processed this BPF program, record proper 9436 * attach_btf_id and member_idx 9437 */ 9438 if (!prog->attach_btf_id) { 9439 prog->attach_btf_id = st_ops->type_id; 9440 prog->expected_attach_type = member_idx; 9441 } 9442 9443 /* struct_ops BPF prog can be re-used between multiple 9444 * .struct_ops & .struct_ops.link as long as it's the 9445 * same struct_ops struct definition and the same 9446 * function pointer field 9447 */ 9448 if (prog->attach_btf_id != st_ops->type_id || 9449 prog->expected_attach_type != member_idx) { 9450 pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n", 9451 map->name, prog->name, prog->sec_name, prog->type, 9452 prog->attach_btf_id, prog->expected_attach_type, name); 9453 return -EINVAL; 9454 } 9455 9456 st_ops->progs[member_idx] = prog; 9457 } 9458 9459 return 0; 9460 } 9461 9462 #define BTF_TRACE_PREFIX "btf_trace_" 9463 #define BTF_LSM_PREFIX "bpf_lsm_" 9464 #define BTF_ITER_PREFIX "bpf_iter_" 9465 #define BTF_MAX_NAME_SIZE 128 9466 9467 void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, 9468 const char **prefix, int *kind) 9469 { 9470 switch (attach_type) { 9471 case BPF_TRACE_RAW_TP: 9472 *prefix = BTF_TRACE_PREFIX; 9473 *kind = BTF_KIND_TYPEDEF; 9474 break; 9475 case BPF_LSM_MAC: 9476 case BPF_LSM_CGROUP: 9477 *prefix = BTF_LSM_PREFIX; 9478 *kind = BTF_KIND_FUNC; 9479 break; 9480 case BPF_TRACE_ITER: 9481 *prefix = BTF_ITER_PREFIX; 9482 *kind = BTF_KIND_FUNC; 9483 break; 9484 default: 9485 *prefix = ""; 9486 *kind = BTF_KIND_FUNC; 9487 } 9488 } 9489 9490 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, 9491 const char *name, __u32 kind) 9492 { 9493 char btf_type_name[BTF_MAX_NAME_SIZE]; 9494 int ret; 9495 9496 ret = snprintf(btf_type_name, sizeof(btf_type_name), 9497 "%s%s", prefix, name); 9498 /* snprintf returns the number of characters written excluding the 9499 * terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it 9500 * indicates truncation. 9501 */ 9502 if (ret < 0 || ret >= sizeof(btf_type_name)) 9503 return -ENAMETOOLONG; 9504 return btf__find_by_name_kind(btf, btf_type_name, kind); 9505 } 9506 9507 static inline int find_attach_btf_id(struct btf *btf, const char *name, 9508 enum bpf_attach_type attach_type) 9509 { 9510 const char *prefix; 9511 int kind; 9512 9513 btf_get_kernel_prefix_kind(attach_type, &prefix, &kind); 9514 return find_btf_by_prefix_kind(btf, prefix, name, kind); 9515 } 9516 9517 int libbpf_find_vmlinux_btf_id(const char *name, 9518 enum bpf_attach_type attach_type) 9519 { 9520 struct btf *btf; 9521 int err; 9522 9523 btf = btf__load_vmlinux_btf(); 9524 err = libbpf_get_error(btf); 9525 if (err) { 9526 pr_warn("vmlinux BTF is not found\n"); 9527 return libbpf_err(err); 9528 } 9529 9530 err = find_attach_btf_id(btf, name, attach_type); 9531 if (err <= 0) 9532 pr_warn("%s is not found in vmlinux BTF\n", name); 9533 9534 btf__free(btf); 9535 return libbpf_err(err); 9536 } 9537 9538 static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) 9539 { 9540 struct bpf_prog_info info; 9541 __u32 info_len = sizeof(info); 9542 struct btf *btf; 9543 int err; 9544 9545 memset(&info, 0, info_len); 9546 err = bpf_prog_get_info_by_fd(attach_prog_fd, &info, &info_len); 9547 if (err) { 9548 pr_warn("failed bpf_prog_get_info_by_fd for FD %d: %d\n", 9549 attach_prog_fd, err); 9550 return err; 9551 } 9552 9553 err = -EINVAL; 9554 if (!info.btf_id) { 9555 pr_warn("The target program doesn't have BTF\n"); 9556 goto out; 9557 } 9558 btf = btf__load_from_kernel_by_id(info.btf_id); 9559 err = libbpf_get_error(btf); 9560 if (err) { 9561 pr_warn("Failed to get BTF %d of the program: %d\n", info.btf_id, err); 9562 goto out; 9563 } 9564 err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC); 9565 btf__free(btf); 9566 if (err <= 0) { 9567 pr_warn("%s is not found in prog's BTF\n", name); 9568 goto out; 9569 } 9570 out: 9571 return err; 9572 } 9573 9574 static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name, 9575 enum bpf_attach_type attach_type, 9576 int *btf_obj_fd, int *btf_type_id) 9577 { 9578 int ret, i; 9579 9580 ret = find_attach_btf_id(obj->btf_vmlinux, attach_name, attach_type); 9581 if (ret > 0) { 9582 *btf_obj_fd = 0; /* vmlinux BTF */ 9583 *btf_type_id = ret; 9584 return 0; 9585 } 9586 if (ret != -ENOENT) 9587 return ret; 9588 9589 ret = load_module_btfs(obj); 9590 if (ret) 9591 return ret; 9592 9593 for (i = 0; i < obj->btf_module_cnt; i++) { 9594 const struct module_btf *mod = &obj->btf_modules[i]; 9595 9596 ret = find_attach_btf_id(mod->btf, attach_name, attach_type); 9597 if (ret > 0) { 9598 *btf_obj_fd = mod->fd; 9599 *btf_type_id = ret; 9600 return 0; 9601 } 9602 if (ret == -ENOENT) 9603 continue; 9604 9605 return ret; 9606 } 9607 9608 return -ESRCH; 9609 } 9610 9611 static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name, 9612 int *btf_obj_fd, int *btf_type_id) 9613 { 9614 enum bpf_attach_type attach_type = prog->expected_attach_type; 9615 __u32 attach_prog_fd = prog->attach_prog_fd; 9616 int err = 0; 9617 9618 /* BPF program's BTF ID */ 9619 if (prog->type == BPF_PROG_TYPE_EXT || attach_prog_fd) { 9620 if (!attach_prog_fd) { 9621 pr_warn("prog '%s': attach program FD is not set\n", prog->name); 9622 return -EINVAL; 9623 } 9624 err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd); 9625 if (err < 0) { 9626 pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %d\n", 9627 prog->name, attach_prog_fd, attach_name, err); 9628 return err; 9629 } 9630 *btf_obj_fd = 0; 9631 *btf_type_id = err; 9632 return 0; 9633 } 9634 9635 /* kernel/module BTF ID */ 9636 if (prog->obj->gen_loader) { 9637 bpf_gen__record_attach_target(prog->obj->gen_loader, attach_name, attach_type); 9638 *btf_obj_fd = 0; 9639 *btf_type_id = 1; 9640 } else { 9641 err = find_kernel_btf_id(prog->obj, attach_name, 9642 attach_type, btf_obj_fd, 9643 btf_type_id); 9644 } 9645 if (err) { 9646 pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %d\n", 9647 prog->name, attach_name, err); 9648 return err; 9649 } 9650 return 0; 9651 } 9652 9653 int libbpf_attach_type_by_name(const char *name, 9654 enum bpf_attach_type *attach_type) 9655 { 9656 char *type_names; 9657 const struct bpf_sec_def *sec_def; 9658 9659 if (!name) 9660 return libbpf_err(-EINVAL); 9661 9662 sec_def = find_sec_def(name); 9663 if (!sec_def) { 9664 pr_debug("failed to guess attach type based on ELF section name '%s'\n", name); 9665 type_names = libbpf_get_type_names(true); 9666 if (type_names != NULL) { 9667 pr_debug("attachable section(type) names are:%s\n", type_names); 9668 free(type_names); 9669 } 9670 9671 return libbpf_err(-EINVAL); 9672 } 9673 9674 if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load) 9675 return libbpf_err(-EINVAL); 9676 if (!(sec_def->cookie & SEC_ATTACHABLE)) 9677 return libbpf_err(-EINVAL); 9678 9679 *attach_type = sec_def->expected_attach_type; 9680 return 0; 9681 } 9682 9683 int bpf_map__fd(const struct bpf_map *map) 9684 { 9685 if (!map) 9686 return libbpf_err(-EINVAL); 9687 if (!map_is_created(map)) 9688 return -1; 9689 return map->fd; 9690 } 9691 9692 static bool map_uses_real_name(const struct bpf_map *map) 9693 { 9694 /* Since libbpf started to support custom .data.* and .rodata.* maps, 9695 * their user-visible name differs from kernel-visible name. Users see 9696 * such map's corresponding ELF section name as a map name. 9697 * This check distinguishes .data/.rodata from .data.* and .rodata.* 9698 * maps to know which name has to be returned to the user. 9699 */ 9700 if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0) 9701 return true; 9702 if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0) 9703 return true; 9704 return false; 9705 } 9706 9707 const char *bpf_map__name(const struct bpf_map *map) 9708 { 9709 if (!map) 9710 return NULL; 9711 9712 if (map_uses_real_name(map)) 9713 return map->real_name; 9714 9715 return map->name; 9716 } 9717 9718 enum bpf_map_type bpf_map__type(const struct bpf_map *map) 9719 { 9720 return map->def.type; 9721 } 9722 9723 int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type) 9724 { 9725 if (map_is_created(map)) 9726 return libbpf_err(-EBUSY); 9727 map->def.type = type; 9728 return 0; 9729 } 9730 9731 __u32 bpf_map__map_flags(const struct bpf_map *map) 9732 { 9733 return map->def.map_flags; 9734 } 9735 9736 int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags) 9737 { 9738 if (map_is_created(map)) 9739 return libbpf_err(-EBUSY); 9740 map->def.map_flags = flags; 9741 return 0; 9742 } 9743 9744 __u64 bpf_map__map_extra(const struct bpf_map *map) 9745 { 9746 return map->map_extra; 9747 } 9748 9749 int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra) 9750 { 9751 if (map_is_created(map)) 9752 return libbpf_err(-EBUSY); 9753 map->map_extra = map_extra; 9754 return 0; 9755 } 9756 9757 __u32 bpf_map__numa_node(const struct bpf_map *map) 9758 { 9759 return map->numa_node; 9760 } 9761 9762 int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node) 9763 { 9764 if (map_is_created(map)) 9765 return libbpf_err(-EBUSY); 9766 map->numa_node = numa_node; 9767 return 0; 9768 } 9769 9770 __u32 bpf_map__key_size(const struct bpf_map *map) 9771 { 9772 return map->def.key_size; 9773 } 9774 9775 int bpf_map__set_key_size(struct bpf_map *map, __u32 size) 9776 { 9777 if (map_is_created(map)) 9778 return libbpf_err(-EBUSY); 9779 map->def.key_size = size; 9780 return 0; 9781 } 9782 9783 __u32 bpf_map__value_size(const struct bpf_map *map) 9784 { 9785 return map->def.value_size; 9786 } 9787 9788 static int map_btf_datasec_resize(struct bpf_map *map, __u32 size) 9789 { 9790 struct btf *btf; 9791 struct btf_type *datasec_type, *var_type; 9792 struct btf_var_secinfo *var; 9793 const struct btf_type *array_type; 9794 const struct btf_array *array; 9795 int vlen, element_sz, new_array_id; 9796 __u32 nr_elements; 9797 9798 /* check btf existence */ 9799 btf = bpf_object__btf(map->obj); 9800 if (!btf) 9801 return -ENOENT; 9802 9803 /* verify map is datasec */ 9804 datasec_type = btf_type_by_id(btf, bpf_map__btf_value_type_id(map)); 9805 if (!btf_is_datasec(datasec_type)) { 9806 pr_warn("map '%s': cannot be resized, map value type is not a datasec\n", 9807 bpf_map__name(map)); 9808 return -EINVAL; 9809 } 9810 9811 /* verify datasec has at least one var */ 9812 vlen = btf_vlen(datasec_type); 9813 if (vlen == 0) { 9814 pr_warn("map '%s': cannot be resized, map value datasec is empty\n", 9815 bpf_map__name(map)); 9816 return -EINVAL; 9817 } 9818 9819 /* verify last var in the datasec is an array */ 9820 var = &btf_var_secinfos(datasec_type)[vlen - 1]; 9821 var_type = btf_type_by_id(btf, var->type); 9822 array_type = skip_mods_and_typedefs(btf, var_type->type, NULL); 9823 if (!btf_is_array(array_type)) { 9824 pr_warn("map '%s': cannot be resized, last var must be an array\n", 9825 bpf_map__name(map)); 9826 return -EINVAL; 9827 } 9828 9829 /* verify request size aligns with array */ 9830 array = btf_array(array_type); 9831 element_sz = btf__resolve_size(btf, array->type); 9832 if (element_sz <= 0 || (size - var->offset) % element_sz != 0) { 9833 pr_warn("map '%s': cannot be resized, element size (%d) doesn't align with new total size (%u)\n", 9834 bpf_map__name(map), element_sz, size); 9835 return -EINVAL; 9836 } 9837 9838 /* create a new array based on the existing array, but with new length */ 9839 nr_elements = (size - var->offset) / element_sz; 9840 new_array_id = btf__add_array(btf, array->index_type, array->type, nr_elements); 9841 if (new_array_id < 0) 9842 return new_array_id; 9843 9844 /* adding a new btf type invalidates existing pointers to btf objects, 9845 * so refresh pointers before proceeding 9846 */ 9847 datasec_type = btf_type_by_id(btf, map->btf_value_type_id); 9848 var = &btf_var_secinfos(datasec_type)[vlen - 1]; 9849 var_type = btf_type_by_id(btf, var->type); 9850 9851 /* finally update btf info */ 9852 datasec_type->size = size; 9853 var->size = size - var->offset; 9854 var_type->type = new_array_id; 9855 9856 return 0; 9857 } 9858 9859 int bpf_map__set_value_size(struct bpf_map *map, __u32 size) 9860 { 9861 if (map->obj->loaded || map->reused) 9862 return libbpf_err(-EBUSY); 9863 9864 if (map->mmaped) { 9865 int err; 9866 size_t mmap_old_sz, mmap_new_sz; 9867 9868 mmap_old_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); 9869 mmap_new_sz = bpf_map_mmap_sz(size, map->def.max_entries); 9870 err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz); 9871 if (err) { 9872 pr_warn("map '%s': failed to resize memory-mapped region: %d\n", 9873 bpf_map__name(map), err); 9874 return err; 9875 } 9876 err = map_btf_datasec_resize(map, size); 9877 if (err && err != -ENOENT) { 9878 pr_warn("map '%s': failed to adjust resized BTF, clearing BTF key/value info: %d\n", 9879 bpf_map__name(map), err); 9880 map->btf_value_type_id = 0; 9881 map->btf_key_type_id = 0; 9882 } 9883 } 9884 9885 map->def.value_size = size; 9886 return 0; 9887 } 9888 9889 __u32 bpf_map__btf_key_type_id(const struct bpf_map *map) 9890 { 9891 return map ? map->btf_key_type_id : 0; 9892 } 9893 9894 __u32 bpf_map__btf_value_type_id(const struct bpf_map *map) 9895 { 9896 return map ? map->btf_value_type_id : 0; 9897 } 9898 9899 int bpf_map__set_initial_value(struct bpf_map *map, 9900 const void *data, size_t size) 9901 { 9902 if (map->obj->loaded || map->reused) 9903 return libbpf_err(-EBUSY); 9904 9905 if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG || 9906 size != map->def.value_size) 9907 return libbpf_err(-EINVAL); 9908 9909 memcpy(map->mmaped, data, size); 9910 return 0; 9911 } 9912 9913 void *bpf_map__initial_value(struct bpf_map *map, size_t *psize) 9914 { 9915 if (!map->mmaped) 9916 return NULL; 9917 *psize = map->def.value_size; 9918 return map->mmaped; 9919 } 9920 9921 bool bpf_map__is_internal(const struct bpf_map *map) 9922 { 9923 return map->libbpf_type != LIBBPF_MAP_UNSPEC; 9924 } 9925 9926 __u32 bpf_map__ifindex(const struct bpf_map *map) 9927 { 9928 return map->map_ifindex; 9929 } 9930 9931 int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex) 9932 { 9933 if (map_is_created(map)) 9934 return libbpf_err(-EBUSY); 9935 map->map_ifindex = ifindex; 9936 return 0; 9937 } 9938 9939 int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd) 9940 { 9941 if (!bpf_map_type__is_map_in_map(map->def.type)) { 9942 pr_warn("error: unsupported map type\n"); 9943 return libbpf_err(-EINVAL); 9944 } 9945 if (map->inner_map_fd != -1) { 9946 pr_warn("error: inner_map_fd already specified\n"); 9947 return libbpf_err(-EINVAL); 9948 } 9949 if (map->inner_map) { 9950 bpf_map__destroy(map->inner_map); 9951 zfree(&map->inner_map); 9952 } 9953 map->inner_map_fd = fd; 9954 return 0; 9955 } 9956 9957 static struct bpf_map * 9958 __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i) 9959 { 9960 ssize_t idx; 9961 struct bpf_map *s, *e; 9962 9963 if (!obj || !obj->maps) 9964 return errno = EINVAL, NULL; 9965 9966 s = obj->maps; 9967 e = obj->maps + obj->nr_maps; 9968 9969 if ((m < s) || (m >= e)) { 9970 pr_warn("error in %s: map handler doesn't belong to object\n", 9971 __func__); 9972 return errno = EINVAL, NULL; 9973 } 9974 9975 idx = (m - obj->maps) + i; 9976 if (idx >= obj->nr_maps || idx < 0) 9977 return NULL; 9978 return &obj->maps[idx]; 9979 } 9980 9981 struct bpf_map * 9982 bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) 9983 { 9984 if (prev == NULL) 9985 return obj->maps; 9986 9987 return __bpf_map__iter(prev, obj, 1); 9988 } 9989 9990 struct bpf_map * 9991 bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next) 9992 { 9993 if (next == NULL) { 9994 if (!obj->nr_maps) 9995 return NULL; 9996 return obj->maps + obj->nr_maps - 1; 9997 } 9998 9999 return __bpf_map__iter(next, obj, -1); 10000 } 10001 10002 struct bpf_map * 10003 bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name) 10004 { 10005 struct bpf_map *pos; 10006 10007 bpf_object__for_each_map(pos, obj) { 10008 /* if it's a special internal map name (which always starts 10009 * with dot) then check if that special name matches the 10010 * real map name (ELF section name) 10011 */ 10012 if (name[0] == '.') { 10013 if (pos->real_name && strcmp(pos->real_name, name) == 0) 10014 return pos; 10015 continue; 10016 } 10017 /* otherwise map name has to be an exact match */ 10018 if (map_uses_real_name(pos)) { 10019 if (strcmp(pos->real_name, name) == 0) 10020 return pos; 10021 continue; 10022 } 10023 if (strcmp(pos->name, name) == 0) 10024 return pos; 10025 } 10026 return errno = ENOENT, NULL; 10027 } 10028 10029 int 10030 bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name) 10031 { 10032 return bpf_map__fd(bpf_object__find_map_by_name(obj, name)); 10033 } 10034 10035 static int validate_map_op(const struct bpf_map *map, size_t key_sz, 10036 size_t value_sz, bool check_value_sz) 10037 { 10038 if (!map_is_created(map)) /* map is not yet created */ 10039 return -ENOENT; 10040 10041 if (map->def.key_size != key_sz) { 10042 pr_warn("map '%s': unexpected key size %zu provided, expected %u\n", 10043 map->name, key_sz, map->def.key_size); 10044 return -EINVAL; 10045 } 10046 10047 if (!check_value_sz) 10048 return 0; 10049 10050 switch (map->def.type) { 10051 case BPF_MAP_TYPE_PERCPU_ARRAY: 10052 case BPF_MAP_TYPE_PERCPU_HASH: 10053 case BPF_MAP_TYPE_LRU_PERCPU_HASH: 10054 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: { 10055 int num_cpu = libbpf_num_possible_cpus(); 10056 size_t elem_sz = roundup(map->def.value_size, 8); 10057 10058 if (value_sz != num_cpu * elem_sz) { 10059 pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n", 10060 map->name, value_sz, num_cpu, elem_sz, num_cpu * elem_sz); 10061 return -EINVAL; 10062 } 10063 break; 10064 } 10065 default: 10066 if (map->def.value_size != value_sz) { 10067 pr_warn("map '%s': unexpected value size %zu provided, expected %u\n", 10068 map->name, value_sz, map->def.value_size); 10069 return -EINVAL; 10070 } 10071 break; 10072 } 10073 return 0; 10074 } 10075 10076 int bpf_map__lookup_elem(const struct bpf_map *map, 10077 const void *key, size_t key_sz, 10078 void *value, size_t value_sz, __u64 flags) 10079 { 10080 int err; 10081 10082 err = validate_map_op(map, key_sz, value_sz, true); 10083 if (err) 10084 return libbpf_err(err); 10085 10086 return bpf_map_lookup_elem_flags(map->fd, key, value, flags); 10087 } 10088 10089 int bpf_map__update_elem(const struct bpf_map *map, 10090 const void *key, size_t key_sz, 10091 const void *value, size_t value_sz, __u64 flags) 10092 { 10093 int err; 10094 10095 err = validate_map_op(map, key_sz, value_sz, true); 10096 if (err) 10097 return libbpf_err(err); 10098 10099 return bpf_map_update_elem(map->fd, key, value, flags); 10100 } 10101 10102 int bpf_map__delete_elem(const struct bpf_map *map, 10103 const void *key, size_t key_sz, __u64 flags) 10104 { 10105 int err; 10106 10107 err = validate_map_op(map, key_sz, 0, false /* check_value_sz */); 10108 if (err) 10109 return libbpf_err(err); 10110 10111 return bpf_map_delete_elem_flags(map->fd, key, flags); 10112 } 10113 10114 int bpf_map__lookup_and_delete_elem(const struct bpf_map *map, 10115 const void *key, size_t key_sz, 10116 void *value, size_t value_sz, __u64 flags) 10117 { 10118 int err; 10119 10120 err = validate_map_op(map, key_sz, value_sz, true); 10121 if (err) 10122 return libbpf_err(err); 10123 10124 return bpf_map_lookup_and_delete_elem_flags(map->fd, key, value, flags); 10125 } 10126 10127 int bpf_map__get_next_key(const struct bpf_map *map, 10128 const void *cur_key, void *next_key, size_t key_sz) 10129 { 10130 int err; 10131 10132 err = validate_map_op(map, key_sz, 0, false /* check_value_sz */); 10133 if (err) 10134 return libbpf_err(err); 10135 10136 return bpf_map_get_next_key(map->fd, cur_key, next_key); 10137 } 10138 10139 long libbpf_get_error(const void *ptr) 10140 { 10141 if (!IS_ERR_OR_NULL(ptr)) 10142 return 0; 10143 10144 if (IS_ERR(ptr)) 10145 errno = -PTR_ERR(ptr); 10146 10147 /* If ptr == NULL, then errno should be already set by the failing 10148 * API, because libbpf never returns NULL on success and it now always 10149 * sets errno on error. So no extra errno handling for ptr == NULL 10150 * case. 10151 */ 10152 return -errno; 10153 } 10154 10155 /* Replace link's underlying BPF program with the new one */ 10156 int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog) 10157 { 10158 int ret; 10159 10160 ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL); 10161 return libbpf_err_errno(ret); 10162 } 10163 10164 /* Release "ownership" of underlying BPF resource (typically, BPF program 10165 * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected 10166 * link, when destructed through bpf_link__destroy() call won't attempt to 10167 * detach/unregisted that BPF resource. This is useful in situations where, 10168 * say, attached BPF program has to outlive userspace program that attached it 10169 * in the system. Depending on type of BPF program, though, there might be 10170 * additional steps (like pinning BPF program in BPF FS) necessary to ensure 10171 * exit of userspace program doesn't trigger automatic detachment and clean up 10172 * inside the kernel. 10173 */ 10174 void bpf_link__disconnect(struct bpf_link *link) 10175 { 10176 link->disconnected = true; 10177 } 10178 10179 int bpf_link__destroy(struct bpf_link *link) 10180 { 10181 int err = 0; 10182 10183 if (IS_ERR_OR_NULL(link)) 10184 return 0; 10185 10186 if (!link->disconnected && link->detach) 10187 err = link->detach(link); 10188 if (link->pin_path) 10189 free(link->pin_path); 10190 if (link->dealloc) 10191 link->dealloc(link); 10192 else 10193 free(link); 10194 10195 return libbpf_err(err); 10196 } 10197 10198 int bpf_link__fd(const struct bpf_link *link) 10199 { 10200 return link->fd; 10201 } 10202 10203 const char *bpf_link__pin_path(const struct bpf_link *link) 10204 { 10205 return link->pin_path; 10206 } 10207 10208 static int bpf_link__detach_fd(struct bpf_link *link) 10209 { 10210 return libbpf_err_errno(close(link->fd)); 10211 } 10212 10213 struct bpf_link *bpf_link__open(const char *path) 10214 { 10215 struct bpf_link *link; 10216 int fd; 10217 10218 fd = bpf_obj_get(path); 10219 if (fd < 0) { 10220 fd = -errno; 10221 pr_warn("failed to open link at %s: %d\n", path, fd); 10222 return libbpf_err_ptr(fd); 10223 } 10224 10225 link = calloc(1, sizeof(*link)); 10226 if (!link) { 10227 close(fd); 10228 return libbpf_err_ptr(-ENOMEM); 10229 } 10230 link->detach = &bpf_link__detach_fd; 10231 link->fd = fd; 10232 10233 link->pin_path = strdup(path); 10234 if (!link->pin_path) { 10235 bpf_link__destroy(link); 10236 return libbpf_err_ptr(-ENOMEM); 10237 } 10238 10239 return link; 10240 } 10241 10242 int bpf_link__detach(struct bpf_link *link) 10243 { 10244 return bpf_link_detach(link->fd) ? -errno : 0; 10245 } 10246 10247 int bpf_link__pin(struct bpf_link *link, const char *path) 10248 { 10249 int err; 10250 10251 if (link->pin_path) 10252 return libbpf_err(-EBUSY); 10253 err = make_parent_dir(path); 10254 if (err) 10255 return libbpf_err(err); 10256 err = check_path(path); 10257 if (err) 10258 return libbpf_err(err); 10259 10260 link->pin_path = strdup(path); 10261 if (!link->pin_path) 10262 return libbpf_err(-ENOMEM); 10263 10264 if (bpf_obj_pin(link->fd, link->pin_path)) { 10265 err = -errno; 10266 zfree(&link->pin_path); 10267 return libbpf_err(err); 10268 } 10269 10270 pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path); 10271 return 0; 10272 } 10273 10274 int bpf_link__unpin(struct bpf_link *link) 10275 { 10276 int err; 10277 10278 if (!link->pin_path) 10279 return libbpf_err(-EINVAL); 10280 10281 err = unlink(link->pin_path); 10282 if (err != 0) 10283 return -errno; 10284 10285 pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path); 10286 zfree(&link->pin_path); 10287 return 0; 10288 } 10289 10290 struct bpf_link_perf { 10291 struct bpf_link link; 10292 int perf_event_fd; 10293 /* legacy kprobe support: keep track of probe identifier and type */ 10294 char *legacy_probe_name; 10295 bool legacy_is_kprobe; 10296 bool legacy_is_retprobe; 10297 }; 10298 10299 static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe); 10300 static int remove_uprobe_event_legacy(const char *probe_name, bool retprobe); 10301 10302 static int bpf_link_perf_detach(struct bpf_link *link) 10303 { 10304 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); 10305 int err = 0; 10306 10307 if (ioctl(perf_link->perf_event_fd, PERF_EVENT_IOC_DISABLE, 0) < 0) 10308 err = -errno; 10309 10310 if (perf_link->perf_event_fd != link->fd) 10311 close(perf_link->perf_event_fd); 10312 close(link->fd); 10313 10314 /* legacy uprobe/kprobe needs to be removed after perf event fd closure */ 10315 if (perf_link->legacy_probe_name) { 10316 if (perf_link->legacy_is_kprobe) { 10317 err = remove_kprobe_event_legacy(perf_link->legacy_probe_name, 10318 perf_link->legacy_is_retprobe); 10319 } else { 10320 err = remove_uprobe_event_legacy(perf_link->legacy_probe_name, 10321 perf_link->legacy_is_retprobe); 10322 } 10323 } 10324 10325 return err; 10326 } 10327 10328 static void bpf_link_perf_dealloc(struct bpf_link *link) 10329 { 10330 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); 10331 10332 free(perf_link->legacy_probe_name); 10333 free(perf_link); 10334 } 10335 10336 struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd, 10337 const struct bpf_perf_event_opts *opts) 10338 { 10339 char errmsg[STRERR_BUFSIZE]; 10340 struct bpf_link_perf *link; 10341 int prog_fd, link_fd = -1, err; 10342 bool force_ioctl_attach; 10343 10344 if (!OPTS_VALID(opts, bpf_perf_event_opts)) 10345 return libbpf_err_ptr(-EINVAL); 10346 10347 if (pfd < 0) { 10348 pr_warn("prog '%s': invalid perf event FD %d\n", 10349 prog->name, pfd); 10350 return libbpf_err_ptr(-EINVAL); 10351 } 10352 prog_fd = bpf_program__fd(prog); 10353 if (prog_fd < 0) { 10354 pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n", 10355 prog->name); 10356 return libbpf_err_ptr(-EINVAL); 10357 } 10358 10359 link = calloc(1, sizeof(*link)); 10360 if (!link) 10361 return libbpf_err_ptr(-ENOMEM); 10362 link->link.detach = &bpf_link_perf_detach; 10363 link->link.dealloc = &bpf_link_perf_dealloc; 10364 link->perf_event_fd = pfd; 10365 10366 force_ioctl_attach = OPTS_GET(opts, force_ioctl_attach, false); 10367 if (kernel_supports(prog->obj, FEAT_PERF_LINK) && !force_ioctl_attach) { 10368 DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_opts, 10369 .perf_event.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0)); 10370 10371 link_fd = bpf_link_create(prog_fd, pfd, BPF_PERF_EVENT, &link_opts); 10372 if (link_fd < 0) { 10373 err = -errno; 10374 pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %d (%s)\n", 10375 prog->name, pfd, 10376 err, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 10377 goto err_out; 10378 } 10379 link->link.fd = link_fd; 10380 } else { 10381 if (OPTS_GET(opts, bpf_cookie, 0)) { 10382 pr_warn("prog '%s': user context value is not supported\n", prog->name); 10383 err = -EOPNOTSUPP; 10384 goto err_out; 10385 } 10386 10387 if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) { 10388 err = -errno; 10389 pr_warn("prog '%s': failed to attach to perf_event FD %d: %s\n", 10390 prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 10391 if (err == -EPROTO) 10392 pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n", 10393 prog->name, pfd); 10394 goto err_out; 10395 } 10396 link->link.fd = pfd; 10397 } 10398 if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) { 10399 err = -errno; 10400 pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n", 10401 prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 10402 goto err_out; 10403 } 10404 10405 return &link->link; 10406 err_out: 10407 if (link_fd >= 0) 10408 close(link_fd); 10409 free(link); 10410 return libbpf_err_ptr(err); 10411 } 10412 10413 struct bpf_link *bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd) 10414 { 10415 return bpf_program__attach_perf_event_opts(prog, pfd, NULL); 10416 } 10417 10418 /* 10419 * this function is expected to parse integer in the range of [0, 2^31-1] from 10420 * given file using scanf format string fmt. If actual parsed value is 10421 * negative, the result might be indistinguishable from error 10422 */ 10423 static int parse_uint_from_file(const char *file, const char *fmt) 10424 { 10425 char buf[STRERR_BUFSIZE]; 10426 int err, ret; 10427 FILE *f; 10428 10429 f = fopen(file, "re"); 10430 if (!f) { 10431 err = -errno; 10432 pr_debug("failed to open '%s': %s\n", file, 10433 libbpf_strerror_r(err, buf, sizeof(buf))); 10434 return err; 10435 } 10436 err = fscanf(f, fmt, &ret); 10437 if (err != 1) { 10438 err = err == EOF ? -EIO : -errno; 10439 pr_debug("failed to parse '%s': %s\n", file, 10440 libbpf_strerror_r(err, buf, sizeof(buf))); 10441 fclose(f); 10442 return err; 10443 } 10444 fclose(f); 10445 return ret; 10446 } 10447 10448 static int determine_kprobe_perf_type(void) 10449 { 10450 const char *file = "/sys/bus/event_source/devices/kprobe/type"; 10451 10452 return parse_uint_from_file(file, "%d\n"); 10453 } 10454 10455 static int determine_uprobe_perf_type(void) 10456 { 10457 const char *file = "/sys/bus/event_source/devices/uprobe/type"; 10458 10459 return parse_uint_from_file(file, "%d\n"); 10460 } 10461 10462 static int determine_kprobe_retprobe_bit(void) 10463 { 10464 const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe"; 10465 10466 return parse_uint_from_file(file, "config:%d\n"); 10467 } 10468 10469 static int determine_uprobe_retprobe_bit(void) 10470 { 10471 const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe"; 10472 10473 return parse_uint_from_file(file, "config:%d\n"); 10474 } 10475 10476 #define PERF_UPROBE_REF_CTR_OFFSET_BITS 32 10477 #define PERF_UPROBE_REF_CTR_OFFSET_SHIFT 32 10478 10479 static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, 10480 uint64_t offset, int pid, size_t ref_ctr_off) 10481 { 10482 const size_t attr_sz = sizeof(struct perf_event_attr); 10483 struct perf_event_attr attr; 10484 char errmsg[STRERR_BUFSIZE]; 10485 int type, pfd; 10486 10487 if ((__u64)ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS)) 10488 return -EINVAL; 10489 10490 memset(&attr, 0, attr_sz); 10491 10492 type = uprobe ? determine_uprobe_perf_type() 10493 : determine_kprobe_perf_type(); 10494 if (type < 0) { 10495 pr_warn("failed to determine %s perf type: %s\n", 10496 uprobe ? "uprobe" : "kprobe", 10497 libbpf_strerror_r(type, errmsg, sizeof(errmsg))); 10498 return type; 10499 } 10500 if (retprobe) { 10501 int bit = uprobe ? determine_uprobe_retprobe_bit() 10502 : determine_kprobe_retprobe_bit(); 10503 10504 if (bit < 0) { 10505 pr_warn("failed to determine %s retprobe bit: %s\n", 10506 uprobe ? "uprobe" : "kprobe", 10507 libbpf_strerror_r(bit, errmsg, sizeof(errmsg))); 10508 return bit; 10509 } 10510 attr.config |= 1 << bit; 10511 } 10512 attr.size = attr_sz; 10513 attr.type = type; 10514 attr.config |= (__u64)ref_ctr_off << PERF_UPROBE_REF_CTR_OFFSET_SHIFT; 10515 attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */ 10516 attr.config2 = offset; /* kprobe_addr or probe_offset */ 10517 10518 /* pid filter is meaningful only for uprobes */ 10519 pfd = syscall(__NR_perf_event_open, &attr, 10520 pid < 0 ? -1 : pid /* pid */, 10521 pid == -1 ? 0 : -1 /* cpu */, 10522 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); 10523 return pfd >= 0 ? pfd : -errno; 10524 } 10525 10526 static int append_to_file(const char *file, const char *fmt, ...) 10527 { 10528 int fd, n, err = 0; 10529 va_list ap; 10530 char buf[1024]; 10531 10532 va_start(ap, fmt); 10533 n = vsnprintf(buf, sizeof(buf), fmt, ap); 10534 va_end(ap); 10535 10536 if (n < 0 || n >= sizeof(buf)) 10537 return -EINVAL; 10538 10539 fd = open(file, O_WRONLY | O_APPEND | O_CLOEXEC, 0); 10540 if (fd < 0) 10541 return -errno; 10542 10543 if (write(fd, buf, n) < 0) 10544 err = -errno; 10545 10546 close(fd); 10547 return err; 10548 } 10549 10550 #define DEBUGFS "/sys/kernel/debug/tracing" 10551 #define TRACEFS "/sys/kernel/tracing" 10552 10553 static bool use_debugfs(void) 10554 { 10555 static int has_debugfs = -1; 10556 10557 if (has_debugfs < 0) 10558 has_debugfs = faccessat(AT_FDCWD, DEBUGFS, F_OK, AT_EACCESS) == 0; 10559 10560 return has_debugfs == 1; 10561 } 10562 10563 static const char *tracefs_path(void) 10564 { 10565 return use_debugfs() ? DEBUGFS : TRACEFS; 10566 } 10567 10568 static const char *tracefs_kprobe_events(void) 10569 { 10570 return use_debugfs() ? DEBUGFS"/kprobe_events" : TRACEFS"/kprobe_events"; 10571 } 10572 10573 static const char *tracefs_uprobe_events(void) 10574 { 10575 return use_debugfs() ? DEBUGFS"/uprobe_events" : TRACEFS"/uprobe_events"; 10576 } 10577 10578 static const char *tracefs_available_filter_functions(void) 10579 { 10580 return use_debugfs() ? DEBUGFS"/available_filter_functions" 10581 : TRACEFS"/available_filter_functions"; 10582 } 10583 10584 static const char *tracefs_available_filter_functions_addrs(void) 10585 { 10586 return use_debugfs() ? DEBUGFS"/available_filter_functions_addrs" 10587 : TRACEFS"/available_filter_functions_addrs"; 10588 } 10589 10590 static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz, 10591 const char *kfunc_name, size_t offset) 10592 { 10593 static int index = 0; 10594 int i; 10595 10596 snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset, 10597 __sync_fetch_and_add(&index, 1)); 10598 10599 /* sanitize binary_path in the probe name */ 10600 for (i = 0; buf[i]; i++) { 10601 if (!isalnum(buf[i])) 10602 buf[i] = '_'; 10603 } 10604 } 10605 10606 static int add_kprobe_event_legacy(const char *probe_name, bool retprobe, 10607 const char *kfunc_name, size_t offset) 10608 { 10609 return append_to_file(tracefs_kprobe_events(), "%c:%s/%s %s+0x%zx", 10610 retprobe ? 'r' : 'p', 10611 retprobe ? "kretprobes" : "kprobes", 10612 probe_name, kfunc_name, offset); 10613 } 10614 10615 static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe) 10616 { 10617 return append_to_file(tracefs_kprobe_events(), "-:%s/%s", 10618 retprobe ? "kretprobes" : "kprobes", probe_name); 10619 } 10620 10621 static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retprobe) 10622 { 10623 char file[256]; 10624 10625 snprintf(file, sizeof(file), "%s/events/%s/%s/id", 10626 tracefs_path(), retprobe ? "kretprobes" : "kprobes", probe_name); 10627 10628 return parse_uint_from_file(file, "%d\n"); 10629 } 10630 10631 static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe, 10632 const char *kfunc_name, size_t offset, int pid) 10633 { 10634 const size_t attr_sz = sizeof(struct perf_event_attr); 10635 struct perf_event_attr attr; 10636 char errmsg[STRERR_BUFSIZE]; 10637 int type, pfd, err; 10638 10639 err = add_kprobe_event_legacy(probe_name, retprobe, kfunc_name, offset); 10640 if (err < 0) { 10641 pr_warn("failed to add legacy kprobe event for '%s+0x%zx': %s\n", 10642 kfunc_name, offset, 10643 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 10644 return err; 10645 } 10646 type = determine_kprobe_perf_type_legacy(probe_name, retprobe); 10647 if (type < 0) { 10648 err = type; 10649 pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n", 10650 kfunc_name, offset, 10651 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 10652 goto err_clean_legacy; 10653 } 10654 10655 memset(&attr, 0, attr_sz); 10656 attr.size = attr_sz; 10657 attr.config = type; 10658 attr.type = PERF_TYPE_TRACEPOINT; 10659 10660 pfd = syscall(__NR_perf_event_open, &attr, 10661 pid < 0 ? -1 : pid, /* pid */ 10662 pid == -1 ? 0 : -1, /* cpu */ 10663 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); 10664 if (pfd < 0) { 10665 err = -errno; 10666 pr_warn("legacy kprobe perf_event_open() failed: %s\n", 10667 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 10668 goto err_clean_legacy; 10669 } 10670 return pfd; 10671 10672 err_clean_legacy: 10673 /* Clear the newly added legacy kprobe_event */ 10674 remove_kprobe_event_legacy(probe_name, retprobe); 10675 return err; 10676 } 10677 10678 static const char *arch_specific_syscall_pfx(void) 10679 { 10680 #if defined(__x86_64__) 10681 return "x64"; 10682 #elif defined(__i386__) 10683 return "ia32"; 10684 #elif defined(__s390x__) 10685 return "s390x"; 10686 #elif defined(__s390__) 10687 return "s390"; 10688 #elif defined(__arm__) 10689 return "arm"; 10690 #elif defined(__aarch64__) 10691 return "arm64"; 10692 #elif defined(__mips__) 10693 return "mips"; 10694 #elif defined(__riscv) 10695 return "riscv"; 10696 #elif defined(__powerpc__) 10697 return "powerpc"; 10698 #elif defined(__powerpc64__) 10699 return "powerpc64"; 10700 #else 10701 return NULL; 10702 #endif 10703 } 10704 10705 int probe_kern_syscall_wrapper(int token_fd) 10706 { 10707 char syscall_name[64]; 10708 const char *ksys_pfx; 10709 10710 ksys_pfx = arch_specific_syscall_pfx(); 10711 if (!ksys_pfx) 10712 return 0; 10713 10714 snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx); 10715 10716 if (determine_kprobe_perf_type() >= 0) { 10717 int pfd; 10718 10719 pfd = perf_event_open_probe(false, false, syscall_name, 0, getpid(), 0); 10720 if (pfd >= 0) 10721 close(pfd); 10722 10723 return pfd >= 0 ? 1 : 0; 10724 } else { /* legacy mode */ 10725 char probe_name[128]; 10726 10727 gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0); 10728 if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0) 10729 return 0; 10730 10731 (void)remove_kprobe_event_legacy(probe_name, false); 10732 return 1; 10733 } 10734 } 10735 10736 struct bpf_link * 10737 bpf_program__attach_kprobe_opts(const struct bpf_program *prog, 10738 const char *func_name, 10739 const struct bpf_kprobe_opts *opts) 10740 { 10741 DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); 10742 enum probe_attach_mode attach_mode; 10743 char errmsg[STRERR_BUFSIZE]; 10744 char *legacy_probe = NULL; 10745 struct bpf_link *link; 10746 size_t offset; 10747 bool retprobe, legacy; 10748 int pfd, err; 10749 10750 if (!OPTS_VALID(opts, bpf_kprobe_opts)) 10751 return libbpf_err_ptr(-EINVAL); 10752 10753 attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT); 10754 retprobe = OPTS_GET(opts, retprobe, false); 10755 offset = OPTS_GET(opts, offset, 0); 10756 pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); 10757 10758 legacy = determine_kprobe_perf_type() < 0; 10759 switch (attach_mode) { 10760 case PROBE_ATTACH_MODE_LEGACY: 10761 legacy = true; 10762 pe_opts.force_ioctl_attach = true; 10763 break; 10764 case PROBE_ATTACH_MODE_PERF: 10765 if (legacy) 10766 return libbpf_err_ptr(-ENOTSUP); 10767 pe_opts.force_ioctl_attach = true; 10768 break; 10769 case PROBE_ATTACH_MODE_LINK: 10770 if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK)) 10771 return libbpf_err_ptr(-ENOTSUP); 10772 break; 10773 case PROBE_ATTACH_MODE_DEFAULT: 10774 break; 10775 default: 10776 return libbpf_err_ptr(-EINVAL); 10777 } 10778 10779 if (!legacy) { 10780 pfd = perf_event_open_probe(false /* uprobe */, retprobe, 10781 func_name, offset, 10782 -1 /* pid */, 0 /* ref_ctr_off */); 10783 } else { 10784 char probe_name[256]; 10785 10786 gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), 10787 func_name, offset); 10788 10789 legacy_probe = strdup(probe_name); 10790 if (!legacy_probe) 10791 return libbpf_err_ptr(-ENOMEM); 10792 10793 pfd = perf_event_kprobe_open_legacy(legacy_probe, retprobe, func_name, 10794 offset, -1 /* pid */); 10795 } 10796 if (pfd < 0) { 10797 err = -errno; 10798 pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n", 10799 prog->name, retprobe ? "kretprobe" : "kprobe", 10800 func_name, offset, 10801 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 10802 goto err_out; 10803 } 10804 link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); 10805 err = libbpf_get_error(link); 10806 if (err) { 10807 close(pfd); 10808 pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n", 10809 prog->name, retprobe ? "kretprobe" : "kprobe", 10810 func_name, offset, 10811 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 10812 goto err_clean_legacy; 10813 } 10814 if (legacy) { 10815 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); 10816 10817 perf_link->legacy_probe_name = legacy_probe; 10818 perf_link->legacy_is_kprobe = true; 10819 perf_link->legacy_is_retprobe = retprobe; 10820 } 10821 10822 return link; 10823 10824 err_clean_legacy: 10825 if (legacy) 10826 remove_kprobe_event_legacy(legacy_probe, retprobe); 10827 err_out: 10828 free(legacy_probe); 10829 return libbpf_err_ptr(err); 10830 } 10831 10832 struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog, 10833 bool retprobe, 10834 const char *func_name) 10835 { 10836 DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts, 10837 .retprobe = retprobe, 10838 ); 10839 10840 return bpf_program__attach_kprobe_opts(prog, func_name, &opts); 10841 } 10842 10843 struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog, 10844 const char *syscall_name, 10845 const struct bpf_ksyscall_opts *opts) 10846 { 10847 LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts); 10848 char func_name[128]; 10849 10850 if (!OPTS_VALID(opts, bpf_ksyscall_opts)) 10851 return libbpf_err_ptr(-EINVAL); 10852 10853 if (kernel_supports(prog->obj, FEAT_SYSCALL_WRAPPER)) { 10854 /* arch_specific_syscall_pfx() should never return NULL here 10855 * because it is guarded by kernel_supports(). However, since 10856 * compiler does not know that we have an explicit conditional 10857 * as well. 10858 */ 10859 snprintf(func_name, sizeof(func_name), "__%s_sys_%s", 10860 arch_specific_syscall_pfx() ? : "", syscall_name); 10861 } else { 10862 snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name); 10863 } 10864 10865 kprobe_opts.retprobe = OPTS_GET(opts, retprobe, false); 10866 kprobe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); 10867 10868 return bpf_program__attach_kprobe_opts(prog, func_name, &kprobe_opts); 10869 } 10870 10871 /* Adapted from perf/util/string.c */ 10872 bool glob_match(const char *str, const char *pat) 10873 { 10874 while (*str && *pat && *pat != '*') { 10875 if (*pat == '?') { /* Matches any single character */ 10876 str++; 10877 pat++; 10878 continue; 10879 } 10880 if (*str != *pat) 10881 return false; 10882 str++; 10883 pat++; 10884 } 10885 /* Check wild card */ 10886 if (*pat == '*') { 10887 while (*pat == '*') 10888 pat++; 10889 if (!*pat) /* Tail wild card matches all */ 10890 return true; 10891 while (*str) 10892 if (glob_match(str++, pat)) 10893 return true; 10894 } 10895 return !*str && !*pat; 10896 } 10897 10898 struct kprobe_multi_resolve { 10899 const char *pattern; 10900 unsigned long *addrs; 10901 size_t cap; 10902 size_t cnt; 10903 }; 10904 10905 struct avail_kallsyms_data { 10906 char **syms; 10907 size_t cnt; 10908 struct kprobe_multi_resolve *res; 10909 }; 10910 10911 static int avail_func_cmp(const void *a, const void *b) 10912 { 10913 return strcmp(*(const char **)a, *(const char **)b); 10914 } 10915 10916 static int avail_kallsyms_cb(unsigned long long sym_addr, char sym_type, 10917 const char *sym_name, void *ctx) 10918 { 10919 struct avail_kallsyms_data *data = ctx; 10920 struct kprobe_multi_resolve *res = data->res; 10921 int err; 10922 10923 if (!bsearch(&sym_name, data->syms, data->cnt, sizeof(*data->syms), avail_func_cmp)) 10924 return 0; 10925 10926 err = libbpf_ensure_mem((void **)&res->addrs, &res->cap, sizeof(*res->addrs), res->cnt + 1); 10927 if (err) 10928 return err; 10929 10930 res->addrs[res->cnt++] = (unsigned long)sym_addr; 10931 return 0; 10932 } 10933 10934 static int libbpf_available_kallsyms_parse(struct kprobe_multi_resolve *res) 10935 { 10936 const char *available_functions_file = tracefs_available_filter_functions(); 10937 struct avail_kallsyms_data data; 10938 char sym_name[500]; 10939 FILE *f; 10940 int err = 0, ret, i; 10941 char **syms = NULL; 10942 size_t cap = 0, cnt = 0; 10943 10944 f = fopen(available_functions_file, "re"); 10945 if (!f) { 10946 err = -errno; 10947 pr_warn("failed to open %s: %d\n", available_functions_file, err); 10948 return err; 10949 } 10950 10951 while (true) { 10952 char *name; 10953 10954 ret = fscanf(f, "%499s%*[^\n]\n", sym_name); 10955 if (ret == EOF && feof(f)) 10956 break; 10957 10958 if (ret != 1) { 10959 pr_warn("failed to parse available_filter_functions entry: %d\n", ret); 10960 err = -EINVAL; 10961 goto cleanup; 10962 } 10963 10964 if (!glob_match(sym_name, res->pattern)) 10965 continue; 10966 10967 err = libbpf_ensure_mem((void **)&syms, &cap, sizeof(*syms), cnt + 1); 10968 if (err) 10969 goto cleanup; 10970 10971 name = strdup(sym_name); 10972 if (!name) { 10973 err = -errno; 10974 goto cleanup; 10975 } 10976 10977 syms[cnt++] = name; 10978 } 10979 10980 /* no entries found, bail out */ 10981 if (cnt == 0) { 10982 err = -ENOENT; 10983 goto cleanup; 10984 } 10985 10986 /* sort available functions */ 10987 qsort(syms, cnt, sizeof(*syms), avail_func_cmp); 10988 10989 data.syms = syms; 10990 data.res = res; 10991 data.cnt = cnt; 10992 libbpf_kallsyms_parse(avail_kallsyms_cb, &data); 10993 10994 if (res->cnt == 0) 10995 err = -ENOENT; 10996 10997 cleanup: 10998 for (i = 0; i < cnt; i++) 10999 free((char *)syms[i]); 11000 free(syms); 11001 11002 fclose(f); 11003 return err; 11004 } 11005 11006 static bool has_available_filter_functions_addrs(void) 11007 { 11008 return access(tracefs_available_filter_functions_addrs(), R_OK) != -1; 11009 } 11010 11011 static int libbpf_available_kprobes_parse(struct kprobe_multi_resolve *res) 11012 { 11013 const char *available_path = tracefs_available_filter_functions_addrs(); 11014 char sym_name[500]; 11015 FILE *f; 11016 int ret, err = 0; 11017 unsigned long long sym_addr; 11018 11019 f = fopen(available_path, "re"); 11020 if (!f) { 11021 err = -errno; 11022 pr_warn("failed to open %s: %d\n", available_path, err); 11023 return err; 11024 } 11025 11026 while (true) { 11027 ret = fscanf(f, "%llx %499s%*[^\n]\n", &sym_addr, sym_name); 11028 if (ret == EOF && feof(f)) 11029 break; 11030 11031 if (ret != 2) { 11032 pr_warn("failed to parse available_filter_functions_addrs entry: %d\n", 11033 ret); 11034 err = -EINVAL; 11035 goto cleanup; 11036 } 11037 11038 if (!glob_match(sym_name, res->pattern)) 11039 continue; 11040 11041 err = libbpf_ensure_mem((void **)&res->addrs, &res->cap, 11042 sizeof(*res->addrs), res->cnt + 1); 11043 if (err) 11044 goto cleanup; 11045 11046 res->addrs[res->cnt++] = (unsigned long)sym_addr; 11047 } 11048 11049 if (res->cnt == 0) 11050 err = -ENOENT; 11051 11052 cleanup: 11053 fclose(f); 11054 return err; 11055 } 11056 11057 struct bpf_link * 11058 bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, 11059 const char *pattern, 11060 const struct bpf_kprobe_multi_opts *opts) 11061 { 11062 LIBBPF_OPTS(bpf_link_create_opts, lopts); 11063 struct kprobe_multi_resolve res = { 11064 .pattern = pattern, 11065 }; 11066 struct bpf_link *link = NULL; 11067 char errmsg[STRERR_BUFSIZE]; 11068 const unsigned long *addrs; 11069 int err, link_fd, prog_fd; 11070 const __u64 *cookies; 11071 const char **syms; 11072 bool retprobe; 11073 size_t cnt; 11074 11075 if (!OPTS_VALID(opts, bpf_kprobe_multi_opts)) 11076 return libbpf_err_ptr(-EINVAL); 11077 11078 syms = OPTS_GET(opts, syms, false); 11079 addrs = OPTS_GET(opts, addrs, false); 11080 cnt = OPTS_GET(opts, cnt, false); 11081 cookies = OPTS_GET(opts, cookies, false); 11082 11083 if (!pattern && !addrs && !syms) 11084 return libbpf_err_ptr(-EINVAL); 11085 if (pattern && (addrs || syms || cookies || cnt)) 11086 return libbpf_err_ptr(-EINVAL); 11087 if (!pattern && !cnt) 11088 return libbpf_err_ptr(-EINVAL); 11089 if (addrs && syms) 11090 return libbpf_err_ptr(-EINVAL); 11091 11092 if (pattern) { 11093 if (has_available_filter_functions_addrs()) 11094 err = libbpf_available_kprobes_parse(&res); 11095 else 11096 err = libbpf_available_kallsyms_parse(&res); 11097 if (err) 11098 goto error; 11099 addrs = res.addrs; 11100 cnt = res.cnt; 11101 } 11102 11103 retprobe = OPTS_GET(opts, retprobe, false); 11104 11105 lopts.kprobe_multi.syms = syms; 11106 lopts.kprobe_multi.addrs = addrs; 11107 lopts.kprobe_multi.cookies = cookies; 11108 lopts.kprobe_multi.cnt = cnt; 11109 lopts.kprobe_multi.flags = retprobe ? BPF_F_KPROBE_MULTI_RETURN : 0; 11110 11111 link = calloc(1, sizeof(*link)); 11112 if (!link) { 11113 err = -ENOMEM; 11114 goto error; 11115 } 11116 link->detach = &bpf_link__detach_fd; 11117 11118 prog_fd = bpf_program__fd(prog); 11119 link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_KPROBE_MULTI, &lopts); 11120 if (link_fd < 0) { 11121 err = -errno; 11122 pr_warn("prog '%s': failed to attach: %s\n", 11123 prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 11124 goto error; 11125 } 11126 link->fd = link_fd; 11127 free(res.addrs); 11128 return link; 11129 11130 error: 11131 free(link); 11132 free(res.addrs); 11133 return libbpf_err_ptr(err); 11134 } 11135 11136 static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link) 11137 { 11138 DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts); 11139 unsigned long offset = 0; 11140 const char *func_name; 11141 char *func; 11142 int n; 11143 11144 *link = NULL; 11145 11146 /* no auto-attach for SEC("kprobe") and SEC("kretprobe") */ 11147 if (strcmp(prog->sec_name, "kprobe") == 0 || strcmp(prog->sec_name, "kretprobe") == 0) 11148 return 0; 11149 11150 opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe/"); 11151 if (opts.retprobe) 11152 func_name = prog->sec_name + sizeof("kretprobe/") - 1; 11153 else 11154 func_name = prog->sec_name + sizeof("kprobe/") - 1; 11155 11156 n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset); 11157 if (n < 1) { 11158 pr_warn("kprobe name is invalid: %s\n", func_name); 11159 return -EINVAL; 11160 } 11161 if (opts.retprobe && offset != 0) { 11162 free(func); 11163 pr_warn("kretprobes do not support offset specification\n"); 11164 return -EINVAL; 11165 } 11166 11167 opts.offset = offset; 11168 *link = bpf_program__attach_kprobe_opts(prog, func, &opts); 11169 free(func); 11170 return libbpf_get_error(*link); 11171 } 11172 11173 static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link) 11174 { 11175 LIBBPF_OPTS(bpf_ksyscall_opts, opts); 11176 const char *syscall_name; 11177 11178 *link = NULL; 11179 11180 /* no auto-attach for SEC("ksyscall") and SEC("kretsyscall") */ 11181 if (strcmp(prog->sec_name, "ksyscall") == 0 || strcmp(prog->sec_name, "kretsyscall") == 0) 11182 return 0; 11183 11184 opts.retprobe = str_has_pfx(prog->sec_name, "kretsyscall/"); 11185 if (opts.retprobe) 11186 syscall_name = prog->sec_name + sizeof("kretsyscall/") - 1; 11187 else 11188 syscall_name = prog->sec_name + sizeof("ksyscall/") - 1; 11189 11190 *link = bpf_program__attach_ksyscall(prog, syscall_name, &opts); 11191 return *link ? 0 : -errno; 11192 } 11193 11194 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link) 11195 { 11196 LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); 11197 const char *spec; 11198 char *pattern; 11199 int n; 11200 11201 *link = NULL; 11202 11203 /* no auto-attach for SEC("kprobe.multi") and SEC("kretprobe.multi") */ 11204 if (strcmp(prog->sec_name, "kprobe.multi") == 0 || 11205 strcmp(prog->sec_name, "kretprobe.multi") == 0) 11206 return 0; 11207 11208 opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe.multi/"); 11209 if (opts.retprobe) 11210 spec = prog->sec_name + sizeof("kretprobe.multi/") - 1; 11211 else 11212 spec = prog->sec_name + sizeof("kprobe.multi/") - 1; 11213 11214 n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern); 11215 if (n < 1) { 11216 pr_warn("kprobe multi pattern is invalid: %s\n", pattern); 11217 return -EINVAL; 11218 } 11219 11220 *link = bpf_program__attach_kprobe_multi_opts(prog, pattern, &opts); 11221 free(pattern); 11222 return libbpf_get_error(*link); 11223 } 11224 11225 static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link) 11226 { 11227 char *probe_type = NULL, *binary_path = NULL, *func_name = NULL; 11228 LIBBPF_OPTS(bpf_uprobe_multi_opts, opts); 11229 int n, ret = -EINVAL; 11230 11231 *link = NULL; 11232 11233 n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]", 11234 &probe_type, &binary_path, &func_name); 11235 switch (n) { 11236 case 1: 11237 /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */ 11238 ret = 0; 11239 break; 11240 case 3: 11241 opts.retprobe = strcmp(probe_type, "uretprobe.multi") == 0; 11242 *link = bpf_program__attach_uprobe_multi(prog, -1, binary_path, func_name, &opts); 11243 ret = libbpf_get_error(*link); 11244 break; 11245 default: 11246 pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name, 11247 prog->sec_name); 11248 break; 11249 } 11250 free(probe_type); 11251 free(binary_path); 11252 free(func_name); 11253 return ret; 11254 } 11255 11256 static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz, 11257 const char *binary_path, uint64_t offset) 11258 { 11259 int i; 11260 11261 snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), binary_path, (size_t)offset); 11262 11263 /* sanitize binary_path in the probe name */ 11264 for (i = 0; buf[i]; i++) { 11265 if (!isalnum(buf[i])) 11266 buf[i] = '_'; 11267 } 11268 } 11269 11270 static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe, 11271 const char *binary_path, size_t offset) 11272 { 11273 return append_to_file(tracefs_uprobe_events(), "%c:%s/%s %s:0x%zx", 11274 retprobe ? 'r' : 'p', 11275 retprobe ? "uretprobes" : "uprobes", 11276 probe_name, binary_path, offset); 11277 } 11278 11279 static inline int remove_uprobe_event_legacy(const char *probe_name, bool retprobe) 11280 { 11281 return append_to_file(tracefs_uprobe_events(), "-:%s/%s", 11282 retprobe ? "uretprobes" : "uprobes", probe_name); 11283 } 11284 11285 static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retprobe) 11286 { 11287 char file[512]; 11288 11289 snprintf(file, sizeof(file), "%s/events/%s/%s/id", 11290 tracefs_path(), retprobe ? "uretprobes" : "uprobes", probe_name); 11291 11292 return parse_uint_from_file(file, "%d\n"); 11293 } 11294 11295 static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe, 11296 const char *binary_path, size_t offset, int pid) 11297 { 11298 const size_t attr_sz = sizeof(struct perf_event_attr); 11299 struct perf_event_attr attr; 11300 int type, pfd, err; 11301 11302 err = add_uprobe_event_legacy(probe_name, retprobe, binary_path, offset); 11303 if (err < 0) { 11304 pr_warn("failed to add legacy uprobe event for %s:0x%zx: %d\n", 11305 binary_path, (size_t)offset, err); 11306 return err; 11307 } 11308 type = determine_uprobe_perf_type_legacy(probe_name, retprobe); 11309 if (type < 0) { 11310 err = type; 11311 pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %d\n", 11312 binary_path, offset, err); 11313 goto err_clean_legacy; 11314 } 11315 11316 memset(&attr, 0, attr_sz); 11317 attr.size = attr_sz; 11318 attr.config = type; 11319 attr.type = PERF_TYPE_TRACEPOINT; 11320 11321 pfd = syscall(__NR_perf_event_open, &attr, 11322 pid < 0 ? -1 : pid, /* pid */ 11323 pid == -1 ? 0 : -1, /* cpu */ 11324 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); 11325 if (pfd < 0) { 11326 err = -errno; 11327 pr_warn("legacy uprobe perf_event_open() failed: %d\n", err); 11328 goto err_clean_legacy; 11329 } 11330 return pfd; 11331 11332 err_clean_legacy: 11333 /* Clear the newly added legacy uprobe_event */ 11334 remove_uprobe_event_legacy(probe_name, retprobe); 11335 return err; 11336 } 11337 11338 /* Find offset of function name in archive specified by path. Currently 11339 * supported are .zip files that do not compress their contents, as used on 11340 * Android in the form of APKs, for example. "file_name" is the name of the ELF 11341 * file inside the archive. "func_name" matches symbol name or name@@LIB for 11342 * library functions. 11343 * 11344 * An overview of the APK format specifically provided here: 11345 * https://en.wikipedia.org/w/index.php?title=Apk_(file_format)&oldid=1139099120#Package_contents 11346 */ 11347 static long elf_find_func_offset_from_archive(const char *archive_path, const char *file_name, 11348 const char *func_name) 11349 { 11350 struct zip_archive *archive; 11351 struct zip_entry entry; 11352 long ret; 11353 Elf *elf; 11354 11355 archive = zip_archive_open(archive_path); 11356 if (IS_ERR(archive)) { 11357 ret = PTR_ERR(archive); 11358 pr_warn("zip: failed to open %s: %ld\n", archive_path, ret); 11359 return ret; 11360 } 11361 11362 ret = zip_archive_find_entry(archive, file_name, &entry); 11363 if (ret) { 11364 pr_warn("zip: could not find archive member %s in %s: %ld\n", file_name, 11365 archive_path, ret); 11366 goto out; 11367 } 11368 pr_debug("zip: found entry for %s in %s at 0x%lx\n", file_name, archive_path, 11369 (unsigned long)entry.data_offset); 11370 11371 if (entry.compression) { 11372 pr_warn("zip: entry %s of %s is compressed and cannot be handled\n", file_name, 11373 archive_path); 11374 ret = -LIBBPF_ERRNO__FORMAT; 11375 goto out; 11376 } 11377 11378 elf = elf_memory((void *)entry.data, entry.data_length); 11379 if (!elf) { 11380 pr_warn("elf: could not read elf file %s from %s: %s\n", file_name, archive_path, 11381 elf_errmsg(-1)); 11382 ret = -LIBBPF_ERRNO__LIBELF; 11383 goto out; 11384 } 11385 11386 ret = elf_find_func_offset(elf, file_name, func_name); 11387 if (ret > 0) { 11388 pr_debug("elf: symbol address match for %s of %s in %s: 0x%x + 0x%lx = 0x%lx\n", 11389 func_name, file_name, archive_path, entry.data_offset, ret, 11390 ret + entry.data_offset); 11391 ret += entry.data_offset; 11392 } 11393 elf_end(elf); 11394 11395 out: 11396 zip_archive_close(archive); 11397 return ret; 11398 } 11399 11400 static const char *arch_specific_lib_paths(void) 11401 { 11402 /* 11403 * Based on https://packages.debian.org/sid/libc6. 11404 * 11405 * Assume that the traced program is built for the same architecture 11406 * as libbpf, which should cover the vast majority of cases. 11407 */ 11408 #if defined(__x86_64__) 11409 return "/lib/x86_64-linux-gnu"; 11410 #elif defined(__i386__) 11411 return "/lib/i386-linux-gnu"; 11412 #elif defined(__s390x__) 11413 return "/lib/s390x-linux-gnu"; 11414 #elif defined(__s390__) 11415 return "/lib/s390-linux-gnu"; 11416 #elif defined(__arm__) && defined(__SOFTFP__) 11417 return "/lib/arm-linux-gnueabi"; 11418 #elif defined(__arm__) && !defined(__SOFTFP__) 11419 return "/lib/arm-linux-gnueabihf"; 11420 #elif defined(__aarch64__) 11421 return "/lib/aarch64-linux-gnu"; 11422 #elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 64 11423 return "/lib/mips64el-linux-gnuabi64"; 11424 #elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 32 11425 return "/lib/mipsel-linux-gnu"; 11426 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 11427 return "/lib/powerpc64le-linux-gnu"; 11428 #elif defined(__sparc__) && defined(__arch64__) 11429 return "/lib/sparc64-linux-gnu"; 11430 #elif defined(__riscv) && __riscv_xlen == 64 11431 return "/lib/riscv64-linux-gnu"; 11432 #else 11433 return NULL; 11434 #endif 11435 } 11436 11437 /* Get full path to program/shared library. */ 11438 static int resolve_full_path(const char *file, char *result, size_t result_sz) 11439 { 11440 const char *search_paths[3] = {}; 11441 int i, perm; 11442 11443 if (str_has_sfx(file, ".so") || strstr(file, ".so.")) { 11444 search_paths[0] = getenv("LD_LIBRARY_PATH"); 11445 search_paths[1] = "/usr/lib64:/usr/lib"; 11446 search_paths[2] = arch_specific_lib_paths(); 11447 perm = R_OK; 11448 } else { 11449 search_paths[0] = getenv("PATH"); 11450 search_paths[1] = "/usr/bin:/usr/sbin"; 11451 perm = R_OK | X_OK; 11452 } 11453 11454 for (i = 0; i < ARRAY_SIZE(search_paths); i++) { 11455 const char *s; 11456 11457 if (!search_paths[i]) 11458 continue; 11459 for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) { 11460 char *next_path; 11461 int seg_len; 11462 11463 if (s[0] == ':') 11464 s++; 11465 next_path = strchr(s, ':'); 11466 seg_len = next_path ? next_path - s : strlen(s); 11467 if (!seg_len) 11468 continue; 11469 snprintf(result, result_sz, "%.*s/%s", seg_len, s, file); 11470 /* ensure it has required permissions */ 11471 if (faccessat(AT_FDCWD, result, perm, AT_EACCESS) < 0) 11472 continue; 11473 pr_debug("resolved '%s' to '%s'\n", file, result); 11474 return 0; 11475 } 11476 } 11477 return -ENOENT; 11478 } 11479 11480 struct bpf_link * 11481 bpf_program__attach_uprobe_multi(const struct bpf_program *prog, 11482 pid_t pid, 11483 const char *path, 11484 const char *func_pattern, 11485 const struct bpf_uprobe_multi_opts *opts) 11486 { 11487 const unsigned long *ref_ctr_offsets = NULL, *offsets = NULL; 11488 LIBBPF_OPTS(bpf_link_create_opts, lopts); 11489 unsigned long *resolved_offsets = NULL; 11490 int err = 0, link_fd, prog_fd; 11491 struct bpf_link *link = NULL; 11492 char errmsg[STRERR_BUFSIZE]; 11493 char full_path[PATH_MAX]; 11494 const __u64 *cookies; 11495 const char **syms; 11496 size_t cnt; 11497 11498 if (!OPTS_VALID(opts, bpf_uprobe_multi_opts)) 11499 return libbpf_err_ptr(-EINVAL); 11500 11501 syms = OPTS_GET(opts, syms, NULL); 11502 offsets = OPTS_GET(opts, offsets, NULL); 11503 ref_ctr_offsets = OPTS_GET(opts, ref_ctr_offsets, NULL); 11504 cookies = OPTS_GET(opts, cookies, NULL); 11505 cnt = OPTS_GET(opts, cnt, 0); 11506 11507 /* 11508 * User can specify 2 mutually exclusive set of inputs: 11509 * 11510 * 1) use only path/func_pattern/pid arguments 11511 * 11512 * 2) use path/pid with allowed combinations of: 11513 * syms/offsets/ref_ctr_offsets/cookies/cnt 11514 * 11515 * - syms and offsets are mutually exclusive 11516 * - ref_ctr_offsets and cookies are optional 11517 * 11518 * Any other usage results in error. 11519 */ 11520 11521 if (!path) 11522 return libbpf_err_ptr(-EINVAL); 11523 if (!func_pattern && cnt == 0) 11524 return libbpf_err_ptr(-EINVAL); 11525 11526 if (func_pattern) { 11527 if (syms || offsets || ref_ctr_offsets || cookies || cnt) 11528 return libbpf_err_ptr(-EINVAL); 11529 } else { 11530 if (!!syms == !!offsets) 11531 return libbpf_err_ptr(-EINVAL); 11532 } 11533 11534 if (func_pattern) { 11535 if (!strchr(path, '/')) { 11536 err = resolve_full_path(path, full_path, sizeof(full_path)); 11537 if (err) { 11538 pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", 11539 prog->name, path, err); 11540 return libbpf_err_ptr(err); 11541 } 11542 path = full_path; 11543 } 11544 11545 err = elf_resolve_pattern_offsets(path, func_pattern, 11546 &resolved_offsets, &cnt); 11547 if (err < 0) 11548 return libbpf_err_ptr(err); 11549 offsets = resolved_offsets; 11550 } else if (syms) { 11551 err = elf_resolve_syms_offsets(path, cnt, syms, &resolved_offsets, STT_FUNC); 11552 if (err < 0) 11553 return libbpf_err_ptr(err); 11554 offsets = resolved_offsets; 11555 } 11556 11557 lopts.uprobe_multi.path = path; 11558 lopts.uprobe_multi.offsets = offsets; 11559 lopts.uprobe_multi.ref_ctr_offsets = ref_ctr_offsets; 11560 lopts.uprobe_multi.cookies = cookies; 11561 lopts.uprobe_multi.cnt = cnt; 11562 lopts.uprobe_multi.flags = OPTS_GET(opts, retprobe, false) ? BPF_F_UPROBE_MULTI_RETURN : 0; 11563 11564 if (pid == 0) 11565 pid = getpid(); 11566 if (pid > 0) 11567 lopts.uprobe_multi.pid = pid; 11568 11569 link = calloc(1, sizeof(*link)); 11570 if (!link) { 11571 err = -ENOMEM; 11572 goto error; 11573 } 11574 link->detach = &bpf_link__detach_fd; 11575 11576 prog_fd = bpf_program__fd(prog); 11577 link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &lopts); 11578 if (link_fd < 0) { 11579 err = -errno; 11580 pr_warn("prog '%s': failed to attach multi-uprobe: %s\n", 11581 prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 11582 goto error; 11583 } 11584 link->fd = link_fd; 11585 free(resolved_offsets); 11586 return link; 11587 11588 error: 11589 free(resolved_offsets); 11590 free(link); 11591 return libbpf_err_ptr(err); 11592 } 11593 11594 LIBBPF_API struct bpf_link * 11595 bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, 11596 const char *binary_path, size_t func_offset, 11597 const struct bpf_uprobe_opts *opts) 11598 { 11599 const char *archive_path = NULL, *archive_sep = NULL; 11600 char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL; 11601 DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); 11602 enum probe_attach_mode attach_mode; 11603 char full_path[PATH_MAX]; 11604 struct bpf_link *link; 11605 size_t ref_ctr_off; 11606 int pfd, err; 11607 bool retprobe, legacy; 11608 const char *func_name; 11609 11610 if (!OPTS_VALID(opts, bpf_uprobe_opts)) 11611 return libbpf_err_ptr(-EINVAL); 11612 11613 attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT); 11614 retprobe = OPTS_GET(opts, retprobe, false); 11615 ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0); 11616 pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); 11617 11618 if (!binary_path) 11619 return libbpf_err_ptr(-EINVAL); 11620 11621 /* Check if "binary_path" refers to an archive. */ 11622 archive_sep = strstr(binary_path, "!/"); 11623 if (archive_sep) { 11624 full_path[0] = '\0'; 11625 libbpf_strlcpy(full_path, binary_path, 11626 min(sizeof(full_path), (size_t)(archive_sep - binary_path + 1))); 11627 archive_path = full_path; 11628 binary_path = archive_sep + 2; 11629 } else if (!strchr(binary_path, '/')) { 11630 err = resolve_full_path(binary_path, full_path, sizeof(full_path)); 11631 if (err) { 11632 pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", 11633 prog->name, binary_path, err); 11634 return libbpf_err_ptr(err); 11635 } 11636 binary_path = full_path; 11637 } 11638 func_name = OPTS_GET(opts, func_name, NULL); 11639 if (func_name) { 11640 long sym_off; 11641 11642 if (archive_path) { 11643 sym_off = elf_find_func_offset_from_archive(archive_path, binary_path, 11644 func_name); 11645 binary_path = archive_path; 11646 } else { 11647 sym_off = elf_find_func_offset_from_file(binary_path, func_name); 11648 } 11649 if (sym_off < 0) 11650 return libbpf_err_ptr(sym_off); 11651 func_offset += sym_off; 11652 } 11653 11654 legacy = determine_uprobe_perf_type() < 0; 11655 switch (attach_mode) { 11656 case PROBE_ATTACH_MODE_LEGACY: 11657 legacy = true; 11658 pe_opts.force_ioctl_attach = true; 11659 break; 11660 case PROBE_ATTACH_MODE_PERF: 11661 if (legacy) 11662 return libbpf_err_ptr(-ENOTSUP); 11663 pe_opts.force_ioctl_attach = true; 11664 break; 11665 case PROBE_ATTACH_MODE_LINK: 11666 if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK)) 11667 return libbpf_err_ptr(-ENOTSUP); 11668 break; 11669 case PROBE_ATTACH_MODE_DEFAULT: 11670 break; 11671 default: 11672 return libbpf_err_ptr(-EINVAL); 11673 } 11674 11675 if (!legacy) { 11676 pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path, 11677 func_offset, pid, ref_ctr_off); 11678 } else { 11679 char probe_name[PATH_MAX + 64]; 11680 11681 if (ref_ctr_off) 11682 return libbpf_err_ptr(-EINVAL); 11683 11684 gen_uprobe_legacy_event_name(probe_name, sizeof(probe_name), 11685 binary_path, func_offset); 11686 11687 legacy_probe = strdup(probe_name); 11688 if (!legacy_probe) 11689 return libbpf_err_ptr(-ENOMEM); 11690 11691 pfd = perf_event_uprobe_open_legacy(legacy_probe, retprobe, 11692 binary_path, func_offset, pid); 11693 } 11694 if (pfd < 0) { 11695 err = -errno; 11696 pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n", 11697 prog->name, retprobe ? "uretprobe" : "uprobe", 11698 binary_path, func_offset, 11699 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 11700 goto err_out; 11701 } 11702 11703 link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); 11704 err = libbpf_get_error(link); 11705 if (err) { 11706 close(pfd); 11707 pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n", 11708 prog->name, retprobe ? "uretprobe" : "uprobe", 11709 binary_path, func_offset, 11710 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 11711 goto err_clean_legacy; 11712 } 11713 if (legacy) { 11714 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); 11715 11716 perf_link->legacy_probe_name = legacy_probe; 11717 perf_link->legacy_is_kprobe = false; 11718 perf_link->legacy_is_retprobe = retprobe; 11719 } 11720 return link; 11721 11722 err_clean_legacy: 11723 if (legacy) 11724 remove_uprobe_event_legacy(legacy_probe, retprobe); 11725 err_out: 11726 free(legacy_probe); 11727 return libbpf_err_ptr(err); 11728 } 11729 11730 /* Format of u[ret]probe section definition supporting auto-attach: 11731 * u[ret]probe/binary:function[+offset] 11732 * 11733 * binary can be an absolute/relative path or a filename; the latter is resolved to a 11734 * full binary path via bpf_program__attach_uprobe_opts. 11735 * 11736 * Specifying uprobe+ ensures we carry out strict matching; either "uprobe" must be 11737 * specified (and auto-attach is not possible) or the above format is specified for 11738 * auto-attach. 11739 */ 11740 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link) 11741 { 11742 DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts); 11743 char *probe_type = NULL, *binary_path = NULL, *func_name = NULL, *func_off; 11744 int n, c, ret = -EINVAL; 11745 long offset = 0; 11746 11747 *link = NULL; 11748 11749 n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]", 11750 &probe_type, &binary_path, &func_name); 11751 switch (n) { 11752 case 1: 11753 /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */ 11754 ret = 0; 11755 break; 11756 case 2: 11757 pr_warn("prog '%s': section '%s' missing ':function[+offset]' specification\n", 11758 prog->name, prog->sec_name); 11759 break; 11760 case 3: 11761 /* check if user specifies `+offset`, if yes, this should be 11762 * the last part of the string, make sure sscanf read to EOL 11763 */ 11764 func_off = strrchr(func_name, '+'); 11765 if (func_off) { 11766 n = sscanf(func_off, "+%li%n", &offset, &c); 11767 if (n == 1 && *(func_off + c) == '\0') 11768 func_off[0] = '\0'; 11769 else 11770 offset = 0; 11771 } 11772 opts.retprobe = strcmp(probe_type, "uretprobe") == 0 || 11773 strcmp(probe_type, "uretprobe.s") == 0; 11774 if (opts.retprobe && offset != 0) { 11775 pr_warn("prog '%s': uretprobes do not support offset specification\n", 11776 prog->name); 11777 break; 11778 } 11779 opts.func_name = func_name; 11780 *link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts); 11781 ret = libbpf_get_error(*link); 11782 break; 11783 default: 11784 pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name, 11785 prog->sec_name); 11786 break; 11787 } 11788 free(probe_type); 11789 free(binary_path); 11790 free(func_name); 11791 11792 return ret; 11793 } 11794 11795 struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog, 11796 bool retprobe, pid_t pid, 11797 const char *binary_path, 11798 size_t func_offset) 11799 { 11800 DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts, .retprobe = retprobe); 11801 11802 return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts); 11803 } 11804 11805 struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog, 11806 pid_t pid, const char *binary_path, 11807 const char *usdt_provider, const char *usdt_name, 11808 const struct bpf_usdt_opts *opts) 11809 { 11810 char resolved_path[512]; 11811 struct bpf_object *obj = prog->obj; 11812 struct bpf_link *link; 11813 __u64 usdt_cookie; 11814 int err; 11815 11816 if (!OPTS_VALID(opts, bpf_uprobe_opts)) 11817 return libbpf_err_ptr(-EINVAL); 11818 11819 if (bpf_program__fd(prog) < 0) { 11820 pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n", 11821 prog->name); 11822 return libbpf_err_ptr(-EINVAL); 11823 } 11824 11825 if (!binary_path) 11826 return libbpf_err_ptr(-EINVAL); 11827 11828 if (!strchr(binary_path, '/')) { 11829 err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path)); 11830 if (err) { 11831 pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", 11832 prog->name, binary_path, err); 11833 return libbpf_err_ptr(err); 11834 } 11835 binary_path = resolved_path; 11836 } 11837 11838 /* USDT manager is instantiated lazily on first USDT attach. It will 11839 * be destroyed together with BPF object in bpf_object__close(). 11840 */ 11841 if (IS_ERR(obj->usdt_man)) 11842 return libbpf_ptr(obj->usdt_man); 11843 if (!obj->usdt_man) { 11844 obj->usdt_man = usdt_manager_new(obj); 11845 if (IS_ERR(obj->usdt_man)) 11846 return libbpf_ptr(obj->usdt_man); 11847 } 11848 11849 usdt_cookie = OPTS_GET(opts, usdt_cookie, 0); 11850 link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path, 11851 usdt_provider, usdt_name, usdt_cookie); 11852 err = libbpf_get_error(link); 11853 if (err) 11854 return libbpf_err_ptr(err); 11855 return link; 11856 } 11857 11858 static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link) 11859 { 11860 char *path = NULL, *provider = NULL, *name = NULL; 11861 const char *sec_name; 11862 int n, err; 11863 11864 sec_name = bpf_program__section_name(prog); 11865 if (strcmp(sec_name, "usdt") == 0) { 11866 /* no auto-attach for just SEC("usdt") */ 11867 *link = NULL; 11868 return 0; 11869 } 11870 11871 n = sscanf(sec_name, "usdt/%m[^:]:%m[^:]:%m[^:]", &path, &provider, &name); 11872 if (n != 3) { 11873 pr_warn("invalid section '%s', expected SEC(\"usdt/<path>:<provider>:<name>\")\n", 11874 sec_name); 11875 err = -EINVAL; 11876 } else { 11877 *link = bpf_program__attach_usdt(prog, -1 /* any process */, path, 11878 provider, name, NULL); 11879 err = libbpf_get_error(*link); 11880 } 11881 free(path); 11882 free(provider); 11883 free(name); 11884 return err; 11885 } 11886 11887 static int determine_tracepoint_id(const char *tp_category, 11888 const char *tp_name) 11889 { 11890 char file[PATH_MAX]; 11891 int ret; 11892 11893 ret = snprintf(file, sizeof(file), "%s/events/%s/%s/id", 11894 tracefs_path(), tp_category, tp_name); 11895 if (ret < 0) 11896 return -errno; 11897 if (ret >= sizeof(file)) { 11898 pr_debug("tracepoint %s/%s path is too long\n", 11899 tp_category, tp_name); 11900 return -E2BIG; 11901 } 11902 return parse_uint_from_file(file, "%d\n"); 11903 } 11904 11905 static int perf_event_open_tracepoint(const char *tp_category, 11906 const char *tp_name) 11907 { 11908 const size_t attr_sz = sizeof(struct perf_event_attr); 11909 struct perf_event_attr attr; 11910 char errmsg[STRERR_BUFSIZE]; 11911 int tp_id, pfd, err; 11912 11913 tp_id = determine_tracepoint_id(tp_category, tp_name); 11914 if (tp_id < 0) { 11915 pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n", 11916 tp_category, tp_name, 11917 libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg))); 11918 return tp_id; 11919 } 11920 11921 memset(&attr, 0, attr_sz); 11922 attr.type = PERF_TYPE_TRACEPOINT; 11923 attr.size = attr_sz; 11924 attr.config = tp_id; 11925 11926 pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */, 11927 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); 11928 if (pfd < 0) { 11929 err = -errno; 11930 pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n", 11931 tp_category, tp_name, 11932 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 11933 return err; 11934 } 11935 return pfd; 11936 } 11937 11938 struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *prog, 11939 const char *tp_category, 11940 const char *tp_name, 11941 const struct bpf_tracepoint_opts *opts) 11942 { 11943 DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); 11944 char errmsg[STRERR_BUFSIZE]; 11945 struct bpf_link *link; 11946 int pfd, err; 11947 11948 if (!OPTS_VALID(opts, bpf_tracepoint_opts)) 11949 return libbpf_err_ptr(-EINVAL); 11950 11951 pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); 11952 11953 pfd = perf_event_open_tracepoint(tp_category, tp_name); 11954 if (pfd < 0) { 11955 pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n", 11956 prog->name, tp_category, tp_name, 11957 libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); 11958 return libbpf_err_ptr(pfd); 11959 } 11960 link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); 11961 err = libbpf_get_error(link); 11962 if (err) { 11963 close(pfd); 11964 pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n", 11965 prog->name, tp_category, tp_name, 11966 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 11967 return libbpf_err_ptr(err); 11968 } 11969 return link; 11970 } 11971 11972 struct bpf_link *bpf_program__attach_tracepoint(const struct bpf_program *prog, 11973 const char *tp_category, 11974 const char *tp_name) 11975 { 11976 return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL); 11977 } 11978 11979 static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link) 11980 { 11981 char *sec_name, *tp_cat, *tp_name; 11982 11983 *link = NULL; 11984 11985 /* no auto-attach for SEC("tp") or SEC("tracepoint") */ 11986 if (strcmp(prog->sec_name, "tp") == 0 || strcmp(prog->sec_name, "tracepoint") == 0) 11987 return 0; 11988 11989 sec_name = strdup(prog->sec_name); 11990 if (!sec_name) 11991 return -ENOMEM; 11992 11993 /* extract "tp/<category>/<name>" or "tracepoint/<category>/<name>" */ 11994 if (str_has_pfx(prog->sec_name, "tp/")) 11995 tp_cat = sec_name + sizeof("tp/") - 1; 11996 else 11997 tp_cat = sec_name + sizeof("tracepoint/") - 1; 11998 tp_name = strchr(tp_cat, '/'); 11999 if (!tp_name) { 12000 free(sec_name); 12001 return -EINVAL; 12002 } 12003 *tp_name = '\0'; 12004 tp_name++; 12005 12006 *link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name); 12007 free(sec_name); 12008 return libbpf_get_error(*link); 12009 } 12010 12011 struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *prog, 12012 const char *tp_name) 12013 { 12014 char errmsg[STRERR_BUFSIZE]; 12015 struct bpf_link *link; 12016 int prog_fd, pfd; 12017 12018 prog_fd = bpf_program__fd(prog); 12019 if (prog_fd < 0) { 12020 pr_warn("prog '%s': can't attach before loaded\n", prog->name); 12021 return libbpf_err_ptr(-EINVAL); 12022 } 12023 12024 link = calloc(1, sizeof(*link)); 12025 if (!link) 12026 return libbpf_err_ptr(-ENOMEM); 12027 link->detach = &bpf_link__detach_fd; 12028 12029 pfd = bpf_raw_tracepoint_open(tp_name, prog_fd); 12030 if (pfd < 0) { 12031 pfd = -errno; 12032 free(link); 12033 pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n", 12034 prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); 12035 return libbpf_err_ptr(pfd); 12036 } 12037 link->fd = pfd; 12038 return link; 12039 } 12040 12041 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link) 12042 { 12043 static const char *const prefixes[] = { 12044 "raw_tp", 12045 "raw_tracepoint", 12046 "raw_tp.w", 12047 "raw_tracepoint.w", 12048 }; 12049 size_t i; 12050 const char *tp_name = NULL; 12051 12052 *link = NULL; 12053 12054 for (i = 0; i < ARRAY_SIZE(prefixes); i++) { 12055 size_t pfx_len; 12056 12057 if (!str_has_pfx(prog->sec_name, prefixes[i])) 12058 continue; 12059 12060 pfx_len = strlen(prefixes[i]); 12061 /* no auto-attach case of, e.g., SEC("raw_tp") */ 12062 if (prog->sec_name[pfx_len] == '\0') 12063 return 0; 12064 12065 if (prog->sec_name[pfx_len] != '/') 12066 continue; 12067 12068 tp_name = prog->sec_name + pfx_len + 1; 12069 break; 12070 } 12071 12072 if (!tp_name) { 12073 pr_warn("prog '%s': invalid section name '%s'\n", 12074 prog->name, prog->sec_name); 12075 return -EINVAL; 12076 } 12077 12078 *link = bpf_program__attach_raw_tracepoint(prog, tp_name); 12079 return libbpf_get_error(*link); 12080 } 12081 12082 /* Common logic for all BPF program types that attach to a btf_id */ 12083 static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog, 12084 const struct bpf_trace_opts *opts) 12085 { 12086 LIBBPF_OPTS(bpf_link_create_opts, link_opts); 12087 char errmsg[STRERR_BUFSIZE]; 12088 struct bpf_link *link; 12089 int prog_fd, pfd; 12090 12091 if (!OPTS_VALID(opts, bpf_trace_opts)) 12092 return libbpf_err_ptr(-EINVAL); 12093 12094 prog_fd = bpf_program__fd(prog); 12095 if (prog_fd < 0) { 12096 pr_warn("prog '%s': can't attach before loaded\n", prog->name); 12097 return libbpf_err_ptr(-EINVAL); 12098 } 12099 12100 link = calloc(1, sizeof(*link)); 12101 if (!link) 12102 return libbpf_err_ptr(-ENOMEM); 12103 link->detach = &bpf_link__detach_fd; 12104 12105 /* libbpf is smart enough to redirect to BPF_RAW_TRACEPOINT_OPEN on old kernels */ 12106 link_opts.tracing.cookie = OPTS_GET(opts, cookie, 0); 12107 pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), &link_opts); 12108 if (pfd < 0) { 12109 pfd = -errno; 12110 free(link); 12111 pr_warn("prog '%s': failed to attach: %s\n", 12112 prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); 12113 return libbpf_err_ptr(pfd); 12114 } 12115 link->fd = pfd; 12116 return link; 12117 } 12118 12119 struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog) 12120 { 12121 return bpf_program__attach_btf_id(prog, NULL); 12122 } 12123 12124 struct bpf_link *bpf_program__attach_trace_opts(const struct bpf_program *prog, 12125 const struct bpf_trace_opts *opts) 12126 { 12127 return bpf_program__attach_btf_id(prog, opts); 12128 } 12129 12130 struct bpf_link *bpf_program__attach_lsm(const struct bpf_program *prog) 12131 { 12132 return bpf_program__attach_btf_id(prog, NULL); 12133 } 12134 12135 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link) 12136 { 12137 *link = bpf_program__attach_trace(prog); 12138 return libbpf_get_error(*link); 12139 } 12140 12141 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link) 12142 { 12143 *link = bpf_program__attach_lsm(prog); 12144 return libbpf_get_error(*link); 12145 } 12146 12147 static struct bpf_link * 12148 bpf_program_attach_fd(const struct bpf_program *prog, 12149 int target_fd, const char *target_name, 12150 const struct bpf_link_create_opts *opts) 12151 { 12152 enum bpf_attach_type attach_type; 12153 char errmsg[STRERR_BUFSIZE]; 12154 struct bpf_link *link; 12155 int prog_fd, link_fd; 12156 12157 prog_fd = bpf_program__fd(prog); 12158 if (prog_fd < 0) { 12159 pr_warn("prog '%s': can't attach before loaded\n", prog->name); 12160 return libbpf_err_ptr(-EINVAL); 12161 } 12162 12163 link = calloc(1, sizeof(*link)); 12164 if (!link) 12165 return libbpf_err_ptr(-ENOMEM); 12166 link->detach = &bpf_link__detach_fd; 12167 12168 attach_type = bpf_program__expected_attach_type(prog); 12169 link_fd = bpf_link_create(prog_fd, target_fd, attach_type, opts); 12170 if (link_fd < 0) { 12171 link_fd = -errno; 12172 free(link); 12173 pr_warn("prog '%s': failed to attach to %s: %s\n", 12174 prog->name, target_name, 12175 libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg))); 12176 return libbpf_err_ptr(link_fd); 12177 } 12178 link->fd = link_fd; 12179 return link; 12180 } 12181 12182 struct bpf_link * 12183 bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd) 12184 { 12185 return bpf_program_attach_fd(prog, cgroup_fd, "cgroup", NULL); 12186 } 12187 12188 struct bpf_link * 12189 bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd) 12190 { 12191 return bpf_program_attach_fd(prog, netns_fd, "netns", NULL); 12192 } 12193 12194 struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex) 12195 { 12196 /* target_fd/target_ifindex use the same field in LINK_CREATE */ 12197 return bpf_program_attach_fd(prog, ifindex, "xdp", NULL); 12198 } 12199 12200 struct bpf_link * 12201 bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex, 12202 const struct bpf_tcx_opts *opts) 12203 { 12204 LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); 12205 __u32 relative_id; 12206 int relative_fd; 12207 12208 if (!OPTS_VALID(opts, bpf_tcx_opts)) 12209 return libbpf_err_ptr(-EINVAL); 12210 12211 relative_id = OPTS_GET(opts, relative_id, 0); 12212 relative_fd = OPTS_GET(opts, relative_fd, 0); 12213 12214 /* validate we don't have unexpected combinations of non-zero fields */ 12215 if (!ifindex) { 12216 pr_warn("prog '%s': target netdevice ifindex cannot be zero\n", 12217 prog->name); 12218 return libbpf_err_ptr(-EINVAL); 12219 } 12220 if (relative_fd && relative_id) { 12221 pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n", 12222 prog->name); 12223 return libbpf_err_ptr(-EINVAL); 12224 } 12225 12226 link_create_opts.tcx.expected_revision = OPTS_GET(opts, expected_revision, 0); 12227 link_create_opts.tcx.relative_fd = relative_fd; 12228 link_create_opts.tcx.relative_id = relative_id; 12229 link_create_opts.flags = OPTS_GET(opts, flags, 0); 12230 12231 /* target_fd/target_ifindex use the same field in LINK_CREATE */ 12232 return bpf_program_attach_fd(prog, ifindex, "tcx", &link_create_opts); 12233 } 12234 12235 struct bpf_link * 12236 bpf_program__attach_netkit(const struct bpf_program *prog, int ifindex, 12237 const struct bpf_netkit_opts *opts) 12238 { 12239 LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); 12240 __u32 relative_id; 12241 int relative_fd; 12242 12243 if (!OPTS_VALID(opts, bpf_netkit_opts)) 12244 return libbpf_err_ptr(-EINVAL); 12245 12246 relative_id = OPTS_GET(opts, relative_id, 0); 12247 relative_fd = OPTS_GET(opts, relative_fd, 0); 12248 12249 /* validate we don't have unexpected combinations of non-zero fields */ 12250 if (!ifindex) { 12251 pr_warn("prog '%s': target netdevice ifindex cannot be zero\n", 12252 prog->name); 12253 return libbpf_err_ptr(-EINVAL); 12254 } 12255 if (relative_fd && relative_id) { 12256 pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n", 12257 prog->name); 12258 return libbpf_err_ptr(-EINVAL); 12259 } 12260 12261 link_create_opts.netkit.expected_revision = OPTS_GET(opts, expected_revision, 0); 12262 link_create_opts.netkit.relative_fd = relative_fd; 12263 link_create_opts.netkit.relative_id = relative_id; 12264 link_create_opts.flags = OPTS_GET(opts, flags, 0); 12265 12266 return bpf_program_attach_fd(prog, ifindex, "netkit", &link_create_opts); 12267 } 12268 12269 struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog, 12270 int target_fd, 12271 const char *attach_func_name) 12272 { 12273 int btf_id; 12274 12275 if (!!target_fd != !!attach_func_name) { 12276 pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n", 12277 prog->name); 12278 return libbpf_err_ptr(-EINVAL); 12279 } 12280 12281 if (prog->type != BPF_PROG_TYPE_EXT) { 12282 pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace", 12283 prog->name); 12284 return libbpf_err_ptr(-EINVAL); 12285 } 12286 12287 if (target_fd) { 12288 LIBBPF_OPTS(bpf_link_create_opts, target_opts); 12289 12290 btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd); 12291 if (btf_id < 0) 12292 return libbpf_err_ptr(btf_id); 12293 12294 target_opts.target_btf_id = btf_id; 12295 12296 return bpf_program_attach_fd(prog, target_fd, "freplace", 12297 &target_opts); 12298 } else { 12299 /* no target, so use raw_tracepoint_open for compatibility 12300 * with old kernels 12301 */ 12302 return bpf_program__attach_trace(prog); 12303 } 12304 } 12305 12306 struct bpf_link * 12307 bpf_program__attach_iter(const struct bpf_program *prog, 12308 const struct bpf_iter_attach_opts *opts) 12309 { 12310 DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); 12311 char errmsg[STRERR_BUFSIZE]; 12312 struct bpf_link *link; 12313 int prog_fd, link_fd; 12314 __u32 target_fd = 0; 12315 12316 if (!OPTS_VALID(opts, bpf_iter_attach_opts)) 12317 return libbpf_err_ptr(-EINVAL); 12318 12319 link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0); 12320 link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0); 12321 12322 prog_fd = bpf_program__fd(prog); 12323 if (prog_fd < 0) { 12324 pr_warn("prog '%s': can't attach before loaded\n", prog->name); 12325 return libbpf_err_ptr(-EINVAL); 12326 } 12327 12328 link = calloc(1, sizeof(*link)); 12329 if (!link) 12330 return libbpf_err_ptr(-ENOMEM); 12331 link->detach = &bpf_link__detach_fd; 12332 12333 link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER, 12334 &link_create_opts); 12335 if (link_fd < 0) { 12336 link_fd = -errno; 12337 free(link); 12338 pr_warn("prog '%s': failed to attach to iterator: %s\n", 12339 prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg))); 12340 return libbpf_err_ptr(link_fd); 12341 } 12342 link->fd = link_fd; 12343 return link; 12344 } 12345 12346 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link) 12347 { 12348 *link = bpf_program__attach_iter(prog, NULL); 12349 return libbpf_get_error(*link); 12350 } 12351 12352 struct bpf_link *bpf_program__attach_netfilter(const struct bpf_program *prog, 12353 const struct bpf_netfilter_opts *opts) 12354 { 12355 LIBBPF_OPTS(bpf_link_create_opts, lopts); 12356 struct bpf_link *link; 12357 int prog_fd, link_fd; 12358 12359 if (!OPTS_VALID(opts, bpf_netfilter_opts)) 12360 return libbpf_err_ptr(-EINVAL); 12361 12362 prog_fd = bpf_program__fd(prog); 12363 if (prog_fd < 0) { 12364 pr_warn("prog '%s': can't attach before loaded\n", prog->name); 12365 return libbpf_err_ptr(-EINVAL); 12366 } 12367 12368 link = calloc(1, sizeof(*link)); 12369 if (!link) 12370 return libbpf_err_ptr(-ENOMEM); 12371 12372 link->detach = &bpf_link__detach_fd; 12373 12374 lopts.netfilter.pf = OPTS_GET(opts, pf, 0); 12375 lopts.netfilter.hooknum = OPTS_GET(opts, hooknum, 0); 12376 lopts.netfilter.priority = OPTS_GET(opts, priority, 0); 12377 lopts.netfilter.flags = OPTS_GET(opts, flags, 0); 12378 12379 link_fd = bpf_link_create(prog_fd, 0, BPF_NETFILTER, &lopts); 12380 if (link_fd < 0) { 12381 char errmsg[STRERR_BUFSIZE]; 12382 12383 link_fd = -errno; 12384 free(link); 12385 pr_warn("prog '%s': failed to attach to netfilter: %s\n", 12386 prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg))); 12387 return libbpf_err_ptr(link_fd); 12388 } 12389 link->fd = link_fd; 12390 12391 return link; 12392 } 12393 12394 struct bpf_link *bpf_program__attach(const struct bpf_program *prog) 12395 { 12396 struct bpf_link *link = NULL; 12397 int err; 12398 12399 if (!prog->sec_def || !prog->sec_def->prog_attach_fn) 12400 return libbpf_err_ptr(-EOPNOTSUPP); 12401 12402 err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, &link); 12403 if (err) 12404 return libbpf_err_ptr(err); 12405 12406 /* When calling bpf_program__attach() explicitly, auto-attach support 12407 * is expected to work, so NULL returned link is considered an error. 12408 * This is different for skeleton's attach, see comment in 12409 * bpf_object__attach_skeleton(). 12410 */ 12411 if (!link) 12412 return libbpf_err_ptr(-EOPNOTSUPP); 12413 12414 return link; 12415 } 12416 12417 struct bpf_link_struct_ops { 12418 struct bpf_link link; 12419 int map_fd; 12420 }; 12421 12422 static int bpf_link__detach_struct_ops(struct bpf_link *link) 12423 { 12424 struct bpf_link_struct_ops *st_link; 12425 __u32 zero = 0; 12426 12427 st_link = container_of(link, struct bpf_link_struct_ops, link); 12428 12429 if (st_link->map_fd < 0) 12430 /* w/o a real link */ 12431 return bpf_map_delete_elem(link->fd, &zero); 12432 12433 return close(link->fd); 12434 } 12435 12436 struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map) 12437 { 12438 struct bpf_link_struct_ops *link; 12439 __u32 zero = 0; 12440 int err, fd; 12441 12442 if (!bpf_map__is_struct_ops(map) || map->fd == -1) 12443 return libbpf_err_ptr(-EINVAL); 12444 12445 link = calloc(1, sizeof(*link)); 12446 if (!link) 12447 return libbpf_err_ptr(-EINVAL); 12448 12449 /* kern_vdata should be prepared during the loading phase. */ 12450 err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0); 12451 /* It can be EBUSY if the map has been used to create or 12452 * update a link before. We don't allow updating the value of 12453 * a struct_ops once it is set. That ensures that the value 12454 * never changed. So, it is safe to skip EBUSY. 12455 */ 12456 if (err && (!(map->def.map_flags & BPF_F_LINK) || err != -EBUSY)) { 12457 free(link); 12458 return libbpf_err_ptr(err); 12459 } 12460 12461 link->link.detach = bpf_link__detach_struct_ops; 12462 12463 if (!(map->def.map_flags & BPF_F_LINK)) { 12464 /* w/o a real link */ 12465 link->link.fd = map->fd; 12466 link->map_fd = -1; 12467 return &link->link; 12468 } 12469 12470 fd = bpf_link_create(map->fd, 0, BPF_STRUCT_OPS, NULL); 12471 if (fd < 0) { 12472 free(link); 12473 return libbpf_err_ptr(fd); 12474 } 12475 12476 link->link.fd = fd; 12477 link->map_fd = map->fd; 12478 12479 return &link->link; 12480 } 12481 12482 /* 12483 * Swap the back struct_ops of a link with a new struct_ops map. 12484 */ 12485 int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map) 12486 { 12487 struct bpf_link_struct_ops *st_ops_link; 12488 __u32 zero = 0; 12489 int err; 12490 12491 if (!bpf_map__is_struct_ops(map) || !map_is_created(map)) 12492 return -EINVAL; 12493 12494 st_ops_link = container_of(link, struct bpf_link_struct_ops, link); 12495 /* Ensure the type of a link is correct */ 12496 if (st_ops_link->map_fd < 0) 12497 return -EINVAL; 12498 12499 err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0); 12500 /* It can be EBUSY if the map has been used to create or 12501 * update a link before. We don't allow updating the value of 12502 * a struct_ops once it is set. That ensures that the value 12503 * never changed. So, it is safe to skip EBUSY. 12504 */ 12505 if (err && err != -EBUSY) 12506 return err; 12507 12508 err = bpf_link_update(link->fd, map->fd, NULL); 12509 if (err < 0) 12510 return err; 12511 12512 st_ops_link->map_fd = map->fd; 12513 12514 return 0; 12515 } 12516 12517 typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr, 12518 void *private_data); 12519 12520 static enum bpf_perf_event_ret 12521 perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, 12522 void **copy_mem, size_t *copy_size, 12523 bpf_perf_event_print_t fn, void *private_data) 12524 { 12525 struct perf_event_mmap_page *header = mmap_mem; 12526 __u64 data_head = ring_buffer_read_head(header); 12527 __u64 data_tail = header->data_tail; 12528 void *base = ((__u8 *)header) + page_size; 12529 int ret = LIBBPF_PERF_EVENT_CONT; 12530 struct perf_event_header *ehdr; 12531 size_t ehdr_size; 12532 12533 while (data_head != data_tail) { 12534 ehdr = base + (data_tail & (mmap_size - 1)); 12535 ehdr_size = ehdr->size; 12536 12537 if (((void *)ehdr) + ehdr_size > base + mmap_size) { 12538 void *copy_start = ehdr; 12539 size_t len_first = base + mmap_size - copy_start; 12540 size_t len_secnd = ehdr_size - len_first; 12541 12542 if (*copy_size < ehdr_size) { 12543 free(*copy_mem); 12544 *copy_mem = malloc(ehdr_size); 12545 if (!*copy_mem) { 12546 *copy_size = 0; 12547 ret = LIBBPF_PERF_EVENT_ERROR; 12548 break; 12549 } 12550 *copy_size = ehdr_size; 12551 } 12552 12553 memcpy(*copy_mem, copy_start, len_first); 12554 memcpy(*copy_mem + len_first, base, len_secnd); 12555 ehdr = *copy_mem; 12556 } 12557 12558 ret = fn(ehdr, private_data); 12559 data_tail += ehdr_size; 12560 if (ret != LIBBPF_PERF_EVENT_CONT) 12561 break; 12562 } 12563 12564 ring_buffer_write_tail(header, data_tail); 12565 return libbpf_err(ret); 12566 } 12567 12568 struct perf_buffer; 12569 12570 struct perf_buffer_params { 12571 struct perf_event_attr *attr; 12572 /* if event_cb is specified, it takes precendence */ 12573 perf_buffer_event_fn event_cb; 12574 /* sample_cb and lost_cb are higher-level common-case callbacks */ 12575 perf_buffer_sample_fn sample_cb; 12576 perf_buffer_lost_fn lost_cb; 12577 void *ctx; 12578 int cpu_cnt; 12579 int *cpus; 12580 int *map_keys; 12581 }; 12582 12583 struct perf_cpu_buf { 12584 struct perf_buffer *pb; 12585 void *base; /* mmap()'ed memory */ 12586 void *buf; /* for reconstructing segmented data */ 12587 size_t buf_size; 12588 int fd; 12589 int cpu; 12590 int map_key; 12591 }; 12592 12593 struct perf_buffer { 12594 perf_buffer_event_fn event_cb; 12595 perf_buffer_sample_fn sample_cb; 12596 perf_buffer_lost_fn lost_cb; 12597 void *ctx; /* passed into callbacks */ 12598 12599 size_t page_size; 12600 size_t mmap_size; 12601 struct perf_cpu_buf **cpu_bufs; 12602 struct epoll_event *events; 12603 int cpu_cnt; /* number of allocated CPU buffers */ 12604 int epoll_fd; /* perf event FD */ 12605 int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */ 12606 }; 12607 12608 static void perf_buffer__free_cpu_buf(struct perf_buffer *pb, 12609 struct perf_cpu_buf *cpu_buf) 12610 { 12611 if (!cpu_buf) 12612 return; 12613 if (cpu_buf->base && 12614 munmap(cpu_buf->base, pb->mmap_size + pb->page_size)) 12615 pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu); 12616 if (cpu_buf->fd >= 0) { 12617 ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0); 12618 close(cpu_buf->fd); 12619 } 12620 free(cpu_buf->buf); 12621 free(cpu_buf); 12622 } 12623 12624 void perf_buffer__free(struct perf_buffer *pb) 12625 { 12626 int i; 12627 12628 if (IS_ERR_OR_NULL(pb)) 12629 return; 12630 if (pb->cpu_bufs) { 12631 for (i = 0; i < pb->cpu_cnt; i++) { 12632 struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i]; 12633 12634 if (!cpu_buf) 12635 continue; 12636 12637 bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key); 12638 perf_buffer__free_cpu_buf(pb, cpu_buf); 12639 } 12640 free(pb->cpu_bufs); 12641 } 12642 if (pb->epoll_fd >= 0) 12643 close(pb->epoll_fd); 12644 free(pb->events); 12645 free(pb); 12646 } 12647 12648 static struct perf_cpu_buf * 12649 perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr, 12650 int cpu, int map_key) 12651 { 12652 struct perf_cpu_buf *cpu_buf; 12653 char msg[STRERR_BUFSIZE]; 12654 int err; 12655 12656 cpu_buf = calloc(1, sizeof(*cpu_buf)); 12657 if (!cpu_buf) 12658 return ERR_PTR(-ENOMEM); 12659 12660 cpu_buf->pb = pb; 12661 cpu_buf->cpu = cpu; 12662 cpu_buf->map_key = map_key; 12663 12664 cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu, 12665 -1, PERF_FLAG_FD_CLOEXEC); 12666 if (cpu_buf->fd < 0) { 12667 err = -errno; 12668 pr_warn("failed to open perf buffer event on cpu #%d: %s\n", 12669 cpu, libbpf_strerror_r(err, msg, sizeof(msg))); 12670 goto error; 12671 } 12672 12673 cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size, 12674 PROT_READ | PROT_WRITE, MAP_SHARED, 12675 cpu_buf->fd, 0); 12676 if (cpu_buf->base == MAP_FAILED) { 12677 cpu_buf->base = NULL; 12678 err = -errno; 12679 pr_warn("failed to mmap perf buffer on cpu #%d: %s\n", 12680 cpu, libbpf_strerror_r(err, msg, sizeof(msg))); 12681 goto error; 12682 } 12683 12684 if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) { 12685 err = -errno; 12686 pr_warn("failed to enable perf buffer event on cpu #%d: %s\n", 12687 cpu, libbpf_strerror_r(err, msg, sizeof(msg))); 12688 goto error; 12689 } 12690 12691 return cpu_buf; 12692 12693 error: 12694 perf_buffer__free_cpu_buf(pb, cpu_buf); 12695 return (struct perf_cpu_buf *)ERR_PTR(err); 12696 } 12697 12698 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, 12699 struct perf_buffer_params *p); 12700 12701 struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt, 12702 perf_buffer_sample_fn sample_cb, 12703 perf_buffer_lost_fn lost_cb, 12704 void *ctx, 12705 const struct perf_buffer_opts *opts) 12706 { 12707 const size_t attr_sz = sizeof(struct perf_event_attr); 12708 struct perf_buffer_params p = {}; 12709 struct perf_event_attr attr; 12710 __u32 sample_period; 12711 12712 if (!OPTS_VALID(opts, perf_buffer_opts)) 12713 return libbpf_err_ptr(-EINVAL); 12714 12715 sample_period = OPTS_GET(opts, sample_period, 1); 12716 if (!sample_period) 12717 sample_period = 1; 12718 12719 memset(&attr, 0, attr_sz); 12720 attr.size = attr_sz; 12721 attr.config = PERF_COUNT_SW_BPF_OUTPUT; 12722 attr.type = PERF_TYPE_SOFTWARE; 12723 attr.sample_type = PERF_SAMPLE_RAW; 12724 attr.sample_period = sample_period; 12725 attr.wakeup_events = sample_period; 12726 12727 p.attr = &attr; 12728 p.sample_cb = sample_cb; 12729 p.lost_cb = lost_cb; 12730 p.ctx = ctx; 12731 12732 return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p)); 12733 } 12734 12735 struct perf_buffer *perf_buffer__new_raw(int map_fd, size_t page_cnt, 12736 struct perf_event_attr *attr, 12737 perf_buffer_event_fn event_cb, void *ctx, 12738 const struct perf_buffer_raw_opts *opts) 12739 { 12740 struct perf_buffer_params p = {}; 12741 12742 if (!attr) 12743 return libbpf_err_ptr(-EINVAL); 12744 12745 if (!OPTS_VALID(opts, perf_buffer_raw_opts)) 12746 return libbpf_err_ptr(-EINVAL); 12747 12748 p.attr = attr; 12749 p.event_cb = event_cb; 12750 p.ctx = ctx; 12751 p.cpu_cnt = OPTS_GET(opts, cpu_cnt, 0); 12752 p.cpus = OPTS_GET(opts, cpus, NULL); 12753 p.map_keys = OPTS_GET(opts, map_keys, NULL); 12754 12755 return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p)); 12756 } 12757 12758 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, 12759 struct perf_buffer_params *p) 12760 { 12761 const char *online_cpus_file = "/sys/devices/system/cpu/online"; 12762 struct bpf_map_info map; 12763 char msg[STRERR_BUFSIZE]; 12764 struct perf_buffer *pb; 12765 bool *online = NULL; 12766 __u32 map_info_len; 12767 int err, i, j, n; 12768 12769 if (page_cnt == 0 || (page_cnt & (page_cnt - 1))) { 12770 pr_warn("page count should be power of two, but is %zu\n", 12771 page_cnt); 12772 return ERR_PTR(-EINVAL); 12773 } 12774 12775 /* best-effort sanity checks */ 12776 memset(&map, 0, sizeof(map)); 12777 map_info_len = sizeof(map); 12778 err = bpf_map_get_info_by_fd(map_fd, &map, &map_info_len); 12779 if (err) { 12780 err = -errno; 12781 /* if BPF_OBJ_GET_INFO_BY_FD is supported, will return 12782 * -EBADFD, -EFAULT, or -E2BIG on real error 12783 */ 12784 if (err != -EINVAL) { 12785 pr_warn("failed to get map info for map FD %d: %s\n", 12786 map_fd, libbpf_strerror_r(err, msg, sizeof(msg))); 12787 return ERR_PTR(err); 12788 } 12789 pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n", 12790 map_fd); 12791 } else { 12792 if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) { 12793 pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n", 12794 map.name); 12795 return ERR_PTR(-EINVAL); 12796 } 12797 } 12798 12799 pb = calloc(1, sizeof(*pb)); 12800 if (!pb) 12801 return ERR_PTR(-ENOMEM); 12802 12803 pb->event_cb = p->event_cb; 12804 pb->sample_cb = p->sample_cb; 12805 pb->lost_cb = p->lost_cb; 12806 pb->ctx = p->ctx; 12807 12808 pb->page_size = getpagesize(); 12809 pb->mmap_size = pb->page_size * page_cnt; 12810 pb->map_fd = map_fd; 12811 12812 pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC); 12813 if (pb->epoll_fd < 0) { 12814 err = -errno; 12815 pr_warn("failed to create epoll instance: %s\n", 12816 libbpf_strerror_r(err, msg, sizeof(msg))); 12817 goto error; 12818 } 12819 12820 if (p->cpu_cnt > 0) { 12821 pb->cpu_cnt = p->cpu_cnt; 12822 } else { 12823 pb->cpu_cnt = libbpf_num_possible_cpus(); 12824 if (pb->cpu_cnt < 0) { 12825 err = pb->cpu_cnt; 12826 goto error; 12827 } 12828 if (map.max_entries && map.max_entries < pb->cpu_cnt) 12829 pb->cpu_cnt = map.max_entries; 12830 } 12831 12832 pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events)); 12833 if (!pb->events) { 12834 err = -ENOMEM; 12835 pr_warn("failed to allocate events: out of memory\n"); 12836 goto error; 12837 } 12838 pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs)); 12839 if (!pb->cpu_bufs) { 12840 err = -ENOMEM; 12841 pr_warn("failed to allocate buffers: out of memory\n"); 12842 goto error; 12843 } 12844 12845 err = parse_cpu_mask_file(online_cpus_file, &online, &n); 12846 if (err) { 12847 pr_warn("failed to get online CPU mask: %d\n", err); 12848 goto error; 12849 } 12850 12851 for (i = 0, j = 0; i < pb->cpu_cnt; i++) { 12852 struct perf_cpu_buf *cpu_buf; 12853 int cpu, map_key; 12854 12855 cpu = p->cpu_cnt > 0 ? p->cpus[i] : i; 12856 map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i; 12857 12858 /* in case user didn't explicitly requested particular CPUs to 12859 * be attached to, skip offline/not present CPUs 12860 */ 12861 if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu])) 12862 continue; 12863 12864 cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key); 12865 if (IS_ERR(cpu_buf)) { 12866 err = PTR_ERR(cpu_buf); 12867 goto error; 12868 } 12869 12870 pb->cpu_bufs[j] = cpu_buf; 12871 12872 err = bpf_map_update_elem(pb->map_fd, &map_key, 12873 &cpu_buf->fd, 0); 12874 if (err) { 12875 err = -errno; 12876 pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n", 12877 cpu, map_key, cpu_buf->fd, 12878 libbpf_strerror_r(err, msg, sizeof(msg))); 12879 goto error; 12880 } 12881 12882 pb->events[j].events = EPOLLIN; 12883 pb->events[j].data.ptr = cpu_buf; 12884 if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd, 12885 &pb->events[j]) < 0) { 12886 err = -errno; 12887 pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n", 12888 cpu, cpu_buf->fd, 12889 libbpf_strerror_r(err, msg, sizeof(msg))); 12890 goto error; 12891 } 12892 j++; 12893 } 12894 pb->cpu_cnt = j; 12895 free(online); 12896 12897 return pb; 12898 12899 error: 12900 free(online); 12901 if (pb) 12902 perf_buffer__free(pb); 12903 return ERR_PTR(err); 12904 } 12905 12906 struct perf_sample_raw { 12907 struct perf_event_header header; 12908 uint32_t size; 12909 char data[]; 12910 }; 12911 12912 struct perf_sample_lost { 12913 struct perf_event_header header; 12914 uint64_t id; 12915 uint64_t lost; 12916 uint64_t sample_id; 12917 }; 12918 12919 static enum bpf_perf_event_ret 12920 perf_buffer__process_record(struct perf_event_header *e, void *ctx) 12921 { 12922 struct perf_cpu_buf *cpu_buf = ctx; 12923 struct perf_buffer *pb = cpu_buf->pb; 12924 void *data = e; 12925 12926 /* user wants full control over parsing perf event */ 12927 if (pb->event_cb) 12928 return pb->event_cb(pb->ctx, cpu_buf->cpu, e); 12929 12930 switch (e->type) { 12931 case PERF_RECORD_SAMPLE: { 12932 struct perf_sample_raw *s = data; 12933 12934 if (pb->sample_cb) 12935 pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size); 12936 break; 12937 } 12938 case PERF_RECORD_LOST: { 12939 struct perf_sample_lost *s = data; 12940 12941 if (pb->lost_cb) 12942 pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost); 12943 break; 12944 } 12945 default: 12946 pr_warn("unknown perf sample type %d\n", e->type); 12947 return LIBBPF_PERF_EVENT_ERROR; 12948 } 12949 return LIBBPF_PERF_EVENT_CONT; 12950 } 12951 12952 static int perf_buffer__process_records(struct perf_buffer *pb, 12953 struct perf_cpu_buf *cpu_buf) 12954 { 12955 enum bpf_perf_event_ret ret; 12956 12957 ret = perf_event_read_simple(cpu_buf->base, pb->mmap_size, 12958 pb->page_size, &cpu_buf->buf, 12959 &cpu_buf->buf_size, 12960 perf_buffer__process_record, cpu_buf); 12961 if (ret != LIBBPF_PERF_EVENT_CONT) 12962 return ret; 12963 return 0; 12964 } 12965 12966 int perf_buffer__epoll_fd(const struct perf_buffer *pb) 12967 { 12968 return pb->epoll_fd; 12969 } 12970 12971 int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms) 12972 { 12973 int i, cnt, err; 12974 12975 cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms); 12976 if (cnt < 0) 12977 return -errno; 12978 12979 for (i = 0; i < cnt; i++) { 12980 struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr; 12981 12982 err = perf_buffer__process_records(pb, cpu_buf); 12983 if (err) { 12984 pr_warn("error while processing records: %d\n", err); 12985 return libbpf_err(err); 12986 } 12987 } 12988 return cnt; 12989 } 12990 12991 /* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer 12992 * manager. 12993 */ 12994 size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb) 12995 { 12996 return pb->cpu_cnt; 12997 } 12998 12999 /* 13000 * Return perf_event FD of a ring buffer in *buf_idx* slot of 13001 * PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using 13002 * select()/poll()/epoll() Linux syscalls. 13003 */ 13004 int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx) 13005 { 13006 struct perf_cpu_buf *cpu_buf; 13007 13008 if (buf_idx >= pb->cpu_cnt) 13009 return libbpf_err(-EINVAL); 13010 13011 cpu_buf = pb->cpu_bufs[buf_idx]; 13012 if (!cpu_buf) 13013 return libbpf_err(-ENOENT); 13014 13015 return cpu_buf->fd; 13016 } 13017 13018 int perf_buffer__buffer(struct perf_buffer *pb, int buf_idx, void **buf, size_t *buf_size) 13019 { 13020 struct perf_cpu_buf *cpu_buf; 13021 13022 if (buf_idx >= pb->cpu_cnt) 13023 return libbpf_err(-EINVAL); 13024 13025 cpu_buf = pb->cpu_bufs[buf_idx]; 13026 if (!cpu_buf) 13027 return libbpf_err(-ENOENT); 13028 13029 *buf = cpu_buf->base; 13030 *buf_size = pb->mmap_size; 13031 return 0; 13032 } 13033 13034 /* 13035 * Consume data from perf ring buffer corresponding to slot *buf_idx* in 13036 * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to 13037 * consume, do nothing and return success. 13038 * Returns: 13039 * - 0 on success; 13040 * - <0 on failure. 13041 */ 13042 int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx) 13043 { 13044 struct perf_cpu_buf *cpu_buf; 13045 13046 if (buf_idx >= pb->cpu_cnt) 13047 return libbpf_err(-EINVAL); 13048 13049 cpu_buf = pb->cpu_bufs[buf_idx]; 13050 if (!cpu_buf) 13051 return libbpf_err(-ENOENT); 13052 13053 return perf_buffer__process_records(pb, cpu_buf); 13054 } 13055 13056 int perf_buffer__consume(struct perf_buffer *pb) 13057 { 13058 int i, err; 13059 13060 for (i = 0; i < pb->cpu_cnt; i++) { 13061 struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i]; 13062 13063 if (!cpu_buf) 13064 continue; 13065 13066 err = perf_buffer__process_records(pb, cpu_buf); 13067 if (err) { 13068 pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err); 13069 return libbpf_err(err); 13070 } 13071 } 13072 return 0; 13073 } 13074 13075 int bpf_program__set_attach_target(struct bpf_program *prog, 13076 int attach_prog_fd, 13077 const char *attach_func_name) 13078 { 13079 int btf_obj_fd = 0, btf_id = 0, err; 13080 13081 if (!prog || attach_prog_fd < 0) 13082 return libbpf_err(-EINVAL); 13083 13084 if (prog->obj->loaded) 13085 return libbpf_err(-EINVAL); 13086 13087 if (attach_prog_fd && !attach_func_name) { 13088 /* remember attach_prog_fd and let bpf_program__load() find 13089 * BTF ID during the program load 13090 */ 13091 prog->attach_prog_fd = attach_prog_fd; 13092 return 0; 13093 } 13094 13095 if (attach_prog_fd) { 13096 btf_id = libbpf_find_prog_btf_id(attach_func_name, 13097 attach_prog_fd); 13098 if (btf_id < 0) 13099 return libbpf_err(btf_id); 13100 } else { 13101 if (!attach_func_name) 13102 return libbpf_err(-EINVAL); 13103 13104 /* load btf_vmlinux, if not yet */ 13105 err = bpf_object__load_vmlinux_btf(prog->obj, true); 13106 if (err) 13107 return libbpf_err(err); 13108 err = find_kernel_btf_id(prog->obj, attach_func_name, 13109 prog->expected_attach_type, 13110 &btf_obj_fd, &btf_id); 13111 if (err) 13112 return libbpf_err(err); 13113 } 13114 13115 prog->attach_btf_id = btf_id; 13116 prog->attach_btf_obj_fd = btf_obj_fd; 13117 prog->attach_prog_fd = attach_prog_fd; 13118 return 0; 13119 } 13120 13121 int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz) 13122 { 13123 int err = 0, n, len, start, end = -1; 13124 bool *tmp; 13125 13126 *mask = NULL; 13127 *mask_sz = 0; 13128 13129 /* Each sub string separated by ',' has format \d+-\d+ or \d+ */ 13130 while (*s) { 13131 if (*s == ',' || *s == '\n') { 13132 s++; 13133 continue; 13134 } 13135 n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len); 13136 if (n <= 0 || n > 2) { 13137 pr_warn("Failed to get CPU range %s: %d\n", s, n); 13138 err = -EINVAL; 13139 goto cleanup; 13140 } else if (n == 1) { 13141 end = start; 13142 } 13143 if (start < 0 || start > end) { 13144 pr_warn("Invalid CPU range [%d,%d] in %s\n", 13145 start, end, s); 13146 err = -EINVAL; 13147 goto cleanup; 13148 } 13149 tmp = realloc(*mask, end + 1); 13150 if (!tmp) { 13151 err = -ENOMEM; 13152 goto cleanup; 13153 } 13154 *mask = tmp; 13155 memset(tmp + *mask_sz, 0, start - *mask_sz); 13156 memset(tmp + start, 1, end - start + 1); 13157 *mask_sz = end + 1; 13158 s += len; 13159 } 13160 if (!*mask_sz) { 13161 pr_warn("Empty CPU range\n"); 13162 return -EINVAL; 13163 } 13164 return 0; 13165 cleanup: 13166 free(*mask); 13167 *mask = NULL; 13168 return err; 13169 } 13170 13171 int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz) 13172 { 13173 int fd, err = 0, len; 13174 char buf[128]; 13175 13176 fd = open(fcpu, O_RDONLY | O_CLOEXEC); 13177 if (fd < 0) { 13178 err = -errno; 13179 pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err); 13180 return err; 13181 } 13182 len = read(fd, buf, sizeof(buf)); 13183 close(fd); 13184 if (len <= 0) { 13185 err = len ? -errno : -EINVAL; 13186 pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err); 13187 return err; 13188 } 13189 if (len >= sizeof(buf)) { 13190 pr_warn("CPU mask is too big in file %s\n", fcpu); 13191 return -E2BIG; 13192 } 13193 buf[len] = '\0'; 13194 13195 return parse_cpu_mask_str(buf, mask, mask_sz); 13196 } 13197 13198 int libbpf_num_possible_cpus(void) 13199 { 13200 static const char *fcpu = "/sys/devices/system/cpu/possible"; 13201 static int cpus; 13202 int err, n, i, tmp_cpus; 13203 bool *mask; 13204 13205 tmp_cpus = READ_ONCE(cpus); 13206 if (tmp_cpus > 0) 13207 return tmp_cpus; 13208 13209 err = parse_cpu_mask_file(fcpu, &mask, &n); 13210 if (err) 13211 return libbpf_err(err); 13212 13213 tmp_cpus = 0; 13214 for (i = 0; i < n; i++) { 13215 if (mask[i]) 13216 tmp_cpus++; 13217 } 13218 free(mask); 13219 13220 WRITE_ONCE(cpus, tmp_cpus); 13221 return tmp_cpus; 13222 } 13223 13224 static int populate_skeleton_maps(const struct bpf_object *obj, 13225 struct bpf_map_skeleton *maps, 13226 size_t map_cnt) 13227 { 13228 int i; 13229 13230 for (i = 0; i < map_cnt; i++) { 13231 struct bpf_map **map = maps[i].map; 13232 const char *name = maps[i].name; 13233 void **mmaped = maps[i].mmaped; 13234 13235 *map = bpf_object__find_map_by_name(obj, name); 13236 if (!*map) { 13237 pr_warn("failed to find skeleton map '%s'\n", name); 13238 return -ESRCH; 13239 } 13240 13241 /* externs shouldn't be pre-setup from user code */ 13242 if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG) 13243 *mmaped = (*map)->mmaped; 13244 } 13245 return 0; 13246 } 13247 13248 static int populate_skeleton_progs(const struct bpf_object *obj, 13249 struct bpf_prog_skeleton *progs, 13250 size_t prog_cnt) 13251 { 13252 int i; 13253 13254 for (i = 0; i < prog_cnt; i++) { 13255 struct bpf_program **prog = progs[i].prog; 13256 const char *name = progs[i].name; 13257 13258 *prog = bpf_object__find_program_by_name(obj, name); 13259 if (!*prog) { 13260 pr_warn("failed to find skeleton program '%s'\n", name); 13261 return -ESRCH; 13262 } 13263 } 13264 return 0; 13265 } 13266 13267 int bpf_object__open_skeleton(struct bpf_object_skeleton *s, 13268 const struct bpf_object_open_opts *opts) 13269 { 13270 DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts, 13271 .object_name = s->name, 13272 ); 13273 struct bpf_object *obj; 13274 int err; 13275 13276 /* Attempt to preserve opts->object_name, unless overriden by user 13277 * explicitly. Overwriting object name for skeletons is discouraged, 13278 * as it breaks global data maps, because they contain object name 13279 * prefix as their own map name prefix. When skeleton is generated, 13280 * bpftool is making an assumption that this name will stay the same. 13281 */ 13282 if (opts) { 13283 memcpy(&skel_opts, opts, sizeof(*opts)); 13284 if (!opts->object_name) 13285 skel_opts.object_name = s->name; 13286 } 13287 13288 obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts); 13289 err = libbpf_get_error(obj); 13290 if (err) { 13291 pr_warn("failed to initialize skeleton BPF object '%s': %d\n", 13292 s->name, err); 13293 return libbpf_err(err); 13294 } 13295 13296 *s->obj = obj; 13297 err = populate_skeleton_maps(obj, s->maps, s->map_cnt); 13298 if (err) { 13299 pr_warn("failed to populate skeleton maps for '%s': %d\n", s->name, err); 13300 return libbpf_err(err); 13301 } 13302 13303 err = populate_skeleton_progs(obj, s->progs, s->prog_cnt); 13304 if (err) { 13305 pr_warn("failed to populate skeleton progs for '%s': %d\n", s->name, err); 13306 return libbpf_err(err); 13307 } 13308 13309 return 0; 13310 } 13311 13312 int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s) 13313 { 13314 int err, len, var_idx, i; 13315 const char *var_name; 13316 const struct bpf_map *map; 13317 struct btf *btf; 13318 __u32 map_type_id; 13319 const struct btf_type *map_type, *var_type; 13320 const struct bpf_var_skeleton *var_skel; 13321 struct btf_var_secinfo *var; 13322 13323 if (!s->obj) 13324 return libbpf_err(-EINVAL); 13325 13326 btf = bpf_object__btf(s->obj); 13327 if (!btf) { 13328 pr_warn("subskeletons require BTF at runtime (object %s)\n", 13329 bpf_object__name(s->obj)); 13330 return libbpf_err(-errno); 13331 } 13332 13333 err = populate_skeleton_maps(s->obj, s->maps, s->map_cnt); 13334 if (err) { 13335 pr_warn("failed to populate subskeleton maps: %d\n", err); 13336 return libbpf_err(err); 13337 } 13338 13339 err = populate_skeleton_progs(s->obj, s->progs, s->prog_cnt); 13340 if (err) { 13341 pr_warn("failed to populate subskeleton maps: %d\n", err); 13342 return libbpf_err(err); 13343 } 13344 13345 for (var_idx = 0; var_idx < s->var_cnt; var_idx++) { 13346 var_skel = &s->vars[var_idx]; 13347 map = *var_skel->map; 13348 map_type_id = bpf_map__btf_value_type_id(map); 13349 map_type = btf__type_by_id(btf, map_type_id); 13350 13351 if (!btf_is_datasec(map_type)) { 13352 pr_warn("type for map '%1$s' is not a datasec: %2$s", 13353 bpf_map__name(map), 13354 __btf_kind_str(btf_kind(map_type))); 13355 return libbpf_err(-EINVAL); 13356 } 13357 13358 len = btf_vlen(map_type); 13359 var = btf_var_secinfos(map_type); 13360 for (i = 0; i < len; i++, var++) { 13361 var_type = btf__type_by_id(btf, var->type); 13362 var_name = btf__name_by_offset(btf, var_type->name_off); 13363 if (strcmp(var_name, var_skel->name) == 0) { 13364 *var_skel->addr = map->mmaped + var->offset; 13365 break; 13366 } 13367 } 13368 } 13369 return 0; 13370 } 13371 13372 void bpf_object__destroy_subskeleton(struct bpf_object_subskeleton *s) 13373 { 13374 if (!s) 13375 return; 13376 free(s->maps); 13377 free(s->progs); 13378 free(s->vars); 13379 free(s); 13380 } 13381 13382 int bpf_object__load_skeleton(struct bpf_object_skeleton *s) 13383 { 13384 int i, err; 13385 13386 err = bpf_object__load(*s->obj); 13387 if (err) { 13388 pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err); 13389 return libbpf_err(err); 13390 } 13391 13392 for (i = 0; i < s->map_cnt; i++) { 13393 struct bpf_map *map = *s->maps[i].map; 13394 size_t mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); 13395 int prot, map_fd = map->fd; 13396 void **mmaped = s->maps[i].mmaped; 13397 13398 if (!mmaped) 13399 continue; 13400 13401 if (!(map->def.map_flags & BPF_F_MMAPABLE)) { 13402 *mmaped = NULL; 13403 continue; 13404 } 13405 13406 if (map->def.map_flags & BPF_F_RDONLY_PROG) 13407 prot = PROT_READ; 13408 else 13409 prot = PROT_READ | PROT_WRITE; 13410 13411 /* Remap anonymous mmap()-ed "map initialization image" as 13412 * a BPF map-backed mmap()-ed memory, but preserving the same 13413 * memory address. This will cause kernel to change process' 13414 * page table to point to a different piece of kernel memory, 13415 * but from userspace point of view memory address (and its 13416 * contents, being identical at this point) will stay the 13417 * same. This mapping will be released by bpf_object__close() 13418 * as per normal clean up procedure, so we don't need to worry 13419 * about it from skeleton's clean up perspective. 13420 */ 13421 *mmaped = mmap(map->mmaped, mmap_sz, prot, MAP_SHARED | MAP_FIXED, map_fd, 0); 13422 if (*mmaped == MAP_FAILED) { 13423 err = -errno; 13424 *mmaped = NULL; 13425 pr_warn("failed to re-mmap() map '%s': %d\n", 13426 bpf_map__name(map), err); 13427 return libbpf_err(err); 13428 } 13429 } 13430 13431 return 0; 13432 } 13433 13434 int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) 13435 { 13436 int i, err; 13437 13438 for (i = 0; i < s->prog_cnt; i++) { 13439 struct bpf_program *prog = *s->progs[i].prog; 13440 struct bpf_link **link = s->progs[i].link; 13441 13442 if (!prog->autoload || !prog->autoattach) 13443 continue; 13444 13445 /* auto-attaching not supported for this program */ 13446 if (!prog->sec_def || !prog->sec_def->prog_attach_fn) 13447 continue; 13448 13449 /* if user already set the link manually, don't attempt auto-attach */ 13450 if (*link) 13451 continue; 13452 13453 err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, link); 13454 if (err) { 13455 pr_warn("prog '%s': failed to auto-attach: %d\n", 13456 bpf_program__name(prog), err); 13457 return libbpf_err(err); 13458 } 13459 13460 /* It's possible that for some SEC() definitions auto-attach 13461 * is supported in some cases (e.g., if definition completely 13462 * specifies target information), but is not in other cases. 13463 * SEC("uprobe") is one such case. If user specified target 13464 * binary and function name, such BPF program can be 13465 * auto-attached. But if not, it shouldn't trigger skeleton's 13466 * attach to fail. It should just be skipped. 13467 * attach_fn signals such case with returning 0 (no error) and 13468 * setting link to NULL. 13469 */ 13470 } 13471 13472 return 0; 13473 } 13474 13475 void bpf_object__detach_skeleton(struct bpf_object_skeleton *s) 13476 { 13477 int i; 13478 13479 for (i = 0; i < s->prog_cnt; i++) { 13480 struct bpf_link **link = s->progs[i].link; 13481 13482 bpf_link__destroy(*link); 13483 *link = NULL; 13484 } 13485 } 13486 13487 void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s) 13488 { 13489 if (!s) 13490 return; 13491 13492 if (s->progs) 13493 bpf_object__detach_skeleton(s); 13494 if (s->obj) 13495 bpf_object__close(*s->obj); 13496 free(s->maps); 13497 free(s->progs); 13498 free(s); 13499 } 13500