1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 2 3 /* 4 * Common eBPF ELF object loading operations. 5 * 6 * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org> 7 * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> 8 * Copyright (C) 2015 Huawei Inc. 9 * Copyright (C) 2017 Nicira, Inc. 10 * Copyright (C) 2019 Isovalent, Inc. 11 */ 12 13 #ifndef _GNU_SOURCE 14 #define _GNU_SOURCE 15 #endif 16 #include <stdlib.h> 17 #include <stdio.h> 18 #include <stdarg.h> 19 #include <libgen.h> 20 #include <inttypes.h> 21 #include <limits.h> 22 #include <string.h> 23 #include <unistd.h> 24 #include <endian.h> 25 #include <fcntl.h> 26 #include <errno.h> 27 #include <ctype.h> 28 #include <asm/unistd.h> 29 #include <linux/err.h> 30 #include <linux/kernel.h> 31 #include <linux/bpf.h> 32 #include <linux/btf.h> 33 #include <linux/filter.h> 34 #include <linux/limits.h> 35 #include <linux/perf_event.h> 36 #include <linux/bpf_perf_event.h> 37 #include <linux/ring_buffer.h> 38 #include <linux/unaligned.h> 39 #include <sys/epoll.h> 40 #include <sys/ioctl.h> 41 #include <sys/mman.h> 42 #include <sys/stat.h> 43 #include <sys/types.h> 44 #include <sys/vfs.h> 45 #include <sys/utsname.h> 46 #include <sys/resource.h> 47 #include <libelf.h> 48 #include <gelf.h> 49 #include <zlib.h> 50 51 #include "libbpf.h" 52 #include "bpf.h" 53 #include "btf.h" 54 #include "str_error.h" 55 #include "libbpf_internal.h" 56 #include "hashmap.h" 57 #include "bpf_gen_internal.h" 58 #include "zip.h" 59 60 #ifndef BPF_FS_MAGIC 61 #define BPF_FS_MAGIC 0xcafe4a11 62 #endif 63 64 #define MAX_EVENT_NAME_LEN 64 65 66 #define BPF_FS_DEFAULT_PATH "/sys/fs/bpf" 67 68 #define BPF_INSN_SZ (sizeof(struct bpf_insn)) 69 70 /* vsprintf() in __base_pr() uses nonliteral format string. It may break 71 * compilation if user enables corresponding warning. Disable it explicitly. 72 */ 73 #pragma GCC diagnostic ignored "-Wformat-nonliteral" 74 75 #define __printf(a, b) __attribute__((format(printf, a, b))) 76 77 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj); 78 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog); 79 static int map_set_def_max_entries(struct bpf_map *map); 80 81 static const char * const attach_type_name[] = { 82 [BPF_CGROUP_INET_INGRESS] = "cgroup_inet_ingress", 83 [BPF_CGROUP_INET_EGRESS] = "cgroup_inet_egress", 84 [BPF_CGROUP_INET_SOCK_CREATE] = "cgroup_inet_sock_create", 85 [BPF_CGROUP_INET_SOCK_RELEASE] = "cgroup_inet_sock_release", 86 [BPF_CGROUP_SOCK_OPS] = "cgroup_sock_ops", 87 [BPF_CGROUP_DEVICE] = "cgroup_device", 88 [BPF_CGROUP_INET4_BIND] = "cgroup_inet4_bind", 89 [BPF_CGROUP_INET6_BIND] = "cgroup_inet6_bind", 90 [BPF_CGROUP_INET4_CONNECT] = "cgroup_inet4_connect", 91 [BPF_CGROUP_INET6_CONNECT] = "cgroup_inet6_connect", 92 [BPF_CGROUP_UNIX_CONNECT] = "cgroup_unix_connect", 93 [BPF_CGROUP_INET4_POST_BIND] = "cgroup_inet4_post_bind", 94 [BPF_CGROUP_INET6_POST_BIND] = "cgroup_inet6_post_bind", 95 [BPF_CGROUP_INET4_GETPEERNAME] = "cgroup_inet4_getpeername", 96 [BPF_CGROUP_INET6_GETPEERNAME] = "cgroup_inet6_getpeername", 97 [BPF_CGROUP_UNIX_GETPEERNAME] = "cgroup_unix_getpeername", 98 [BPF_CGROUP_INET4_GETSOCKNAME] = "cgroup_inet4_getsockname", 99 [BPF_CGROUP_INET6_GETSOCKNAME] = "cgroup_inet6_getsockname", 100 [BPF_CGROUP_UNIX_GETSOCKNAME] = "cgroup_unix_getsockname", 101 [BPF_CGROUP_UDP4_SENDMSG] = "cgroup_udp4_sendmsg", 102 [BPF_CGROUP_UDP6_SENDMSG] = "cgroup_udp6_sendmsg", 103 [BPF_CGROUP_UNIX_SENDMSG] = "cgroup_unix_sendmsg", 104 [BPF_CGROUP_SYSCTL] = "cgroup_sysctl", 105 [BPF_CGROUP_UDP4_RECVMSG] = "cgroup_udp4_recvmsg", 106 [BPF_CGROUP_UDP6_RECVMSG] = "cgroup_udp6_recvmsg", 107 [BPF_CGROUP_UNIX_RECVMSG] = "cgroup_unix_recvmsg", 108 [BPF_CGROUP_GETSOCKOPT] = "cgroup_getsockopt", 109 [BPF_CGROUP_SETSOCKOPT] = "cgroup_setsockopt", 110 [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser", 111 [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict", 112 [BPF_SK_SKB_VERDICT] = "sk_skb_verdict", 113 [BPF_SK_MSG_VERDICT] = "sk_msg_verdict", 114 [BPF_LIRC_MODE2] = "lirc_mode2", 115 [BPF_FLOW_DISSECTOR] = "flow_dissector", 116 [BPF_TRACE_RAW_TP] = "trace_raw_tp", 117 [BPF_TRACE_FENTRY] = "trace_fentry", 118 [BPF_TRACE_FEXIT] = "trace_fexit", 119 [BPF_MODIFY_RETURN] = "modify_return", 120 [BPF_LSM_MAC] = "lsm_mac", 121 [BPF_LSM_CGROUP] = "lsm_cgroup", 122 [BPF_SK_LOOKUP] = "sk_lookup", 123 [BPF_TRACE_ITER] = "trace_iter", 124 [BPF_XDP_DEVMAP] = "xdp_devmap", 125 [BPF_XDP_CPUMAP] = "xdp_cpumap", 126 [BPF_XDP] = "xdp", 127 [BPF_SK_REUSEPORT_SELECT] = "sk_reuseport_select", 128 [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_reuseport_select_or_migrate", 129 [BPF_PERF_EVENT] = "perf_event", 130 [BPF_TRACE_KPROBE_MULTI] = "trace_kprobe_multi", 131 [BPF_STRUCT_OPS] = "struct_ops", 132 [BPF_NETFILTER] = "netfilter", 133 [BPF_TCX_INGRESS] = "tcx_ingress", 134 [BPF_TCX_EGRESS] = "tcx_egress", 135 [BPF_TRACE_UPROBE_MULTI] = "trace_uprobe_multi", 136 [BPF_NETKIT_PRIMARY] = "netkit_primary", 137 [BPF_NETKIT_PEER] = "netkit_peer", 138 [BPF_TRACE_KPROBE_SESSION] = "trace_kprobe_session", 139 [BPF_TRACE_UPROBE_SESSION] = "trace_uprobe_session", 140 }; 141 142 static const char * const link_type_name[] = { 143 [BPF_LINK_TYPE_UNSPEC] = "unspec", 144 [BPF_LINK_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", 145 [BPF_LINK_TYPE_TRACING] = "tracing", 146 [BPF_LINK_TYPE_CGROUP] = "cgroup", 147 [BPF_LINK_TYPE_ITER] = "iter", 148 [BPF_LINK_TYPE_NETNS] = "netns", 149 [BPF_LINK_TYPE_XDP] = "xdp", 150 [BPF_LINK_TYPE_PERF_EVENT] = "perf_event", 151 [BPF_LINK_TYPE_KPROBE_MULTI] = "kprobe_multi", 152 [BPF_LINK_TYPE_STRUCT_OPS] = "struct_ops", 153 [BPF_LINK_TYPE_NETFILTER] = "netfilter", 154 [BPF_LINK_TYPE_TCX] = "tcx", 155 [BPF_LINK_TYPE_UPROBE_MULTI] = "uprobe_multi", 156 [BPF_LINK_TYPE_NETKIT] = "netkit", 157 [BPF_LINK_TYPE_SOCKMAP] = "sockmap", 158 }; 159 160 static const char * const map_type_name[] = { 161 [BPF_MAP_TYPE_UNSPEC] = "unspec", 162 [BPF_MAP_TYPE_HASH] = "hash", 163 [BPF_MAP_TYPE_ARRAY] = "array", 164 [BPF_MAP_TYPE_PROG_ARRAY] = "prog_array", 165 [BPF_MAP_TYPE_PERF_EVENT_ARRAY] = "perf_event_array", 166 [BPF_MAP_TYPE_PERCPU_HASH] = "percpu_hash", 167 [BPF_MAP_TYPE_PERCPU_ARRAY] = "percpu_array", 168 [BPF_MAP_TYPE_STACK_TRACE] = "stack_trace", 169 [BPF_MAP_TYPE_CGROUP_ARRAY] = "cgroup_array", 170 [BPF_MAP_TYPE_LRU_HASH] = "lru_hash", 171 [BPF_MAP_TYPE_LRU_PERCPU_HASH] = "lru_percpu_hash", 172 [BPF_MAP_TYPE_LPM_TRIE] = "lpm_trie", 173 [BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps", 174 [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps", 175 [BPF_MAP_TYPE_DEVMAP] = "devmap", 176 [BPF_MAP_TYPE_DEVMAP_HASH] = "devmap_hash", 177 [BPF_MAP_TYPE_SOCKMAP] = "sockmap", 178 [BPF_MAP_TYPE_CPUMAP] = "cpumap", 179 [BPF_MAP_TYPE_XSKMAP] = "xskmap", 180 [BPF_MAP_TYPE_SOCKHASH] = "sockhash", 181 [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage", 182 [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray", 183 [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE] = "percpu_cgroup_storage", 184 [BPF_MAP_TYPE_QUEUE] = "queue", 185 [BPF_MAP_TYPE_STACK] = "stack", 186 [BPF_MAP_TYPE_SK_STORAGE] = "sk_storage", 187 [BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops", 188 [BPF_MAP_TYPE_RINGBUF] = "ringbuf", 189 [BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage", 190 [BPF_MAP_TYPE_TASK_STORAGE] = "task_storage", 191 [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter", 192 [BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf", 193 [BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage", 194 [BPF_MAP_TYPE_ARENA] = "arena", 195 }; 196 197 static const char * const prog_type_name[] = { 198 [BPF_PROG_TYPE_UNSPEC] = "unspec", 199 [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter", 200 [BPF_PROG_TYPE_KPROBE] = "kprobe", 201 [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls", 202 [BPF_PROG_TYPE_SCHED_ACT] = "sched_act", 203 [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint", 204 [BPF_PROG_TYPE_XDP] = "xdp", 205 [BPF_PROG_TYPE_PERF_EVENT] = "perf_event", 206 [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb", 207 [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock", 208 [BPF_PROG_TYPE_LWT_IN] = "lwt_in", 209 [BPF_PROG_TYPE_LWT_OUT] = "lwt_out", 210 [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit", 211 [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops", 212 [BPF_PROG_TYPE_SK_SKB] = "sk_skb", 213 [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device", 214 [BPF_PROG_TYPE_SK_MSG] = "sk_msg", 215 [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", 216 [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr", 217 [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local", 218 [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2", 219 [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport", 220 [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector", 221 [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl", 222 [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable", 223 [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt", 224 [BPF_PROG_TYPE_TRACING] = "tracing", 225 [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops", 226 [BPF_PROG_TYPE_EXT] = "ext", 227 [BPF_PROG_TYPE_LSM] = "lsm", 228 [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup", 229 [BPF_PROG_TYPE_SYSCALL] = "syscall", 230 [BPF_PROG_TYPE_NETFILTER] = "netfilter", 231 }; 232 233 static int __base_pr(enum libbpf_print_level level, const char *format, 234 va_list args) 235 { 236 const char *env_var = "LIBBPF_LOG_LEVEL"; 237 static enum libbpf_print_level min_level = LIBBPF_INFO; 238 static bool initialized; 239 240 if (!initialized) { 241 char *verbosity; 242 243 initialized = true; 244 verbosity = getenv(env_var); 245 if (verbosity) { 246 if (strcasecmp(verbosity, "warn") == 0) 247 min_level = LIBBPF_WARN; 248 else if (strcasecmp(verbosity, "debug") == 0) 249 min_level = LIBBPF_DEBUG; 250 else if (strcasecmp(verbosity, "info") == 0) 251 min_level = LIBBPF_INFO; 252 else 253 fprintf(stderr, "libbpf: unrecognized '%s' envvar value: '%s', should be one of 'warn', 'debug', or 'info'.\n", 254 env_var, verbosity); 255 } 256 } 257 258 /* if too verbose, skip logging */ 259 if (level > min_level) 260 return 0; 261 262 return vfprintf(stderr, format, args); 263 } 264 265 static libbpf_print_fn_t __libbpf_pr = __base_pr; 266 267 libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn) 268 { 269 libbpf_print_fn_t old_print_fn; 270 271 old_print_fn = __atomic_exchange_n(&__libbpf_pr, fn, __ATOMIC_RELAXED); 272 273 return old_print_fn; 274 } 275 276 __printf(2, 3) 277 void libbpf_print(enum libbpf_print_level level, const char *format, ...) 278 { 279 va_list args; 280 int old_errno; 281 libbpf_print_fn_t print_fn; 282 283 print_fn = __atomic_load_n(&__libbpf_pr, __ATOMIC_RELAXED); 284 if (!print_fn) 285 return; 286 287 old_errno = errno; 288 289 va_start(args, format); 290 print_fn(level, format, args); 291 va_end(args); 292 293 errno = old_errno; 294 } 295 296 static void pr_perm_msg(int err) 297 { 298 struct rlimit limit; 299 char buf[100]; 300 301 if (err != -EPERM || geteuid() != 0) 302 return; 303 304 err = getrlimit(RLIMIT_MEMLOCK, &limit); 305 if (err) 306 return; 307 308 if (limit.rlim_cur == RLIM_INFINITY) 309 return; 310 311 if (limit.rlim_cur < 1024) 312 snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur); 313 else if (limit.rlim_cur < 1024*1024) 314 snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024); 315 else 316 snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024)); 317 318 pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n", 319 buf); 320 } 321 322 #define STRERR_BUFSIZE 128 323 324 /* Copied from tools/perf/util/util.h */ 325 #ifndef zfree 326 # define zfree(ptr) ({ free(*ptr); *ptr = NULL; }) 327 #endif 328 329 #ifndef zclose 330 # define zclose(fd) ({ \ 331 int ___err = 0; \ 332 if ((fd) >= 0) \ 333 ___err = close((fd)); \ 334 fd = -1; \ 335 ___err; }) 336 #endif 337 338 static inline __u64 ptr_to_u64(const void *ptr) 339 { 340 return (__u64) (unsigned long) ptr; 341 } 342 343 int libbpf_set_strict_mode(enum libbpf_strict_mode mode) 344 { 345 /* as of v1.0 libbpf_set_strict_mode() is a no-op */ 346 return 0; 347 } 348 349 __u32 libbpf_major_version(void) 350 { 351 return LIBBPF_MAJOR_VERSION; 352 } 353 354 __u32 libbpf_minor_version(void) 355 { 356 return LIBBPF_MINOR_VERSION; 357 } 358 359 const char *libbpf_version_string(void) 360 { 361 #define __S(X) #X 362 #define _S(X) __S(X) 363 return "v" _S(LIBBPF_MAJOR_VERSION) "." _S(LIBBPF_MINOR_VERSION); 364 #undef _S 365 #undef __S 366 } 367 368 enum reloc_type { 369 RELO_LD64, 370 RELO_CALL, 371 RELO_DATA, 372 RELO_EXTERN_LD64, 373 RELO_EXTERN_CALL, 374 RELO_SUBPROG_ADDR, 375 RELO_CORE, 376 }; 377 378 struct reloc_desc { 379 enum reloc_type type; 380 int insn_idx; 381 union { 382 const struct bpf_core_relo *core_relo; /* used when type == RELO_CORE */ 383 struct { 384 int map_idx; 385 int sym_off; 386 int ext_idx; 387 }; 388 }; 389 }; 390 391 /* stored as sec_def->cookie for all libbpf-supported SEC()s */ 392 enum sec_def_flags { 393 SEC_NONE = 0, 394 /* expected_attach_type is optional, if kernel doesn't support that */ 395 SEC_EXP_ATTACH_OPT = 1, 396 /* legacy, only used by libbpf_get_type_names() and 397 * libbpf_attach_type_by_name(), not used by libbpf itself at all. 398 * This used to be associated with cgroup (and few other) BPF programs 399 * that were attachable through BPF_PROG_ATTACH command. Pretty 400 * meaningless nowadays, though. 401 */ 402 SEC_ATTACHABLE = 2, 403 SEC_ATTACHABLE_OPT = SEC_ATTACHABLE | SEC_EXP_ATTACH_OPT, 404 /* attachment target is specified through BTF ID in either kernel or 405 * other BPF program's BTF object 406 */ 407 SEC_ATTACH_BTF = 4, 408 /* BPF program type allows sleeping/blocking in kernel */ 409 SEC_SLEEPABLE = 8, 410 /* BPF program support non-linear XDP buffer */ 411 SEC_XDP_FRAGS = 16, 412 /* Setup proper attach type for usdt probes. */ 413 SEC_USDT = 32, 414 }; 415 416 struct bpf_sec_def { 417 char *sec; 418 enum bpf_prog_type prog_type; 419 enum bpf_attach_type expected_attach_type; 420 long cookie; 421 int handler_id; 422 423 libbpf_prog_setup_fn_t prog_setup_fn; 424 libbpf_prog_prepare_load_fn_t prog_prepare_load_fn; 425 libbpf_prog_attach_fn_t prog_attach_fn; 426 }; 427 428 /* 429 * bpf_prog should be a better name but it has been used in 430 * linux/filter.h. 431 */ 432 struct bpf_program { 433 char *name; 434 char *sec_name; 435 size_t sec_idx; 436 const struct bpf_sec_def *sec_def; 437 /* this program's instruction offset (in number of instructions) 438 * within its containing ELF section 439 */ 440 size_t sec_insn_off; 441 /* number of original instructions in ELF section belonging to this 442 * program, not taking into account subprogram instructions possible 443 * appended later during relocation 444 */ 445 size_t sec_insn_cnt; 446 /* Offset (in number of instructions) of the start of instruction 447 * belonging to this BPF program within its containing main BPF 448 * program. For the entry-point (main) BPF program, this is always 449 * zero. For a sub-program, this gets reset before each of main BPF 450 * programs are processed and relocated and is used to determined 451 * whether sub-program was already appended to the main program, and 452 * if yes, at which instruction offset. 453 */ 454 size_t sub_insn_off; 455 456 /* instructions that belong to BPF program; insns[0] is located at 457 * sec_insn_off instruction within its ELF section in ELF file, so 458 * when mapping ELF file instruction index to the local instruction, 459 * one needs to subtract sec_insn_off; and vice versa. 460 */ 461 struct bpf_insn *insns; 462 /* actual number of instruction in this BPF program's image; for 463 * entry-point BPF programs this includes the size of main program 464 * itself plus all the used sub-programs, appended at the end 465 */ 466 size_t insns_cnt; 467 468 struct reloc_desc *reloc_desc; 469 int nr_reloc; 470 471 /* BPF verifier log settings */ 472 char *log_buf; 473 size_t log_size; 474 __u32 log_level; 475 476 struct bpf_object *obj; 477 478 int fd; 479 bool autoload; 480 bool autoattach; 481 bool sym_global; 482 bool mark_btf_static; 483 enum bpf_prog_type type; 484 enum bpf_attach_type expected_attach_type; 485 int exception_cb_idx; 486 487 int prog_ifindex; 488 __u32 attach_btf_obj_fd; 489 __u32 attach_btf_id; 490 __u32 attach_prog_fd; 491 492 void *func_info; 493 __u32 func_info_rec_size; 494 __u32 func_info_cnt; 495 496 void *line_info; 497 __u32 line_info_rec_size; 498 __u32 line_info_cnt; 499 __u32 prog_flags; 500 __u8 hash[SHA256_DIGEST_LENGTH]; 501 }; 502 503 struct bpf_struct_ops { 504 struct bpf_program **progs; 505 __u32 *kern_func_off; 506 /* e.g. struct tcp_congestion_ops in bpf_prog's btf format */ 507 void *data; 508 /* e.g. struct bpf_struct_ops_tcp_congestion_ops in 509 * btf_vmlinux's format. 510 * struct bpf_struct_ops_tcp_congestion_ops { 511 * [... some other kernel fields ...] 512 * struct tcp_congestion_ops data; 513 * } 514 * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops) 515 * bpf_map__init_kern_struct_ops() will populate the "kern_vdata" 516 * from "data". 517 */ 518 void *kern_vdata; 519 __u32 type_id; 520 }; 521 522 #define DATA_SEC ".data" 523 #define BSS_SEC ".bss" 524 #define RODATA_SEC ".rodata" 525 #define KCONFIG_SEC ".kconfig" 526 #define KSYMS_SEC ".ksyms" 527 #define STRUCT_OPS_SEC ".struct_ops" 528 #define STRUCT_OPS_LINK_SEC ".struct_ops.link" 529 #define ARENA_SEC ".addr_space.1" 530 531 enum libbpf_map_type { 532 LIBBPF_MAP_UNSPEC, 533 LIBBPF_MAP_DATA, 534 LIBBPF_MAP_BSS, 535 LIBBPF_MAP_RODATA, 536 LIBBPF_MAP_KCONFIG, 537 }; 538 539 struct bpf_map_def { 540 unsigned int type; 541 unsigned int key_size; 542 unsigned int value_size; 543 unsigned int max_entries; 544 unsigned int map_flags; 545 }; 546 547 struct bpf_map { 548 struct bpf_object *obj; 549 char *name; 550 /* real_name is defined for special internal maps (.rodata*, 551 * .data*, .bss, .kconfig) and preserves their original ELF section 552 * name. This is important to be able to find corresponding BTF 553 * DATASEC information. 554 */ 555 char *real_name; 556 int fd; 557 int sec_idx; 558 size_t sec_offset; 559 int map_ifindex; 560 int inner_map_fd; 561 struct bpf_map_def def; 562 __u32 numa_node; 563 __u32 btf_var_idx; 564 int mod_btf_fd; 565 __u32 btf_key_type_id; 566 __u32 btf_value_type_id; 567 __u32 btf_vmlinux_value_type_id; 568 enum libbpf_map_type libbpf_type; 569 void *mmaped; 570 struct bpf_struct_ops *st_ops; 571 struct bpf_map *inner_map; 572 void **init_slots; 573 int init_slots_sz; 574 char *pin_path; 575 bool pinned; 576 bool reused; 577 bool autocreate; 578 bool autoattach; 579 __u64 map_extra; 580 struct bpf_program *excl_prog; 581 }; 582 583 enum extern_type { 584 EXT_UNKNOWN, 585 EXT_KCFG, 586 EXT_KSYM, 587 }; 588 589 enum kcfg_type { 590 KCFG_UNKNOWN, 591 KCFG_CHAR, 592 KCFG_BOOL, 593 KCFG_INT, 594 KCFG_TRISTATE, 595 KCFG_CHAR_ARR, 596 }; 597 598 struct extern_desc { 599 enum extern_type type; 600 int sym_idx; 601 int btf_id; 602 int sec_btf_id; 603 char *name; 604 char *essent_name; 605 bool is_set; 606 bool is_weak; 607 union { 608 struct { 609 enum kcfg_type type; 610 int sz; 611 int align; 612 int data_off; 613 bool is_signed; 614 } kcfg; 615 struct { 616 unsigned long long addr; 617 618 /* target btf_id of the corresponding kernel var. */ 619 int kernel_btf_obj_fd; 620 int kernel_btf_id; 621 622 /* local btf_id of the ksym extern's type. */ 623 __u32 type_id; 624 /* BTF fd index to be patched in for insn->off, this is 625 * 0 for vmlinux BTF, index in obj->fd_array for module 626 * BTF 627 */ 628 __s16 btf_fd_idx; 629 } ksym; 630 }; 631 }; 632 633 struct module_btf { 634 struct btf *btf; 635 char *name; 636 __u32 id; 637 int fd; 638 int fd_array_idx; 639 }; 640 641 enum sec_type { 642 SEC_UNUSED = 0, 643 SEC_RELO, 644 SEC_BSS, 645 SEC_DATA, 646 SEC_RODATA, 647 SEC_ST_OPS, 648 }; 649 650 struct elf_sec_desc { 651 enum sec_type sec_type; 652 Elf64_Shdr *shdr; 653 Elf_Data *data; 654 }; 655 656 struct elf_state { 657 int fd; 658 const void *obj_buf; 659 size_t obj_buf_sz; 660 Elf *elf; 661 Elf64_Ehdr *ehdr; 662 Elf_Data *symbols; 663 Elf_Data *arena_data; 664 size_t shstrndx; /* section index for section name strings */ 665 size_t strtabidx; 666 struct elf_sec_desc *secs; 667 size_t sec_cnt; 668 int btf_maps_shndx; 669 __u32 btf_maps_sec_btf_id; 670 int text_shndx; 671 int symbols_shndx; 672 bool has_st_ops; 673 int arena_data_shndx; 674 }; 675 676 struct usdt_manager; 677 678 enum bpf_object_state { 679 OBJ_OPEN, 680 OBJ_PREPARED, 681 OBJ_LOADED, 682 }; 683 684 struct bpf_object { 685 char name[BPF_OBJ_NAME_LEN]; 686 char license[64]; 687 __u32 kern_version; 688 689 enum bpf_object_state state; 690 struct bpf_program *programs; 691 size_t nr_programs; 692 struct bpf_map *maps; 693 size_t nr_maps; 694 size_t maps_cap; 695 696 char *kconfig; 697 struct extern_desc *externs; 698 int nr_extern; 699 int kconfig_map_idx; 700 701 bool has_subcalls; 702 bool has_rodata; 703 704 struct bpf_gen *gen_loader; 705 706 /* Information when doing ELF related work. Only valid if efile.elf is not NULL */ 707 struct elf_state efile; 708 709 unsigned char byteorder; 710 711 struct btf *btf; 712 struct btf_ext *btf_ext; 713 714 /* Parse and load BTF vmlinux if any of the programs in the object need 715 * it at load time. 716 */ 717 struct btf *btf_vmlinux; 718 /* Path to the custom BTF to be used for BPF CO-RE relocations as an 719 * override for vmlinux BTF. 720 */ 721 char *btf_custom_path; 722 /* vmlinux BTF override for CO-RE relocations */ 723 struct btf *btf_vmlinux_override; 724 /* Lazily initialized kernel module BTFs */ 725 struct module_btf *btf_modules; 726 bool btf_modules_loaded; 727 size_t btf_module_cnt; 728 size_t btf_module_cap; 729 730 /* optional log settings passed to BPF_BTF_LOAD and BPF_PROG_LOAD commands */ 731 char *log_buf; 732 size_t log_size; 733 __u32 log_level; 734 735 int *fd_array; 736 size_t fd_array_cap; 737 size_t fd_array_cnt; 738 739 struct usdt_manager *usdt_man; 740 741 int arena_map_idx; 742 void *arena_data; 743 size_t arena_data_sz; 744 745 struct kern_feature_cache *feat_cache; 746 char *token_path; 747 int token_fd; 748 749 char path[]; 750 }; 751 752 static const char *elf_sym_str(const struct bpf_object *obj, size_t off); 753 static const char *elf_sec_str(const struct bpf_object *obj, size_t off); 754 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx); 755 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name); 756 static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn); 757 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn); 758 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn); 759 static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx); 760 static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx); 761 762 void bpf_program__unload(struct bpf_program *prog) 763 { 764 if (!prog) 765 return; 766 767 zclose(prog->fd); 768 769 zfree(&prog->func_info); 770 zfree(&prog->line_info); 771 } 772 773 static void bpf_program__exit(struct bpf_program *prog) 774 { 775 if (!prog) 776 return; 777 778 bpf_program__unload(prog); 779 zfree(&prog->name); 780 zfree(&prog->sec_name); 781 zfree(&prog->insns); 782 zfree(&prog->reloc_desc); 783 784 prog->nr_reloc = 0; 785 prog->insns_cnt = 0; 786 prog->sec_idx = -1; 787 } 788 789 static bool insn_is_subprog_call(const struct bpf_insn *insn) 790 { 791 return BPF_CLASS(insn->code) == BPF_JMP && 792 BPF_OP(insn->code) == BPF_CALL && 793 BPF_SRC(insn->code) == BPF_K && 794 insn->src_reg == BPF_PSEUDO_CALL && 795 insn->dst_reg == 0 && 796 insn->off == 0; 797 } 798 799 static bool is_call_insn(const struct bpf_insn *insn) 800 { 801 return insn->code == (BPF_JMP | BPF_CALL); 802 } 803 804 static bool insn_is_pseudo_func(struct bpf_insn *insn) 805 { 806 return is_ldimm64_insn(insn) && insn->src_reg == BPF_PSEUDO_FUNC; 807 } 808 809 static int 810 bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, 811 const char *name, size_t sec_idx, const char *sec_name, 812 size_t sec_off, void *insn_data, size_t insn_data_sz) 813 { 814 if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) { 815 pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n", 816 sec_name, name, sec_off, insn_data_sz); 817 return -EINVAL; 818 } 819 820 memset(prog, 0, sizeof(*prog)); 821 prog->obj = obj; 822 823 prog->sec_idx = sec_idx; 824 prog->sec_insn_off = sec_off / BPF_INSN_SZ; 825 prog->sec_insn_cnt = insn_data_sz / BPF_INSN_SZ; 826 /* insns_cnt can later be increased by appending used subprograms */ 827 prog->insns_cnt = prog->sec_insn_cnt; 828 829 prog->type = BPF_PROG_TYPE_UNSPEC; 830 prog->fd = -1; 831 prog->exception_cb_idx = -1; 832 833 /* libbpf's convention for SEC("?abc...") is that it's just like 834 * SEC("abc...") but the corresponding bpf_program starts out with 835 * autoload set to false. 836 */ 837 if (sec_name[0] == '?') { 838 prog->autoload = false; 839 /* from now on forget there was ? in section name */ 840 sec_name++; 841 } else { 842 prog->autoload = true; 843 } 844 845 prog->autoattach = true; 846 847 /* inherit object's log_level */ 848 prog->log_level = obj->log_level; 849 850 prog->sec_name = strdup(sec_name); 851 if (!prog->sec_name) 852 goto errout; 853 854 prog->name = strdup(name); 855 if (!prog->name) 856 goto errout; 857 858 prog->insns = malloc(insn_data_sz); 859 if (!prog->insns) 860 goto errout; 861 memcpy(prog->insns, insn_data, insn_data_sz); 862 863 return 0; 864 errout: 865 pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name); 866 bpf_program__exit(prog); 867 return -ENOMEM; 868 } 869 870 static int 871 bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, 872 const char *sec_name, int sec_idx) 873 { 874 Elf_Data *symbols = obj->efile.symbols; 875 struct bpf_program *prog, *progs; 876 void *data = sec_data->d_buf; 877 size_t sec_sz = sec_data->d_size, sec_off, prog_sz, nr_syms; 878 int nr_progs, err, i; 879 const char *name; 880 Elf64_Sym *sym; 881 882 progs = obj->programs; 883 nr_progs = obj->nr_programs; 884 nr_syms = symbols->d_size / sizeof(Elf64_Sym); 885 886 for (i = 0; i < nr_syms; i++) { 887 sym = elf_sym_by_idx(obj, i); 888 889 if (sym->st_shndx != sec_idx) 890 continue; 891 if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC) 892 continue; 893 894 prog_sz = sym->st_size; 895 sec_off = sym->st_value; 896 897 name = elf_sym_str(obj, sym->st_name); 898 if (!name) { 899 pr_warn("sec '%s': failed to get symbol name for offset %zu\n", 900 sec_name, sec_off); 901 return -LIBBPF_ERRNO__FORMAT; 902 } 903 904 if (sec_off + prog_sz > sec_sz || sec_off + prog_sz < sec_off) { 905 pr_warn("sec '%s': program at offset %zu crosses section boundary\n", 906 sec_name, sec_off); 907 return -LIBBPF_ERRNO__FORMAT; 908 } 909 910 if (sec_idx != obj->efile.text_shndx && ELF64_ST_BIND(sym->st_info) == STB_LOCAL) { 911 pr_warn("sec '%s': program '%s' is static and not supported\n", sec_name, name); 912 return -ENOTSUP; 913 } 914 915 pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n", 916 sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz); 917 918 progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(*progs)); 919 if (!progs) { 920 /* 921 * In this case the original obj->programs 922 * is still valid, so don't need special treat for 923 * bpf_close_object(). 924 */ 925 pr_warn("sec '%s': failed to alloc memory for new program '%s'\n", 926 sec_name, name); 927 return -ENOMEM; 928 } 929 obj->programs = progs; 930 931 prog = &progs[nr_progs]; 932 933 err = bpf_object__init_prog(obj, prog, name, sec_idx, sec_name, 934 sec_off, data + sec_off, prog_sz); 935 if (err) 936 return err; 937 938 if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL) 939 prog->sym_global = true; 940 941 /* if function is a global/weak symbol, but has restricted 942 * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC 943 * as static to enable more permissive BPF verification mode 944 * with more outside context available to BPF verifier 945 */ 946 if (prog->sym_global && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN 947 || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)) 948 prog->mark_btf_static = true; 949 950 nr_progs++; 951 obj->nr_programs = nr_progs; 952 } 953 954 return 0; 955 } 956 957 static void bpf_object_bswap_progs(struct bpf_object *obj) 958 { 959 struct bpf_program *prog = obj->programs; 960 struct bpf_insn *insn; 961 int p, i; 962 963 for (p = 0; p < obj->nr_programs; p++, prog++) { 964 insn = prog->insns; 965 for (i = 0; i < prog->insns_cnt; i++, insn++) 966 bpf_insn_bswap(insn); 967 } 968 pr_debug("converted %zu BPF programs to native byte order\n", obj->nr_programs); 969 } 970 971 static const struct btf_member * 972 find_member_by_offset(const struct btf_type *t, __u32 bit_offset) 973 { 974 struct btf_member *m; 975 int i; 976 977 for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) { 978 if (btf_member_bit_offset(t, i) == bit_offset) 979 return m; 980 } 981 982 return NULL; 983 } 984 985 static const struct btf_member * 986 find_member_by_name(const struct btf *btf, const struct btf_type *t, 987 const char *name) 988 { 989 struct btf_member *m; 990 int i; 991 992 for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) { 993 if (!strcmp(btf__name_by_offset(btf, m->name_off), name)) 994 return m; 995 } 996 997 return NULL; 998 } 999 1000 static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name, 1001 __u16 kind, struct btf **res_btf, 1002 struct module_btf **res_mod_btf); 1003 1004 #define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_" 1005 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, 1006 const char *name, __u32 kind); 1007 1008 static int 1009 find_struct_ops_kern_types(struct bpf_object *obj, const char *tname_raw, 1010 struct module_btf **mod_btf, 1011 const struct btf_type **type, __u32 *type_id, 1012 const struct btf_type **vtype, __u32 *vtype_id, 1013 const struct btf_member **data_member) 1014 { 1015 const struct btf_type *kern_type, *kern_vtype; 1016 const struct btf_member *kern_data_member; 1017 struct btf *btf = NULL; 1018 __s32 kern_vtype_id, kern_type_id; 1019 char tname[192], stname[256]; 1020 __u32 i; 1021 1022 snprintf(tname, sizeof(tname), "%.*s", 1023 (int)bpf_core_essential_name_len(tname_raw), tname_raw); 1024 1025 snprintf(stname, sizeof(stname), "%s%s", STRUCT_OPS_VALUE_PREFIX, tname); 1026 1027 /* Look for the corresponding "map_value" type that will be used 1028 * in map_update(BPF_MAP_TYPE_STRUCT_OPS) first, figure out the btf 1029 * and the mod_btf. 1030 * For example, find "struct bpf_struct_ops_tcp_congestion_ops". 1031 */ 1032 kern_vtype_id = find_ksym_btf_id(obj, stname, BTF_KIND_STRUCT, &btf, mod_btf); 1033 if (kern_vtype_id < 0) { 1034 pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", stname); 1035 return kern_vtype_id; 1036 } 1037 kern_vtype = btf__type_by_id(btf, kern_vtype_id); 1038 1039 kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT); 1040 if (kern_type_id < 0) { 1041 pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", tname); 1042 return kern_type_id; 1043 } 1044 kern_type = btf__type_by_id(btf, kern_type_id); 1045 1046 /* Find "struct tcp_congestion_ops" from 1047 * struct bpf_struct_ops_tcp_congestion_ops { 1048 * [ ... ] 1049 * struct tcp_congestion_ops data; 1050 * } 1051 */ 1052 kern_data_member = btf_members(kern_vtype); 1053 for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) { 1054 if (kern_data_member->type == kern_type_id) 1055 break; 1056 } 1057 if (i == btf_vlen(kern_vtype)) { 1058 pr_warn("struct_ops init_kern: struct %s data is not found in struct %s\n", 1059 tname, stname); 1060 return -EINVAL; 1061 } 1062 1063 *type = kern_type; 1064 *type_id = kern_type_id; 1065 *vtype = kern_vtype; 1066 *vtype_id = kern_vtype_id; 1067 *data_member = kern_data_member; 1068 1069 return 0; 1070 } 1071 1072 static bool bpf_map__is_struct_ops(const struct bpf_map *map) 1073 { 1074 return map->def.type == BPF_MAP_TYPE_STRUCT_OPS; 1075 } 1076 1077 static bool is_valid_st_ops_program(struct bpf_object *obj, 1078 const struct bpf_program *prog) 1079 { 1080 int i; 1081 1082 for (i = 0; i < obj->nr_programs; i++) { 1083 if (&obj->programs[i] == prog) 1084 return prog->type == BPF_PROG_TYPE_STRUCT_OPS; 1085 } 1086 1087 return false; 1088 } 1089 1090 /* For each struct_ops program P, referenced from some struct_ops map M, 1091 * enable P.autoload if there are Ms for which M.autocreate is true, 1092 * disable P.autoload if for all Ms M.autocreate is false. 1093 * Don't change P.autoload for programs that are not referenced from any maps. 1094 */ 1095 static int bpf_object_adjust_struct_ops_autoload(struct bpf_object *obj) 1096 { 1097 struct bpf_program *prog, *slot_prog; 1098 struct bpf_map *map; 1099 int i, j, k, vlen; 1100 1101 for (i = 0; i < obj->nr_programs; ++i) { 1102 int should_load = false; 1103 int use_cnt = 0; 1104 1105 prog = &obj->programs[i]; 1106 if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) 1107 continue; 1108 1109 for (j = 0; j < obj->nr_maps; ++j) { 1110 const struct btf_type *type; 1111 1112 map = &obj->maps[j]; 1113 if (!bpf_map__is_struct_ops(map)) 1114 continue; 1115 1116 type = btf__type_by_id(obj->btf, map->st_ops->type_id); 1117 vlen = btf_vlen(type); 1118 for (k = 0; k < vlen; ++k) { 1119 slot_prog = map->st_ops->progs[k]; 1120 if (prog != slot_prog) 1121 continue; 1122 1123 use_cnt++; 1124 if (map->autocreate) 1125 should_load = true; 1126 } 1127 } 1128 if (use_cnt) 1129 prog->autoload = should_load; 1130 } 1131 1132 return 0; 1133 } 1134 1135 /* Init the map's fields that depend on kern_btf */ 1136 static int bpf_map__init_kern_struct_ops(struct bpf_map *map) 1137 { 1138 const struct btf_member *member, *kern_member, *kern_data_member; 1139 const struct btf_type *type, *kern_type, *kern_vtype; 1140 __u32 i, kern_type_id, kern_vtype_id, kern_data_off; 1141 struct bpf_object *obj = map->obj; 1142 const struct btf *btf = obj->btf; 1143 struct bpf_struct_ops *st_ops; 1144 const struct btf *kern_btf; 1145 struct module_btf *mod_btf = NULL; 1146 void *data, *kern_data; 1147 const char *tname; 1148 int err; 1149 1150 st_ops = map->st_ops; 1151 type = btf__type_by_id(btf, st_ops->type_id); 1152 tname = btf__name_by_offset(btf, type->name_off); 1153 err = find_struct_ops_kern_types(obj, tname, &mod_btf, 1154 &kern_type, &kern_type_id, 1155 &kern_vtype, &kern_vtype_id, 1156 &kern_data_member); 1157 if (err) 1158 return err; 1159 1160 kern_btf = mod_btf ? mod_btf->btf : obj->btf_vmlinux; 1161 1162 pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n", 1163 map->name, st_ops->type_id, kern_type_id, kern_vtype_id); 1164 1165 map->mod_btf_fd = mod_btf ? mod_btf->fd : -1; 1166 map->def.value_size = kern_vtype->size; 1167 map->btf_vmlinux_value_type_id = kern_vtype_id; 1168 1169 st_ops->kern_vdata = calloc(1, kern_vtype->size); 1170 if (!st_ops->kern_vdata) 1171 return -ENOMEM; 1172 1173 data = st_ops->data; 1174 kern_data_off = kern_data_member->offset / 8; 1175 kern_data = st_ops->kern_vdata + kern_data_off; 1176 1177 member = btf_members(type); 1178 for (i = 0; i < btf_vlen(type); i++, member++) { 1179 const struct btf_type *mtype, *kern_mtype; 1180 __u32 mtype_id, kern_mtype_id; 1181 void *mdata, *kern_mdata; 1182 struct bpf_program *prog; 1183 __s64 msize, kern_msize; 1184 __u32 moff, kern_moff; 1185 __u32 kern_member_idx; 1186 const char *mname; 1187 1188 mname = btf__name_by_offset(btf, member->name_off); 1189 moff = member->offset / 8; 1190 mdata = data + moff; 1191 msize = btf__resolve_size(btf, member->type); 1192 if (msize < 0) { 1193 pr_warn("struct_ops init_kern %s: failed to resolve the size of member %s\n", 1194 map->name, mname); 1195 return msize; 1196 } 1197 1198 kern_member = find_member_by_name(kern_btf, kern_type, mname); 1199 if (!kern_member) { 1200 if (!libbpf_is_mem_zeroed(mdata, msize)) { 1201 pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n", 1202 map->name, mname); 1203 return -ENOTSUP; 1204 } 1205 1206 if (st_ops->progs[i]) { 1207 /* If we had declaratively set struct_ops callback, we need to 1208 * force its autoload to false, because it doesn't have 1209 * a chance of succeeding from POV of the current struct_ops map. 1210 * If this program is still referenced somewhere else, though, 1211 * then bpf_object_adjust_struct_ops_autoload() will update its 1212 * autoload accordingly. 1213 */ 1214 st_ops->progs[i]->autoload = false; 1215 st_ops->progs[i] = NULL; 1216 } 1217 1218 /* Skip all-zero/NULL fields if they are not present in the kernel BTF */ 1219 pr_info("struct_ops %s: member %s not found in kernel, skipping it as it's set to zero\n", 1220 map->name, mname); 1221 continue; 1222 } 1223 1224 kern_member_idx = kern_member - btf_members(kern_type); 1225 if (btf_member_bitfield_size(type, i) || 1226 btf_member_bitfield_size(kern_type, kern_member_idx)) { 1227 pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n", 1228 map->name, mname); 1229 return -ENOTSUP; 1230 } 1231 1232 kern_moff = kern_member->offset / 8; 1233 kern_mdata = kern_data + kern_moff; 1234 1235 mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id); 1236 kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type, 1237 &kern_mtype_id); 1238 if (BTF_INFO_KIND(mtype->info) != 1239 BTF_INFO_KIND(kern_mtype->info)) { 1240 pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n", 1241 map->name, mname, BTF_INFO_KIND(mtype->info), 1242 BTF_INFO_KIND(kern_mtype->info)); 1243 return -ENOTSUP; 1244 } 1245 1246 if (btf_is_ptr(mtype)) { 1247 prog = *(void **)mdata; 1248 /* just like for !kern_member case above, reset declaratively 1249 * set (at compile time) program's autload to false, 1250 * if user replaced it with another program or NULL 1251 */ 1252 if (st_ops->progs[i] && st_ops->progs[i] != prog) 1253 st_ops->progs[i]->autoload = false; 1254 1255 /* Update the value from the shadow type */ 1256 st_ops->progs[i] = prog; 1257 if (!prog) 1258 continue; 1259 1260 if (!is_valid_st_ops_program(obj, prog)) { 1261 pr_warn("struct_ops init_kern %s: member %s is not a struct_ops program\n", 1262 map->name, mname); 1263 return -ENOTSUP; 1264 } 1265 1266 kern_mtype = skip_mods_and_typedefs(kern_btf, 1267 kern_mtype->type, 1268 &kern_mtype_id); 1269 1270 /* mtype->type must be a func_proto which was 1271 * guaranteed in bpf_object__collect_st_ops_relos(), 1272 * so only check kern_mtype for func_proto here. 1273 */ 1274 if (!btf_is_func_proto(kern_mtype)) { 1275 pr_warn("struct_ops init_kern %s: kernel member %s is not a func ptr\n", 1276 map->name, mname); 1277 return -ENOTSUP; 1278 } 1279 1280 if (mod_btf) 1281 prog->attach_btf_obj_fd = mod_btf->fd; 1282 1283 /* if we haven't yet processed this BPF program, record proper 1284 * attach_btf_id and member_idx 1285 */ 1286 if (!prog->attach_btf_id) { 1287 prog->attach_btf_id = kern_type_id; 1288 prog->expected_attach_type = kern_member_idx; 1289 } 1290 1291 /* struct_ops BPF prog can be re-used between multiple 1292 * .struct_ops & .struct_ops.link as long as it's the 1293 * same struct_ops struct definition and the same 1294 * function pointer field 1295 */ 1296 if (prog->attach_btf_id != kern_type_id) { 1297 pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: attach_btf_id %u != kern_type_id %u\n", 1298 map->name, mname, prog->name, prog->sec_name, prog->type, 1299 prog->attach_btf_id, kern_type_id); 1300 return -EINVAL; 1301 } 1302 if (prog->expected_attach_type != kern_member_idx) { 1303 pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: expected_attach_type %u != kern_member_idx %u\n", 1304 map->name, mname, prog->name, prog->sec_name, prog->type, 1305 prog->expected_attach_type, kern_member_idx); 1306 return -EINVAL; 1307 } 1308 1309 st_ops->kern_func_off[i] = kern_data_off + kern_moff; 1310 1311 pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n", 1312 map->name, mname, prog->name, moff, 1313 kern_moff); 1314 1315 continue; 1316 } 1317 1318 kern_msize = btf__resolve_size(kern_btf, kern_mtype_id); 1319 if (kern_msize < 0 || msize != kern_msize) { 1320 pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n", 1321 map->name, mname, (ssize_t)msize, 1322 (ssize_t)kern_msize); 1323 return -ENOTSUP; 1324 } 1325 1326 pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n", 1327 map->name, mname, (unsigned int)msize, 1328 moff, kern_moff); 1329 memcpy(kern_mdata, mdata, msize); 1330 } 1331 1332 return 0; 1333 } 1334 1335 static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj) 1336 { 1337 struct bpf_map *map; 1338 size_t i; 1339 int err; 1340 1341 for (i = 0; i < obj->nr_maps; i++) { 1342 map = &obj->maps[i]; 1343 1344 if (!bpf_map__is_struct_ops(map)) 1345 continue; 1346 1347 if (!map->autocreate) 1348 continue; 1349 1350 err = bpf_map__init_kern_struct_ops(map); 1351 if (err) 1352 return err; 1353 } 1354 1355 return 0; 1356 } 1357 1358 static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name, 1359 int shndx, Elf_Data *data) 1360 { 1361 const struct btf_type *type, *datasec; 1362 const struct btf_var_secinfo *vsi; 1363 struct bpf_struct_ops *st_ops; 1364 const char *tname, *var_name; 1365 __s32 type_id, datasec_id; 1366 const struct btf *btf; 1367 struct bpf_map *map; 1368 __u32 i; 1369 1370 if (shndx == -1) 1371 return 0; 1372 1373 btf = obj->btf; 1374 datasec_id = btf__find_by_name_kind(btf, sec_name, 1375 BTF_KIND_DATASEC); 1376 if (datasec_id < 0) { 1377 pr_warn("struct_ops init: DATASEC %s not found\n", 1378 sec_name); 1379 return -EINVAL; 1380 } 1381 1382 datasec = btf__type_by_id(btf, datasec_id); 1383 vsi = btf_var_secinfos(datasec); 1384 for (i = 0; i < btf_vlen(datasec); i++, vsi++) { 1385 type = btf__type_by_id(obj->btf, vsi->type); 1386 var_name = btf__name_by_offset(obj->btf, type->name_off); 1387 1388 type_id = btf__resolve_type(obj->btf, vsi->type); 1389 if (type_id < 0) { 1390 pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n", 1391 vsi->type, sec_name); 1392 return -EINVAL; 1393 } 1394 1395 type = btf__type_by_id(obj->btf, type_id); 1396 tname = btf__name_by_offset(obj->btf, type->name_off); 1397 if (!tname[0]) { 1398 pr_warn("struct_ops init: anonymous type is not supported\n"); 1399 return -ENOTSUP; 1400 } 1401 if (!btf_is_struct(type)) { 1402 pr_warn("struct_ops init: %s is not a struct\n", tname); 1403 return -EINVAL; 1404 } 1405 1406 map = bpf_object__add_map(obj); 1407 if (IS_ERR(map)) 1408 return PTR_ERR(map); 1409 1410 map->sec_idx = shndx; 1411 map->sec_offset = vsi->offset; 1412 map->name = strdup(var_name); 1413 if (!map->name) 1414 return -ENOMEM; 1415 map->btf_value_type_id = type_id; 1416 1417 /* Follow same convention as for programs autoload: 1418 * SEC("?.struct_ops") means map is not created by default. 1419 */ 1420 if (sec_name[0] == '?') { 1421 map->autocreate = false; 1422 /* from now on forget there was ? in section name */ 1423 sec_name++; 1424 } 1425 1426 map->def.type = BPF_MAP_TYPE_STRUCT_OPS; 1427 map->def.key_size = sizeof(int); 1428 map->def.value_size = type->size; 1429 map->def.max_entries = 1; 1430 map->def.map_flags = strcmp(sec_name, STRUCT_OPS_LINK_SEC) == 0 ? BPF_F_LINK : 0; 1431 map->autoattach = true; 1432 1433 map->st_ops = calloc(1, sizeof(*map->st_ops)); 1434 if (!map->st_ops) 1435 return -ENOMEM; 1436 st_ops = map->st_ops; 1437 st_ops->data = malloc(type->size); 1438 st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs)); 1439 st_ops->kern_func_off = malloc(btf_vlen(type) * 1440 sizeof(*st_ops->kern_func_off)); 1441 if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off) 1442 return -ENOMEM; 1443 1444 if (vsi->offset + type->size > data->d_size) { 1445 pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n", 1446 var_name, sec_name); 1447 return -EINVAL; 1448 } 1449 1450 memcpy(st_ops->data, 1451 data->d_buf + vsi->offset, 1452 type->size); 1453 st_ops->type_id = type_id; 1454 1455 pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n", 1456 tname, type_id, var_name, vsi->offset); 1457 } 1458 1459 return 0; 1460 } 1461 1462 static int bpf_object_init_struct_ops(struct bpf_object *obj) 1463 { 1464 const char *sec_name; 1465 int sec_idx, err; 1466 1467 for (sec_idx = 0; sec_idx < obj->efile.sec_cnt; ++sec_idx) { 1468 struct elf_sec_desc *desc = &obj->efile.secs[sec_idx]; 1469 1470 if (desc->sec_type != SEC_ST_OPS) 1471 continue; 1472 1473 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); 1474 if (!sec_name) 1475 return -LIBBPF_ERRNO__FORMAT; 1476 1477 err = init_struct_ops_maps(obj, sec_name, sec_idx, desc->data); 1478 if (err) 1479 return err; 1480 } 1481 1482 return 0; 1483 } 1484 1485 static struct bpf_object *bpf_object__new(const char *path, 1486 const void *obj_buf, 1487 size_t obj_buf_sz, 1488 const char *obj_name) 1489 { 1490 struct bpf_object *obj; 1491 char *end; 1492 1493 obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1); 1494 if (!obj) { 1495 pr_warn("alloc memory failed for %s\n", path); 1496 return ERR_PTR(-ENOMEM); 1497 } 1498 1499 strcpy(obj->path, path); 1500 if (obj_name) { 1501 libbpf_strlcpy(obj->name, obj_name, sizeof(obj->name)); 1502 } else { 1503 /* Using basename() GNU version which doesn't modify arg. */ 1504 libbpf_strlcpy(obj->name, basename((void *)path), sizeof(obj->name)); 1505 end = strchr(obj->name, '.'); 1506 if (end) 1507 *end = 0; 1508 } 1509 1510 obj->efile.fd = -1; 1511 /* 1512 * Caller of this function should also call 1513 * bpf_object__elf_finish() after data collection to return 1514 * obj_buf to user. If not, we should duplicate the buffer to 1515 * avoid user freeing them before elf finish. 1516 */ 1517 obj->efile.obj_buf = obj_buf; 1518 obj->efile.obj_buf_sz = obj_buf_sz; 1519 obj->efile.btf_maps_shndx = -1; 1520 obj->kconfig_map_idx = -1; 1521 obj->arena_map_idx = -1; 1522 1523 obj->kern_version = get_kernel_version(); 1524 obj->state = OBJ_OPEN; 1525 1526 return obj; 1527 } 1528 1529 static void bpf_object__elf_finish(struct bpf_object *obj) 1530 { 1531 if (!obj->efile.elf) 1532 return; 1533 1534 elf_end(obj->efile.elf); 1535 obj->efile.elf = NULL; 1536 obj->efile.ehdr = NULL; 1537 obj->efile.symbols = NULL; 1538 obj->efile.arena_data = NULL; 1539 1540 zfree(&obj->efile.secs); 1541 obj->efile.sec_cnt = 0; 1542 zclose(obj->efile.fd); 1543 obj->efile.obj_buf = NULL; 1544 obj->efile.obj_buf_sz = 0; 1545 } 1546 1547 static int bpf_object__elf_init(struct bpf_object *obj) 1548 { 1549 Elf64_Ehdr *ehdr; 1550 int err = 0; 1551 Elf *elf; 1552 1553 if (obj->efile.elf) { 1554 pr_warn("elf: init internal error\n"); 1555 return -LIBBPF_ERRNO__LIBELF; 1556 } 1557 1558 if (obj->efile.obj_buf_sz > 0) { 1559 /* obj_buf should have been validated by bpf_object__open_mem(). */ 1560 elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz); 1561 } else { 1562 obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC); 1563 if (obj->efile.fd < 0) { 1564 err = -errno; 1565 pr_warn("elf: failed to open %s: %s\n", obj->path, errstr(err)); 1566 return err; 1567 } 1568 1569 elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL); 1570 } 1571 1572 if (!elf) { 1573 pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1)); 1574 err = -LIBBPF_ERRNO__LIBELF; 1575 goto errout; 1576 } 1577 1578 obj->efile.elf = elf; 1579 1580 if (elf_kind(elf) != ELF_K_ELF) { 1581 err = -LIBBPF_ERRNO__FORMAT; 1582 pr_warn("elf: '%s' is not a proper ELF object\n", obj->path); 1583 goto errout; 1584 } 1585 1586 if (gelf_getclass(elf) != ELFCLASS64) { 1587 err = -LIBBPF_ERRNO__FORMAT; 1588 pr_warn("elf: '%s' is not a 64-bit ELF object\n", obj->path); 1589 goto errout; 1590 } 1591 1592 obj->efile.ehdr = ehdr = elf64_getehdr(elf); 1593 if (!obj->efile.ehdr) { 1594 pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1)); 1595 err = -LIBBPF_ERRNO__FORMAT; 1596 goto errout; 1597 } 1598 1599 /* Validate ELF object endianness... */ 1600 if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB && 1601 ehdr->e_ident[EI_DATA] != ELFDATA2MSB) { 1602 err = -LIBBPF_ERRNO__ENDIAN; 1603 pr_warn("elf: '%s' has unknown byte order\n", obj->path); 1604 goto errout; 1605 } 1606 /* and save after bpf_object_open() frees ELF data */ 1607 obj->byteorder = ehdr->e_ident[EI_DATA]; 1608 1609 if (elf_getshdrstrndx(elf, &obj->efile.shstrndx)) { 1610 pr_warn("elf: failed to get section names section index for %s: %s\n", 1611 obj->path, elf_errmsg(-1)); 1612 err = -LIBBPF_ERRNO__FORMAT; 1613 goto errout; 1614 } 1615 1616 /* ELF is corrupted/truncated, avoid calling elf_strptr. */ 1617 if (!elf_rawdata(elf_getscn(elf, obj->efile.shstrndx), NULL)) { 1618 pr_warn("elf: failed to get section names strings from %s: %s\n", 1619 obj->path, elf_errmsg(-1)); 1620 err = -LIBBPF_ERRNO__FORMAT; 1621 goto errout; 1622 } 1623 1624 /* Old LLVM set e_machine to EM_NONE */ 1625 if (ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF)) { 1626 pr_warn("elf: %s is not a valid eBPF object file\n", obj->path); 1627 err = -LIBBPF_ERRNO__FORMAT; 1628 goto errout; 1629 } 1630 1631 return 0; 1632 errout: 1633 bpf_object__elf_finish(obj); 1634 return err; 1635 } 1636 1637 static bool is_native_endianness(struct bpf_object *obj) 1638 { 1639 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 1640 return obj->byteorder == ELFDATA2LSB; 1641 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 1642 return obj->byteorder == ELFDATA2MSB; 1643 #else 1644 # error "Unrecognized __BYTE_ORDER__" 1645 #endif 1646 } 1647 1648 static int 1649 bpf_object__init_license(struct bpf_object *obj, void *data, size_t size) 1650 { 1651 if (!data) { 1652 pr_warn("invalid license section in %s\n", obj->path); 1653 return -LIBBPF_ERRNO__FORMAT; 1654 } 1655 /* libbpf_strlcpy() only copies first N - 1 bytes, so size + 1 won't 1656 * go over allowed ELF data section buffer 1657 */ 1658 libbpf_strlcpy(obj->license, data, min(size + 1, sizeof(obj->license))); 1659 pr_debug("license of %s is %s\n", obj->path, obj->license); 1660 return 0; 1661 } 1662 1663 static int 1664 bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size) 1665 { 1666 __u32 kver; 1667 1668 if (!data || size != sizeof(kver)) { 1669 pr_warn("invalid kver section in %s\n", obj->path); 1670 return -LIBBPF_ERRNO__FORMAT; 1671 } 1672 memcpy(&kver, data, sizeof(kver)); 1673 obj->kern_version = kver; 1674 pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version); 1675 return 0; 1676 } 1677 1678 static bool bpf_map_type__is_map_in_map(enum bpf_map_type type) 1679 { 1680 if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS || 1681 type == BPF_MAP_TYPE_HASH_OF_MAPS) 1682 return true; 1683 return false; 1684 } 1685 1686 static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32 *size) 1687 { 1688 Elf_Data *data; 1689 Elf_Scn *scn; 1690 1691 if (!name) 1692 return -EINVAL; 1693 1694 scn = elf_sec_by_name(obj, name); 1695 data = elf_sec_data(obj, scn); 1696 if (data) { 1697 *size = data->d_size; 1698 return 0; /* found it */ 1699 } 1700 1701 return -ENOENT; 1702 } 1703 1704 static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *name) 1705 { 1706 Elf_Data *symbols = obj->efile.symbols; 1707 const char *sname; 1708 size_t si; 1709 1710 for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) { 1711 Elf64_Sym *sym = elf_sym_by_idx(obj, si); 1712 1713 if (ELF64_ST_TYPE(sym->st_info) != STT_OBJECT) 1714 continue; 1715 1716 if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL && 1717 ELF64_ST_BIND(sym->st_info) != STB_WEAK) 1718 continue; 1719 1720 sname = elf_sym_str(obj, sym->st_name); 1721 if (!sname) { 1722 pr_warn("failed to get sym name string for var %s\n", name); 1723 return ERR_PTR(-EIO); 1724 } 1725 if (strcmp(name, sname) == 0) 1726 return sym; 1727 } 1728 1729 return ERR_PTR(-ENOENT); 1730 } 1731 1732 #ifndef MFD_CLOEXEC 1733 #define MFD_CLOEXEC 0x0001U 1734 #endif 1735 #ifndef MFD_NOEXEC_SEAL 1736 #define MFD_NOEXEC_SEAL 0x0008U 1737 #endif 1738 1739 static int create_placeholder_fd(void) 1740 { 1741 unsigned int flags = MFD_CLOEXEC | MFD_NOEXEC_SEAL; 1742 const char *name = "libbpf-placeholder-fd"; 1743 int fd; 1744 1745 fd = ensure_good_fd(sys_memfd_create(name, flags)); 1746 if (fd >= 0) 1747 return fd; 1748 else if (errno != EINVAL) 1749 return -errno; 1750 1751 /* Possibly running on kernel without MFD_NOEXEC_SEAL */ 1752 fd = ensure_good_fd(sys_memfd_create(name, flags & ~MFD_NOEXEC_SEAL)); 1753 if (fd < 0) 1754 return -errno; 1755 return fd; 1756 } 1757 1758 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) 1759 { 1760 struct bpf_map *map; 1761 int err; 1762 1763 err = libbpf_ensure_mem((void **)&obj->maps, &obj->maps_cap, 1764 sizeof(*obj->maps), obj->nr_maps + 1); 1765 if (err) 1766 return ERR_PTR(err); 1767 1768 map = &obj->maps[obj->nr_maps++]; 1769 map->obj = obj; 1770 /* Preallocate map FD without actually creating BPF map just yet. 1771 * These map FD "placeholders" will be reused later without changing 1772 * FD value when map is actually created in the kernel. 1773 * 1774 * This is useful to be able to perform BPF program relocations 1775 * without having to create BPF maps before that step. This allows us 1776 * to finalize and load BTF very late in BPF object's loading phase, 1777 * right before BPF maps have to be created and BPF programs have to 1778 * be loaded. By having these map FD placeholders we can perform all 1779 * the sanitizations, relocations, and any other adjustments before we 1780 * start creating actual BPF kernel objects (BTF, maps, progs). 1781 */ 1782 map->fd = create_placeholder_fd(); 1783 if (map->fd < 0) 1784 return ERR_PTR(map->fd); 1785 map->inner_map_fd = -1; 1786 map->autocreate = true; 1787 1788 return map; 1789 } 1790 1791 static size_t array_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) 1792 { 1793 const long page_sz = sysconf(_SC_PAGE_SIZE); 1794 size_t map_sz; 1795 1796 map_sz = (size_t)roundup(value_sz, 8) * max_entries; 1797 map_sz = roundup(map_sz, page_sz); 1798 return map_sz; 1799 } 1800 1801 static size_t bpf_map_mmap_sz(const struct bpf_map *map) 1802 { 1803 const long page_sz = sysconf(_SC_PAGE_SIZE); 1804 1805 switch (map->def.type) { 1806 case BPF_MAP_TYPE_ARRAY: 1807 return array_map_mmap_sz(map->def.value_size, map->def.max_entries); 1808 case BPF_MAP_TYPE_ARENA: 1809 return page_sz * map->def.max_entries; 1810 default: 1811 return 0; /* not supported */ 1812 } 1813 } 1814 1815 static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz) 1816 { 1817 void *mmaped; 1818 1819 if (!map->mmaped) 1820 return -EINVAL; 1821 1822 if (old_sz == new_sz) 1823 return 0; 1824 1825 mmaped = mmap(NULL, new_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); 1826 if (mmaped == MAP_FAILED) 1827 return -errno; 1828 1829 memcpy(mmaped, map->mmaped, min(old_sz, new_sz)); 1830 munmap(map->mmaped, old_sz); 1831 map->mmaped = mmaped; 1832 return 0; 1833 } 1834 1835 static char *internal_map_name(struct bpf_object *obj, const char *real_name) 1836 { 1837 char map_name[BPF_OBJ_NAME_LEN], *p; 1838 int pfx_len, sfx_len = max((size_t)7, strlen(real_name)); 1839 1840 /* This is one of the more confusing parts of libbpf for various 1841 * reasons, some of which are historical. The original idea for naming 1842 * internal names was to include as much of BPF object name prefix as 1843 * possible, so that it can be distinguished from similar internal 1844 * maps of a different BPF object. 1845 * As an example, let's say we have bpf_object named 'my_object_name' 1846 * and internal map corresponding to '.rodata' ELF section. The final 1847 * map name advertised to user and to the kernel will be 1848 * 'my_objec.rodata', taking first 8 characters of object name and 1849 * entire 7 characters of '.rodata'. 1850 * Somewhat confusingly, if internal map ELF section name is shorter 1851 * than 7 characters, e.g., '.bss', we still reserve 7 characters 1852 * for the suffix, even though we only have 4 actual characters, and 1853 * resulting map will be called 'my_objec.bss', not even using all 15 1854 * characters allowed by the kernel. Oh well, at least the truncated 1855 * object name is somewhat consistent in this case. But if the map 1856 * name is '.kconfig', we'll still have entirety of '.kconfig' added 1857 * (8 chars) and thus will be left with only first 7 characters of the 1858 * object name ('my_obje'). Happy guessing, user, that the final map 1859 * name will be "my_obje.kconfig". 1860 * Now, with libbpf starting to support arbitrarily named .rodata.* 1861 * and .data.* data sections, it's possible that ELF section name is 1862 * longer than allowed 15 chars, so we now need to be careful to take 1863 * only up to 15 first characters of ELF name, taking no BPF object 1864 * name characters at all. So '.rodata.abracadabra' will result in 1865 * '.rodata.abracad' kernel and user-visible name. 1866 * We need to keep this convoluted logic intact for .data, .bss and 1867 * .rodata maps, but for new custom .data.custom and .rodata.custom 1868 * maps we use their ELF names as is, not prepending bpf_object name 1869 * in front. We still need to truncate them to 15 characters for the 1870 * kernel. Full name can be recovered for such maps by using DATASEC 1871 * BTF type associated with such map's value type, though. 1872 */ 1873 if (sfx_len >= BPF_OBJ_NAME_LEN) 1874 sfx_len = BPF_OBJ_NAME_LEN - 1; 1875 1876 /* if there are two or more dots in map name, it's a custom dot map */ 1877 if (strchr(real_name + 1, '.') != NULL) 1878 pfx_len = 0; 1879 else 1880 pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, strlen(obj->name)); 1881 1882 snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name, 1883 sfx_len, real_name); 1884 1885 /* sanities map name to characters allowed by kernel */ 1886 for (p = map_name; *p && p < map_name + sizeof(map_name); p++) 1887 if (!isalnum(*p) && *p != '_' && *p != '.') 1888 *p = '_'; 1889 1890 return strdup(map_name); 1891 } 1892 1893 static int 1894 map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map); 1895 1896 /* Internal BPF map is mmap()'able only if at least one of corresponding 1897 * DATASEC's VARs are to be exposed through BPF skeleton. I.e., it's a GLOBAL 1898 * variable and it's not marked as __hidden (which turns it into, effectively, 1899 * a STATIC variable). 1900 */ 1901 static bool map_is_mmapable(struct bpf_object *obj, struct bpf_map *map) 1902 { 1903 const struct btf_type *t, *vt; 1904 struct btf_var_secinfo *vsi; 1905 int i, n; 1906 1907 if (!map->btf_value_type_id) 1908 return false; 1909 1910 t = btf__type_by_id(obj->btf, map->btf_value_type_id); 1911 if (!btf_is_datasec(t)) 1912 return false; 1913 1914 vsi = btf_var_secinfos(t); 1915 for (i = 0, n = btf_vlen(t); i < n; i++, vsi++) { 1916 vt = btf__type_by_id(obj->btf, vsi->type); 1917 if (!btf_is_var(vt)) 1918 continue; 1919 1920 if (btf_var(vt)->linkage != BTF_VAR_STATIC) 1921 return true; 1922 } 1923 1924 return false; 1925 } 1926 1927 static int 1928 bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, 1929 const char *real_name, int sec_idx, void *data, size_t data_sz) 1930 { 1931 struct bpf_map_def *def; 1932 struct bpf_map *map; 1933 size_t mmap_sz; 1934 int err; 1935 1936 map = bpf_object__add_map(obj); 1937 if (IS_ERR(map)) 1938 return PTR_ERR(map); 1939 1940 map->libbpf_type = type; 1941 map->sec_idx = sec_idx; 1942 map->sec_offset = 0; 1943 map->real_name = strdup(real_name); 1944 map->name = internal_map_name(obj, real_name); 1945 if (!map->real_name || !map->name) { 1946 zfree(&map->real_name); 1947 zfree(&map->name); 1948 return -ENOMEM; 1949 } 1950 1951 def = &map->def; 1952 def->type = BPF_MAP_TYPE_ARRAY; 1953 def->key_size = sizeof(int); 1954 def->value_size = data_sz; 1955 def->max_entries = 1; 1956 def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG 1957 ? BPF_F_RDONLY_PROG : 0; 1958 1959 /* failures are fine because of maps like .rodata.str1.1 */ 1960 (void) map_fill_btf_type_info(obj, map); 1961 1962 if (map_is_mmapable(obj, map)) 1963 def->map_flags |= BPF_F_MMAPABLE; 1964 1965 pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", 1966 map->name, map->sec_idx, map->sec_offset, def->map_flags); 1967 1968 mmap_sz = bpf_map_mmap_sz(map); 1969 map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, 1970 MAP_SHARED | MAP_ANONYMOUS, -1, 0); 1971 if (map->mmaped == MAP_FAILED) { 1972 err = -errno; 1973 map->mmaped = NULL; 1974 pr_warn("failed to alloc map '%s' content buffer: %s\n", map->name, errstr(err)); 1975 zfree(&map->real_name); 1976 zfree(&map->name); 1977 return err; 1978 } 1979 1980 if (data) 1981 memcpy(map->mmaped, data, data_sz); 1982 1983 pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name); 1984 return 0; 1985 } 1986 1987 static int bpf_object__init_global_data_maps(struct bpf_object *obj) 1988 { 1989 struct elf_sec_desc *sec_desc; 1990 const char *sec_name; 1991 int err = 0, sec_idx; 1992 1993 /* 1994 * Populate obj->maps with libbpf internal maps. 1995 */ 1996 for (sec_idx = 1; sec_idx < obj->efile.sec_cnt; sec_idx++) { 1997 sec_desc = &obj->efile.secs[sec_idx]; 1998 1999 /* Skip recognized sections with size 0. */ 2000 if (!sec_desc->data || sec_desc->data->d_size == 0) 2001 continue; 2002 2003 switch (sec_desc->sec_type) { 2004 case SEC_DATA: 2005 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); 2006 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA, 2007 sec_name, sec_idx, 2008 sec_desc->data->d_buf, 2009 sec_desc->data->d_size); 2010 break; 2011 case SEC_RODATA: 2012 obj->has_rodata = true; 2013 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); 2014 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA, 2015 sec_name, sec_idx, 2016 sec_desc->data->d_buf, 2017 sec_desc->data->d_size); 2018 break; 2019 case SEC_BSS: 2020 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); 2021 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS, 2022 sec_name, sec_idx, 2023 NULL, 2024 sec_desc->data->d_size); 2025 break; 2026 default: 2027 /* skip */ 2028 break; 2029 } 2030 if (err) 2031 return err; 2032 } 2033 return 0; 2034 } 2035 2036 2037 static struct extern_desc *find_extern_by_name(const struct bpf_object *obj, 2038 const void *name) 2039 { 2040 int i; 2041 2042 for (i = 0; i < obj->nr_extern; i++) { 2043 if (strcmp(obj->externs[i].name, name) == 0) 2044 return &obj->externs[i]; 2045 } 2046 return NULL; 2047 } 2048 2049 static struct extern_desc *find_extern_by_name_with_len(const struct bpf_object *obj, 2050 const void *name, int len) 2051 { 2052 const char *ext_name; 2053 int i; 2054 2055 for (i = 0; i < obj->nr_extern; i++) { 2056 ext_name = obj->externs[i].name; 2057 if (strlen(ext_name) == len && strncmp(ext_name, name, len) == 0) 2058 return &obj->externs[i]; 2059 } 2060 return NULL; 2061 } 2062 2063 static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val, 2064 char value) 2065 { 2066 switch (ext->kcfg.type) { 2067 case KCFG_BOOL: 2068 if (value == 'm') { 2069 pr_warn("extern (kcfg) '%s': value '%c' implies tristate or char type\n", 2070 ext->name, value); 2071 return -EINVAL; 2072 } 2073 *(bool *)ext_val = value == 'y' ? true : false; 2074 break; 2075 case KCFG_TRISTATE: 2076 if (value == 'y') 2077 *(enum libbpf_tristate *)ext_val = TRI_YES; 2078 else if (value == 'm') 2079 *(enum libbpf_tristate *)ext_val = TRI_MODULE; 2080 else /* value == 'n' */ 2081 *(enum libbpf_tristate *)ext_val = TRI_NO; 2082 break; 2083 case KCFG_CHAR: 2084 *(char *)ext_val = value; 2085 break; 2086 case KCFG_UNKNOWN: 2087 case KCFG_INT: 2088 case KCFG_CHAR_ARR: 2089 default: 2090 pr_warn("extern (kcfg) '%s': value '%c' implies bool, tristate, or char type\n", 2091 ext->name, value); 2092 return -EINVAL; 2093 } 2094 ext->is_set = true; 2095 return 0; 2096 } 2097 2098 static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val, 2099 const char *value) 2100 { 2101 size_t len; 2102 2103 if (ext->kcfg.type != KCFG_CHAR_ARR) { 2104 pr_warn("extern (kcfg) '%s': value '%s' implies char array type\n", 2105 ext->name, value); 2106 return -EINVAL; 2107 } 2108 2109 len = strlen(value); 2110 if (len < 2 || value[len - 1] != '"') { 2111 pr_warn("extern (kcfg) '%s': invalid string config '%s'\n", 2112 ext->name, value); 2113 return -EINVAL; 2114 } 2115 2116 /* strip quotes */ 2117 len -= 2; 2118 if (len >= ext->kcfg.sz) { 2119 pr_warn("extern (kcfg) '%s': long string '%s' of (%zu bytes) truncated to %d bytes\n", 2120 ext->name, value, len, ext->kcfg.sz - 1); 2121 len = ext->kcfg.sz - 1; 2122 } 2123 memcpy(ext_val, value + 1, len); 2124 ext_val[len] = '\0'; 2125 ext->is_set = true; 2126 return 0; 2127 } 2128 2129 static int parse_u64(const char *value, __u64 *res) 2130 { 2131 char *value_end; 2132 int err; 2133 2134 errno = 0; 2135 *res = strtoull(value, &value_end, 0); 2136 if (errno) { 2137 err = -errno; 2138 pr_warn("failed to parse '%s': %s\n", value, errstr(err)); 2139 return err; 2140 } 2141 if (*value_end) { 2142 pr_warn("failed to parse '%s' as integer completely\n", value); 2143 return -EINVAL; 2144 } 2145 return 0; 2146 } 2147 2148 static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v) 2149 { 2150 int bit_sz = ext->kcfg.sz * 8; 2151 2152 if (ext->kcfg.sz == 8) 2153 return true; 2154 2155 /* Validate that value stored in u64 fits in integer of `ext->sz` 2156 * bytes size without any loss of information. If the target integer 2157 * is signed, we rely on the following limits of integer type of 2158 * Y bits and subsequent transformation: 2159 * 2160 * -2^(Y-1) <= X <= 2^(Y-1) - 1 2161 * 0 <= X + 2^(Y-1) <= 2^Y - 1 2162 * 0 <= X + 2^(Y-1) < 2^Y 2163 * 2164 * For unsigned target integer, check that all the (64 - Y) bits are 2165 * zero. 2166 */ 2167 if (ext->kcfg.is_signed) 2168 return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz); 2169 else 2170 return (v >> bit_sz) == 0; 2171 } 2172 2173 static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val, 2174 __u64 value) 2175 { 2176 if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR && 2177 ext->kcfg.type != KCFG_BOOL) { 2178 pr_warn("extern (kcfg) '%s': value '%llu' implies integer, char, or boolean type\n", 2179 ext->name, (unsigned long long)value); 2180 return -EINVAL; 2181 } 2182 if (ext->kcfg.type == KCFG_BOOL && value > 1) { 2183 pr_warn("extern (kcfg) '%s': value '%llu' isn't boolean compatible\n", 2184 ext->name, (unsigned long long)value); 2185 return -EINVAL; 2186 2187 } 2188 if (!is_kcfg_value_in_range(ext, value)) { 2189 pr_warn("extern (kcfg) '%s': value '%llu' doesn't fit in %d bytes\n", 2190 ext->name, (unsigned long long)value, ext->kcfg.sz); 2191 return -ERANGE; 2192 } 2193 switch (ext->kcfg.sz) { 2194 case 1: 2195 *(__u8 *)ext_val = value; 2196 break; 2197 case 2: 2198 *(__u16 *)ext_val = value; 2199 break; 2200 case 4: 2201 *(__u32 *)ext_val = value; 2202 break; 2203 case 8: 2204 *(__u64 *)ext_val = value; 2205 break; 2206 default: 2207 return -EINVAL; 2208 } 2209 ext->is_set = true; 2210 return 0; 2211 } 2212 2213 static int bpf_object__process_kconfig_line(struct bpf_object *obj, 2214 char *buf, void *data) 2215 { 2216 struct extern_desc *ext; 2217 char *sep, *value; 2218 int len, err = 0; 2219 void *ext_val; 2220 __u64 num; 2221 2222 if (!str_has_pfx(buf, "CONFIG_")) 2223 return 0; 2224 2225 sep = strchr(buf, '='); 2226 if (!sep) { 2227 pr_warn("failed to parse '%s': no separator\n", buf); 2228 return -EINVAL; 2229 } 2230 2231 /* Trim ending '\n' */ 2232 len = strlen(buf); 2233 if (buf[len - 1] == '\n') 2234 buf[len - 1] = '\0'; 2235 /* Split on '=' and ensure that a value is present. */ 2236 *sep = '\0'; 2237 if (!sep[1]) { 2238 *sep = '='; 2239 pr_warn("failed to parse '%s': no value\n", buf); 2240 return -EINVAL; 2241 } 2242 2243 ext = find_extern_by_name(obj, buf); 2244 if (!ext || ext->is_set) 2245 return 0; 2246 2247 ext_val = data + ext->kcfg.data_off; 2248 value = sep + 1; 2249 2250 switch (*value) { 2251 case 'y': case 'n': case 'm': 2252 err = set_kcfg_value_tri(ext, ext_val, *value); 2253 break; 2254 case '"': 2255 err = set_kcfg_value_str(ext, ext_val, value); 2256 break; 2257 default: 2258 /* assume integer */ 2259 err = parse_u64(value, &num); 2260 if (err) { 2261 pr_warn("extern (kcfg) '%s': value '%s' isn't a valid integer\n", ext->name, value); 2262 return err; 2263 } 2264 if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) { 2265 pr_warn("extern (kcfg) '%s': value '%s' implies integer type\n", ext->name, value); 2266 return -EINVAL; 2267 } 2268 err = set_kcfg_value_num(ext, ext_val, num); 2269 break; 2270 } 2271 if (err) 2272 return err; 2273 pr_debug("extern (kcfg) '%s': set to %s\n", ext->name, value); 2274 return 0; 2275 } 2276 2277 static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data) 2278 { 2279 char buf[PATH_MAX]; 2280 struct utsname uts; 2281 int len, err = 0; 2282 gzFile file; 2283 2284 uname(&uts); 2285 len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release); 2286 if (len < 0) 2287 return -EINVAL; 2288 else if (len >= PATH_MAX) 2289 return -ENAMETOOLONG; 2290 2291 /* gzopen also accepts uncompressed files. */ 2292 file = gzopen(buf, "re"); 2293 if (!file) 2294 file = gzopen("/proc/config.gz", "re"); 2295 2296 if (!file) { 2297 pr_warn("failed to open system Kconfig\n"); 2298 return -ENOENT; 2299 } 2300 2301 while (gzgets(file, buf, sizeof(buf))) { 2302 err = bpf_object__process_kconfig_line(obj, buf, data); 2303 if (err) { 2304 pr_warn("error parsing system Kconfig line '%s': %s\n", 2305 buf, errstr(err)); 2306 goto out; 2307 } 2308 } 2309 2310 out: 2311 gzclose(file); 2312 return err; 2313 } 2314 2315 static int bpf_object__read_kconfig_mem(struct bpf_object *obj, 2316 const char *config, void *data) 2317 { 2318 char buf[PATH_MAX]; 2319 int err = 0; 2320 FILE *file; 2321 2322 file = fmemopen((void *)config, strlen(config), "r"); 2323 if (!file) { 2324 err = -errno; 2325 pr_warn("failed to open in-memory Kconfig: %s\n", errstr(err)); 2326 return err; 2327 } 2328 2329 while (fgets(buf, sizeof(buf), file)) { 2330 err = bpf_object__process_kconfig_line(obj, buf, data); 2331 if (err) { 2332 pr_warn("error parsing in-memory Kconfig line '%s': %s\n", 2333 buf, errstr(err)); 2334 break; 2335 } 2336 } 2337 2338 fclose(file); 2339 return err; 2340 } 2341 2342 static int bpf_object__init_kconfig_map(struct bpf_object *obj) 2343 { 2344 struct extern_desc *last_ext = NULL, *ext; 2345 size_t map_sz; 2346 int i, err; 2347 2348 for (i = 0; i < obj->nr_extern; i++) { 2349 ext = &obj->externs[i]; 2350 if (ext->type == EXT_KCFG) 2351 last_ext = ext; 2352 } 2353 2354 if (!last_ext) 2355 return 0; 2356 2357 map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz; 2358 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG, 2359 ".kconfig", obj->efile.symbols_shndx, 2360 NULL, map_sz); 2361 if (err) 2362 return err; 2363 2364 obj->kconfig_map_idx = obj->nr_maps - 1; 2365 2366 return 0; 2367 } 2368 2369 const struct btf_type * 2370 skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id) 2371 { 2372 const struct btf_type *t = btf__type_by_id(btf, id); 2373 2374 if (res_id) 2375 *res_id = id; 2376 2377 while (btf_is_mod(t) || btf_is_typedef(t)) { 2378 if (res_id) 2379 *res_id = t->type; 2380 t = btf__type_by_id(btf, t->type); 2381 } 2382 2383 return t; 2384 } 2385 2386 static const struct btf_type * 2387 resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id) 2388 { 2389 const struct btf_type *t; 2390 2391 t = skip_mods_and_typedefs(btf, id, NULL); 2392 if (!btf_is_ptr(t)) 2393 return NULL; 2394 2395 t = skip_mods_and_typedefs(btf, t->type, res_id); 2396 2397 return btf_is_func_proto(t) ? t : NULL; 2398 } 2399 2400 static const char *__btf_kind_str(__u16 kind) 2401 { 2402 switch (kind) { 2403 case BTF_KIND_UNKN: return "void"; 2404 case BTF_KIND_INT: return "int"; 2405 case BTF_KIND_PTR: return "ptr"; 2406 case BTF_KIND_ARRAY: return "array"; 2407 case BTF_KIND_STRUCT: return "struct"; 2408 case BTF_KIND_UNION: return "union"; 2409 case BTF_KIND_ENUM: return "enum"; 2410 case BTF_KIND_FWD: return "fwd"; 2411 case BTF_KIND_TYPEDEF: return "typedef"; 2412 case BTF_KIND_VOLATILE: return "volatile"; 2413 case BTF_KIND_CONST: return "const"; 2414 case BTF_KIND_RESTRICT: return "restrict"; 2415 case BTF_KIND_FUNC: return "func"; 2416 case BTF_KIND_FUNC_PROTO: return "func_proto"; 2417 case BTF_KIND_VAR: return "var"; 2418 case BTF_KIND_DATASEC: return "datasec"; 2419 case BTF_KIND_FLOAT: return "float"; 2420 case BTF_KIND_DECL_TAG: return "decl_tag"; 2421 case BTF_KIND_TYPE_TAG: return "type_tag"; 2422 case BTF_KIND_ENUM64: return "enum64"; 2423 default: return "unknown"; 2424 } 2425 } 2426 2427 const char *btf_kind_str(const struct btf_type *t) 2428 { 2429 return __btf_kind_str(btf_kind(t)); 2430 } 2431 2432 /* 2433 * Fetch integer attribute of BTF map definition. Such attributes are 2434 * represented using a pointer to an array, in which dimensionality of array 2435 * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY]; 2436 * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF 2437 * type definition, while using only sizeof(void *) space in ELF data section. 2438 */ 2439 static bool get_map_field_int(const char *map_name, const struct btf *btf, 2440 const struct btf_member *m, __u32 *res) 2441 { 2442 const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL); 2443 const char *name = btf__name_by_offset(btf, m->name_off); 2444 const struct btf_array *arr_info; 2445 const struct btf_type *arr_t; 2446 2447 if (!btf_is_ptr(t)) { 2448 pr_warn("map '%s': attr '%s': expected PTR, got %s.\n", 2449 map_name, name, btf_kind_str(t)); 2450 return false; 2451 } 2452 2453 arr_t = btf__type_by_id(btf, t->type); 2454 if (!arr_t) { 2455 pr_warn("map '%s': attr '%s': type [%u] not found.\n", 2456 map_name, name, t->type); 2457 return false; 2458 } 2459 if (!btf_is_array(arr_t)) { 2460 pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n", 2461 map_name, name, btf_kind_str(arr_t)); 2462 return false; 2463 } 2464 arr_info = btf_array(arr_t); 2465 *res = arr_info->nelems; 2466 return true; 2467 } 2468 2469 static bool get_map_field_long(const char *map_name, const struct btf *btf, 2470 const struct btf_member *m, __u64 *res) 2471 { 2472 const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL); 2473 const char *name = btf__name_by_offset(btf, m->name_off); 2474 2475 if (btf_is_ptr(t)) { 2476 __u32 res32; 2477 bool ret; 2478 2479 ret = get_map_field_int(map_name, btf, m, &res32); 2480 if (ret) 2481 *res = (__u64)res32; 2482 return ret; 2483 } 2484 2485 if (!btf_is_enum(t) && !btf_is_enum64(t)) { 2486 pr_warn("map '%s': attr '%s': expected ENUM or ENUM64, got %s.\n", 2487 map_name, name, btf_kind_str(t)); 2488 return false; 2489 } 2490 2491 if (btf_vlen(t) != 1) { 2492 pr_warn("map '%s': attr '%s': invalid __ulong\n", 2493 map_name, name); 2494 return false; 2495 } 2496 2497 if (btf_is_enum(t)) { 2498 const struct btf_enum *e = btf_enum(t); 2499 2500 *res = e->val; 2501 } else { 2502 const struct btf_enum64 *e = btf_enum64(t); 2503 2504 *res = btf_enum64_value(e); 2505 } 2506 return true; 2507 } 2508 2509 static int pathname_concat(char *buf, size_t buf_sz, const char *path, const char *name) 2510 { 2511 int len; 2512 2513 len = snprintf(buf, buf_sz, "%s/%s", path, name); 2514 if (len < 0) 2515 return -EINVAL; 2516 if (len >= buf_sz) 2517 return -ENAMETOOLONG; 2518 2519 return 0; 2520 } 2521 2522 static int build_map_pin_path(struct bpf_map *map, const char *path) 2523 { 2524 char buf[PATH_MAX]; 2525 int err; 2526 2527 if (!path) 2528 path = BPF_FS_DEFAULT_PATH; 2529 2530 err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map)); 2531 if (err) 2532 return err; 2533 2534 return bpf_map__set_pin_path(map, buf); 2535 } 2536 2537 /* should match definition in bpf_helpers.h */ 2538 enum libbpf_pin_type { 2539 LIBBPF_PIN_NONE, 2540 /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */ 2541 LIBBPF_PIN_BY_NAME, 2542 }; 2543 2544 int parse_btf_map_def(const char *map_name, struct btf *btf, 2545 const struct btf_type *def_t, bool strict, 2546 struct btf_map_def *map_def, struct btf_map_def *inner_def) 2547 { 2548 const struct btf_type *t; 2549 const struct btf_member *m; 2550 bool is_inner = inner_def == NULL; 2551 int vlen, i; 2552 2553 vlen = btf_vlen(def_t); 2554 m = btf_members(def_t); 2555 for (i = 0; i < vlen; i++, m++) { 2556 const char *name = btf__name_by_offset(btf, m->name_off); 2557 2558 if (!name) { 2559 pr_warn("map '%s': invalid field #%d.\n", map_name, i); 2560 return -EINVAL; 2561 } 2562 if (strcmp(name, "type") == 0) { 2563 if (!get_map_field_int(map_name, btf, m, &map_def->map_type)) 2564 return -EINVAL; 2565 map_def->parts |= MAP_DEF_MAP_TYPE; 2566 } else if (strcmp(name, "max_entries") == 0) { 2567 if (!get_map_field_int(map_name, btf, m, &map_def->max_entries)) 2568 return -EINVAL; 2569 map_def->parts |= MAP_DEF_MAX_ENTRIES; 2570 } else if (strcmp(name, "map_flags") == 0) { 2571 if (!get_map_field_int(map_name, btf, m, &map_def->map_flags)) 2572 return -EINVAL; 2573 map_def->parts |= MAP_DEF_MAP_FLAGS; 2574 } else if (strcmp(name, "numa_node") == 0) { 2575 if (!get_map_field_int(map_name, btf, m, &map_def->numa_node)) 2576 return -EINVAL; 2577 map_def->parts |= MAP_DEF_NUMA_NODE; 2578 } else if (strcmp(name, "key_size") == 0) { 2579 __u32 sz; 2580 2581 if (!get_map_field_int(map_name, btf, m, &sz)) 2582 return -EINVAL; 2583 if (map_def->key_size && map_def->key_size != sz) { 2584 pr_warn("map '%s': conflicting key size %u != %u.\n", 2585 map_name, map_def->key_size, sz); 2586 return -EINVAL; 2587 } 2588 map_def->key_size = sz; 2589 map_def->parts |= MAP_DEF_KEY_SIZE; 2590 } else if (strcmp(name, "key") == 0) { 2591 __s64 sz; 2592 2593 t = btf__type_by_id(btf, m->type); 2594 if (!t) { 2595 pr_warn("map '%s': key type [%d] not found.\n", 2596 map_name, m->type); 2597 return -EINVAL; 2598 } 2599 if (!btf_is_ptr(t)) { 2600 pr_warn("map '%s': key spec is not PTR: %s.\n", 2601 map_name, btf_kind_str(t)); 2602 return -EINVAL; 2603 } 2604 sz = btf__resolve_size(btf, t->type); 2605 if (sz < 0) { 2606 pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n", 2607 map_name, t->type, (ssize_t)sz); 2608 return sz; 2609 } 2610 if (map_def->key_size && map_def->key_size != sz) { 2611 pr_warn("map '%s': conflicting key size %u != %zd.\n", 2612 map_name, map_def->key_size, (ssize_t)sz); 2613 return -EINVAL; 2614 } 2615 map_def->key_size = sz; 2616 map_def->key_type_id = t->type; 2617 map_def->parts |= MAP_DEF_KEY_SIZE | MAP_DEF_KEY_TYPE; 2618 } else if (strcmp(name, "value_size") == 0) { 2619 __u32 sz; 2620 2621 if (!get_map_field_int(map_name, btf, m, &sz)) 2622 return -EINVAL; 2623 if (map_def->value_size && map_def->value_size != sz) { 2624 pr_warn("map '%s': conflicting value size %u != %u.\n", 2625 map_name, map_def->value_size, sz); 2626 return -EINVAL; 2627 } 2628 map_def->value_size = sz; 2629 map_def->parts |= MAP_DEF_VALUE_SIZE; 2630 } else if (strcmp(name, "value") == 0) { 2631 __s64 sz; 2632 2633 t = btf__type_by_id(btf, m->type); 2634 if (!t) { 2635 pr_warn("map '%s': value type [%d] not found.\n", 2636 map_name, m->type); 2637 return -EINVAL; 2638 } 2639 if (!btf_is_ptr(t)) { 2640 pr_warn("map '%s': value spec is not PTR: %s.\n", 2641 map_name, btf_kind_str(t)); 2642 return -EINVAL; 2643 } 2644 sz = btf__resolve_size(btf, t->type); 2645 if (sz < 0) { 2646 pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n", 2647 map_name, t->type, (ssize_t)sz); 2648 return sz; 2649 } 2650 if (map_def->value_size && map_def->value_size != sz) { 2651 pr_warn("map '%s': conflicting value size %u != %zd.\n", 2652 map_name, map_def->value_size, (ssize_t)sz); 2653 return -EINVAL; 2654 } 2655 map_def->value_size = sz; 2656 map_def->value_type_id = t->type; 2657 map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE; 2658 } 2659 else if (strcmp(name, "values") == 0) { 2660 bool is_map_in_map = bpf_map_type__is_map_in_map(map_def->map_type); 2661 bool is_prog_array = map_def->map_type == BPF_MAP_TYPE_PROG_ARRAY; 2662 const char *desc = is_map_in_map ? "map-in-map inner" : "prog-array value"; 2663 char inner_map_name[128]; 2664 int err; 2665 2666 if (is_inner) { 2667 pr_warn("map '%s': multi-level inner maps not supported.\n", 2668 map_name); 2669 return -ENOTSUP; 2670 } 2671 if (i != vlen - 1) { 2672 pr_warn("map '%s': '%s' member should be last.\n", 2673 map_name, name); 2674 return -EINVAL; 2675 } 2676 if (!is_map_in_map && !is_prog_array) { 2677 pr_warn("map '%s': should be map-in-map or prog-array.\n", 2678 map_name); 2679 return -ENOTSUP; 2680 } 2681 if (map_def->value_size && map_def->value_size != 4) { 2682 pr_warn("map '%s': conflicting value size %u != 4.\n", 2683 map_name, map_def->value_size); 2684 return -EINVAL; 2685 } 2686 map_def->value_size = 4; 2687 t = btf__type_by_id(btf, m->type); 2688 if (!t) { 2689 pr_warn("map '%s': %s type [%d] not found.\n", 2690 map_name, desc, m->type); 2691 return -EINVAL; 2692 } 2693 if (!btf_is_array(t) || btf_array(t)->nelems) { 2694 pr_warn("map '%s': %s spec is not a zero-sized array.\n", 2695 map_name, desc); 2696 return -EINVAL; 2697 } 2698 t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL); 2699 if (!btf_is_ptr(t)) { 2700 pr_warn("map '%s': %s def is of unexpected kind %s.\n", 2701 map_name, desc, btf_kind_str(t)); 2702 return -EINVAL; 2703 } 2704 t = skip_mods_and_typedefs(btf, t->type, NULL); 2705 if (is_prog_array) { 2706 if (!btf_is_func_proto(t)) { 2707 pr_warn("map '%s': prog-array value def is of unexpected kind %s.\n", 2708 map_name, btf_kind_str(t)); 2709 return -EINVAL; 2710 } 2711 continue; 2712 } 2713 if (!btf_is_struct(t)) { 2714 pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n", 2715 map_name, btf_kind_str(t)); 2716 return -EINVAL; 2717 } 2718 2719 snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", map_name); 2720 err = parse_btf_map_def(inner_map_name, btf, t, strict, inner_def, NULL); 2721 if (err) 2722 return err; 2723 2724 map_def->parts |= MAP_DEF_INNER_MAP; 2725 } else if (strcmp(name, "pinning") == 0) { 2726 __u32 val; 2727 2728 if (is_inner) { 2729 pr_warn("map '%s': inner def can't be pinned.\n", map_name); 2730 return -EINVAL; 2731 } 2732 if (!get_map_field_int(map_name, btf, m, &val)) 2733 return -EINVAL; 2734 if (val != LIBBPF_PIN_NONE && val != LIBBPF_PIN_BY_NAME) { 2735 pr_warn("map '%s': invalid pinning value %u.\n", 2736 map_name, val); 2737 return -EINVAL; 2738 } 2739 map_def->pinning = val; 2740 map_def->parts |= MAP_DEF_PINNING; 2741 } else if (strcmp(name, "map_extra") == 0) { 2742 __u64 map_extra; 2743 2744 if (!get_map_field_long(map_name, btf, m, &map_extra)) 2745 return -EINVAL; 2746 map_def->map_extra = map_extra; 2747 map_def->parts |= MAP_DEF_MAP_EXTRA; 2748 } else { 2749 if (strict) { 2750 pr_warn("map '%s': unknown field '%s'.\n", map_name, name); 2751 return -ENOTSUP; 2752 } 2753 pr_debug("map '%s': ignoring unknown field '%s'.\n", map_name, name); 2754 } 2755 } 2756 2757 if (map_def->map_type == BPF_MAP_TYPE_UNSPEC) { 2758 pr_warn("map '%s': map type isn't specified.\n", map_name); 2759 return -EINVAL; 2760 } 2761 2762 return 0; 2763 } 2764 2765 static size_t adjust_ringbuf_sz(size_t sz) 2766 { 2767 __u32 page_sz = sysconf(_SC_PAGE_SIZE); 2768 __u32 mul; 2769 2770 /* if user forgot to set any size, make sure they see error */ 2771 if (sz == 0) 2772 return 0; 2773 /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be 2774 * a power-of-2 multiple of kernel's page size. If user diligently 2775 * satisified these conditions, pass the size through. 2776 */ 2777 if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz)) 2778 return sz; 2779 2780 /* Otherwise find closest (page_sz * power_of_2) product bigger than 2781 * user-set size to satisfy both user size request and kernel 2782 * requirements and substitute correct max_entries for map creation. 2783 */ 2784 for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) { 2785 if (mul * page_sz > sz) 2786 return mul * page_sz; 2787 } 2788 2789 /* if it's impossible to satisfy the conditions (i.e., user size is 2790 * very close to UINT_MAX but is not a power-of-2 multiple of 2791 * page_size) then just return original size and let kernel reject it 2792 */ 2793 return sz; 2794 } 2795 2796 static bool map_is_ringbuf(const struct bpf_map *map) 2797 { 2798 return map->def.type == BPF_MAP_TYPE_RINGBUF || 2799 map->def.type == BPF_MAP_TYPE_USER_RINGBUF; 2800 } 2801 2802 static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def) 2803 { 2804 map->def.type = def->map_type; 2805 map->def.key_size = def->key_size; 2806 map->def.value_size = def->value_size; 2807 map->def.max_entries = def->max_entries; 2808 map->def.map_flags = def->map_flags; 2809 map->map_extra = def->map_extra; 2810 2811 map->numa_node = def->numa_node; 2812 map->btf_key_type_id = def->key_type_id; 2813 map->btf_value_type_id = def->value_type_id; 2814 2815 /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */ 2816 if (map_is_ringbuf(map)) 2817 map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries); 2818 2819 if (def->parts & MAP_DEF_MAP_TYPE) 2820 pr_debug("map '%s': found type = %u.\n", map->name, def->map_type); 2821 2822 if (def->parts & MAP_DEF_KEY_TYPE) 2823 pr_debug("map '%s': found key [%u], sz = %u.\n", 2824 map->name, def->key_type_id, def->key_size); 2825 else if (def->parts & MAP_DEF_KEY_SIZE) 2826 pr_debug("map '%s': found key_size = %u.\n", map->name, def->key_size); 2827 2828 if (def->parts & MAP_DEF_VALUE_TYPE) 2829 pr_debug("map '%s': found value [%u], sz = %u.\n", 2830 map->name, def->value_type_id, def->value_size); 2831 else if (def->parts & MAP_DEF_VALUE_SIZE) 2832 pr_debug("map '%s': found value_size = %u.\n", map->name, def->value_size); 2833 2834 if (def->parts & MAP_DEF_MAX_ENTRIES) 2835 pr_debug("map '%s': found max_entries = %u.\n", map->name, def->max_entries); 2836 if (def->parts & MAP_DEF_MAP_FLAGS) 2837 pr_debug("map '%s': found map_flags = 0x%x.\n", map->name, def->map_flags); 2838 if (def->parts & MAP_DEF_MAP_EXTRA) 2839 pr_debug("map '%s': found map_extra = 0x%llx.\n", map->name, 2840 (unsigned long long)def->map_extra); 2841 if (def->parts & MAP_DEF_PINNING) 2842 pr_debug("map '%s': found pinning = %u.\n", map->name, def->pinning); 2843 if (def->parts & MAP_DEF_NUMA_NODE) 2844 pr_debug("map '%s': found numa_node = %u.\n", map->name, def->numa_node); 2845 2846 if (def->parts & MAP_DEF_INNER_MAP) 2847 pr_debug("map '%s': found inner map definition.\n", map->name); 2848 } 2849 2850 static const char *btf_var_linkage_str(__u32 linkage) 2851 { 2852 switch (linkage) { 2853 case BTF_VAR_STATIC: return "static"; 2854 case BTF_VAR_GLOBAL_ALLOCATED: return "global"; 2855 case BTF_VAR_GLOBAL_EXTERN: return "extern"; 2856 default: return "unknown"; 2857 } 2858 } 2859 2860 static int bpf_object__init_user_btf_map(struct bpf_object *obj, 2861 const struct btf_type *sec, 2862 int var_idx, int sec_idx, 2863 const Elf_Data *data, bool strict, 2864 const char *pin_root_path) 2865 { 2866 struct btf_map_def map_def = {}, inner_def = {}; 2867 const struct btf_type *var, *def; 2868 const struct btf_var_secinfo *vi; 2869 const struct btf_var *var_extra; 2870 const char *map_name; 2871 struct bpf_map *map; 2872 int err; 2873 2874 vi = btf_var_secinfos(sec) + var_idx; 2875 var = btf__type_by_id(obj->btf, vi->type); 2876 var_extra = btf_var(var); 2877 map_name = btf__name_by_offset(obj->btf, var->name_off); 2878 2879 if (map_name == NULL || map_name[0] == '\0') { 2880 pr_warn("map #%d: empty name.\n", var_idx); 2881 return -EINVAL; 2882 } 2883 if ((__u64)vi->offset + vi->size > data->d_size) { 2884 pr_warn("map '%s' BTF data is corrupted.\n", map_name); 2885 return -EINVAL; 2886 } 2887 if (!btf_is_var(var)) { 2888 pr_warn("map '%s': unexpected var kind %s.\n", 2889 map_name, btf_kind_str(var)); 2890 return -EINVAL; 2891 } 2892 if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED) { 2893 pr_warn("map '%s': unsupported map linkage %s.\n", 2894 map_name, btf_var_linkage_str(var_extra->linkage)); 2895 return -EOPNOTSUPP; 2896 } 2897 2898 def = skip_mods_and_typedefs(obj->btf, var->type, NULL); 2899 if (!btf_is_struct(def)) { 2900 pr_warn("map '%s': unexpected def kind %s.\n", 2901 map_name, btf_kind_str(var)); 2902 return -EINVAL; 2903 } 2904 if (def->size > vi->size) { 2905 pr_warn("map '%s': invalid def size.\n", map_name); 2906 return -EINVAL; 2907 } 2908 2909 map = bpf_object__add_map(obj); 2910 if (IS_ERR(map)) 2911 return PTR_ERR(map); 2912 map->name = strdup(map_name); 2913 if (!map->name) { 2914 pr_warn("map '%s': failed to alloc map name.\n", map_name); 2915 return -ENOMEM; 2916 } 2917 map->libbpf_type = LIBBPF_MAP_UNSPEC; 2918 map->def.type = BPF_MAP_TYPE_UNSPEC; 2919 map->sec_idx = sec_idx; 2920 map->sec_offset = vi->offset; 2921 map->btf_var_idx = var_idx; 2922 pr_debug("map '%s': at sec_idx %d, offset %zu.\n", 2923 map_name, map->sec_idx, map->sec_offset); 2924 2925 err = parse_btf_map_def(map->name, obj->btf, def, strict, &map_def, &inner_def); 2926 if (err) 2927 return err; 2928 2929 fill_map_from_def(map, &map_def); 2930 2931 if (map_def.pinning == LIBBPF_PIN_BY_NAME) { 2932 err = build_map_pin_path(map, pin_root_path); 2933 if (err) { 2934 pr_warn("map '%s': couldn't build pin path.\n", map->name); 2935 return err; 2936 } 2937 } 2938 2939 if (map_def.parts & MAP_DEF_INNER_MAP) { 2940 map->inner_map = calloc(1, sizeof(*map->inner_map)); 2941 if (!map->inner_map) 2942 return -ENOMEM; 2943 map->inner_map->fd = create_placeholder_fd(); 2944 if (map->inner_map->fd < 0) 2945 return map->inner_map->fd; 2946 map->inner_map->sec_idx = sec_idx; 2947 map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1); 2948 if (!map->inner_map->name) 2949 return -ENOMEM; 2950 sprintf(map->inner_map->name, "%s.inner", map_name); 2951 2952 fill_map_from_def(map->inner_map, &inner_def); 2953 } 2954 2955 err = map_fill_btf_type_info(obj, map); 2956 if (err) 2957 return err; 2958 2959 return 0; 2960 } 2961 2962 static int init_arena_map_data(struct bpf_object *obj, struct bpf_map *map, 2963 const char *sec_name, int sec_idx, 2964 void *data, size_t data_sz) 2965 { 2966 const long page_sz = sysconf(_SC_PAGE_SIZE); 2967 size_t mmap_sz; 2968 2969 mmap_sz = bpf_map_mmap_sz(map); 2970 if (roundup(data_sz, page_sz) > mmap_sz) { 2971 pr_warn("elf: sec '%s': declared ARENA map size (%zu) is too small to hold global __arena variables of size %zu\n", 2972 sec_name, mmap_sz, data_sz); 2973 return -E2BIG; 2974 } 2975 2976 obj->arena_data = malloc(data_sz); 2977 if (!obj->arena_data) 2978 return -ENOMEM; 2979 memcpy(obj->arena_data, data, data_sz); 2980 obj->arena_data_sz = data_sz; 2981 2982 /* make bpf_map__init_value() work for ARENA maps */ 2983 map->mmaped = obj->arena_data; 2984 2985 return 0; 2986 } 2987 2988 static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, 2989 const char *pin_root_path) 2990 { 2991 const struct btf_type *sec = NULL; 2992 int nr_types, i, vlen, err; 2993 const struct btf_type *t; 2994 const char *name; 2995 Elf_Data *data; 2996 Elf_Scn *scn; 2997 2998 if (obj->efile.btf_maps_shndx < 0) 2999 return 0; 3000 3001 scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx); 3002 data = elf_sec_data(obj, scn); 3003 if (!scn || !data) { 3004 pr_warn("elf: failed to get %s map definitions for %s\n", 3005 MAPS_ELF_SEC, obj->path); 3006 return -EINVAL; 3007 } 3008 3009 nr_types = btf__type_cnt(obj->btf); 3010 for (i = 1; i < nr_types; i++) { 3011 t = btf__type_by_id(obj->btf, i); 3012 if (!btf_is_datasec(t)) 3013 continue; 3014 name = btf__name_by_offset(obj->btf, t->name_off); 3015 if (strcmp(name, MAPS_ELF_SEC) == 0) { 3016 sec = t; 3017 obj->efile.btf_maps_sec_btf_id = i; 3018 break; 3019 } 3020 } 3021 3022 if (!sec) { 3023 pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC); 3024 return -ENOENT; 3025 } 3026 3027 vlen = btf_vlen(sec); 3028 for (i = 0; i < vlen; i++) { 3029 err = bpf_object__init_user_btf_map(obj, sec, i, 3030 obj->efile.btf_maps_shndx, 3031 data, strict, 3032 pin_root_path); 3033 if (err) 3034 return err; 3035 } 3036 3037 for (i = 0; i < obj->nr_maps; i++) { 3038 struct bpf_map *map = &obj->maps[i]; 3039 3040 if (map->def.type != BPF_MAP_TYPE_ARENA) 3041 continue; 3042 3043 if (obj->arena_map_idx >= 0) { 3044 pr_warn("map '%s': only single ARENA map is supported (map '%s' is also ARENA)\n", 3045 map->name, obj->maps[obj->arena_map_idx].name); 3046 return -EINVAL; 3047 } 3048 obj->arena_map_idx = i; 3049 3050 if (obj->efile.arena_data) { 3051 err = init_arena_map_data(obj, map, ARENA_SEC, obj->efile.arena_data_shndx, 3052 obj->efile.arena_data->d_buf, 3053 obj->efile.arena_data->d_size); 3054 if (err) 3055 return err; 3056 } 3057 } 3058 if (obj->efile.arena_data && obj->arena_map_idx < 0) { 3059 pr_warn("elf: sec '%s': to use global __arena variables the ARENA map should be explicitly declared in SEC(\".maps\")\n", 3060 ARENA_SEC); 3061 return -ENOENT; 3062 } 3063 3064 return 0; 3065 } 3066 3067 static int bpf_object__init_maps(struct bpf_object *obj, 3068 const struct bpf_object_open_opts *opts) 3069 { 3070 const char *pin_root_path; 3071 bool strict; 3072 int err = 0; 3073 3074 strict = !OPTS_GET(opts, relaxed_maps, false); 3075 pin_root_path = OPTS_GET(opts, pin_root_path, NULL); 3076 3077 err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path); 3078 err = err ?: bpf_object__init_global_data_maps(obj); 3079 err = err ?: bpf_object__init_kconfig_map(obj); 3080 err = err ?: bpf_object_init_struct_ops(obj); 3081 3082 return err; 3083 } 3084 3085 static bool section_have_execinstr(struct bpf_object *obj, int idx) 3086 { 3087 Elf64_Shdr *sh; 3088 3089 sh = elf_sec_hdr(obj, elf_sec_by_idx(obj, idx)); 3090 if (!sh) 3091 return false; 3092 3093 return sh->sh_flags & SHF_EXECINSTR; 3094 } 3095 3096 static bool starts_with_qmark(const char *s) 3097 { 3098 return s && s[0] == '?'; 3099 } 3100 3101 static bool btf_needs_sanitization(struct bpf_object *obj) 3102 { 3103 bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC); 3104 bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC); 3105 bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT); 3106 bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); 3107 bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); 3108 bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); 3109 bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64); 3110 bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC); 3111 3112 return !has_func || !has_datasec || !has_func_global || !has_float || 3113 !has_decl_tag || !has_type_tag || !has_enum64 || !has_qmark_datasec; 3114 } 3115 3116 static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) 3117 { 3118 bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC); 3119 bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC); 3120 bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT); 3121 bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); 3122 bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); 3123 bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); 3124 bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64); 3125 bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC); 3126 int enum64_placeholder_id = 0; 3127 struct btf_type *t; 3128 int i, j, vlen; 3129 3130 for (i = 1; i < btf__type_cnt(btf); i++) { 3131 t = (struct btf_type *)btf__type_by_id(btf, i); 3132 3133 if ((!has_datasec && btf_is_var(t)) || (!has_decl_tag && btf_is_decl_tag(t))) { 3134 /* replace VAR/DECL_TAG with INT */ 3135 t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0); 3136 /* 3137 * using size = 1 is the safest choice, 4 will be too 3138 * big and cause kernel BTF validation failure if 3139 * original variable took less than 4 bytes 3140 */ 3141 t->size = 1; 3142 *(int *)(t + 1) = BTF_INT_ENC(0, 0, 8); 3143 } else if (!has_datasec && btf_is_datasec(t)) { 3144 /* replace DATASEC with STRUCT */ 3145 const struct btf_var_secinfo *v = btf_var_secinfos(t); 3146 struct btf_member *m = btf_members(t); 3147 struct btf_type *vt; 3148 char *name; 3149 3150 name = (char *)btf__name_by_offset(btf, t->name_off); 3151 while (*name) { 3152 if (*name == '.' || *name == '?') 3153 *name = '_'; 3154 name++; 3155 } 3156 3157 vlen = btf_vlen(t); 3158 t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen); 3159 for (j = 0; j < vlen; j++, v++, m++) { 3160 /* order of field assignments is important */ 3161 m->offset = v->offset * 8; 3162 m->type = v->type; 3163 /* preserve variable name as member name */ 3164 vt = (void *)btf__type_by_id(btf, v->type); 3165 m->name_off = vt->name_off; 3166 } 3167 } else if (!has_qmark_datasec && btf_is_datasec(t) && 3168 starts_with_qmark(btf__name_by_offset(btf, t->name_off))) { 3169 /* replace '?' prefix with '_' for DATASEC names */ 3170 char *name; 3171 3172 name = (char *)btf__name_by_offset(btf, t->name_off); 3173 if (name[0] == '?') 3174 name[0] = '_'; 3175 } else if (!has_func && btf_is_func_proto(t)) { 3176 /* replace FUNC_PROTO with ENUM */ 3177 vlen = btf_vlen(t); 3178 t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen); 3179 t->size = sizeof(__u32); /* kernel enforced */ 3180 } else if (!has_func && btf_is_func(t)) { 3181 /* replace FUNC with TYPEDEF */ 3182 t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0); 3183 } else if (!has_func_global && btf_is_func(t)) { 3184 /* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */ 3185 t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0); 3186 } else if (!has_float && btf_is_float(t)) { 3187 /* replace FLOAT with an equally-sized empty STRUCT; 3188 * since C compilers do not accept e.g. "float" as a 3189 * valid struct name, make it anonymous 3190 */ 3191 t->name_off = 0; 3192 t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0); 3193 } else if (!has_type_tag && btf_is_type_tag(t)) { 3194 /* replace TYPE_TAG with a CONST */ 3195 t->name_off = 0; 3196 t->info = BTF_INFO_ENC(BTF_KIND_CONST, 0, 0); 3197 } else if (!has_enum64 && btf_is_enum(t)) { 3198 /* clear the kflag */ 3199 t->info = btf_type_info(btf_kind(t), btf_vlen(t), false); 3200 } else if (!has_enum64 && btf_is_enum64(t)) { 3201 /* replace ENUM64 with a union */ 3202 struct btf_member *m; 3203 3204 if (enum64_placeholder_id == 0) { 3205 enum64_placeholder_id = btf__add_int(btf, "enum64_placeholder", 1, 0); 3206 if (enum64_placeholder_id < 0) 3207 return enum64_placeholder_id; 3208 3209 t = (struct btf_type *)btf__type_by_id(btf, i); 3210 } 3211 3212 m = btf_members(t); 3213 vlen = btf_vlen(t); 3214 t->info = BTF_INFO_ENC(BTF_KIND_UNION, 0, vlen); 3215 for (j = 0; j < vlen; j++, m++) { 3216 m->type = enum64_placeholder_id; 3217 m->offset = 0; 3218 } 3219 } 3220 } 3221 3222 return 0; 3223 } 3224 3225 static bool libbpf_needs_btf(const struct bpf_object *obj) 3226 { 3227 return obj->efile.btf_maps_shndx >= 0 || 3228 obj->efile.has_st_ops || 3229 obj->nr_extern > 0; 3230 } 3231 3232 static bool kernel_needs_btf(const struct bpf_object *obj) 3233 { 3234 return obj->efile.has_st_ops; 3235 } 3236 3237 static int bpf_object__init_btf(struct bpf_object *obj, 3238 Elf_Data *btf_data, 3239 Elf_Data *btf_ext_data) 3240 { 3241 int err = -ENOENT; 3242 3243 if (btf_data) { 3244 obj->btf = btf__new(btf_data->d_buf, btf_data->d_size); 3245 err = libbpf_get_error(obj->btf); 3246 if (err) { 3247 obj->btf = NULL; 3248 pr_warn("Error loading ELF section %s: %s.\n", BTF_ELF_SEC, errstr(err)); 3249 goto out; 3250 } 3251 /* enforce 8-byte pointers for BPF-targeted BTFs */ 3252 btf__set_pointer_size(obj->btf, 8); 3253 } 3254 if (btf_ext_data) { 3255 struct btf_ext_info *ext_segs[3]; 3256 int seg_num, sec_num; 3257 3258 if (!obj->btf) { 3259 pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n", 3260 BTF_EXT_ELF_SEC, BTF_ELF_SEC); 3261 goto out; 3262 } 3263 obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size); 3264 err = libbpf_get_error(obj->btf_ext); 3265 if (err) { 3266 pr_warn("Error loading ELF section %s: %s. Ignored and continue.\n", 3267 BTF_EXT_ELF_SEC, errstr(err)); 3268 obj->btf_ext = NULL; 3269 goto out; 3270 } 3271 3272 /* setup .BTF.ext to ELF section mapping */ 3273 ext_segs[0] = &obj->btf_ext->func_info; 3274 ext_segs[1] = &obj->btf_ext->line_info; 3275 ext_segs[2] = &obj->btf_ext->core_relo_info; 3276 for (seg_num = 0; seg_num < ARRAY_SIZE(ext_segs); seg_num++) { 3277 struct btf_ext_info *seg = ext_segs[seg_num]; 3278 const struct btf_ext_info_sec *sec; 3279 const char *sec_name; 3280 Elf_Scn *scn; 3281 3282 if (seg->sec_cnt == 0) 3283 continue; 3284 3285 seg->sec_idxs = calloc(seg->sec_cnt, sizeof(*seg->sec_idxs)); 3286 if (!seg->sec_idxs) { 3287 err = -ENOMEM; 3288 goto out; 3289 } 3290 3291 sec_num = 0; 3292 for_each_btf_ext_sec(seg, sec) { 3293 /* preventively increment index to avoid doing 3294 * this before every continue below 3295 */ 3296 sec_num++; 3297 3298 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); 3299 if (str_is_empty(sec_name)) 3300 continue; 3301 scn = elf_sec_by_name(obj, sec_name); 3302 if (!scn) 3303 continue; 3304 3305 seg->sec_idxs[sec_num - 1] = elf_ndxscn(scn); 3306 } 3307 } 3308 } 3309 out: 3310 if (err && libbpf_needs_btf(obj)) { 3311 pr_warn("BTF is required, but is missing or corrupted.\n"); 3312 return err; 3313 } 3314 return 0; 3315 } 3316 3317 static int compare_vsi_off(const void *_a, const void *_b) 3318 { 3319 const struct btf_var_secinfo *a = _a; 3320 const struct btf_var_secinfo *b = _b; 3321 3322 return a->offset - b->offset; 3323 } 3324 3325 static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, 3326 struct btf_type *t) 3327 { 3328 __u32 size = 0, i, vars = btf_vlen(t); 3329 const char *sec_name = btf__name_by_offset(btf, t->name_off); 3330 struct btf_var_secinfo *vsi; 3331 bool fixup_offsets = false; 3332 int err; 3333 3334 if (!sec_name) { 3335 pr_debug("No name found in string section for DATASEC kind.\n"); 3336 return -ENOENT; 3337 } 3338 3339 /* Extern-backing datasecs (.ksyms, .kconfig) have their size and 3340 * variable offsets set at the previous step. Further, not every 3341 * extern BTF VAR has corresponding ELF symbol preserved, so we skip 3342 * all fixups altogether for such sections and go straight to sorting 3343 * VARs within their DATASEC. 3344 */ 3345 if (strcmp(sec_name, KCONFIG_SEC) == 0 || strcmp(sec_name, KSYMS_SEC) == 0) 3346 goto sort_vars; 3347 3348 /* Clang leaves DATASEC size and VAR offsets as zeroes, so we need to 3349 * fix this up. But BPF static linker already fixes this up and fills 3350 * all the sizes and offsets during static linking. So this step has 3351 * to be optional. But the STV_HIDDEN handling is non-optional for any 3352 * non-extern DATASEC, so the variable fixup loop below handles both 3353 * functions at the same time, paying the cost of BTF VAR <-> ELF 3354 * symbol matching just once. 3355 */ 3356 if (t->size == 0) { 3357 err = find_elf_sec_sz(obj, sec_name, &size); 3358 if (err || !size) { 3359 pr_debug("sec '%s': failed to determine size from ELF: size %u, err %s\n", 3360 sec_name, size, errstr(err)); 3361 return -ENOENT; 3362 } 3363 3364 t->size = size; 3365 fixup_offsets = true; 3366 } 3367 3368 for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) { 3369 const struct btf_type *t_var; 3370 struct btf_var *var; 3371 const char *var_name; 3372 Elf64_Sym *sym; 3373 3374 t_var = btf__type_by_id(btf, vsi->type); 3375 if (!t_var || !btf_is_var(t_var)) { 3376 pr_debug("sec '%s': unexpected non-VAR type found\n", sec_name); 3377 return -EINVAL; 3378 } 3379 3380 var = btf_var(t_var); 3381 if (var->linkage == BTF_VAR_STATIC || var->linkage == BTF_VAR_GLOBAL_EXTERN) 3382 continue; 3383 3384 var_name = btf__name_by_offset(btf, t_var->name_off); 3385 if (!var_name) { 3386 pr_debug("sec '%s': failed to find name of DATASEC's member #%d\n", 3387 sec_name, i); 3388 return -ENOENT; 3389 } 3390 3391 sym = find_elf_var_sym(obj, var_name); 3392 if (IS_ERR(sym)) { 3393 pr_debug("sec '%s': failed to find ELF symbol for VAR '%s'\n", 3394 sec_name, var_name); 3395 return -ENOENT; 3396 } 3397 3398 if (fixup_offsets) 3399 vsi->offset = sym->st_value; 3400 3401 /* if variable is a global/weak symbol, but has restricted 3402 * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF VAR 3403 * as static. This follows similar logic for functions (BPF 3404 * subprogs) and influences libbpf's further decisions about 3405 * whether to make global data BPF array maps as 3406 * BPF_F_MMAPABLE. 3407 */ 3408 if (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN 3409 || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL) 3410 var->linkage = BTF_VAR_STATIC; 3411 } 3412 3413 sort_vars: 3414 qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off); 3415 return 0; 3416 } 3417 3418 static int bpf_object_fixup_btf(struct bpf_object *obj) 3419 { 3420 int i, n, err = 0; 3421 3422 if (!obj->btf) 3423 return 0; 3424 3425 n = btf__type_cnt(obj->btf); 3426 for (i = 1; i < n; i++) { 3427 struct btf_type *t = btf_type_by_id(obj->btf, i); 3428 3429 /* Loader needs to fix up some of the things compiler 3430 * couldn't get its hands on while emitting BTF. This 3431 * is section size and global variable offset. We use 3432 * the info from the ELF itself for this purpose. 3433 */ 3434 if (btf_is_datasec(t)) { 3435 err = btf_fixup_datasec(obj, obj->btf, t); 3436 if (err) 3437 return err; 3438 } 3439 } 3440 3441 return 0; 3442 } 3443 3444 static bool prog_needs_vmlinux_btf(struct bpf_program *prog) 3445 { 3446 if (prog->type == BPF_PROG_TYPE_STRUCT_OPS || 3447 prog->type == BPF_PROG_TYPE_LSM) 3448 return true; 3449 3450 /* BPF_PROG_TYPE_TRACING programs which do not attach to other programs 3451 * also need vmlinux BTF 3452 */ 3453 if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd) 3454 return true; 3455 3456 return false; 3457 } 3458 3459 static bool map_needs_vmlinux_btf(struct bpf_map *map) 3460 { 3461 return bpf_map__is_struct_ops(map); 3462 } 3463 3464 static bool obj_needs_vmlinux_btf(const struct bpf_object *obj) 3465 { 3466 struct bpf_program *prog; 3467 struct bpf_map *map; 3468 int i; 3469 3470 /* CO-RE relocations need kernel BTF, only when btf_custom_path 3471 * is not specified 3472 */ 3473 if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path) 3474 return true; 3475 3476 /* Support for typed ksyms needs kernel BTF */ 3477 for (i = 0; i < obj->nr_extern; i++) { 3478 const struct extern_desc *ext; 3479 3480 ext = &obj->externs[i]; 3481 if (ext->type == EXT_KSYM && ext->ksym.type_id) 3482 return true; 3483 } 3484 3485 bpf_object__for_each_program(prog, obj) { 3486 if (!prog->autoload) 3487 continue; 3488 if (prog_needs_vmlinux_btf(prog)) 3489 return true; 3490 } 3491 3492 bpf_object__for_each_map(map, obj) { 3493 if (map_needs_vmlinux_btf(map)) 3494 return true; 3495 } 3496 3497 return false; 3498 } 3499 3500 static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force) 3501 { 3502 int err; 3503 3504 /* btf_vmlinux could be loaded earlier */ 3505 if (obj->btf_vmlinux || obj->gen_loader) 3506 return 0; 3507 3508 if (!force && !obj_needs_vmlinux_btf(obj)) 3509 return 0; 3510 3511 obj->btf_vmlinux = btf__load_vmlinux_btf(); 3512 err = libbpf_get_error(obj->btf_vmlinux); 3513 if (err) { 3514 pr_warn("Error loading vmlinux BTF: %s\n", errstr(err)); 3515 obj->btf_vmlinux = NULL; 3516 return err; 3517 } 3518 return 0; 3519 } 3520 3521 static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) 3522 { 3523 struct btf *kern_btf = obj->btf; 3524 bool btf_mandatory, sanitize; 3525 int i, err = 0; 3526 3527 if (!obj->btf) 3528 return 0; 3529 3530 if (!kernel_supports(obj, FEAT_BTF)) { 3531 if (kernel_needs_btf(obj)) { 3532 err = -EOPNOTSUPP; 3533 goto report; 3534 } 3535 pr_debug("Kernel doesn't support BTF, skipping uploading it.\n"); 3536 return 0; 3537 } 3538 3539 /* Even though some subprogs are global/weak, user might prefer more 3540 * permissive BPF verification process that BPF verifier performs for 3541 * static functions, taking into account more context from the caller 3542 * functions. In such case, they need to mark such subprogs with 3543 * __attribute__((visibility("hidden"))) and libbpf will adjust 3544 * corresponding FUNC BTF type to be marked as static and trigger more 3545 * involved BPF verification process. 3546 */ 3547 for (i = 0; i < obj->nr_programs; i++) { 3548 struct bpf_program *prog = &obj->programs[i]; 3549 struct btf_type *t; 3550 const char *name; 3551 int j, n; 3552 3553 if (!prog->mark_btf_static || !prog_is_subprog(obj, prog)) 3554 continue; 3555 3556 n = btf__type_cnt(obj->btf); 3557 for (j = 1; j < n; j++) { 3558 t = btf_type_by_id(obj->btf, j); 3559 if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) 3560 continue; 3561 3562 name = btf__str_by_offset(obj->btf, t->name_off); 3563 if (strcmp(name, prog->name) != 0) 3564 continue; 3565 3566 t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_STATIC, 0); 3567 break; 3568 } 3569 } 3570 3571 sanitize = btf_needs_sanitization(obj); 3572 if (sanitize) { 3573 const void *raw_data; 3574 __u32 sz; 3575 3576 /* clone BTF to sanitize a copy and leave the original intact */ 3577 raw_data = btf__raw_data(obj->btf, &sz); 3578 kern_btf = btf__new(raw_data, sz); 3579 err = libbpf_get_error(kern_btf); 3580 if (err) 3581 return err; 3582 3583 /* enforce 8-byte pointers for BPF-targeted BTFs */ 3584 btf__set_pointer_size(obj->btf, 8); 3585 err = bpf_object__sanitize_btf(obj, kern_btf); 3586 if (err) 3587 return err; 3588 } 3589 3590 if (obj->gen_loader) { 3591 __u32 raw_size = 0; 3592 const void *raw_data = btf__raw_data(kern_btf, &raw_size); 3593 3594 if (!raw_data) 3595 return -ENOMEM; 3596 bpf_gen__load_btf(obj->gen_loader, raw_data, raw_size); 3597 /* Pretend to have valid FD to pass various fd >= 0 checks. 3598 * This fd == 0 will not be used with any syscall and will be reset to -1 eventually. 3599 */ 3600 btf__set_fd(kern_btf, 0); 3601 } else { 3602 /* currently BPF_BTF_LOAD only supports log_level 1 */ 3603 err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size, 3604 obj->log_level ? 1 : 0, obj->token_fd); 3605 } 3606 if (sanitize) { 3607 if (!err) { 3608 /* move fd to libbpf's BTF */ 3609 btf__set_fd(obj->btf, btf__fd(kern_btf)); 3610 btf__set_fd(kern_btf, -1); 3611 } 3612 btf__free(kern_btf); 3613 } 3614 report: 3615 if (err) { 3616 btf_mandatory = kernel_needs_btf(obj); 3617 if (btf_mandatory) { 3618 pr_warn("Error loading .BTF into kernel: %s. BTF is mandatory, can't proceed.\n", 3619 errstr(err)); 3620 } else { 3621 pr_info("Error loading .BTF into kernel: %s. BTF is optional, ignoring.\n", 3622 errstr(err)); 3623 err = 0; 3624 } 3625 } 3626 return err; 3627 } 3628 3629 static const char *elf_sym_str(const struct bpf_object *obj, size_t off) 3630 { 3631 const char *name; 3632 3633 name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off); 3634 if (!name) { 3635 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n", 3636 off, obj->path, elf_errmsg(-1)); 3637 return NULL; 3638 } 3639 3640 return name; 3641 } 3642 3643 static const char *elf_sec_str(const struct bpf_object *obj, size_t off) 3644 { 3645 const char *name; 3646 3647 name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off); 3648 if (!name) { 3649 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n", 3650 off, obj->path, elf_errmsg(-1)); 3651 return NULL; 3652 } 3653 3654 return name; 3655 } 3656 3657 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx) 3658 { 3659 Elf_Scn *scn; 3660 3661 scn = elf_getscn(obj->efile.elf, idx); 3662 if (!scn) { 3663 pr_warn("elf: failed to get section(%zu) from %s: %s\n", 3664 idx, obj->path, elf_errmsg(-1)); 3665 return NULL; 3666 } 3667 return scn; 3668 } 3669 3670 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name) 3671 { 3672 Elf_Scn *scn = NULL; 3673 Elf *elf = obj->efile.elf; 3674 const char *sec_name; 3675 3676 while ((scn = elf_nextscn(elf, scn)) != NULL) { 3677 sec_name = elf_sec_name(obj, scn); 3678 if (!sec_name) 3679 return NULL; 3680 3681 if (strcmp(sec_name, name) != 0) 3682 continue; 3683 3684 return scn; 3685 } 3686 return NULL; 3687 } 3688 3689 static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn) 3690 { 3691 Elf64_Shdr *shdr; 3692 3693 if (!scn) 3694 return NULL; 3695 3696 shdr = elf64_getshdr(scn); 3697 if (!shdr) { 3698 pr_warn("elf: failed to get section(%zu) header from %s: %s\n", 3699 elf_ndxscn(scn), obj->path, elf_errmsg(-1)); 3700 return NULL; 3701 } 3702 3703 return shdr; 3704 } 3705 3706 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn) 3707 { 3708 const char *name; 3709 Elf64_Shdr *sh; 3710 3711 if (!scn) 3712 return NULL; 3713 3714 sh = elf_sec_hdr(obj, scn); 3715 if (!sh) 3716 return NULL; 3717 3718 name = elf_sec_str(obj, sh->sh_name); 3719 if (!name) { 3720 pr_warn("elf: failed to get section(%zu) name from %s: %s\n", 3721 elf_ndxscn(scn), obj->path, elf_errmsg(-1)); 3722 return NULL; 3723 } 3724 3725 return name; 3726 } 3727 3728 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn) 3729 { 3730 Elf_Data *data; 3731 3732 if (!scn) 3733 return NULL; 3734 3735 data = elf_getdata(scn, 0); 3736 if (!data) { 3737 pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n", 3738 elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "<?>", 3739 obj->path, elf_errmsg(-1)); 3740 return NULL; 3741 } 3742 3743 return data; 3744 } 3745 3746 static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx) 3747 { 3748 if (idx >= obj->efile.symbols->d_size / sizeof(Elf64_Sym)) 3749 return NULL; 3750 3751 return (Elf64_Sym *)obj->efile.symbols->d_buf + idx; 3752 } 3753 3754 static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx) 3755 { 3756 if (idx >= data->d_size / sizeof(Elf64_Rel)) 3757 return NULL; 3758 3759 return (Elf64_Rel *)data->d_buf + idx; 3760 } 3761 3762 static bool is_sec_name_dwarf(const char *name) 3763 { 3764 /* approximation, but the actual list is too long */ 3765 return str_has_pfx(name, ".debug_"); 3766 } 3767 3768 static bool ignore_elf_section(Elf64_Shdr *hdr, const char *name) 3769 { 3770 /* no special handling of .strtab */ 3771 if (hdr->sh_type == SHT_STRTAB) 3772 return true; 3773 3774 /* ignore .llvm_addrsig section as well */ 3775 if (hdr->sh_type == SHT_LLVM_ADDRSIG) 3776 return true; 3777 3778 /* no subprograms will lead to an empty .text section, ignore it */ 3779 if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 && 3780 strcmp(name, ".text") == 0) 3781 return true; 3782 3783 /* DWARF sections */ 3784 if (is_sec_name_dwarf(name)) 3785 return true; 3786 3787 if (str_has_pfx(name, ".rel")) { 3788 name += sizeof(".rel") - 1; 3789 /* DWARF section relocations */ 3790 if (is_sec_name_dwarf(name)) 3791 return true; 3792 3793 /* .BTF and .BTF.ext don't need relocations */ 3794 if (strcmp(name, BTF_ELF_SEC) == 0 || 3795 strcmp(name, BTF_EXT_ELF_SEC) == 0) 3796 return true; 3797 } 3798 3799 return false; 3800 } 3801 3802 static int cmp_progs(const void *_a, const void *_b) 3803 { 3804 const struct bpf_program *a = _a; 3805 const struct bpf_program *b = _b; 3806 3807 if (a->sec_idx != b->sec_idx) 3808 return a->sec_idx < b->sec_idx ? -1 : 1; 3809 3810 /* sec_insn_off can't be the same within the section */ 3811 return a->sec_insn_off < b->sec_insn_off ? -1 : 1; 3812 } 3813 3814 static int bpf_object__elf_collect(struct bpf_object *obj) 3815 { 3816 struct elf_sec_desc *sec_desc; 3817 Elf *elf = obj->efile.elf; 3818 Elf_Data *btf_ext_data = NULL; 3819 Elf_Data *btf_data = NULL; 3820 int idx = 0, err = 0; 3821 const char *name; 3822 Elf_Data *data; 3823 Elf_Scn *scn; 3824 Elf64_Shdr *sh; 3825 3826 /* ELF section indices are 0-based, but sec #0 is special "invalid" 3827 * section. Since section count retrieved by elf_getshdrnum() does 3828 * include sec #0, it is already the necessary size of an array to keep 3829 * all the sections. 3830 */ 3831 if (elf_getshdrnum(obj->efile.elf, &obj->efile.sec_cnt)) { 3832 pr_warn("elf: failed to get the number of sections for %s: %s\n", 3833 obj->path, elf_errmsg(-1)); 3834 return -LIBBPF_ERRNO__FORMAT; 3835 } 3836 obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs)); 3837 if (!obj->efile.secs) 3838 return -ENOMEM; 3839 3840 /* a bunch of ELF parsing functionality depends on processing symbols, 3841 * so do the first pass and find the symbol table 3842 */ 3843 scn = NULL; 3844 while ((scn = elf_nextscn(elf, scn)) != NULL) { 3845 sh = elf_sec_hdr(obj, scn); 3846 if (!sh) 3847 return -LIBBPF_ERRNO__FORMAT; 3848 3849 if (sh->sh_type == SHT_SYMTAB) { 3850 if (obj->efile.symbols) { 3851 pr_warn("elf: multiple symbol tables in %s\n", obj->path); 3852 return -LIBBPF_ERRNO__FORMAT; 3853 } 3854 3855 data = elf_sec_data(obj, scn); 3856 if (!data) 3857 return -LIBBPF_ERRNO__FORMAT; 3858 3859 idx = elf_ndxscn(scn); 3860 3861 obj->efile.symbols = data; 3862 obj->efile.symbols_shndx = idx; 3863 obj->efile.strtabidx = sh->sh_link; 3864 } 3865 } 3866 3867 if (!obj->efile.symbols) { 3868 pr_warn("elf: couldn't find symbol table in %s, stripped object file?\n", 3869 obj->path); 3870 return -ENOENT; 3871 } 3872 3873 scn = NULL; 3874 while ((scn = elf_nextscn(elf, scn)) != NULL) { 3875 idx = elf_ndxscn(scn); 3876 sec_desc = &obj->efile.secs[idx]; 3877 3878 sh = elf_sec_hdr(obj, scn); 3879 if (!sh) 3880 return -LIBBPF_ERRNO__FORMAT; 3881 3882 name = elf_sec_str(obj, sh->sh_name); 3883 if (!name) 3884 return -LIBBPF_ERRNO__FORMAT; 3885 3886 if (ignore_elf_section(sh, name)) 3887 continue; 3888 3889 data = elf_sec_data(obj, scn); 3890 if (!data) 3891 return -LIBBPF_ERRNO__FORMAT; 3892 3893 pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n", 3894 idx, name, (unsigned long)data->d_size, 3895 (int)sh->sh_link, (unsigned long)sh->sh_flags, 3896 (int)sh->sh_type); 3897 3898 if (strcmp(name, "license") == 0) { 3899 err = bpf_object__init_license(obj, data->d_buf, data->d_size); 3900 if (err) 3901 return err; 3902 } else if (strcmp(name, "version") == 0) { 3903 err = bpf_object__init_kversion(obj, data->d_buf, data->d_size); 3904 if (err) 3905 return err; 3906 } else if (strcmp(name, "maps") == 0) { 3907 pr_warn("elf: legacy map definitions in 'maps' section are not supported by libbpf v1.0+\n"); 3908 return -ENOTSUP; 3909 } else if (strcmp(name, MAPS_ELF_SEC) == 0) { 3910 obj->efile.btf_maps_shndx = idx; 3911 } else if (strcmp(name, BTF_ELF_SEC) == 0) { 3912 if (sh->sh_type != SHT_PROGBITS) 3913 return -LIBBPF_ERRNO__FORMAT; 3914 btf_data = data; 3915 } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) { 3916 if (sh->sh_type != SHT_PROGBITS) 3917 return -LIBBPF_ERRNO__FORMAT; 3918 btf_ext_data = data; 3919 } else if (sh->sh_type == SHT_SYMTAB) { 3920 /* already processed during the first pass above */ 3921 } else if (sh->sh_type == SHT_PROGBITS && data->d_size > 0) { 3922 if (sh->sh_flags & SHF_EXECINSTR) { 3923 if (strcmp(name, ".text") == 0) 3924 obj->efile.text_shndx = idx; 3925 err = bpf_object__add_programs(obj, data, name, idx); 3926 if (err) 3927 return err; 3928 } else if (strcmp(name, DATA_SEC) == 0 || 3929 str_has_pfx(name, DATA_SEC ".")) { 3930 sec_desc->sec_type = SEC_DATA; 3931 sec_desc->shdr = sh; 3932 sec_desc->data = data; 3933 } else if (strcmp(name, RODATA_SEC) == 0 || 3934 str_has_pfx(name, RODATA_SEC ".")) { 3935 sec_desc->sec_type = SEC_RODATA; 3936 sec_desc->shdr = sh; 3937 sec_desc->data = data; 3938 } else if (strcmp(name, STRUCT_OPS_SEC) == 0 || 3939 strcmp(name, STRUCT_OPS_LINK_SEC) == 0 || 3940 strcmp(name, "?" STRUCT_OPS_SEC) == 0 || 3941 strcmp(name, "?" STRUCT_OPS_LINK_SEC) == 0) { 3942 sec_desc->sec_type = SEC_ST_OPS; 3943 sec_desc->shdr = sh; 3944 sec_desc->data = data; 3945 obj->efile.has_st_ops = true; 3946 } else if (strcmp(name, ARENA_SEC) == 0) { 3947 obj->efile.arena_data = data; 3948 obj->efile.arena_data_shndx = idx; 3949 } else { 3950 pr_info("elf: skipping unrecognized data section(%d) %s\n", 3951 idx, name); 3952 } 3953 } else if (sh->sh_type == SHT_REL) { 3954 int targ_sec_idx = sh->sh_info; /* points to other section */ 3955 3956 if (sh->sh_entsize != sizeof(Elf64_Rel) || 3957 targ_sec_idx >= obj->efile.sec_cnt) 3958 return -LIBBPF_ERRNO__FORMAT; 3959 3960 /* Only do relo for section with exec instructions */ 3961 if (!section_have_execinstr(obj, targ_sec_idx) && 3962 strcmp(name, ".rel" STRUCT_OPS_SEC) && 3963 strcmp(name, ".rel" STRUCT_OPS_LINK_SEC) && 3964 strcmp(name, ".rel?" STRUCT_OPS_SEC) && 3965 strcmp(name, ".rel?" STRUCT_OPS_LINK_SEC) && 3966 strcmp(name, ".rel" MAPS_ELF_SEC)) { 3967 pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n", 3968 idx, name, targ_sec_idx, 3969 elf_sec_name(obj, elf_sec_by_idx(obj, targ_sec_idx)) ?: "<?>"); 3970 continue; 3971 } 3972 3973 sec_desc->sec_type = SEC_RELO; 3974 sec_desc->shdr = sh; 3975 sec_desc->data = data; 3976 } else if (sh->sh_type == SHT_NOBITS && (strcmp(name, BSS_SEC) == 0 || 3977 str_has_pfx(name, BSS_SEC "."))) { 3978 sec_desc->sec_type = SEC_BSS; 3979 sec_desc->shdr = sh; 3980 sec_desc->data = data; 3981 } else { 3982 pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name, 3983 (size_t)sh->sh_size); 3984 } 3985 } 3986 3987 if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) { 3988 pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path); 3989 return -LIBBPF_ERRNO__FORMAT; 3990 } 3991 3992 /* change BPF program insns to native endianness for introspection */ 3993 if (!is_native_endianness(obj)) 3994 bpf_object_bswap_progs(obj); 3995 3996 /* sort BPF programs by section name and in-section instruction offset 3997 * for faster search 3998 */ 3999 if (obj->nr_programs) 4000 qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs); 4001 4002 return bpf_object__init_btf(obj, btf_data, btf_ext_data); 4003 } 4004 4005 static bool sym_is_extern(const Elf64_Sym *sym) 4006 { 4007 int bind = ELF64_ST_BIND(sym->st_info); 4008 /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */ 4009 return sym->st_shndx == SHN_UNDEF && 4010 (bind == STB_GLOBAL || bind == STB_WEAK) && 4011 ELF64_ST_TYPE(sym->st_info) == STT_NOTYPE; 4012 } 4013 4014 static bool sym_is_subprog(const Elf64_Sym *sym, int text_shndx) 4015 { 4016 int bind = ELF64_ST_BIND(sym->st_info); 4017 int type = ELF64_ST_TYPE(sym->st_info); 4018 4019 /* in .text section */ 4020 if (sym->st_shndx != text_shndx) 4021 return false; 4022 4023 /* local function */ 4024 if (bind == STB_LOCAL && type == STT_SECTION) 4025 return true; 4026 4027 /* global function */ 4028 return (bind == STB_GLOBAL || bind == STB_WEAK) && type == STT_FUNC; 4029 } 4030 4031 static int find_extern_btf_id(const struct btf *btf, const char *ext_name) 4032 { 4033 const struct btf_type *t; 4034 const char *tname; 4035 int i, n; 4036 4037 if (!btf) 4038 return -ESRCH; 4039 4040 n = btf__type_cnt(btf); 4041 for (i = 1; i < n; i++) { 4042 t = btf__type_by_id(btf, i); 4043 4044 if (!btf_is_var(t) && !btf_is_func(t)) 4045 continue; 4046 4047 tname = btf__name_by_offset(btf, t->name_off); 4048 if (strcmp(tname, ext_name)) 4049 continue; 4050 4051 if (btf_is_var(t) && 4052 btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN) 4053 return -EINVAL; 4054 4055 if (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_EXTERN) 4056 return -EINVAL; 4057 4058 return i; 4059 } 4060 4061 return -ENOENT; 4062 } 4063 4064 static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) { 4065 const struct btf_var_secinfo *vs; 4066 const struct btf_type *t; 4067 int i, j, n; 4068 4069 if (!btf) 4070 return -ESRCH; 4071 4072 n = btf__type_cnt(btf); 4073 for (i = 1; i < n; i++) { 4074 t = btf__type_by_id(btf, i); 4075 4076 if (!btf_is_datasec(t)) 4077 continue; 4078 4079 vs = btf_var_secinfos(t); 4080 for (j = 0; j < btf_vlen(t); j++, vs++) { 4081 if (vs->type == ext_btf_id) 4082 return i; 4083 } 4084 } 4085 4086 return -ENOENT; 4087 } 4088 4089 static enum kcfg_type find_kcfg_type(const struct btf *btf, int id, 4090 bool *is_signed) 4091 { 4092 const struct btf_type *t; 4093 const char *name; 4094 4095 t = skip_mods_and_typedefs(btf, id, NULL); 4096 name = btf__name_by_offset(btf, t->name_off); 4097 4098 if (is_signed) 4099 *is_signed = false; 4100 switch (btf_kind(t)) { 4101 case BTF_KIND_INT: { 4102 int enc = btf_int_encoding(t); 4103 4104 if (enc & BTF_INT_BOOL) 4105 return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN; 4106 if (is_signed) 4107 *is_signed = enc & BTF_INT_SIGNED; 4108 if (t->size == 1) 4109 return KCFG_CHAR; 4110 if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1))) 4111 return KCFG_UNKNOWN; 4112 return KCFG_INT; 4113 } 4114 case BTF_KIND_ENUM: 4115 if (t->size != 4) 4116 return KCFG_UNKNOWN; 4117 if (strcmp(name, "libbpf_tristate")) 4118 return KCFG_UNKNOWN; 4119 return KCFG_TRISTATE; 4120 case BTF_KIND_ENUM64: 4121 if (strcmp(name, "libbpf_tristate")) 4122 return KCFG_UNKNOWN; 4123 return KCFG_TRISTATE; 4124 case BTF_KIND_ARRAY: 4125 if (btf_array(t)->nelems == 0) 4126 return KCFG_UNKNOWN; 4127 if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR) 4128 return KCFG_UNKNOWN; 4129 return KCFG_CHAR_ARR; 4130 default: 4131 return KCFG_UNKNOWN; 4132 } 4133 } 4134 4135 static int cmp_externs(const void *_a, const void *_b) 4136 { 4137 const struct extern_desc *a = _a; 4138 const struct extern_desc *b = _b; 4139 4140 if (a->type != b->type) 4141 return a->type < b->type ? -1 : 1; 4142 4143 if (a->type == EXT_KCFG) { 4144 /* descending order by alignment requirements */ 4145 if (a->kcfg.align != b->kcfg.align) 4146 return a->kcfg.align > b->kcfg.align ? -1 : 1; 4147 /* ascending order by size, within same alignment class */ 4148 if (a->kcfg.sz != b->kcfg.sz) 4149 return a->kcfg.sz < b->kcfg.sz ? -1 : 1; 4150 } 4151 4152 /* resolve ties by name */ 4153 return strcmp(a->name, b->name); 4154 } 4155 4156 static int find_int_btf_id(const struct btf *btf) 4157 { 4158 const struct btf_type *t; 4159 int i, n; 4160 4161 n = btf__type_cnt(btf); 4162 for (i = 1; i < n; i++) { 4163 t = btf__type_by_id(btf, i); 4164 4165 if (btf_is_int(t) && btf_int_bits(t) == 32) 4166 return i; 4167 } 4168 4169 return 0; 4170 } 4171 4172 static int add_dummy_ksym_var(struct btf *btf) 4173 { 4174 int i, int_btf_id, sec_btf_id, dummy_var_btf_id; 4175 const struct btf_var_secinfo *vs; 4176 const struct btf_type *sec; 4177 4178 if (!btf) 4179 return 0; 4180 4181 sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC, 4182 BTF_KIND_DATASEC); 4183 if (sec_btf_id < 0) 4184 return 0; 4185 4186 sec = btf__type_by_id(btf, sec_btf_id); 4187 vs = btf_var_secinfos(sec); 4188 for (i = 0; i < btf_vlen(sec); i++, vs++) { 4189 const struct btf_type *vt; 4190 4191 vt = btf__type_by_id(btf, vs->type); 4192 if (btf_is_func(vt)) 4193 break; 4194 } 4195 4196 /* No func in ksyms sec. No need to add dummy var. */ 4197 if (i == btf_vlen(sec)) 4198 return 0; 4199 4200 int_btf_id = find_int_btf_id(btf); 4201 dummy_var_btf_id = btf__add_var(btf, 4202 "dummy_ksym", 4203 BTF_VAR_GLOBAL_ALLOCATED, 4204 int_btf_id); 4205 if (dummy_var_btf_id < 0) 4206 pr_warn("cannot create a dummy_ksym var\n"); 4207 4208 return dummy_var_btf_id; 4209 } 4210 4211 static int bpf_object__collect_externs(struct bpf_object *obj) 4212 { 4213 struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL; 4214 const struct btf_type *t; 4215 struct extern_desc *ext; 4216 int i, n, off, dummy_var_btf_id; 4217 const char *ext_name, *sec_name; 4218 size_t ext_essent_len; 4219 Elf_Scn *scn; 4220 Elf64_Shdr *sh; 4221 4222 if (!obj->efile.symbols) 4223 return 0; 4224 4225 scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx); 4226 sh = elf_sec_hdr(obj, scn); 4227 if (!sh || sh->sh_entsize != sizeof(Elf64_Sym)) 4228 return -LIBBPF_ERRNO__FORMAT; 4229 4230 dummy_var_btf_id = add_dummy_ksym_var(obj->btf); 4231 if (dummy_var_btf_id < 0) 4232 return dummy_var_btf_id; 4233 4234 n = sh->sh_size / sh->sh_entsize; 4235 pr_debug("looking for externs among %d symbols...\n", n); 4236 4237 for (i = 0; i < n; i++) { 4238 Elf64_Sym *sym = elf_sym_by_idx(obj, i); 4239 4240 if (!sym) 4241 return -LIBBPF_ERRNO__FORMAT; 4242 if (!sym_is_extern(sym)) 4243 continue; 4244 ext_name = elf_sym_str(obj, sym->st_name); 4245 if (!ext_name || !ext_name[0]) 4246 continue; 4247 4248 ext = obj->externs; 4249 ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext)); 4250 if (!ext) 4251 return -ENOMEM; 4252 obj->externs = ext; 4253 ext = &ext[obj->nr_extern]; 4254 memset(ext, 0, sizeof(*ext)); 4255 obj->nr_extern++; 4256 4257 ext->btf_id = find_extern_btf_id(obj->btf, ext_name); 4258 if (ext->btf_id <= 0) { 4259 pr_warn("failed to find BTF for extern '%s': %d\n", 4260 ext_name, ext->btf_id); 4261 return ext->btf_id; 4262 } 4263 t = btf__type_by_id(obj->btf, ext->btf_id); 4264 ext->name = strdup(btf__name_by_offset(obj->btf, t->name_off)); 4265 if (!ext->name) 4266 return -ENOMEM; 4267 ext->sym_idx = i; 4268 ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK; 4269 4270 ext_essent_len = bpf_core_essential_name_len(ext->name); 4271 ext->essent_name = NULL; 4272 if (ext_essent_len != strlen(ext->name)) { 4273 ext->essent_name = strndup(ext->name, ext_essent_len); 4274 if (!ext->essent_name) 4275 return -ENOMEM; 4276 } 4277 4278 ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id); 4279 if (ext->sec_btf_id <= 0) { 4280 pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n", 4281 ext_name, ext->btf_id, ext->sec_btf_id); 4282 return ext->sec_btf_id; 4283 } 4284 sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id); 4285 sec_name = btf__name_by_offset(obj->btf, sec->name_off); 4286 4287 if (strcmp(sec_name, KCONFIG_SEC) == 0) { 4288 if (btf_is_func(t)) { 4289 pr_warn("extern function %s is unsupported under %s section\n", 4290 ext->name, KCONFIG_SEC); 4291 return -ENOTSUP; 4292 } 4293 kcfg_sec = sec; 4294 ext->type = EXT_KCFG; 4295 ext->kcfg.sz = btf__resolve_size(obj->btf, t->type); 4296 if (ext->kcfg.sz <= 0) { 4297 pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n", 4298 ext_name, ext->kcfg.sz); 4299 return ext->kcfg.sz; 4300 } 4301 ext->kcfg.align = btf__align_of(obj->btf, t->type); 4302 if (ext->kcfg.align <= 0) { 4303 pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n", 4304 ext_name, ext->kcfg.align); 4305 return -EINVAL; 4306 } 4307 ext->kcfg.type = find_kcfg_type(obj->btf, t->type, 4308 &ext->kcfg.is_signed); 4309 if (ext->kcfg.type == KCFG_UNKNOWN) { 4310 pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name); 4311 return -ENOTSUP; 4312 } 4313 } else if (strcmp(sec_name, KSYMS_SEC) == 0) { 4314 ksym_sec = sec; 4315 ext->type = EXT_KSYM; 4316 skip_mods_and_typedefs(obj->btf, t->type, 4317 &ext->ksym.type_id); 4318 } else { 4319 pr_warn("unrecognized extern section '%s'\n", sec_name); 4320 return -ENOTSUP; 4321 } 4322 } 4323 pr_debug("collected %d externs total\n", obj->nr_extern); 4324 4325 if (!obj->nr_extern) 4326 return 0; 4327 4328 /* sort externs by type, for kcfg ones also by (align, size, name) */ 4329 qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs); 4330 4331 /* for .ksyms section, we need to turn all externs into allocated 4332 * variables in BTF to pass kernel verification; we do this by 4333 * pretending that each extern is a 8-byte variable 4334 */ 4335 if (ksym_sec) { 4336 /* find existing 4-byte integer type in BTF to use for fake 4337 * extern variables in DATASEC 4338 */ 4339 int int_btf_id = find_int_btf_id(obj->btf); 4340 /* For extern function, a dummy_var added earlier 4341 * will be used to replace the vs->type and 4342 * its name string will be used to refill 4343 * the missing param's name. 4344 */ 4345 const struct btf_type *dummy_var; 4346 4347 dummy_var = btf__type_by_id(obj->btf, dummy_var_btf_id); 4348 for (i = 0; i < obj->nr_extern; i++) { 4349 ext = &obj->externs[i]; 4350 if (ext->type != EXT_KSYM) 4351 continue; 4352 pr_debug("extern (ksym) #%d: symbol %d, name %s\n", 4353 i, ext->sym_idx, ext->name); 4354 } 4355 4356 sec = ksym_sec; 4357 n = btf_vlen(sec); 4358 for (i = 0, off = 0; i < n; i++, off += sizeof(int)) { 4359 struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i; 4360 struct btf_type *vt; 4361 4362 vt = (void *)btf__type_by_id(obj->btf, vs->type); 4363 ext_name = btf__name_by_offset(obj->btf, vt->name_off); 4364 ext = find_extern_by_name(obj, ext_name); 4365 if (!ext) { 4366 pr_warn("failed to find extern definition for BTF %s '%s'\n", 4367 btf_kind_str(vt), ext_name); 4368 return -ESRCH; 4369 } 4370 if (btf_is_func(vt)) { 4371 const struct btf_type *func_proto; 4372 struct btf_param *param; 4373 int j; 4374 4375 func_proto = btf__type_by_id(obj->btf, 4376 vt->type); 4377 param = btf_params(func_proto); 4378 /* Reuse the dummy_var string if the 4379 * func proto does not have param name. 4380 */ 4381 for (j = 0; j < btf_vlen(func_proto); j++) 4382 if (param[j].type && !param[j].name_off) 4383 param[j].name_off = 4384 dummy_var->name_off; 4385 vs->type = dummy_var_btf_id; 4386 vt->info &= ~0xffff; 4387 vt->info |= BTF_FUNC_GLOBAL; 4388 } else { 4389 btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED; 4390 vt->type = int_btf_id; 4391 } 4392 vs->offset = off; 4393 vs->size = sizeof(int); 4394 } 4395 sec->size = off; 4396 } 4397 4398 if (kcfg_sec) { 4399 sec = kcfg_sec; 4400 /* for kcfg externs calculate their offsets within a .kconfig map */ 4401 off = 0; 4402 for (i = 0; i < obj->nr_extern; i++) { 4403 ext = &obj->externs[i]; 4404 if (ext->type != EXT_KCFG) 4405 continue; 4406 4407 ext->kcfg.data_off = roundup(off, ext->kcfg.align); 4408 off = ext->kcfg.data_off + ext->kcfg.sz; 4409 pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n", 4410 i, ext->sym_idx, ext->kcfg.data_off, ext->name); 4411 } 4412 sec->size = off; 4413 n = btf_vlen(sec); 4414 for (i = 0; i < n; i++) { 4415 struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i; 4416 4417 t = btf__type_by_id(obj->btf, vs->type); 4418 ext_name = btf__name_by_offset(obj->btf, t->name_off); 4419 ext = find_extern_by_name(obj, ext_name); 4420 if (!ext) { 4421 pr_warn("failed to find extern definition for BTF var '%s'\n", 4422 ext_name); 4423 return -ESRCH; 4424 } 4425 btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED; 4426 vs->offset = ext->kcfg.data_off; 4427 } 4428 } 4429 return 0; 4430 } 4431 4432 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog) 4433 { 4434 return prog->sec_idx == obj->efile.text_shndx; 4435 } 4436 4437 struct bpf_program * 4438 bpf_object__find_program_by_name(const struct bpf_object *obj, 4439 const char *name) 4440 { 4441 struct bpf_program *prog; 4442 4443 bpf_object__for_each_program(prog, obj) { 4444 if (prog_is_subprog(obj, prog)) 4445 continue; 4446 if (!strcmp(prog->name, name)) 4447 return prog; 4448 } 4449 return errno = ENOENT, NULL; 4450 } 4451 4452 static bool bpf_object__shndx_is_data(const struct bpf_object *obj, 4453 int shndx) 4454 { 4455 switch (obj->efile.secs[shndx].sec_type) { 4456 case SEC_BSS: 4457 case SEC_DATA: 4458 case SEC_RODATA: 4459 return true; 4460 default: 4461 return false; 4462 } 4463 } 4464 4465 static bool bpf_object__shndx_is_maps(const struct bpf_object *obj, 4466 int shndx) 4467 { 4468 return shndx == obj->efile.btf_maps_shndx; 4469 } 4470 4471 static enum libbpf_map_type 4472 bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx) 4473 { 4474 if (shndx == obj->efile.symbols_shndx) 4475 return LIBBPF_MAP_KCONFIG; 4476 4477 switch (obj->efile.secs[shndx].sec_type) { 4478 case SEC_BSS: 4479 return LIBBPF_MAP_BSS; 4480 case SEC_DATA: 4481 return LIBBPF_MAP_DATA; 4482 case SEC_RODATA: 4483 return LIBBPF_MAP_RODATA; 4484 default: 4485 return LIBBPF_MAP_UNSPEC; 4486 } 4487 } 4488 4489 static int bpf_prog_compute_hash(struct bpf_program *prog) 4490 { 4491 struct bpf_insn *purged; 4492 int i, err = 0; 4493 4494 purged = calloc(prog->insns_cnt, BPF_INSN_SZ); 4495 if (!purged) 4496 return -ENOMEM; 4497 4498 /* If relocations have been done, the map_fd needs to be 4499 * discarded for the digest calculation. 4500 */ 4501 for (i = 0; i < prog->insns_cnt; i++) { 4502 purged[i] = prog->insns[i]; 4503 if (purged[i].code == (BPF_LD | BPF_IMM | BPF_DW) && 4504 (purged[i].src_reg == BPF_PSEUDO_MAP_FD || 4505 purged[i].src_reg == BPF_PSEUDO_MAP_VALUE)) { 4506 purged[i].imm = 0; 4507 i++; 4508 if (i >= prog->insns_cnt || 4509 prog->insns[i].code != 0 || 4510 prog->insns[i].dst_reg != 0 || 4511 prog->insns[i].src_reg != 0 || 4512 prog->insns[i].off != 0) { 4513 err = -EINVAL; 4514 goto out; 4515 } 4516 purged[i] = prog->insns[i]; 4517 purged[i].imm = 0; 4518 } 4519 } 4520 libbpf_sha256(purged, prog->insns_cnt * sizeof(struct bpf_insn), 4521 prog->hash); 4522 out: 4523 free(purged); 4524 return err; 4525 } 4526 4527 static int bpf_program__record_reloc(struct bpf_program *prog, 4528 struct reloc_desc *reloc_desc, 4529 __u32 insn_idx, const char *sym_name, 4530 const Elf64_Sym *sym, const Elf64_Rel *rel) 4531 { 4532 struct bpf_insn *insn = &prog->insns[insn_idx]; 4533 size_t map_idx, nr_maps = prog->obj->nr_maps; 4534 struct bpf_object *obj = prog->obj; 4535 __u32 shdr_idx = sym->st_shndx; 4536 enum libbpf_map_type type; 4537 const char *sym_sec_name; 4538 struct bpf_map *map; 4539 4540 if (!is_call_insn(insn) && !is_ldimm64_insn(insn)) { 4541 pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n", 4542 prog->name, sym_name, insn_idx, insn->code); 4543 return -LIBBPF_ERRNO__RELOC; 4544 } 4545 4546 if (sym_is_extern(sym)) { 4547 int sym_idx = ELF64_R_SYM(rel->r_info); 4548 int i, n = obj->nr_extern; 4549 struct extern_desc *ext; 4550 4551 for (i = 0; i < n; i++) { 4552 ext = &obj->externs[i]; 4553 if (ext->sym_idx == sym_idx) 4554 break; 4555 } 4556 if (i >= n) { 4557 pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n", 4558 prog->name, sym_name, sym_idx); 4559 return -LIBBPF_ERRNO__RELOC; 4560 } 4561 pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n", 4562 prog->name, i, ext->name, ext->sym_idx, insn_idx); 4563 if (insn->code == (BPF_JMP | BPF_CALL)) 4564 reloc_desc->type = RELO_EXTERN_CALL; 4565 else 4566 reloc_desc->type = RELO_EXTERN_LD64; 4567 reloc_desc->insn_idx = insn_idx; 4568 reloc_desc->ext_idx = i; 4569 return 0; 4570 } 4571 4572 /* sub-program call relocation */ 4573 if (is_call_insn(insn)) { 4574 if (insn->src_reg != BPF_PSEUDO_CALL) { 4575 pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name); 4576 return -LIBBPF_ERRNO__RELOC; 4577 } 4578 /* text_shndx can be 0, if no default "main" program exists */ 4579 if (!shdr_idx || shdr_idx != obj->efile.text_shndx) { 4580 sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx)); 4581 pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n", 4582 prog->name, sym_name, sym_sec_name); 4583 return -LIBBPF_ERRNO__RELOC; 4584 } 4585 if (sym->st_value % BPF_INSN_SZ) { 4586 pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n", 4587 prog->name, sym_name, (size_t)sym->st_value); 4588 return -LIBBPF_ERRNO__RELOC; 4589 } 4590 reloc_desc->type = RELO_CALL; 4591 reloc_desc->insn_idx = insn_idx; 4592 reloc_desc->sym_off = sym->st_value; 4593 return 0; 4594 } 4595 4596 if (!shdr_idx || shdr_idx >= SHN_LORESERVE) { 4597 pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n", 4598 prog->name, sym_name, shdr_idx); 4599 return -LIBBPF_ERRNO__RELOC; 4600 } 4601 4602 /* loading subprog addresses */ 4603 if (sym_is_subprog(sym, obj->efile.text_shndx)) { 4604 /* global_func: sym->st_value = offset in the section, insn->imm = 0. 4605 * local_func: sym->st_value = 0, insn->imm = offset in the section. 4606 */ 4607 if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) { 4608 pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n", 4609 prog->name, sym_name, (size_t)sym->st_value, insn->imm); 4610 return -LIBBPF_ERRNO__RELOC; 4611 } 4612 4613 reloc_desc->type = RELO_SUBPROG_ADDR; 4614 reloc_desc->insn_idx = insn_idx; 4615 reloc_desc->sym_off = sym->st_value; 4616 return 0; 4617 } 4618 4619 type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx); 4620 sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx)); 4621 4622 /* arena data relocation */ 4623 if (shdr_idx == obj->efile.arena_data_shndx) { 4624 if (obj->arena_map_idx < 0) { 4625 pr_warn("prog '%s': bad arena data relocation at insn %u, no arena maps defined\n", 4626 prog->name, insn_idx); 4627 return -LIBBPF_ERRNO__RELOC; 4628 } 4629 reloc_desc->type = RELO_DATA; 4630 reloc_desc->insn_idx = insn_idx; 4631 reloc_desc->map_idx = obj->arena_map_idx; 4632 reloc_desc->sym_off = sym->st_value; 4633 4634 map = &obj->maps[obj->arena_map_idx]; 4635 pr_debug("prog '%s': found arena map %d (%s, sec %d, off %zu) for insn %u\n", 4636 prog->name, obj->arena_map_idx, map->name, map->sec_idx, 4637 map->sec_offset, insn_idx); 4638 return 0; 4639 } 4640 4641 /* generic map reference relocation */ 4642 if (type == LIBBPF_MAP_UNSPEC) { 4643 if (!bpf_object__shndx_is_maps(obj, shdr_idx)) { 4644 pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n", 4645 prog->name, sym_name, sym_sec_name); 4646 return -LIBBPF_ERRNO__RELOC; 4647 } 4648 for (map_idx = 0; map_idx < nr_maps; map_idx++) { 4649 map = &obj->maps[map_idx]; 4650 if (map->libbpf_type != type || 4651 map->sec_idx != sym->st_shndx || 4652 map->sec_offset != sym->st_value) 4653 continue; 4654 pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n", 4655 prog->name, map_idx, map->name, map->sec_idx, 4656 map->sec_offset, insn_idx); 4657 break; 4658 } 4659 if (map_idx >= nr_maps) { 4660 pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n", 4661 prog->name, sym_sec_name, (size_t)sym->st_value); 4662 return -LIBBPF_ERRNO__RELOC; 4663 } 4664 reloc_desc->type = RELO_LD64; 4665 reloc_desc->insn_idx = insn_idx; 4666 reloc_desc->map_idx = map_idx; 4667 reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */ 4668 return 0; 4669 } 4670 4671 /* global data map relocation */ 4672 if (!bpf_object__shndx_is_data(obj, shdr_idx)) { 4673 pr_warn("prog '%s': bad data relo against section '%s'\n", 4674 prog->name, sym_sec_name); 4675 return -LIBBPF_ERRNO__RELOC; 4676 } 4677 for (map_idx = 0; map_idx < nr_maps; map_idx++) { 4678 map = &obj->maps[map_idx]; 4679 if (map->libbpf_type != type || map->sec_idx != sym->st_shndx) 4680 continue; 4681 pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n", 4682 prog->name, map_idx, map->name, map->sec_idx, 4683 map->sec_offset, insn_idx); 4684 break; 4685 } 4686 if (map_idx >= nr_maps) { 4687 pr_warn("prog '%s': data relo failed to find map for section '%s'\n", 4688 prog->name, sym_sec_name); 4689 return -LIBBPF_ERRNO__RELOC; 4690 } 4691 4692 reloc_desc->type = RELO_DATA; 4693 reloc_desc->insn_idx = insn_idx; 4694 reloc_desc->map_idx = map_idx; 4695 reloc_desc->sym_off = sym->st_value; 4696 return 0; 4697 } 4698 4699 static bool prog_contains_insn(const struct bpf_program *prog, size_t insn_idx) 4700 { 4701 return insn_idx >= prog->sec_insn_off && 4702 insn_idx < prog->sec_insn_off + prog->sec_insn_cnt; 4703 } 4704 4705 static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj, 4706 size_t sec_idx, size_t insn_idx) 4707 { 4708 int l = 0, r = obj->nr_programs - 1, m; 4709 struct bpf_program *prog; 4710 4711 if (!obj->nr_programs) 4712 return NULL; 4713 4714 while (l < r) { 4715 m = l + (r - l + 1) / 2; 4716 prog = &obj->programs[m]; 4717 4718 if (prog->sec_idx < sec_idx || 4719 (prog->sec_idx == sec_idx && prog->sec_insn_off <= insn_idx)) 4720 l = m; 4721 else 4722 r = m - 1; 4723 } 4724 /* matching program could be at index l, but it still might be the 4725 * wrong one, so we need to double check conditions for the last time 4726 */ 4727 prog = &obj->programs[l]; 4728 if (prog->sec_idx == sec_idx && prog_contains_insn(prog, insn_idx)) 4729 return prog; 4730 return NULL; 4731 } 4732 4733 static int 4734 bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data) 4735 { 4736 const char *relo_sec_name, *sec_name; 4737 size_t sec_idx = shdr->sh_info, sym_idx; 4738 struct bpf_program *prog; 4739 struct reloc_desc *relos; 4740 int err, i, nrels; 4741 const char *sym_name; 4742 __u32 insn_idx; 4743 Elf_Scn *scn; 4744 Elf_Data *scn_data; 4745 Elf64_Sym *sym; 4746 Elf64_Rel *rel; 4747 4748 if (sec_idx >= obj->efile.sec_cnt) 4749 return -EINVAL; 4750 4751 scn = elf_sec_by_idx(obj, sec_idx); 4752 scn_data = elf_sec_data(obj, scn); 4753 if (!scn_data) 4754 return -LIBBPF_ERRNO__FORMAT; 4755 4756 relo_sec_name = elf_sec_str(obj, shdr->sh_name); 4757 sec_name = elf_sec_name(obj, scn); 4758 if (!relo_sec_name || !sec_name) 4759 return -EINVAL; 4760 4761 pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n", 4762 relo_sec_name, sec_idx, sec_name); 4763 nrels = shdr->sh_size / shdr->sh_entsize; 4764 4765 for (i = 0; i < nrels; i++) { 4766 rel = elf_rel_by_idx(data, i); 4767 if (!rel) { 4768 pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i); 4769 return -LIBBPF_ERRNO__FORMAT; 4770 } 4771 4772 sym_idx = ELF64_R_SYM(rel->r_info); 4773 sym = elf_sym_by_idx(obj, sym_idx); 4774 if (!sym) { 4775 pr_warn("sec '%s': symbol #%zu not found for relo #%d\n", 4776 relo_sec_name, sym_idx, i); 4777 return -LIBBPF_ERRNO__FORMAT; 4778 } 4779 4780 if (sym->st_shndx >= obj->efile.sec_cnt) { 4781 pr_warn("sec '%s': corrupted symbol #%zu pointing to invalid section #%zu for relo #%d\n", 4782 relo_sec_name, sym_idx, (size_t)sym->st_shndx, i); 4783 return -LIBBPF_ERRNO__FORMAT; 4784 } 4785 4786 if (rel->r_offset % BPF_INSN_SZ || rel->r_offset >= scn_data->d_size) { 4787 pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n", 4788 relo_sec_name, (size_t)rel->r_offset, i); 4789 return -LIBBPF_ERRNO__FORMAT; 4790 } 4791 4792 insn_idx = rel->r_offset / BPF_INSN_SZ; 4793 /* relocations against static functions are recorded as 4794 * relocations against the section that contains a function; 4795 * in such case, symbol will be STT_SECTION and sym.st_name 4796 * will point to empty string (0), so fetch section name 4797 * instead 4798 */ 4799 if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && sym->st_name == 0) 4800 sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym->st_shndx)); 4801 else 4802 sym_name = elf_sym_str(obj, sym->st_name); 4803 sym_name = sym_name ?: "<?"; 4804 4805 pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n", 4806 relo_sec_name, i, insn_idx, sym_name); 4807 4808 prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx); 4809 if (!prog) { 4810 pr_debug("sec '%s': relo #%d: couldn't find program in section '%s' for insn #%u, probably overridden weak function, skipping...\n", 4811 relo_sec_name, i, sec_name, insn_idx); 4812 continue; 4813 } 4814 4815 relos = libbpf_reallocarray(prog->reloc_desc, 4816 prog->nr_reloc + 1, sizeof(*relos)); 4817 if (!relos) 4818 return -ENOMEM; 4819 prog->reloc_desc = relos; 4820 4821 /* adjust insn_idx to local BPF program frame of reference */ 4822 insn_idx -= prog->sec_insn_off; 4823 err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc], 4824 insn_idx, sym_name, sym, rel); 4825 if (err) 4826 return err; 4827 4828 prog->nr_reloc++; 4829 } 4830 return 0; 4831 } 4832 4833 static int map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map) 4834 { 4835 int id; 4836 4837 if (!obj->btf) 4838 return -ENOENT; 4839 4840 /* if it's BTF-defined map, we don't need to search for type IDs. 4841 * For struct_ops map, it does not need btf_key_type_id and 4842 * btf_value_type_id. 4843 */ 4844 if (map->sec_idx == obj->efile.btf_maps_shndx || bpf_map__is_struct_ops(map)) 4845 return 0; 4846 4847 /* 4848 * LLVM annotates global data differently in BTF, that is, 4849 * only as '.data', '.bss' or '.rodata'. 4850 */ 4851 if (!bpf_map__is_internal(map)) 4852 return -ENOENT; 4853 4854 id = btf__find_by_name(obj->btf, map->real_name); 4855 if (id < 0) 4856 return id; 4857 4858 map->btf_key_type_id = 0; 4859 map->btf_value_type_id = id; 4860 return 0; 4861 } 4862 4863 static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info) 4864 { 4865 char file[PATH_MAX], buff[4096]; 4866 FILE *fp; 4867 __u32 val; 4868 int err; 4869 4870 snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd); 4871 memset(info, 0, sizeof(*info)); 4872 4873 fp = fopen(file, "re"); 4874 if (!fp) { 4875 err = -errno; 4876 pr_warn("failed to open %s: %s. No procfs support?\n", file, 4877 errstr(err)); 4878 return err; 4879 } 4880 4881 while (fgets(buff, sizeof(buff), fp)) { 4882 if (sscanf(buff, "map_type:\t%u", &val) == 1) 4883 info->type = val; 4884 else if (sscanf(buff, "key_size:\t%u", &val) == 1) 4885 info->key_size = val; 4886 else if (sscanf(buff, "value_size:\t%u", &val) == 1) 4887 info->value_size = val; 4888 else if (sscanf(buff, "max_entries:\t%u", &val) == 1) 4889 info->max_entries = val; 4890 else if (sscanf(buff, "map_flags:\t%i", &val) == 1) 4891 info->map_flags = val; 4892 } 4893 4894 fclose(fp); 4895 4896 return 0; 4897 } 4898 4899 static bool map_is_created(const struct bpf_map *map) 4900 { 4901 return map->obj->state >= OBJ_PREPARED || map->reused; 4902 } 4903 4904 bool bpf_map__autocreate(const struct bpf_map *map) 4905 { 4906 return map->autocreate; 4907 } 4908 4909 int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate) 4910 { 4911 if (map_is_created(map)) 4912 return libbpf_err(-EBUSY); 4913 4914 map->autocreate = autocreate; 4915 return 0; 4916 } 4917 4918 int bpf_map__set_autoattach(struct bpf_map *map, bool autoattach) 4919 { 4920 if (!bpf_map__is_struct_ops(map)) 4921 return libbpf_err(-EINVAL); 4922 4923 map->autoattach = autoattach; 4924 return 0; 4925 } 4926 4927 bool bpf_map__autoattach(const struct bpf_map *map) 4928 { 4929 return map->autoattach; 4930 } 4931 4932 int bpf_map__reuse_fd(struct bpf_map *map, int fd) 4933 { 4934 struct bpf_map_info info; 4935 __u32 len = sizeof(info), name_len; 4936 int new_fd, err; 4937 char *new_name; 4938 4939 memset(&info, 0, len); 4940 err = bpf_map_get_info_by_fd(fd, &info, &len); 4941 if (err && errno == EINVAL) 4942 err = bpf_get_map_info_from_fdinfo(fd, &info); 4943 if (err) 4944 return libbpf_err(err); 4945 4946 name_len = strlen(info.name); 4947 if (name_len == BPF_OBJ_NAME_LEN - 1 && strncmp(map->name, info.name, name_len) == 0) 4948 new_name = strdup(map->name); 4949 else 4950 new_name = strdup(info.name); 4951 4952 if (!new_name) 4953 return libbpf_err(-errno); 4954 4955 /* 4956 * Like dup(), but make sure new FD is >= 3 and has O_CLOEXEC set. 4957 * This is similar to what we do in ensure_good_fd(), but without 4958 * closing original FD. 4959 */ 4960 new_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); 4961 if (new_fd < 0) { 4962 err = -errno; 4963 goto err_free_new_name; 4964 } 4965 4966 err = reuse_fd(map->fd, new_fd); 4967 if (err) 4968 goto err_free_new_name; 4969 4970 free(map->name); 4971 4972 map->name = new_name; 4973 map->def.type = info.type; 4974 map->def.key_size = info.key_size; 4975 map->def.value_size = info.value_size; 4976 map->def.max_entries = info.max_entries; 4977 map->def.map_flags = info.map_flags; 4978 map->btf_key_type_id = info.btf_key_type_id; 4979 map->btf_value_type_id = info.btf_value_type_id; 4980 map->reused = true; 4981 map->map_extra = info.map_extra; 4982 4983 return 0; 4984 4985 err_free_new_name: 4986 free(new_name); 4987 return libbpf_err(err); 4988 } 4989 4990 __u32 bpf_map__max_entries(const struct bpf_map *map) 4991 { 4992 return map->def.max_entries; 4993 } 4994 4995 struct bpf_map *bpf_map__inner_map(struct bpf_map *map) 4996 { 4997 if (!bpf_map_type__is_map_in_map(map->def.type)) 4998 return errno = EINVAL, NULL; 4999 5000 return map->inner_map; 5001 } 5002 5003 int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries) 5004 { 5005 if (map_is_created(map)) 5006 return libbpf_err(-EBUSY); 5007 5008 map->def.max_entries = max_entries; 5009 5010 /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */ 5011 if (map_is_ringbuf(map)) 5012 map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries); 5013 5014 return 0; 5015 } 5016 5017 static int bpf_object_prepare_token(struct bpf_object *obj) 5018 { 5019 const char *bpffs_path; 5020 int bpffs_fd = -1, token_fd, err; 5021 bool mandatory; 5022 enum libbpf_print_level level; 5023 5024 /* token is explicitly prevented */ 5025 if (obj->token_path && obj->token_path[0] == '\0') { 5026 pr_debug("object '%s': token is prevented, skipping...\n", obj->name); 5027 return 0; 5028 } 5029 5030 mandatory = obj->token_path != NULL; 5031 level = mandatory ? LIBBPF_WARN : LIBBPF_DEBUG; 5032 5033 bpffs_path = obj->token_path ?: BPF_FS_DEFAULT_PATH; 5034 bpffs_fd = open(bpffs_path, O_DIRECTORY, O_RDWR); 5035 if (bpffs_fd < 0) { 5036 err = -errno; 5037 __pr(level, "object '%s': failed (%s) to open BPF FS mount at '%s'%s\n", 5038 obj->name, errstr(err), bpffs_path, 5039 mandatory ? "" : ", skipping optional step..."); 5040 return mandatory ? err : 0; 5041 } 5042 5043 token_fd = bpf_token_create(bpffs_fd, 0); 5044 close(bpffs_fd); 5045 if (token_fd < 0) { 5046 if (!mandatory && token_fd == -ENOENT) { 5047 pr_debug("object '%s': BPF FS at '%s' doesn't have BPF token delegation set up, skipping...\n", 5048 obj->name, bpffs_path); 5049 return 0; 5050 } 5051 __pr(level, "object '%s': failed (%d) to create BPF token from '%s'%s\n", 5052 obj->name, token_fd, bpffs_path, 5053 mandatory ? "" : ", skipping optional step..."); 5054 return mandatory ? token_fd : 0; 5055 } 5056 5057 obj->feat_cache = calloc(1, sizeof(*obj->feat_cache)); 5058 if (!obj->feat_cache) { 5059 close(token_fd); 5060 return -ENOMEM; 5061 } 5062 5063 obj->token_fd = token_fd; 5064 obj->feat_cache->token_fd = token_fd; 5065 5066 return 0; 5067 } 5068 5069 static int 5070 bpf_object__probe_loading(struct bpf_object *obj) 5071 { 5072 struct bpf_insn insns[] = { 5073 BPF_MOV64_IMM(BPF_REG_0, 0), 5074 BPF_EXIT_INSN(), 5075 }; 5076 int ret, insn_cnt = ARRAY_SIZE(insns); 5077 LIBBPF_OPTS(bpf_prog_load_opts, opts, 5078 .token_fd = obj->token_fd, 5079 .prog_flags = obj->token_fd ? BPF_F_TOKEN_FD : 0, 5080 ); 5081 5082 if (obj->gen_loader) 5083 return 0; 5084 5085 ret = bump_rlimit_memlock(); 5086 if (ret) 5087 pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %s), you might need to do it explicitly!\n", 5088 errstr(ret)); 5089 5090 /* make sure basic loading works */ 5091 ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &opts); 5092 if (ret < 0) 5093 ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); 5094 if (ret < 0) { 5095 ret = errno; 5096 pr_warn("Error in %s(): %s. Couldn't load trivial BPF program. Make sure your kernel supports BPF (CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is set to big enough value.\n", 5097 __func__, errstr(ret)); 5098 return -ret; 5099 } 5100 close(ret); 5101 5102 return 0; 5103 } 5104 5105 bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) 5106 { 5107 if (obj->gen_loader) 5108 /* To generate loader program assume the latest kernel 5109 * to avoid doing extra prog_load, map_create syscalls. 5110 */ 5111 return true; 5112 5113 if (obj->token_fd) 5114 return feat_supported(obj->feat_cache, feat_id); 5115 5116 return feat_supported(NULL, feat_id); 5117 } 5118 5119 static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) 5120 { 5121 struct bpf_map_info map_info; 5122 __u32 map_info_len = sizeof(map_info); 5123 int err; 5124 5125 memset(&map_info, 0, map_info_len); 5126 err = bpf_map_get_info_by_fd(map_fd, &map_info, &map_info_len); 5127 if (err && errno == EINVAL) 5128 err = bpf_get_map_info_from_fdinfo(map_fd, &map_info); 5129 if (err) { 5130 pr_warn("failed to get map info for map FD %d: %s\n", map_fd, 5131 errstr(err)); 5132 return false; 5133 } 5134 5135 /* 5136 * bpf_get_map_info_by_fd() for DEVMAP will always return flags with 5137 * BPF_F_RDONLY_PROG set, but it generally is not set at map creation time. 5138 * Thus, ignore the BPF_F_RDONLY_PROG flag in the flags returned from 5139 * bpf_get_map_info_by_fd() when checking for compatibility with an 5140 * existing DEVMAP. 5141 */ 5142 if (map->def.type == BPF_MAP_TYPE_DEVMAP || map->def.type == BPF_MAP_TYPE_DEVMAP_HASH) 5143 map_info.map_flags &= ~BPF_F_RDONLY_PROG; 5144 5145 return (map_info.type == map->def.type && 5146 map_info.key_size == map->def.key_size && 5147 map_info.value_size == map->def.value_size && 5148 map_info.max_entries == map->def.max_entries && 5149 map_info.map_flags == map->def.map_flags && 5150 map_info.map_extra == map->map_extra); 5151 } 5152 5153 static int 5154 bpf_object__reuse_map(struct bpf_map *map) 5155 { 5156 int err, pin_fd; 5157 5158 pin_fd = bpf_obj_get(map->pin_path); 5159 if (pin_fd < 0) { 5160 err = -errno; 5161 if (err == -ENOENT) { 5162 pr_debug("found no pinned map to reuse at '%s'\n", 5163 map->pin_path); 5164 return 0; 5165 } 5166 5167 pr_warn("couldn't retrieve pinned map '%s': %s\n", 5168 map->pin_path, errstr(err)); 5169 return err; 5170 } 5171 5172 if (!map_is_reuse_compat(map, pin_fd)) { 5173 pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n", 5174 map->pin_path); 5175 close(pin_fd); 5176 return -EINVAL; 5177 } 5178 5179 err = bpf_map__reuse_fd(map, pin_fd); 5180 close(pin_fd); 5181 if (err) 5182 return err; 5183 5184 map->pinned = true; 5185 pr_debug("reused pinned map at '%s'\n", map->pin_path); 5186 5187 return 0; 5188 } 5189 5190 static int 5191 bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) 5192 { 5193 enum libbpf_map_type map_type = map->libbpf_type; 5194 int err, zero = 0; 5195 size_t mmap_sz; 5196 5197 if (obj->gen_loader) { 5198 bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps, 5199 map->mmaped, map->def.value_size); 5200 if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) 5201 bpf_gen__map_freeze(obj->gen_loader, map - obj->maps); 5202 return 0; 5203 } 5204 5205 err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0); 5206 if (err) { 5207 err = -errno; 5208 pr_warn("map '%s': failed to set initial contents: %s\n", 5209 bpf_map__name(map), errstr(err)); 5210 return err; 5211 } 5212 5213 /* Freeze .rodata and .kconfig map as read-only from syscall side. */ 5214 if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) { 5215 err = bpf_map_freeze(map->fd); 5216 if (err) { 5217 err = -errno; 5218 pr_warn("map '%s': failed to freeze as read-only: %s\n", 5219 bpf_map__name(map), errstr(err)); 5220 return err; 5221 } 5222 } 5223 5224 /* Remap anonymous mmap()-ed "map initialization image" as 5225 * a BPF map-backed mmap()-ed memory, but preserving the same 5226 * memory address. This will cause kernel to change process' 5227 * page table to point to a different piece of kernel memory, 5228 * but from userspace point of view memory address (and its 5229 * contents, being identical at this point) will stay the 5230 * same. This mapping will be released by bpf_object__close() 5231 * as per normal clean up procedure. 5232 */ 5233 mmap_sz = bpf_map_mmap_sz(map); 5234 if (map->def.map_flags & BPF_F_MMAPABLE) { 5235 void *mmaped; 5236 int prot; 5237 5238 if (map->def.map_flags & BPF_F_RDONLY_PROG) 5239 prot = PROT_READ; 5240 else 5241 prot = PROT_READ | PROT_WRITE; 5242 mmaped = mmap(map->mmaped, mmap_sz, prot, MAP_SHARED | MAP_FIXED, map->fd, 0); 5243 if (mmaped == MAP_FAILED) { 5244 err = -errno; 5245 pr_warn("map '%s': failed to re-mmap() contents: %s\n", 5246 bpf_map__name(map), errstr(err)); 5247 return err; 5248 } 5249 map->mmaped = mmaped; 5250 } else if (map->mmaped) { 5251 munmap(map->mmaped, mmap_sz); 5252 map->mmaped = NULL; 5253 } 5254 5255 return 0; 5256 } 5257 5258 static void bpf_map__destroy(struct bpf_map *map); 5259 5260 static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner) 5261 { 5262 LIBBPF_OPTS(bpf_map_create_opts, create_attr); 5263 struct bpf_map_def *def = &map->def; 5264 const char *map_name = NULL; 5265 int err = 0, map_fd; 5266 5267 if (kernel_supports(obj, FEAT_PROG_NAME)) 5268 map_name = map->name; 5269 create_attr.map_ifindex = map->map_ifindex; 5270 create_attr.map_flags = def->map_flags; 5271 create_attr.numa_node = map->numa_node; 5272 create_attr.map_extra = map->map_extra; 5273 create_attr.token_fd = obj->token_fd; 5274 if (obj->token_fd) 5275 create_attr.map_flags |= BPF_F_TOKEN_FD; 5276 if (map->excl_prog) { 5277 err = bpf_prog_compute_hash(map->excl_prog); 5278 if (err) 5279 return err; 5280 5281 create_attr.excl_prog_hash = map->excl_prog->hash; 5282 create_attr.excl_prog_hash_size = SHA256_DIGEST_LENGTH; 5283 } 5284 5285 if (bpf_map__is_struct_ops(map)) { 5286 create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; 5287 if (map->mod_btf_fd >= 0) { 5288 create_attr.value_type_btf_obj_fd = map->mod_btf_fd; 5289 create_attr.map_flags |= BPF_F_VTYPE_BTF_OBJ_FD; 5290 } 5291 } 5292 5293 if (obj->btf && btf__fd(obj->btf) >= 0) { 5294 create_attr.btf_fd = btf__fd(obj->btf); 5295 create_attr.btf_key_type_id = map->btf_key_type_id; 5296 create_attr.btf_value_type_id = map->btf_value_type_id; 5297 } 5298 5299 if (bpf_map_type__is_map_in_map(def->type)) { 5300 if (map->inner_map) { 5301 err = map_set_def_max_entries(map->inner_map); 5302 if (err) 5303 return err; 5304 err = bpf_object__create_map(obj, map->inner_map, true); 5305 if (err) { 5306 pr_warn("map '%s': failed to create inner map: %s\n", 5307 map->name, errstr(err)); 5308 return err; 5309 } 5310 map->inner_map_fd = map->inner_map->fd; 5311 } 5312 if (map->inner_map_fd >= 0) 5313 create_attr.inner_map_fd = map->inner_map_fd; 5314 } 5315 5316 switch (def->type) { 5317 case BPF_MAP_TYPE_PERF_EVENT_ARRAY: 5318 case BPF_MAP_TYPE_CGROUP_ARRAY: 5319 case BPF_MAP_TYPE_STACK_TRACE: 5320 case BPF_MAP_TYPE_ARRAY_OF_MAPS: 5321 case BPF_MAP_TYPE_HASH_OF_MAPS: 5322 case BPF_MAP_TYPE_DEVMAP: 5323 case BPF_MAP_TYPE_DEVMAP_HASH: 5324 case BPF_MAP_TYPE_CPUMAP: 5325 case BPF_MAP_TYPE_XSKMAP: 5326 case BPF_MAP_TYPE_SOCKMAP: 5327 case BPF_MAP_TYPE_SOCKHASH: 5328 case BPF_MAP_TYPE_QUEUE: 5329 case BPF_MAP_TYPE_STACK: 5330 case BPF_MAP_TYPE_ARENA: 5331 create_attr.btf_fd = 0; 5332 create_attr.btf_key_type_id = 0; 5333 create_attr.btf_value_type_id = 0; 5334 map->btf_key_type_id = 0; 5335 map->btf_value_type_id = 0; 5336 break; 5337 case BPF_MAP_TYPE_STRUCT_OPS: 5338 create_attr.btf_value_type_id = 0; 5339 break; 5340 default: 5341 break; 5342 } 5343 5344 if (obj->gen_loader) { 5345 bpf_gen__map_create(obj->gen_loader, def->type, map_name, 5346 def->key_size, def->value_size, def->max_entries, 5347 &create_attr, is_inner ? -1 : map - obj->maps); 5348 /* We keep pretenting we have valid FD to pass various fd >= 0 5349 * checks by just keeping original placeholder FDs in place. 5350 * See bpf_object__add_map() comment. 5351 * This placeholder fd will not be used with any syscall and 5352 * will be reset to -1 eventually. 5353 */ 5354 map_fd = map->fd; 5355 } else { 5356 map_fd = bpf_map_create(def->type, map_name, 5357 def->key_size, def->value_size, 5358 def->max_entries, &create_attr); 5359 } 5360 if (map_fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) { 5361 err = -errno; 5362 pr_warn("Error in bpf_create_map_xattr(%s): %s. Retrying without BTF.\n", 5363 map->name, errstr(err)); 5364 create_attr.btf_fd = 0; 5365 create_attr.btf_key_type_id = 0; 5366 create_attr.btf_value_type_id = 0; 5367 map->btf_key_type_id = 0; 5368 map->btf_value_type_id = 0; 5369 map_fd = bpf_map_create(def->type, map_name, 5370 def->key_size, def->value_size, 5371 def->max_entries, &create_attr); 5372 } 5373 5374 if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) { 5375 if (obj->gen_loader) 5376 map->inner_map->fd = -1; 5377 bpf_map__destroy(map->inner_map); 5378 zfree(&map->inner_map); 5379 } 5380 5381 if (map_fd < 0) 5382 return map_fd; 5383 5384 /* obj->gen_loader case, prevent reuse_fd() from closing map_fd */ 5385 if (map->fd == map_fd) 5386 return 0; 5387 5388 /* Keep placeholder FD value but now point it to the BPF map object. 5389 * This way everything that relied on this map's FD (e.g., relocated 5390 * ldimm64 instructions) will stay valid and won't need adjustments. 5391 * map->fd stays valid but now point to what map_fd points to. 5392 */ 5393 return reuse_fd(map->fd, map_fd); 5394 } 5395 5396 static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map) 5397 { 5398 const struct bpf_map *targ_map; 5399 unsigned int i; 5400 int fd, err = 0; 5401 5402 for (i = 0; i < map->init_slots_sz; i++) { 5403 if (!map->init_slots[i]) 5404 continue; 5405 5406 targ_map = map->init_slots[i]; 5407 fd = targ_map->fd; 5408 5409 if (obj->gen_loader) { 5410 bpf_gen__populate_outer_map(obj->gen_loader, 5411 map - obj->maps, i, 5412 targ_map - obj->maps); 5413 } else { 5414 err = bpf_map_update_elem(map->fd, &i, &fd, 0); 5415 } 5416 if (err) { 5417 err = -errno; 5418 pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %s\n", 5419 map->name, i, targ_map->name, fd, errstr(err)); 5420 return err; 5421 } 5422 pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n", 5423 map->name, i, targ_map->name, fd); 5424 } 5425 5426 zfree(&map->init_slots); 5427 map->init_slots_sz = 0; 5428 5429 return 0; 5430 } 5431 5432 static int init_prog_array_slots(struct bpf_object *obj, struct bpf_map *map) 5433 { 5434 const struct bpf_program *targ_prog; 5435 unsigned int i; 5436 int fd, err; 5437 5438 if (obj->gen_loader) 5439 return -ENOTSUP; 5440 5441 for (i = 0; i < map->init_slots_sz; i++) { 5442 if (!map->init_slots[i]) 5443 continue; 5444 5445 targ_prog = map->init_slots[i]; 5446 fd = bpf_program__fd(targ_prog); 5447 5448 err = bpf_map_update_elem(map->fd, &i, &fd, 0); 5449 if (err) { 5450 err = -errno; 5451 pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %s\n", 5452 map->name, i, targ_prog->name, fd, errstr(err)); 5453 return err; 5454 } 5455 pr_debug("map '%s': slot [%d] set to prog '%s' fd=%d\n", 5456 map->name, i, targ_prog->name, fd); 5457 } 5458 5459 zfree(&map->init_slots); 5460 map->init_slots_sz = 0; 5461 5462 return 0; 5463 } 5464 5465 static int bpf_object_init_prog_arrays(struct bpf_object *obj) 5466 { 5467 struct bpf_map *map; 5468 int i, err; 5469 5470 for (i = 0; i < obj->nr_maps; i++) { 5471 map = &obj->maps[i]; 5472 5473 if (!map->init_slots_sz || map->def.type != BPF_MAP_TYPE_PROG_ARRAY) 5474 continue; 5475 5476 err = init_prog_array_slots(obj, map); 5477 if (err < 0) 5478 return err; 5479 } 5480 return 0; 5481 } 5482 5483 static int map_set_def_max_entries(struct bpf_map *map) 5484 { 5485 if (map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !map->def.max_entries) { 5486 int nr_cpus; 5487 5488 nr_cpus = libbpf_num_possible_cpus(); 5489 if (nr_cpus < 0) { 5490 pr_warn("map '%s': failed to determine number of system CPUs: %d\n", 5491 map->name, nr_cpus); 5492 return nr_cpus; 5493 } 5494 pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus); 5495 map->def.max_entries = nr_cpus; 5496 } 5497 5498 return 0; 5499 } 5500 5501 static int 5502 bpf_object__create_maps(struct bpf_object *obj) 5503 { 5504 struct bpf_map *map; 5505 unsigned int i, j; 5506 int err; 5507 bool retried; 5508 5509 for (i = 0; i < obj->nr_maps; i++) { 5510 map = &obj->maps[i]; 5511 5512 /* To support old kernels, we skip creating global data maps 5513 * (.rodata, .data, .kconfig, etc); later on, during program 5514 * loading, if we detect that at least one of the to-be-loaded 5515 * programs is referencing any global data map, we'll error 5516 * out with program name and relocation index logged. 5517 * This approach allows to accommodate Clang emitting 5518 * unnecessary .rodata.str1.1 sections for string literals, 5519 * but also it allows to have CO-RE applications that use 5520 * global variables in some of BPF programs, but not others. 5521 * If those global variable-using programs are not loaded at 5522 * runtime due to bpf_program__set_autoload(prog, false), 5523 * bpf_object loading will succeed just fine even on old 5524 * kernels. 5525 */ 5526 if (bpf_map__is_internal(map) && !kernel_supports(obj, FEAT_GLOBAL_DATA)) 5527 map->autocreate = false; 5528 5529 if (!map->autocreate) { 5530 pr_debug("map '%s': skipped auto-creating...\n", map->name); 5531 continue; 5532 } 5533 5534 err = map_set_def_max_entries(map); 5535 if (err) 5536 goto err_out; 5537 5538 retried = false; 5539 retry: 5540 if (map->pin_path) { 5541 err = bpf_object__reuse_map(map); 5542 if (err) { 5543 pr_warn("map '%s': error reusing pinned map\n", 5544 map->name); 5545 goto err_out; 5546 } 5547 if (retried && map->fd < 0) { 5548 pr_warn("map '%s': cannot find pinned map\n", 5549 map->name); 5550 err = -ENOENT; 5551 goto err_out; 5552 } 5553 } 5554 5555 if (map->reused) { 5556 pr_debug("map '%s': skipping creation (preset fd=%d)\n", 5557 map->name, map->fd); 5558 } else { 5559 err = bpf_object__create_map(obj, map, false); 5560 if (err) 5561 goto err_out; 5562 5563 pr_debug("map '%s': created successfully, fd=%d\n", 5564 map->name, map->fd); 5565 5566 if (bpf_map__is_internal(map)) { 5567 err = bpf_object__populate_internal_map(obj, map); 5568 if (err < 0) 5569 goto err_out; 5570 } else if (map->def.type == BPF_MAP_TYPE_ARENA) { 5571 map->mmaped = mmap((void *)(long)map->map_extra, 5572 bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE, 5573 map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED, 5574 map->fd, 0); 5575 if (map->mmaped == MAP_FAILED) { 5576 err = -errno; 5577 map->mmaped = NULL; 5578 pr_warn("map '%s': failed to mmap arena: %s\n", 5579 map->name, errstr(err)); 5580 return err; 5581 } 5582 if (obj->arena_data) { 5583 memcpy(map->mmaped, obj->arena_data, obj->arena_data_sz); 5584 zfree(&obj->arena_data); 5585 } 5586 } 5587 if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) { 5588 err = init_map_in_map_slots(obj, map); 5589 if (err < 0) 5590 goto err_out; 5591 } 5592 } 5593 5594 if (map->pin_path && !map->pinned) { 5595 err = bpf_map__pin(map, NULL); 5596 if (err) { 5597 if (!retried && err == -EEXIST) { 5598 retried = true; 5599 goto retry; 5600 } 5601 pr_warn("map '%s': failed to auto-pin at '%s': %s\n", 5602 map->name, map->pin_path, errstr(err)); 5603 goto err_out; 5604 } 5605 } 5606 } 5607 5608 return 0; 5609 5610 err_out: 5611 pr_warn("map '%s': failed to create: %s\n", map->name, errstr(err)); 5612 pr_perm_msg(err); 5613 for (j = 0; j < i; j++) 5614 zclose(obj->maps[j].fd); 5615 return err; 5616 } 5617 5618 static bool bpf_core_is_flavor_sep(const char *s) 5619 { 5620 /* check X___Y name pattern, where X and Y are not underscores */ 5621 return s[0] != '_' && /* X */ 5622 s[1] == '_' && s[2] == '_' && s[3] == '_' && /* ___ */ 5623 s[4] != '_'; /* Y */ 5624 } 5625 5626 /* Given 'some_struct_name___with_flavor' return the length of a name prefix 5627 * before last triple underscore. Struct name part after last triple 5628 * underscore is ignored by BPF CO-RE relocation during relocation matching. 5629 */ 5630 size_t bpf_core_essential_name_len(const char *name) 5631 { 5632 size_t n = strlen(name); 5633 int i; 5634 5635 for (i = n - 5; i >= 0; i--) { 5636 if (bpf_core_is_flavor_sep(name + i)) 5637 return i + 1; 5638 } 5639 return n; 5640 } 5641 5642 void bpf_core_free_cands(struct bpf_core_cand_list *cands) 5643 { 5644 if (!cands) 5645 return; 5646 5647 free(cands->cands); 5648 free(cands); 5649 } 5650 5651 int bpf_core_add_cands(struct bpf_core_cand *local_cand, 5652 size_t local_essent_len, 5653 const struct btf *targ_btf, 5654 const char *targ_btf_name, 5655 int targ_start_id, 5656 struct bpf_core_cand_list *cands) 5657 { 5658 struct bpf_core_cand *new_cands, *cand; 5659 const struct btf_type *t, *local_t; 5660 const char *targ_name, *local_name; 5661 size_t targ_essent_len; 5662 int n, i; 5663 5664 local_t = btf__type_by_id(local_cand->btf, local_cand->id); 5665 local_name = btf__str_by_offset(local_cand->btf, local_t->name_off); 5666 5667 n = btf__type_cnt(targ_btf); 5668 for (i = targ_start_id; i < n; i++) { 5669 t = btf__type_by_id(targ_btf, i); 5670 if (!btf_kind_core_compat(t, local_t)) 5671 continue; 5672 5673 targ_name = btf__name_by_offset(targ_btf, t->name_off); 5674 if (str_is_empty(targ_name)) 5675 continue; 5676 5677 targ_essent_len = bpf_core_essential_name_len(targ_name); 5678 if (targ_essent_len != local_essent_len) 5679 continue; 5680 5681 if (strncmp(local_name, targ_name, local_essent_len) != 0) 5682 continue; 5683 5684 pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n", 5685 local_cand->id, btf_kind_str(local_t), 5686 local_name, i, btf_kind_str(t), targ_name, 5687 targ_btf_name); 5688 new_cands = libbpf_reallocarray(cands->cands, cands->len + 1, 5689 sizeof(*cands->cands)); 5690 if (!new_cands) 5691 return -ENOMEM; 5692 5693 cand = &new_cands[cands->len]; 5694 cand->btf = targ_btf; 5695 cand->id = i; 5696 5697 cands->cands = new_cands; 5698 cands->len++; 5699 } 5700 return 0; 5701 } 5702 5703 static int load_module_btfs(struct bpf_object *obj) 5704 { 5705 struct bpf_btf_info info; 5706 struct module_btf *mod_btf; 5707 struct btf *btf; 5708 char name[64]; 5709 __u32 id = 0, len; 5710 int err, fd; 5711 5712 if (obj->btf_modules_loaded) 5713 return 0; 5714 5715 if (obj->gen_loader) 5716 return 0; 5717 5718 /* don't do this again, even if we find no module BTFs */ 5719 obj->btf_modules_loaded = true; 5720 5721 /* kernel too old to support module BTFs */ 5722 if (!kernel_supports(obj, FEAT_MODULE_BTF)) 5723 return 0; 5724 5725 while (true) { 5726 err = bpf_btf_get_next_id(id, &id); 5727 if (err && errno == ENOENT) 5728 return 0; 5729 if (err && errno == EPERM) { 5730 pr_debug("skipping module BTFs loading, missing privileges\n"); 5731 return 0; 5732 } 5733 if (err) { 5734 err = -errno; 5735 pr_warn("failed to iterate BTF objects: %s\n", errstr(err)); 5736 return err; 5737 } 5738 5739 fd = bpf_btf_get_fd_by_id(id); 5740 if (fd < 0) { 5741 if (errno == ENOENT) 5742 continue; /* expected race: BTF was unloaded */ 5743 err = -errno; 5744 pr_warn("failed to get BTF object #%d FD: %s\n", id, errstr(err)); 5745 return err; 5746 } 5747 5748 len = sizeof(info); 5749 memset(&info, 0, sizeof(info)); 5750 info.name = ptr_to_u64(name); 5751 info.name_len = sizeof(name); 5752 5753 err = bpf_btf_get_info_by_fd(fd, &info, &len); 5754 if (err) { 5755 err = -errno; 5756 pr_warn("failed to get BTF object #%d info: %s\n", id, errstr(err)); 5757 goto err_out; 5758 } 5759 5760 /* ignore non-module BTFs */ 5761 if (!info.kernel_btf || strcmp(name, "vmlinux") == 0) { 5762 close(fd); 5763 continue; 5764 } 5765 5766 btf = btf_get_from_fd(fd, obj->btf_vmlinux); 5767 err = libbpf_get_error(btf); 5768 if (err) { 5769 pr_warn("failed to load module [%s]'s BTF object #%d: %s\n", 5770 name, id, errstr(err)); 5771 goto err_out; 5772 } 5773 5774 err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap, 5775 sizeof(*obj->btf_modules), obj->btf_module_cnt + 1); 5776 if (err) 5777 goto err_out; 5778 5779 mod_btf = &obj->btf_modules[obj->btf_module_cnt++]; 5780 5781 mod_btf->btf = btf; 5782 mod_btf->id = id; 5783 mod_btf->fd = fd; 5784 mod_btf->name = strdup(name); 5785 if (!mod_btf->name) { 5786 err = -ENOMEM; 5787 goto err_out; 5788 } 5789 continue; 5790 5791 err_out: 5792 close(fd); 5793 return err; 5794 } 5795 5796 return 0; 5797 } 5798 5799 static struct bpf_core_cand_list * 5800 bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id) 5801 { 5802 struct bpf_core_cand local_cand = {}; 5803 struct bpf_core_cand_list *cands; 5804 const struct btf *main_btf; 5805 const struct btf_type *local_t; 5806 const char *local_name; 5807 size_t local_essent_len; 5808 int err, i; 5809 5810 local_cand.btf = local_btf; 5811 local_cand.id = local_type_id; 5812 local_t = btf__type_by_id(local_btf, local_type_id); 5813 if (!local_t) 5814 return ERR_PTR(-EINVAL); 5815 5816 local_name = btf__name_by_offset(local_btf, local_t->name_off); 5817 if (str_is_empty(local_name)) 5818 return ERR_PTR(-EINVAL); 5819 local_essent_len = bpf_core_essential_name_len(local_name); 5820 5821 cands = calloc(1, sizeof(*cands)); 5822 if (!cands) 5823 return ERR_PTR(-ENOMEM); 5824 5825 /* Attempt to find target candidates in vmlinux BTF first */ 5826 main_btf = obj->btf_vmlinux_override ?: obj->btf_vmlinux; 5827 err = bpf_core_add_cands(&local_cand, local_essent_len, main_btf, "vmlinux", 1, cands); 5828 if (err) 5829 goto err_out; 5830 5831 /* if vmlinux BTF has any candidate, don't got for module BTFs */ 5832 if (cands->len) 5833 return cands; 5834 5835 /* if vmlinux BTF was overridden, don't attempt to load module BTFs */ 5836 if (obj->btf_vmlinux_override) 5837 return cands; 5838 5839 /* now look through module BTFs, trying to still find candidates */ 5840 err = load_module_btfs(obj); 5841 if (err) 5842 goto err_out; 5843 5844 for (i = 0; i < obj->btf_module_cnt; i++) { 5845 err = bpf_core_add_cands(&local_cand, local_essent_len, 5846 obj->btf_modules[i].btf, 5847 obj->btf_modules[i].name, 5848 btf__type_cnt(obj->btf_vmlinux), 5849 cands); 5850 if (err) 5851 goto err_out; 5852 } 5853 5854 return cands; 5855 err_out: 5856 bpf_core_free_cands(cands); 5857 return ERR_PTR(err); 5858 } 5859 5860 /* Check local and target types for compatibility. This check is used for 5861 * type-based CO-RE relocations and follow slightly different rules than 5862 * field-based relocations. This function assumes that root types were already 5863 * checked for name match. Beyond that initial root-level name check, names 5864 * are completely ignored. Compatibility rules are as follows: 5865 * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but 5866 * kind should match for local and target types (i.e., STRUCT is not 5867 * compatible with UNION); 5868 * - for ENUMs, the size is ignored; 5869 * - for INT, size and signedness are ignored; 5870 * - for ARRAY, dimensionality is ignored, element types are checked for 5871 * compatibility recursively; 5872 * - CONST/VOLATILE/RESTRICT modifiers are ignored; 5873 * - TYPEDEFs/PTRs are compatible if types they pointing to are compatible; 5874 * - FUNC_PROTOs are compatible if they have compatible signature: same 5875 * number of input args and compatible return and argument types. 5876 * These rules are not set in stone and probably will be adjusted as we get 5877 * more experience with using BPF CO-RE relocations. 5878 */ 5879 int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, 5880 const struct btf *targ_btf, __u32 targ_id) 5881 { 5882 return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, 32); 5883 } 5884 5885 int bpf_core_types_match(const struct btf *local_btf, __u32 local_id, 5886 const struct btf *targ_btf, __u32 targ_id) 5887 { 5888 return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false, 32); 5889 } 5890 5891 static size_t bpf_core_hash_fn(const long key, void *ctx) 5892 { 5893 return key; 5894 } 5895 5896 static bool bpf_core_equal_fn(const long k1, const long k2, void *ctx) 5897 { 5898 return k1 == k2; 5899 } 5900 5901 static int record_relo_core(struct bpf_program *prog, 5902 const struct bpf_core_relo *core_relo, int insn_idx) 5903 { 5904 struct reloc_desc *relos, *relo; 5905 5906 relos = libbpf_reallocarray(prog->reloc_desc, 5907 prog->nr_reloc + 1, sizeof(*relos)); 5908 if (!relos) 5909 return -ENOMEM; 5910 relo = &relos[prog->nr_reloc]; 5911 relo->type = RELO_CORE; 5912 relo->insn_idx = insn_idx; 5913 relo->core_relo = core_relo; 5914 prog->reloc_desc = relos; 5915 prog->nr_reloc++; 5916 return 0; 5917 } 5918 5919 static const struct bpf_core_relo *find_relo_core(struct bpf_program *prog, int insn_idx) 5920 { 5921 struct reloc_desc *relo; 5922 int i; 5923 5924 for (i = 0; i < prog->nr_reloc; i++) { 5925 relo = &prog->reloc_desc[i]; 5926 if (relo->type != RELO_CORE || relo->insn_idx != insn_idx) 5927 continue; 5928 5929 return relo->core_relo; 5930 } 5931 5932 return NULL; 5933 } 5934 5935 static int bpf_core_resolve_relo(struct bpf_program *prog, 5936 const struct bpf_core_relo *relo, 5937 int relo_idx, 5938 const struct btf *local_btf, 5939 struct hashmap *cand_cache, 5940 struct bpf_core_relo_res *targ_res) 5941 { 5942 struct bpf_core_spec specs_scratch[3] = {}; 5943 struct bpf_core_cand_list *cands = NULL; 5944 const char *prog_name = prog->name; 5945 const struct btf_type *local_type; 5946 const char *local_name; 5947 __u32 local_id = relo->type_id; 5948 int err; 5949 5950 local_type = btf__type_by_id(local_btf, local_id); 5951 if (!local_type) 5952 return -EINVAL; 5953 5954 local_name = btf__name_by_offset(local_btf, local_type->name_off); 5955 if (!local_name) 5956 return -EINVAL; 5957 5958 if (relo->kind != BPF_CORE_TYPE_ID_LOCAL && 5959 !hashmap__find(cand_cache, local_id, &cands)) { 5960 cands = bpf_core_find_cands(prog->obj, local_btf, local_id); 5961 if (IS_ERR(cands)) { 5962 pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n", 5963 prog_name, relo_idx, local_id, btf_kind_str(local_type), 5964 local_name, PTR_ERR(cands)); 5965 return PTR_ERR(cands); 5966 } 5967 err = hashmap__set(cand_cache, local_id, cands, NULL, NULL); 5968 if (err) { 5969 bpf_core_free_cands(cands); 5970 return err; 5971 } 5972 } 5973 5974 return bpf_core_calc_relo_insn(prog_name, relo, relo_idx, local_btf, cands, specs_scratch, 5975 targ_res); 5976 } 5977 5978 static int 5979 bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) 5980 { 5981 const struct btf_ext_info_sec *sec; 5982 struct bpf_core_relo_res targ_res; 5983 const struct bpf_core_relo *rec; 5984 const struct btf_ext_info *seg; 5985 struct hashmap_entry *entry; 5986 struct hashmap *cand_cache = NULL; 5987 struct bpf_program *prog; 5988 struct bpf_insn *insn; 5989 const char *sec_name; 5990 int i, err = 0, insn_idx, sec_idx, sec_num; 5991 5992 if (obj->btf_ext->core_relo_info.len == 0) 5993 return 0; 5994 5995 if (targ_btf_path) { 5996 obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL); 5997 err = libbpf_get_error(obj->btf_vmlinux_override); 5998 if (err) { 5999 pr_warn("failed to parse target BTF: %s\n", errstr(err)); 6000 return err; 6001 } 6002 } 6003 6004 cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL); 6005 if (IS_ERR(cand_cache)) { 6006 err = PTR_ERR(cand_cache); 6007 goto out; 6008 } 6009 6010 seg = &obj->btf_ext->core_relo_info; 6011 sec_num = 0; 6012 for_each_btf_ext_sec(seg, sec) { 6013 sec_idx = seg->sec_idxs[sec_num]; 6014 sec_num++; 6015 6016 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); 6017 if (str_is_empty(sec_name)) { 6018 err = -EINVAL; 6019 goto out; 6020 } 6021 6022 pr_debug("sec '%s': found %d CO-RE relocations\n", sec_name, sec->num_info); 6023 6024 for_each_btf_ext_rec(seg, sec, i, rec) { 6025 if (rec->insn_off % BPF_INSN_SZ) 6026 return -EINVAL; 6027 insn_idx = rec->insn_off / BPF_INSN_SZ; 6028 prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx); 6029 if (!prog) { 6030 /* When __weak subprog is "overridden" by another instance 6031 * of the subprog from a different object file, linker still 6032 * appends all the .BTF.ext info that used to belong to that 6033 * eliminated subprogram. 6034 * This is similar to what x86-64 linker does for relocations. 6035 * So just ignore such relocations just like we ignore 6036 * subprog instructions when discovering subprograms. 6037 */ 6038 pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n", 6039 sec_name, i, insn_idx); 6040 continue; 6041 } 6042 /* no need to apply CO-RE relocation if the program is 6043 * not going to be loaded 6044 */ 6045 if (!prog->autoload) 6046 continue; 6047 6048 /* adjust insn_idx from section frame of reference to the local 6049 * program's frame of reference; (sub-)program code is not yet 6050 * relocated, so it's enough to just subtract in-section offset 6051 */ 6052 insn_idx = insn_idx - prog->sec_insn_off; 6053 if (insn_idx >= prog->insns_cnt) 6054 return -EINVAL; 6055 insn = &prog->insns[insn_idx]; 6056 6057 err = record_relo_core(prog, rec, insn_idx); 6058 if (err) { 6059 pr_warn("prog '%s': relo #%d: failed to record relocation: %s\n", 6060 prog->name, i, errstr(err)); 6061 goto out; 6062 } 6063 6064 if (prog->obj->gen_loader) 6065 continue; 6066 6067 err = bpf_core_resolve_relo(prog, rec, i, obj->btf, cand_cache, &targ_res); 6068 if (err) { 6069 pr_warn("prog '%s': relo #%d: failed to relocate: %s\n", 6070 prog->name, i, errstr(err)); 6071 goto out; 6072 } 6073 6074 err = bpf_core_patch_insn(prog->name, insn, insn_idx, rec, i, &targ_res); 6075 if (err) { 6076 pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %s\n", 6077 prog->name, i, insn_idx, errstr(err)); 6078 goto out; 6079 } 6080 } 6081 } 6082 6083 out: 6084 /* obj->btf_vmlinux and module BTFs are freed after object load */ 6085 btf__free(obj->btf_vmlinux_override); 6086 obj->btf_vmlinux_override = NULL; 6087 6088 if (!IS_ERR_OR_NULL(cand_cache)) { 6089 hashmap__for_each_entry(cand_cache, entry, i) { 6090 bpf_core_free_cands(entry->pvalue); 6091 } 6092 hashmap__free(cand_cache); 6093 } 6094 return err; 6095 } 6096 6097 /* base map load ldimm64 special constant, used also for log fixup logic */ 6098 #define POISON_LDIMM64_MAP_BASE 2001000000 6099 #define POISON_LDIMM64_MAP_PFX "200100" 6100 6101 static void poison_map_ldimm64(struct bpf_program *prog, int relo_idx, 6102 int insn_idx, struct bpf_insn *insn, 6103 int map_idx, const struct bpf_map *map) 6104 { 6105 int i; 6106 6107 pr_debug("prog '%s': relo #%d: poisoning insn #%d that loads map #%d '%s'\n", 6108 prog->name, relo_idx, insn_idx, map_idx, map->name); 6109 6110 /* we turn single ldimm64 into two identical invalid calls */ 6111 for (i = 0; i < 2; i++) { 6112 insn->code = BPF_JMP | BPF_CALL; 6113 insn->dst_reg = 0; 6114 insn->src_reg = 0; 6115 insn->off = 0; 6116 /* if this instruction is reachable (not a dead code), 6117 * verifier will complain with something like: 6118 * invalid func unknown#2001000123 6119 * where lower 123 is map index into obj->maps[] array 6120 */ 6121 insn->imm = POISON_LDIMM64_MAP_BASE + map_idx; 6122 6123 insn++; 6124 } 6125 } 6126 6127 /* unresolved kfunc call special constant, used also for log fixup logic */ 6128 #define POISON_CALL_KFUNC_BASE 2002000000 6129 #define POISON_CALL_KFUNC_PFX "2002" 6130 6131 static void poison_kfunc_call(struct bpf_program *prog, int relo_idx, 6132 int insn_idx, struct bpf_insn *insn, 6133 int ext_idx, const struct extern_desc *ext) 6134 { 6135 pr_debug("prog '%s': relo #%d: poisoning insn #%d that calls kfunc '%s'\n", 6136 prog->name, relo_idx, insn_idx, ext->name); 6137 6138 /* we turn kfunc call into invalid helper call with identifiable constant */ 6139 insn->code = BPF_JMP | BPF_CALL; 6140 insn->dst_reg = 0; 6141 insn->src_reg = 0; 6142 insn->off = 0; 6143 /* if this instruction is reachable (not a dead code), 6144 * verifier will complain with something like: 6145 * invalid func unknown#2001000123 6146 * where lower 123 is extern index into obj->externs[] array 6147 */ 6148 insn->imm = POISON_CALL_KFUNC_BASE + ext_idx; 6149 } 6150 6151 /* Relocate data references within program code: 6152 * - map references; 6153 * - global variable references; 6154 * - extern references. 6155 */ 6156 static int 6157 bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) 6158 { 6159 int i; 6160 6161 for (i = 0; i < prog->nr_reloc; i++) { 6162 struct reloc_desc *relo = &prog->reloc_desc[i]; 6163 struct bpf_insn *insn = &prog->insns[relo->insn_idx]; 6164 const struct bpf_map *map; 6165 struct extern_desc *ext; 6166 6167 switch (relo->type) { 6168 case RELO_LD64: 6169 map = &obj->maps[relo->map_idx]; 6170 if (obj->gen_loader) { 6171 insn[0].src_reg = BPF_PSEUDO_MAP_IDX; 6172 insn[0].imm = relo->map_idx; 6173 } else if (map->autocreate) { 6174 insn[0].src_reg = BPF_PSEUDO_MAP_FD; 6175 insn[0].imm = map->fd; 6176 } else { 6177 poison_map_ldimm64(prog, i, relo->insn_idx, insn, 6178 relo->map_idx, map); 6179 } 6180 break; 6181 case RELO_DATA: 6182 map = &obj->maps[relo->map_idx]; 6183 insn[1].imm = insn[0].imm + relo->sym_off; 6184 if (obj->gen_loader) { 6185 insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE; 6186 insn[0].imm = relo->map_idx; 6187 } else if (map->autocreate) { 6188 insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; 6189 insn[0].imm = map->fd; 6190 } else { 6191 poison_map_ldimm64(prog, i, relo->insn_idx, insn, 6192 relo->map_idx, map); 6193 } 6194 break; 6195 case RELO_EXTERN_LD64: 6196 ext = &obj->externs[relo->ext_idx]; 6197 if (ext->type == EXT_KCFG) { 6198 if (obj->gen_loader) { 6199 insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE; 6200 insn[0].imm = obj->kconfig_map_idx; 6201 } else { 6202 insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; 6203 insn[0].imm = obj->maps[obj->kconfig_map_idx].fd; 6204 } 6205 insn[1].imm = ext->kcfg.data_off; 6206 } else /* EXT_KSYM */ { 6207 if (ext->ksym.type_id && ext->is_set) { /* typed ksyms */ 6208 insn[0].src_reg = BPF_PSEUDO_BTF_ID; 6209 insn[0].imm = ext->ksym.kernel_btf_id; 6210 insn[1].imm = ext->ksym.kernel_btf_obj_fd; 6211 } else { /* typeless ksyms or unresolved typed ksyms */ 6212 insn[0].imm = (__u32)ext->ksym.addr; 6213 insn[1].imm = ext->ksym.addr >> 32; 6214 } 6215 } 6216 break; 6217 case RELO_EXTERN_CALL: 6218 ext = &obj->externs[relo->ext_idx]; 6219 insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL; 6220 if (ext->is_set) { 6221 insn[0].imm = ext->ksym.kernel_btf_id; 6222 insn[0].off = ext->ksym.btf_fd_idx; 6223 } else { /* unresolved weak kfunc call */ 6224 poison_kfunc_call(prog, i, relo->insn_idx, insn, 6225 relo->ext_idx, ext); 6226 } 6227 break; 6228 case RELO_SUBPROG_ADDR: 6229 if (insn[0].src_reg != BPF_PSEUDO_FUNC) { 6230 pr_warn("prog '%s': relo #%d: bad insn\n", 6231 prog->name, i); 6232 return -EINVAL; 6233 } 6234 /* handled already */ 6235 break; 6236 case RELO_CALL: 6237 /* handled already */ 6238 break; 6239 case RELO_CORE: 6240 /* will be handled by bpf_program_record_relos() */ 6241 break; 6242 default: 6243 pr_warn("prog '%s': relo #%d: bad relo type %d\n", 6244 prog->name, i, relo->type); 6245 return -EINVAL; 6246 } 6247 } 6248 6249 return 0; 6250 } 6251 6252 static int adjust_prog_btf_ext_info(const struct bpf_object *obj, 6253 const struct bpf_program *prog, 6254 const struct btf_ext_info *ext_info, 6255 void **prog_info, __u32 *prog_rec_cnt, 6256 __u32 *prog_rec_sz) 6257 { 6258 void *copy_start = NULL, *copy_end = NULL; 6259 void *rec, *rec_end, *new_prog_info; 6260 const struct btf_ext_info_sec *sec; 6261 size_t old_sz, new_sz; 6262 int i, sec_num, sec_idx, off_adj; 6263 6264 sec_num = 0; 6265 for_each_btf_ext_sec(ext_info, sec) { 6266 sec_idx = ext_info->sec_idxs[sec_num]; 6267 sec_num++; 6268 if (prog->sec_idx != sec_idx) 6269 continue; 6270 6271 for_each_btf_ext_rec(ext_info, sec, i, rec) { 6272 __u32 insn_off = *(__u32 *)rec / BPF_INSN_SZ; 6273 6274 if (insn_off < prog->sec_insn_off) 6275 continue; 6276 if (insn_off >= prog->sec_insn_off + prog->sec_insn_cnt) 6277 break; 6278 6279 if (!copy_start) 6280 copy_start = rec; 6281 copy_end = rec + ext_info->rec_size; 6282 } 6283 6284 if (!copy_start) 6285 return -ENOENT; 6286 6287 /* append func/line info of a given (sub-)program to the main 6288 * program func/line info 6289 */ 6290 old_sz = (size_t)(*prog_rec_cnt) * ext_info->rec_size; 6291 new_sz = old_sz + (copy_end - copy_start); 6292 new_prog_info = realloc(*prog_info, new_sz); 6293 if (!new_prog_info) 6294 return -ENOMEM; 6295 *prog_info = new_prog_info; 6296 *prog_rec_cnt = new_sz / ext_info->rec_size; 6297 memcpy(new_prog_info + old_sz, copy_start, copy_end - copy_start); 6298 6299 /* Kernel instruction offsets are in units of 8-byte 6300 * instructions, while .BTF.ext instruction offsets generated 6301 * by Clang are in units of bytes. So convert Clang offsets 6302 * into kernel offsets and adjust offset according to program 6303 * relocated position. 6304 */ 6305 off_adj = prog->sub_insn_off - prog->sec_insn_off; 6306 rec = new_prog_info + old_sz; 6307 rec_end = new_prog_info + new_sz; 6308 for (; rec < rec_end; rec += ext_info->rec_size) { 6309 __u32 *insn_off = rec; 6310 6311 *insn_off = *insn_off / BPF_INSN_SZ + off_adj; 6312 } 6313 *prog_rec_sz = ext_info->rec_size; 6314 return 0; 6315 } 6316 6317 return -ENOENT; 6318 } 6319 6320 static int 6321 reloc_prog_func_and_line_info(const struct bpf_object *obj, 6322 struct bpf_program *main_prog, 6323 const struct bpf_program *prog) 6324 { 6325 int err; 6326 6327 /* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't 6328 * support func/line info 6329 */ 6330 if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC)) 6331 return 0; 6332 6333 /* only attempt func info relocation if main program's func_info 6334 * relocation was successful 6335 */ 6336 if (main_prog != prog && !main_prog->func_info) 6337 goto line_info; 6338 6339 err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->func_info, 6340 &main_prog->func_info, 6341 &main_prog->func_info_cnt, 6342 &main_prog->func_info_rec_size); 6343 if (err) { 6344 if (err != -ENOENT) { 6345 pr_warn("prog '%s': error relocating .BTF.ext function info: %s\n", 6346 prog->name, errstr(err)); 6347 return err; 6348 } 6349 if (main_prog->func_info) { 6350 /* 6351 * Some info has already been found but has problem 6352 * in the last btf_ext reloc. Must have to error out. 6353 */ 6354 pr_warn("prog '%s': missing .BTF.ext function info.\n", prog->name); 6355 return err; 6356 } 6357 /* Have problem loading the very first info. Ignore the rest. */ 6358 pr_warn("prog '%s': missing .BTF.ext function info for the main program, skipping all of .BTF.ext func info.\n", 6359 prog->name); 6360 } 6361 6362 line_info: 6363 /* don't relocate line info if main program's relocation failed */ 6364 if (main_prog != prog && !main_prog->line_info) 6365 return 0; 6366 6367 err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->line_info, 6368 &main_prog->line_info, 6369 &main_prog->line_info_cnt, 6370 &main_prog->line_info_rec_size); 6371 if (err) { 6372 if (err != -ENOENT) { 6373 pr_warn("prog '%s': error relocating .BTF.ext line info: %s\n", 6374 prog->name, errstr(err)); 6375 return err; 6376 } 6377 if (main_prog->line_info) { 6378 /* 6379 * Some info has already been found but has problem 6380 * in the last btf_ext reloc. Must have to error out. 6381 */ 6382 pr_warn("prog '%s': missing .BTF.ext line info.\n", prog->name); 6383 return err; 6384 } 6385 /* Have problem loading the very first info. Ignore the rest. */ 6386 pr_warn("prog '%s': missing .BTF.ext line info for the main program, skipping all of .BTF.ext line info.\n", 6387 prog->name); 6388 } 6389 return 0; 6390 } 6391 6392 static int cmp_relo_by_insn_idx(const void *key, const void *elem) 6393 { 6394 size_t insn_idx = *(const size_t *)key; 6395 const struct reloc_desc *relo = elem; 6396 6397 if (insn_idx == relo->insn_idx) 6398 return 0; 6399 return insn_idx < relo->insn_idx ? -1 : 1; 6400 } 6401 6402 static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx) 6403 { 6404 if (!prog->nr_reloc) 6405 return NULL; 6406 return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc, 6407 sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx); 6408 } 6409 6410 static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_program *subprog) 6411 { 6412 int new_cnt = main_prog->nr_reloc + subprog->nr_reloc; 6413 struct reloc_desc *relos; 6414 int i; 6415 6416 if (main_prog == subprog) 6417 return 0; 6418 relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos)); 6419 /* if new count is zero, reallocarray can return a valid NULL result; 6420 * in this case the previous pointer will be freed, so we *have to* 6421 * reassign old pointer to the new value (even if it's NULL) 6422 */ 6423 if (!relos && new_cnt) 6424 return -ENOMEM; 6425 if (subprog->nr_reloc) 6426 memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc, 6427 sizeof(*relos) * subprog->nr_reloc); 6428 6429 for (i = main_prog->nr_reloc; i < new_cnt; i++) 6430 relos[i].insn_idx += subprog->sub_insn_off; 6431 /* After insn_idx adjustment the 'relos' array is still sorted 6432 * by insn_idx and doesn't break bsearch. 6433 */ 6434 main_prog->reloc_desc = relos; 6435 main_prog->nr_reloc = new_cnt; 6436 return 0; 6437 } 6438 6439 static int 6440 bpf_object__append_subprog_code(struct bpf_object *obj, struct bpf_program *main_prog, 6441 struct bpf_program *subprog) 6442 { 6443 struct bpf_insn *insns; 6444 size_t new_cnt; 6445 int err; 6446 6447 subprog->sub_insn_off = main_prog->insns_cnt; 6448 6449 new_cnt = main_prog->insns_cnt + subprog->insns_cnt; 6450 insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns)); 6451 if (!insns) { 6452 pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name); 6453 return -ENOMEM; 6454 } 6455 main_prog->insns = insns; 6456 main_prog->insns_cnt = new_cnt; 6457 6458 memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns, 6459 subprog->insns_cnt * sizeof(*insns)); 6460 6461 pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n", 6462 main_prog->name, subprog->insns_cnt, subprog->name); 6463 6464 /* The subprog insns are now appended. Append its relos too. */ 6465 err = append_subprog_relos(main_prog, subprog); 6466 if (err) 6467 return err; 6468 return 0; 6469 } 6470 6471 static int 6472 bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog, 6473 struct bpf_program *prog) 6474 { 6475 size_t sub_insn_idx, insn_idx; 6476 struct bpf_program *subprog; 6477 struct reloc_desc *relo; 6478 struct bpf_insn *insn; 6479 int err; 6480 6481 err = reloc_prog_func_and_line_info(obj, main_prog, prog); 6482 if (err) 6483 return err; 6484 6485 for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) { 6486 insn = &main_prog->insns[prog->sub_insn_off + insn_idx]; 6487 if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn)) 6488 continue; 6489 6490 relo = find_prog_insn_relo(prog, insn_idx); 6491 if (relo && relo->type == RELO_EXTERN_CALL) 6492 /* kfunc relocations will be handled later 6493 * in bpf_object__relocate_data() 6494 */ 6495 continue; 6496 if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) { 6497 pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n", 6498 prog->name, insn_idx, relo->type); 6499 return -LIBBPF_ERRNO__RELOC; 6500 } 6501 if (relo) { 6502 /* sub-program instruction index is a combination of 6503 * an offset of a symbol pointed to by relocation and 6504 * call instruction's imm field; for global functions, 6505 * call always has imm = -1, but for static functions 6506 * relocation is against STT_SECTION and insn->imm 6507 * points to a start of a static function 6508 * 6509 * for subprog addr relocation, the relo->sym_off + insn->imm is 6510 * the byte offset in the corresponding section. 6511 */ 6512 if (relo->type == RELO_CALL) 6513 sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1; 6514 else 6515 sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ; 6516 } else if (insn_is_pseudo_func(insn)) { 6517 /* 6518 * RELO_SUBPROG_ADDR relo is always emitted even if both 6519 * functions are in the same section, so it shouldn't reach here. 6520 */ 6521 pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n", 6522 prog->name, insn_idx); 6523 return -LIBBPF_ERRNO__RELOC; 6524 } else { 6525 /* if subprogram call is to a static function within 6526 * the same ELF section, there won't be any relocation 6527 * emitted, but it also means there is no additional 6528 * offset necessary, insns->imm is relative to 6529 * instruction's original position within the section 6530 */ 6531 sub_insn_idx = prog->sec_insn_off + insn_idx + insn->imm + 1; 6532 } 6533 6534 /* we enforce that sub-programs should be in .text section */ 6535 subprog = find_prog_by_sec_insn(obj, obj->efile.text_shndx, sub_insn_idx); 6536 if (!subprog) { 6537 pr_warn("prog '%s': no .text section found yet sub-program call exists\n", 6538 prog->name); 6539 return -LIBBPF_ERRNO__RELOC; 6540 } 6541 6542 /* if it's the first call instruction calling into this 6543 * subprogram (meaning this subprog hasn't been processed 6544 * yet) within the context of current main program: 6545 * - append it at the end of main program's instructions blog; 6546 * - process is recursively, while current program is put on hold; 6547 * - if that subprogram calls some other not yet processes 6548 * subprogram, same thing will happen recursively until 6549 * there are no more unprocesses subprograms left to append 6550 * and relocate. 6551 */ 6552 if (subprog->sub_insn_off == 0) { 6553 err = bpf_object__append_subprog_code(obj, main_prog, subprog); 6554 if (err) 6555 return err; 6556 err = bpf_object__reloc_code(obj, main_prog, subprog); 6557 if (err) 6558 return err; 6559 } 6560 6561 /* main_prog->insns memory could have been re-allocated, so 6562 * calculate pointer again 6563 */ 6564 insn = &main_prog->insns[prog->sub_insn_off + insn_idx]; 6565 /* calculate correct instruction position within current main 6566 * prog; each main prog can have a different set of 6567 * subprograms appended (potentially in different order as 6568 * well), so position of any subprog can be different for 6569 * different main programs 6570 */ 6571 insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1; 6572 6573 pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n", 6574 prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off); 6575 } 6576 6577 return 0; 6578 } 6579 6580 /* 6581 * Relocate sub-program calls. 6582 * 6583 * Algorithm operates as follows. Each entry-point BPF program (referred to as 6584 * main prog) is processed separately. For each subprog (non-entry functions, 6585 * that can be called from either entry progs or other subprogs) gets their 6586 * sub_insn_off reset to zero. This serves as indicator that this subprogram 6587 * hasn't been yet appended and relocated within current main prog. Once its 6588 * relocated, sub_insn_off will point at the position within current main prog 6589 * where given subprog was appended. This will further be used to relocate all 6590 * the call instructions jumping into this subprog. 6591 * 6592 * We start with main program and process all call instructions. If the call 6593 * is into a subprog that hasn't been processed (i.e., subprog->sub_insn_off 6594 * is zero), subprog instructions are appended at the end of main program's 6595 * instruction array. Then main program is "put on hold" while we recursively 6596 * process newly appended subprogram. If that subprogram calls into another 6597 * subprogram that hasn't been appended, new subprogram is appended again to 6598 * the *main* prog's instructions (subprog's instructions are always left 6599 * untouched, as they need to be in unmodified state for subsequent main progs 6600 * and subprog instructions are always sent only as part of a main prog) and 6601 * the process continues recursively. Once all the subprogs called from a main 6602 * prog or any of its subprogs are appended (and relocated), all their 6603 * positions within finalized instructions array are known, so it's easy to 6604 * rewrite call instructions with correct relative offsets, corresponding to 6605 * desired target subprog. 6606 * 6607 * Its important to realize that some subprogs might not be called from some 6608 * main prog and any of its called/used subprogs. Those will keep their 6609 * subprog->sub_insn_off as zero at all times and won't be appended to current 6610 * main prog and won't be relocated within the context of current main prog. 6611 * They might still be used from other main progs later. 6612 * 6613 * Visually this process can be shown as below. Suppose we have two main 6614 * programs mainA and mainB and BPF object contains three subprogs: subA, 6615 * subB, and subC. mainA calls only subA, mainB calls only subC, but subA and 6616 * subC both call subB: 6617 * 6618 * +--------+ +-------+ 6619 * | v v | 6620 * +--+---+ +--+-+-+ +---+--+ 6621 * | subA | | subB | | subC | 6622 * +--+---+ +------+ +---+--+ 6623 * ^ ^ 6624 * | | 6625 * +---+-------+ +------+----+ 6626 * | mainA | | mainB | 6627 * +-----------+ +-----------+ 6628 * 6629 * We'll start relocating mainA, will find subA, append it and start 6630 * processing sub A recursively: 6631 * 6632 * +-----------+------+ 6633 * | mainA | subA | 6634 * +-----------+------+ 6635 * 6636 * At this point we notice that subB is used from subA, so we append it and 6637 * relocate (there are no further subcalls from subB): 6638 * 6639 * +-----------+------+------+ 6640 * | mainA | subA | subB | 6641 * +-----------+------+------+ 6642 * 6643 * At this point, we relocate subA calls, then go one level up and finish with 6644 * relocatin mainA calls. mainA is done. 6645 * 6646 * For mainB process is similar but results in different order. We start with 6647 * mainB and skip subA and subB, as mainB never calls them (at least 6648 * directly), but we see subC is needed, so we append and start processing it: 6649 * 6650 * +-----------+------+ 6651 * | mainB | subC | 6652 * +-----------+------+ 6653 * Now we see subC needs subB, so we go back to it, append and relocate it: 6654 * 6655 * +-----------+------+------+ 6656 * | mainB | subC | subB | 6657 * +-----------+------+------+ 6658 * 6659 * At this point we unwind recursion, relocate calls in subC, then in mainB. 6660 */ 6661 static int 6662 bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog) 6663 { 6664 struct bpf_program *subprog; 6665 int i, err; 6666 6667 /* mark all subprogs as not relocated (yet) within the context of 6668 * current main program 6669 */ 6670 for (i = 0; i < obj->nr_programs; i++) { 6671 subprog = &obj->programs[i]; 6672 if (!prog_is_subprog(obj, subprog)) 6673 continue; 6674 6675 subprog->sub_insn_off = 0; 6676 } 6677 6678 err = bpf_object__reloc_code(obj, prog, prog); 6679 if (err) 6680 return err; 6681 6682 return 0; 6683 } 6684 6685 static void 6686 bpf_object__free_relocs(struct bpf_object *obj) 6687 { 6688 struct bpf_program *prog; 6689 int i; 6690 6691 /* free up relocation descriptors */ 6692 for (i = 0; i < obj->nr_programs; i++) { 6693 prog = &obj->programs[i]; 6694 zfree(&prog->reloc_desc); 6695 prog->nr_reloc = 0; 6696 } 6697 } 6698 6699 static int cmp_relocs(const void *_a, const void *_b) 6700 { 6701 const struct reloc_desc *a = _a; 6702 const struct reloc_desc *b = _b; 6703 6704 if (a->insn_idx != b->insn_idx) 6705 return a->insn_idx < b->insn_idx ? -1 : 1; 6706 6707 /* no two relocations should have the same insn_idx, but ... */ 6708 if (a->type != b->type) 6709 return a->type < b->type ? -1 : 1; 6710 6711 return 0; 6712 } 6713 6714 static void bpf_object__sort_relos(struct bpf_object *obj) 6715 { 6716 int i; 6717 6718 for (i = 0; i < obj->nr_programs; i++) { 6719 struct bpf_program *p = &obj->programs[i]; 6720 6721 if (!p->nr_reloc) 6722 continue; 6723 6724 qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs); 6725 } 6726 } 6727 6728 static int bpf_prog_assign_exc_cb(struct bpf_object *obj, struct bpf_program *prog) 6729 { 6730 const char *str = "exception_callback:"; 6731 size_t pfx_len = strlen(str); 6732 int i, j, n; 6733 6734 if (!obj->btf || !kernel_supports(obj, FEAT_BTF_DECL_TAG)) 6735 return 0; 6736 6737 n = btf__type_cnt(obj->btf); 6738 for (i = 1; i < n; i++) { 6739 const char *name; 6740 struct btf_type *t; 6741 6742 t = btf_type_by_id(obj->btf, i); 6743 if (!btf_is_decl_tag(t) || btf_decl_tag(t)->component_idx != -1) 6744 continue; 6745 6746 name = btf__str_by_offset(obj->btf, t->name_off); 6747 if (strncmp(name, str, pfx_len) != 0) 6748 continue; 6749 6750 t = btf_type_by_id(obj->btf, t->type); 6751 if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) { 6752 pr_warn("prog '%s': exception_callback:<value> decl tag not applied to the main program\n", 6753 prog->name); 6754 return -EINVAL; 6755 } 6756 if (strcmp(prog->name, btf__str_by_offset(obj->btf, t->name_off)) != 0) 6757 continue; 6758 /* Multiple callbacks are specified for the same prog, 6759 * the verifier will eventually return an error for this 6760 * case, hence simply skip appending a subprog. 6761 */ 6762 if (prog->exception_cb_idx >= 0) { 6763 prog->exception_cb_idx = -1; 6764 break; 6765 } 6766 6767 name += pfx_len; 6768 if (str_is_empty(name)) { 6769 pr_warn("prog '%s': exception_callback:<value> decl tag contains empty value\n", 6770 prog->name); 6771 return -EINVAL; 6772 } 6773 6774 for (j = 0; j < obj->nr_programs; j++) { 6775 struct bpf_program *subprog = &obj->programs[j]; 6776 6777 if (!prog_is_subprog(obj, subprog)) 6778 continue; 6779 if (strcmp(name, subprog->name) != 0) 6780 continue; 6781 /* Enforce non-hidden, as from verifier point of 6782 * view it expects global functions, whereas the 6783 * mark_btf_static fixes up linkage as static. 6784 */ 6785 if (!subprog->sym_global || subprog->mark_btf_static) { 6786 pr_warn("prog '%s': exception callback %s must be a global non-hidden function\n", 6787 prog->name, subprog->name); 6788 return -EINVAL; 6789 } 6790 /* Let's see if we already saw a static exception callback with the same name */ 6791 if (prog->exception_cb_idx >= 0) { 6792 pr_warn("prog '%s': multiple subprogs with same name as exception callback '%s'\n", 6793 prog->name, subprog->name); 6794 return -EINVAL; 6795 } 6796 prog->exception_cb_idx = j; 6797 break; 6798 } 6799 6800 if (prog->exception_cb_idx >= 0) 6801 continue; 6802 6803 pr_warn("prog '%s': cannot find exception callback '%s'\n", prog->name, name); 6804 return -ENOENT; 6805 } 6806 6807 return 0; 6808 } 6809 6810 static struct { 6811 enum bpf_prog_type prog_type; 6812 const char *ctx_name; 6813 } global_ctx_map[] = { 6814 { BPF_PROG_TYPE_CGROUP_DEVICE, "bpf_cgroup_dev_ctx" }, 6815 { BPF_PROG_TYPE_CGROUP_SKB, "__sk_buff" }, 6816 { BPF_PROG_TYPE_CGROUP_SOCK, "bpf_sock" }, 6817 { BPF_PROG_TYPE_CGROUP_SOCK_ADDR, "bpf_sock_addr" }, 6818 { BPF_PROG_TYPE_CGROUP_SOCKOPT, "bpf_sockopt" }, 6819 { BPF_PROG_TYPE_CGROUP_SYSCTL, "bpf_sysctl" }, 6820 { BPF_PROG_TYPE_FLOW_DISSECTOR, "__sk_buff" }, 6821 { BPF_PROG_TYPE_KPROBE, "bpf_user_pt_regs_t" }, 6822 { BPF_PROG_TYPE_LWT_IN, "__sk_buff" }, 6823 { BPF_PROG_TYPE_LWT_OUT, "__sk_buff" }, 6824 { BPF_PROG_TYPE_LWT_SEG6LOCAL, "__sk_buff" }, 6825 { BPF_PROG_TYPE_LWT_XMIT, "__sk_buff" }, 6826 { BPF_PROG_TYPE_NETFILTER, "bpf_nf_ctx" }, 6827 { BPF_PROG_TYPE_PERF_EVENT, "bpf_perf_event_data" }, 6828 { BPF_PROG_TYPE_RAW_TRACEPOINT, "bpf_raw_tracepoint_args" }, 6829 { BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, "bpf_raw_tracepoint_args" }, 6830 { BPF_PROG_TYPE_SCHED_ACT, "__sk_buff" }, 6831 { BPF_PROG_TYPE_SCHED_CLS, "__sk_buff" }, 6832 { BPF_PROG_TYPE_SK_LOOKUP, "bpf_sk_lookup" }, 6833 { BPF_PROG_TYPE_SK_MSG, "sk_msg_md" }, 6834 { BPF_PROG_TYPE_SK_REUSEPORT, "sk_reuseport_md" }, 6835 { BPF_PROG_TYPE_SK_SKB, "__sk_buff" }, 6836 { BPF_PROG_TYPE_SOCK_OPS, "bpf_sock_ops" }, 6837 { BPF_PROG_TYPE_SOCKET_FILTER, "__sk_buff" }, 6838 { BPF_PROG_TYPE_XDP, "xdp_md" }, 6839 /* all other program types don't have "named" context structs */ 6840 }; 6841 6842 /* forward declarations for arch-specific underlying types of bpf_user_pt_regs_t typedef, 6843 * for below __builtin_types_compatible_p() checks; 6844 * with this approach we don't need any extra arch-specific #ifdef guards 6845 */ 6846 struct pt_regs; 6847 struct user_pt_regs; 6848 struct user_regs_struct; 6849 6850 static bool need_func_arg_type_fixup(const struct btf *btf, const struct bpf_program *prog, 6851 const char *subprog_name, int arg_idx, 6852 int arg_type_id, const char *ctx_name) 6853 { 6854 const struct btf_type *t; 6855 const char *tname; 6856 6857 /* check if existing parameter already matches verifier expectations */ 6858 t = skip_mods_and_typedefs(btf, arg_type_id, NULL); 6859 if (!btf_is_ptr(t)) 6860 goto out_warn; 6861 6862 /* typedef bpf_user_pt_regs_t is a special PITA case, valid for kprobe 6863 * and perf_event programs, so check this case early on and forget 6864 * about it for subsequent checks 6865 */ 6866 while (btf_is_mod(t)) 6867 t = btf__type_by_id(btf, t->type); 6868 if (btf_is_typedef(t) && 6869 (prog->type == BPF_PROG_TYPE_KPROBE || prog->type == BPF_PROG_TYPE_PERF_EVENT)) { 6870 tname = btf__str_by_offset(btf, t->name_off) ?: "<anon>"; 6871 if (strcmp(tname, "bpf_user_pt_regs_t") == 0) 6872 return false; /* canonical type for kprobe/perf_event */ 6873 } 6874 6875 /* now we can ignore typedefs moving forward */ 6876 t = skip_mods_and_typedefs(btf, t->type, NULL); 6877 6878 /* if it's `void *`, definitely fix up BTF info */ 6879 if (btf_is_void(t)) 6880 return true; 6881 6882 /* if it's already proper canonical type, no need to fix up */ 6883 tname = btf__str_by_offset(btf, t->name_off) ?: "<anon>"; 6884 if (btf_is_struct(t) && strcmp(tname, ctx_name) == 0) 6885 return false; 6886 6887 /* special cases */ 6888 switch (prog->type) { 6889 case BPF_PROG_TYPE_KPROBE: 6890 /* `struct pt_regs *` is expected, but we need to fix up */ 6891 if (btf_is_struct(t) && strcmp(tname, "pt_regs") == 0) 6892 return true; 6893 break; 6894 case BPF_PROG_TYPE_PERF_EVENT: 6895 if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct pt_regs) && 6896 btf_is_struct(t) && strcmp(tname, "pt_regs") == 0) 6897 return true; 6898 if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_pt_regs) && 6899 btf_is_struct(t) && strcmp(tname, "user_pt_regs") == 0) 6900 return true; 6901 if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_regs_struct) && 6902 btf_is_struct(t) && strcmp(tname, "user_regs_struct") == 0) 6903 return true; 6904 break; 6905 case BPF_PROG_TYPE_RAW_TRACEPOINT: 6906 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: 6907 /* allow u64* as ctx */ 6908 if (btf_is_int(t) && t->size == 8) 6909 return true; 6910 break; 6911 default: 6912 break; 6913 } 6914 6915 out_warn: 6916 pr_warn("prog '%s': subprog '%s' arg#%d is expected to be of `struct %s *` type\n", 6917 prog->name, subprog_name, arg_idx, ctx_name); 6918 return false; 6919 } 6920 6921 static int clone_func_btf_info(struct btf *btf, int orig_fn_id, struct bpf_program *prog) 6922 { 6923 int fn_id, fn_proto_id, ret_type_id, orig_proto_id; 6924 int i, err, arg_cnt, fn_name_off, linkage; 6925 struct btf_type *fn_t, *fn_proto_t, *t; 6926 struct btf_param *p; 6927 6928 /* caller already validated FUNC -> FUNC_PROTO validity */ 6929 fn_t = btf_type_by_id(btf, orig_fn_id); 6930 fn_proto_t = btf_type_by_id(btf, fn_t->type); 6931 6932 /* Note that each btf__add_xxx() operation invalidates 6933 * all btf_type and string pointers, so we need to be 6934 * very careful when cloning BTF types. BTF type 6935 * pointers have to be always refetched. And to avoid 6936 * problems with invalidated string pointers, we 6937 * add empty strings initially, then just fix up 6938 * name_off offsets in place. Offsets are stable for 6939 * existing strings, so that works out. 6940 */ 6941 fn_name_off = fn_t->name_off; /* we are about to invalidate fn_t */ 6942 linkage = btf_func_linkage(fn_t); 6943 orig_proto_id = fn_t->type; /* original FUNC_PROTO ID */ 6944 ret_type_id = fn_proto_t->type; /* fn_proto_t will be invalidated */ 6945 arg_cnt = btf_vlen(fn_proto_t); 6946 6947 /* clone FUNC_PROTO and its params */ 6948 fn_proto_id = btf__add_func_proto(btf, ret_type_id); 6949 if (fn_proto_id < 0) 6950 return -EINVAL; 6951 6952 for (i = 0; i < arg_cnt; i++) { 6953 int name_off; 6954 6955 /* copy original parameter data */ 6956 t = btf_type_by_id(btf, orig_proto_id); 6957 p = &btf_params(t)[i]; 6958 name_off = p->name_off; 6959 6960 err = btf__add_func_param(btf, "", p->type); 6961 if (err) 6962 return err; 6963 6964 fn_proto_t = btf_type_by_id(btf, fn_proto_id); 6965 p = &btf_params(fn_proto_t)[i]; 6966 p->name_off = name_off; /* use remembered str offset */ 6967 } 6968 6969 /* clone FUNC now, btf__add_func() enforces non-empty name, so use 6970 * entry program's name as a placeholder, which we replace immediately 6971 * with original name_off 6972 */ 6973 fn_id = btf__add_func(btf, prog->name, linkage, fn_proto_id); 6974 if (fn_id < 0) 6975 return -EINVAL; 6976 6977 fn_t = btf_type_by_id(btf, fn_id); 6978 fn_t->name_off = fn_name_off; /* reuse original string */ 6979 6980 return fn_id; 6981 } 6982 6983 /* Check if main program or global subprog's function prototype has `arg:ctx` 6984 * argument tags, and, if necessary, substitute correct type to match what BPF 6985 * verifier would expect, taking into account specific program type. This 6986 * allows to support __arg_ctx tag transparently on old kernels that don't yet 6987 * have a native support for it in the verifier, making user's life much 6988 * easier. 6989 */ 6990 static int bpf_program_fixup_func_info(struct bpf_object *obj, struct bpf_program *prog) 6991 { 6992 const char *ctx_name = NULL, *ctx_tag = "arg:ctx", *fn_name; 6993 struct bpf_func_info_min *func_rec; 6994 struct btf_type *fn_t, *fn_proto_t; 6995 struct btf *btf = obj->btf; 6996 const struct btf_type *t; 6997 struct btf_param *p; 6998 int ptr_id = 0, struct_id, tag_id, orig_fn_id; 6999 int i, n, arg_idx, arg_cnt, err, rec_idx; 7000 int *orig_ids; 7001 7002 /* no .BTF.ext, no problem */ 7003 if (!obj->btf_ext || !prog->func_info) 7004 return 0; 7005 7006 /* don't do any fix ups if kernel natively supports __arg_ctx */ 7007 if (kernel_supports(obj, FEAT_ARG_CTX_TAG)) 7008 return 0; 7009 7010 /* some BPF program types just don't have named context structs, so 7011 * this fallback mechanism doesn't work for them 7012 */ 7013 for (i = 0; i < ARRAY_SIZE(global_ctx_map); i++) { 7014 if (global_ctx_map[i].prog_type != prog->type) 7015 continue; 7016 ctx_name = global_ctx_map[i].ctx_name; 7017 break; 7018 } 7019 if (!ctx_name) 7020 return 0; 7021 7022 /* remember original func BTF IDs to detect if we already cloned them */ 7023 orig_ids = calloc(prog->func_info_cnt, sizeof(*orig_ids)); 7024 if (!orig_ids) 7025 return -ENOMEM; 7026 for (i = 0; i < prog->func_info_cnt; i++) { 7027 func_rec = prog->func_info + prog->func_info_rec_size * i; 7028 orig_ids[i] = func_rec->type_id; 7029 } 7030 7031 /* go through each DECL_TAG with "arg:ctx" and see if it points to one 7032 * of our subprogs; if yes and subprog is global and needs adjustment, 7033 * clone and adjust FUNC -> FUNC_PROTO combo 7034 */ 7035 for (i = 1, n = btf__type_cnt(btf); i < n; i++) { 7036 /* only DECL_TAG with "arg:ctx" value are interesting */ 7037 t = btf__type_by_id(btf, i); 7038 if (!btf_is_decl_tag(t)) 7039 continue; 7040 if (strcmp(btf__str_by_offset(btf, t->name_off), ctx_tag) != 0) 7041 continue; 7042 7043 /* only global funcs need adjustment, if at all */ 7044 orig_fn_id = t->type; 7045 fn_t = btf_type_by_id(btf, orig_fn_id); 7046 if (!btf_is_func(fn_t) || btf_func_linkage(fn_t) != BTF_FUNC_GLOBAL) 7047 continue; 7048 7049 /* sanity check FUNC -> FUNC_PROTO chain, just in case */ 7050 fn_proto_t = btf_type_by_id(btf, fn_t->type); 7051 if (!fn_proto_t || !btf_is_func_proto(fn_proto_t)) 7052 continue; 7053 7054 /* find corresponding func_info record */ 7055 func_rec = NULL; 7056 for (rec_idx = 0; rec_idx < prog->func_info_cnt; rec_idx++) { 7057 if (orig_ids[rec_idx] == t->type) { 7058 func_rec = prog->func_info + prog->func_info_rec_size * rec_idx; 7059 break; 7060 } 7061 } 7062 /* current main program doesn't call into this subprog */ 7063 if (!func_rec) 7064 continue; 7065 7066 /* some more sanity checking of DECL_TAG */ 7067 arg_cnt = btf_vlen(fn_proto_t); 7068 arg_idx = btf_decl_tag(t)->component_idx; 7069 if (arg_idx < 0 || arg_idx >= arg_cnt) 7070 continue; 7071 7072 /* check if we should fix up argument type */ 7073 p = &btf_params(fn_proto_t)[arg_idx]; 7074 fn_name = btf__str_by_offset(btf, fn_t->name_off) ?: "<anon>"; 7075 if (!need_func_arg_type_fixup(btf, prog, fn_name, arg_idx, p->type, ctx_name)) 7076 continue; 7077 7078 /* clone fn/fn_proto, unless we already did it for another arg */ 7079 if (func_rec->type_id == orig_fn_id) { 7080 int fn_id; 7081 7082 fn_id = clone_func_btf_info(btf, orig_fn_id, prog); 7083 if (fn_id < 0) { 7084 err = fn_id; 7085 goto err_out; 7086 } 7087 7088 /* point func_info record to a cloned FUNC type */ 7089 func_rec->type_id = fn_id; 7090 } 7091 7092 /* create PTR -> STRUCT type chain to mark PTR_TO_CTX argument; 7093 * we do it just once per main BPF program, as all global 7094 * funcs share the same program type, so need only PTR -> 7095 * STRUCT type chain 7096 */ 7097 if (ptr_id == 0) { 7098 struct_id = btf__add_struct(btf, ctx_name, 0); 7099 ptr_id = btf__add_ptr(btf, struct_id); 7100 if (ptr_id < 0 || struct_id < 0) { 7101 err = -EINVAL; 7102 goto err_out; 7103 } 7104 } 7105 7106 /* for completeness, clone DECL_TAG and point it to cloned param */ 7107 tag_id = btf__add_decl_tag(btf, ctx_tag, func_rec->type_id, arg_idx); 7108 if (tag_id < 0) { 7109 err = -EINVAL; 7110 goto err_out; 7111 } 7112 7113 /* all the BTF manipulations invalidated pointers, refetch them */ 7114 fn_t = btf_type_by_id(btf, func_rec->type_id); 7115 fn_proto_t = btf_type_by_id(btf, fn_t->type); 7116 7117 /* fix up type ID pointed to by param */ 7118 p = &btf_params(fn_proto_t)[arg_idx]; 7119 p->type = ptr_id; 7120 } 7121 7122 free(orig_ids); 7123 return 0; 7124 err_out: 7125 free(orig_ids); 7126 return err; 7127 } 7128 7129 static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) 7130 { 7131 struct bpf_program *prog; 7132 size_t i, j; 7133 int err; 7134 7135 if (obj->btf_ext) { 7136 err = bpf_object__relocate_core(obj, targ_btf_path); 7137 if (err) { 7138 pr_warn("failed to perform CO-RE relocations: %s\n", 7139 errstr(err)); 7140 return err; 7141 } 7142 bpf_object__sort_relos(obj); 7143 } 7144 7145 /* Before relocating calls pre-process relocations and mark 7146 * few ld_imm64 instructions that points to subprogs. 7147 * Otherwise bpf_object__reloc_code() later would have to consider 7148 * all ld_imm64 insns as relocation candidates. That would 7149 * reduce relocation speed, since amount of find_prog_insn_relo() 7150 * would increase and most of them will fail to find a relo. 7151 */ 7152 for (i = 0; i < obj->nr_programs; i++) { 7153 prog = &obj->programs[i]; 7154 for (j = 0; j < prog->nr_reloc; j++) { 7155 struct reloc_desc *relo = &prog->reloc_desc[j]; 7156 struct bpf_insn *insn = &prog->insns[relo->insn_idx]; 7157 7158 /* mark the insn, so it's recognized by insn_is_pseudo_func() */ 7159 if (relo->type == RELO_SUBPROG_ADDR) 7160 insn[0].src_reg = BPF_PSEUDO_FUNC; 7161 } 7162 } 7163 7164 /* relocate subprogram calls and append used subprograms to main 7165 * programs; each copy of subprogram code needs to be relocated 7166 * differently for each main program, because its code location might 7167 * have changed. 7168 * Append subprog relos to main programs to allow data relos to be 7169 * processed after text is completely relocated. 7170 */ 7171 for (i = 0; i < obj->nr_programs; i++) { 7172 prog = &obj->programs[i]; 7173 /* sub-program's sub-calls are relocated within the context of 7174 * its main program only 7175 */ 7176 if (prog_is_subprog(obj, prog)) 7177 continue; 7178 if (!prog->autoload) 7179 continue; 7180 7181 err = bpf_object__relocate_calls(obj, prog); 7182 if (err) { 7183 pr_warn("prog '%s': failed to relocate calls: %s\n", 7184 prog->name, errstr(err)); 7185 return err; 7186 } 7187 7188 err = bpf_prog_assign_exc_cb(obj, prog); 7189 if (err) 7190 return err; 7191 /* Now, also append exception callback if it has not been done already. */ 7192 if (prog->exception_cb_idx >= 0) { 7193 struct bpf_program *subprog = &obj->programs[prog->exception_cb_idx]; 7194 7195 /* Calling exception callback directly is disallowed, which the 7196 * verifier will reject later. In case it was processed already, 7197 * we can skip this step, otherwise for all other valid cases we 7198 * have to append exception callback now. 7199 */ 7200 if (subprog->sub_insn_off == 0) { 7201 err = bpf_object__append_subprog_code(obj, prog, subprog); 7202 if (err) 7203 return err; 7204 err = bpf_object__reloc_code(obj, prog, subprog); 7205 if (err) 7206 return err; 7207 } 7208 } 7209 } 7210 for (i = 0; i < obj->nr_programs; i++) { 7211 prog = &obj->programs[i]; 7212 if (prog_is_subprog(obj, prog)) 7213 continue; 7214 if (!prog->autoload) 7215 continue; 7216 7217 /* Process data relos for main programs */ 7218 err = bpf_object__relocate_data(obj, prog); 7219 if (err) { 7220 pr_warn("prog '%s': failed to relocate data references: %s\n", 7221 prog->name, errstr(err)); 7222 return err; 7223 } 7224 7225 /* Fix up .BTF.ext information, if necessary */ 7226 err = bpf_program_fixup_func_info(obj, prog); 7227 if (err) { 7228 pr_warn("prog '%s': failed to perform .BTF.ext fix ups: %s\n", 7229 prog->name, errstr(err)); 7230 return err; 7231 } 7232 } 7233 7234 return 0; 7235 } 7236 7237 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, 7238 Elf64_Shdr *shdr, Elf_Data *data); 7239 7240 static int bpf_object__collect_map_relos(struct bpf_object *obj, 7241 Elf64_Shdr *shdr, Elf_Data *data) 7242 { 7243 const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *); 7244 int i, j, nrels, new_sz; 7245 const struct btf_var_secinfo *vi = NULL; 7246 const struct btf_type *sec, *var, *def; 7247 struct bpf_map *map = NULL, *targ_map = NULL; 7248 struct bpf_program *targ_prog = NULL; 7249 bool is_prog_array, is_map_in_map; 7250 const struct btf_member *member; 7251 const char *name, *mname, *type; 7252 unsigned int moff; 7253 Elf64_Sym *sym; 7254 Elf64_Rel *rel; 7255 void *tmp; 7256 7257 if (!obj->efile.btf_maps_sec_btf_id || !obj->btf) 7258 return -EINVAL; 7259 sec = btf__type_by_id(obj->btf, obj->efile.btf_maps_sec_btf_id); 7260 if (!sec) 7261 return -EINVAL; 7262 7263 nrels = shdr->sh_size / shdr->sh_entsize; 7264 for (i = 0; i < nrels; i++) { 7265 rel = elf_rel_by_idx(data, i); 7266 if (!rel) { 7267 pr_warn(".maps relo #%d: failed to get ELF relo\n", i); 7268 return -LIBBPF_ERRNO__FORMAT; 7269 } 7270 7271 sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info)); 7272 if (!sym) { 7273 pr_warn(".maps relo #%d: symbol %zx not found\n", 7274 i, (size_t)ELF64_R_SYM(rel->r_info)); 7275 return -LIBBPF_ERRNO__FORMAT; 7276 } 7277 name = elf_sym_str(obj, sym->st_name) ?: "<?>"; 7278 7279 pr_debug(".maps relo #%d: for %zd value %zd rel->r_offset %zu name %d ('%s')\n", 7280 i, (ssize_t)(rel->r_info >> 32), (size_t)sym->st_value, 7281 (size_t)rel->r_offset, sym->st_name, name); 7282 7283 for (j = 0; j < obj->nr_maps; j++) { 7284 map = &obj->maps[j]; 7285 if (map->sec_idx != obj->efile.btf_maps_shndx) 7286 continue; 7287 7288 vi = btf_var_secinfos(sec) + map->btf_var_idx; 7289 if (vi->offset <= rel->r_offset && 7290 rel->r_offset + bpf_ptr_sz <= vi->offset + vi->size) 7291 break; 7292 } 7293 if (j == obj->nr_maps) { 7294 pr_warn(".maps relo #%d: cannot find map '%s' at rel->r_offset %zu\n", 7295 i, name, (size_t)rel->r_offset); 7296 return -EINVAL; 7297 } 7298 7299 is_map_in_map = bpf_map_type__is_map_in_map(map->def.type); 7300 is_prog_array = map->def.type == BPF_MAP_TYPE_PROG_ARRAY; 7301 type = is_map_in_map ? "map" : "prog"; 7302 if (is_map_in_map) { 7303 if (sym->st_shndx != obj->efile.btf_maps_shndx) { 7304 pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n", 7305 i, name); 7306 return -LIBBPF_ERRNO__RELOC; 7307 } 7308 if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS && 7309 map->def.key_size != sizeof(int)) { 7310 pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n", 7311 i, map->name, sizeof(int)); 7312 return -EINVAL; 7313 } 7314 targ_map = bpf_object__find_map_by_name(obj, name); 7315 if (!targ_map) { 7316 pr_warn(".maps relo #%d: '%s' isn't a valid map reference\n", 7317 i, name); 7318 return -ESRCH; 7319 } 7320 } else if (is_prog_array) { 7321 targ_prog = bpf_object__find_program_by_name(obj, name); 7322 if (!targ_prog) { 7323 pr_warn(".maps relo #%d: '%s' isn't a valid program reference\n", 7324 i, name); 7325 return -ESRCH; 7326 } 7327 if (targ_prog->sec_idx != sym->st_shndx || 7328 targ_prog->sec_insn_off * 8 != sym->st_value || 7329 prog_is_subprog(obj, targ_prog)) { 7330 pr_warn(".maps relo #%d: '%s' isn't an entry-point program\n", 7331 i, name); 7332 return -LIBBPF_ERRNO__RELOC; 7333 } 7334 } else { 7335 return -EINVAL; 7336 } 7337 7338 var = btf__type_by_id(obj->btf, vi->type); 7339 def = skip_mods_and_typedefs(obj->btf, var->type, NULL); 7340 if (btf_vlen(def) == 0) 7341 return -EINVAL; 7342 member = btf_members(def) + btf_vlen(def) - 1; 7343 mname = btf__name_by_offset(obj->btf, member->name_off); 7344 if (strcmp(mname, "values")) 7345 return -EINVAL; 7346 7347 moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8; 7348 if (rel->r_offset - vi->offset < moff) 7349 return -EINVAL; 7350 7351 moff = rel->r_offset - vi->offset - moff; 7352 /* here we use BPF pointer size, which is always 64 bit, as we 7353 * are parsing ELF that was built for BPF target 7354 */ 7355 if (moff % bpf_ptr_sz) 7356 return -EINVAL; 7357 moff /= bpf_ptr_sz; 7358 if (moff >= map->init_slots_sz) { 7359 new_sz = moff + 1; 7360 tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz); 7361 if (!tmp) 7362 return -ENOMEM; 7363 map->init_slots = tmp; 7364 memset(map->init_slots + map->init_slots_sz, 0, 7365 (new_sz - map->init_slots_sz) * host_ptr_sz); 7366 map->init_slots_sz = new_sz; 7367 } 7368 map->init_slots[moff] = is_map_in_map ? (void *)targ_map : (void *)targ_prog; 7369 7370 pr_debug(".maps relo #%d: map '%s' slot [%d] points to %s '%s'\n", 7371 i, map->name, moff, type, name); 7372 } 7373 7374 return 0; 7375 } 7376 7377 static int bpf_object__collect_relos(struct bpf_object *obj) 7378 { 7379 int i, err; 7380 7381 for (i = 0; i < obj->efile.sec_cnt; i++) { 7382 struct elf_sec_desc *sec_desc = &obj->efile.secs[i]; 7383 Elf64_Shdr *shdr; 7384 Elf_Data *data; 7385 int idx; 7386 7387 if (sec_desc->sec_type != SEC_RELO) 7388 continue; 7389 7390 shdr = sec_desc->shdr; 7391 data = sec_desc->data; 7392 idx = shdr->sh_info; 7393 7394 if (shdr->sh_type != SHT_REL || idx < 0 || idx >= obj->efile.sec_cnt) { 7395 pr_warn("internal error at %d\n", __LINE__); 7396 return -LIBBPF_ERRNO__INTERNAL; 7397 } 7398 7399 if (obj->efile.secs[idx].sec_type == SEC_ST_OPS) 7400 err = bpf_object__collect_st_ops_relos(obj, shdr, data); 7401 else if (idx == obj->efile.btf_maps_shndx) 7402 err = bpf_object__collect_map_relos(obj, shdr, data); 7403 else 7404 err = bpf_object__collect_prog_relos(obj, shdr, data); 7405 if (err) 7406 return err; 7407 } 7408 7409 bpf_object__sort_relos(obj); 7410 return 0; 7411 } 7412 7413 static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id) 7414 { 7415 if (BPF_CLASS(insn->code) == BPF_JMP && 7416 BPF_OP(insn->code) == BPF_CALL && 7417 BPF_SRC(insn->code) == BPF_K && 7418 insn->src_reg == 0 && 7419 insn->dst_reg == 0) { 7420 *func_id = insn->imm; 7421 return true; 7422 } 7423 return false; 7424 } 7425 7426 static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program *prog) 7427 { 7428 struct bpf_insn *insn = prog->insns; 7429 enum bpf_func_id func_id; 7430 int i; 7431 7432 if (obj->gen_loader) 7433 return 0; 7434 7435 for (i = 0; i < prog->insns_cnt; i++, insn++) { 7436 if (!insn_is_helper_call(insn, &func_id)) 7437 continue; 7438 7439 /* on kernels that don't yet support 7440 * bpf_probe_read_{kernel,user}[_str] helpers, fall back 7441 * to bpf_probe_read() which works well for old kernels 7442 */ 7443 switch (func_id) { 7444 case BPF_FUNC_probe_read_kernel: 7445 case BPF_FUNC_probe_read_user: 7446 if (!kernel_supports(obj, FEAT_PROBE_READ_KERN)) 7447 insn->imm = BPF_FUNC_probe_read; 7448 break; 7449 case BPF_FUNC_probe_read_kernel_str: 7450 case BPF_FUNC_probe_read_user_str: 7451 if (!kernel_supports(obj, FEAT_PROBE_READ_KERN)) 7452 insn->imm = BPF_FUNC_probe_read_str; 7453 break; 7454 default: 7455 break; 7456 } 7457 } 7458 return 0; 7459 } 7460 7461 static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name, 7462 int *btf_obj_fd, int *btf_type_id); 7463 7464 /* this is called as prog->sec_def->prog_prepare_load_fn for libbpf-supported sec_defs */ 7465 static int libbpf_prepare_prog_load(struct bpf_program *prog, 7466 struct bpf_prog_load_opts *opts, long cookie) 7467 { 7468 enum sec_def_flags def = cookie; 7469 7470 /* old kernels might not support specifying expected_attach_type */ 7471 if ((def & SEC_EXP_ATTACH_OPT) && !kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE)) 7472 opts->expected_attach_type = 0; 7473 7474 if (def & SEC_SLEEPABLE) 7475 opts->prog_flags |= BPF_F_SLEEPABLE; 7476 7477 if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS)) 7478 opts->prog_flags |= BPF_F_XDP_HAS_FRAGS; 7479 7480 /* special check for usdt to use uprobe_multi link */ 7481 if ((def & SEC_USDT) && kernel_supports(prog->obj, FEAT_UPROBE_MULTI_LINK)) { 7482 /* for BPF_TRACE_UPROBE_MULTI, user might want to query expected_attach_type 7483 * in prog, and expected_attach_type we set in kernel is from opts, so we 7484 * update both. 7485 */ 7486 prog->expected_attach_type = BPF_TRACE_UPROBE_MULTI; 7487 opts->expected_attach_type = BPF_TRACE_UPROBE_MULTI; 7488 } 7489 7490 if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) { 7491 int btf_obj_fd = 0, btf_type_id = 0, err; 7492 const char *attach_name; 7493 7494 attach_name = strchr(prog->sec_name, '/'); 7495 if (!attach_name) { 7496 /* if BPF program is annotated with just SEC("fentry") 7497 * (or similar) without declaratively specifying 7498 * target, then it is expected that target will be 7499 * specified with bpf_program__set_attach_target() at 7500 * runtime before BPF object load step. If not, then 7501 * there is nothing to load into the kernel as BPF 7502 * verifier won't be able to validate BPF program 7503 * correctness anyways. 7504 */ 7505 pr_warn("prog '%s': no BTF-based attach target is specified, use bpf_program__set_attach_target()\n", 7506 prog->name); 7507 return -EINVAL; 7508 } 7509 attach_name++; /* skip over / */ 7510 7511 err = libbpf_find_attach_btf_id(prog, attach_name, &btf_obj_fd, &btf_type_id); 7512 if (err) 7513 return err; 7514 7515 /* cache resolved BTF FD and BTF type ID in the prog */ 7516 prog->attach_btf_obj_fd = btf_obj_fd; 7517 prog->attach_btf_id = btf_type_id; 7518 7519 /* but by now libbpf common logic is not utilizing 7520 * prog->atach_btf_obj_fd/prog->attach_btf_id anymore because 7521 * this callback is called after opts were populated by 7522 * libbpf, so this callback has to update opts explicitly here 7523 */ 7524 opts->attach_btf_obj_fd = btf_obj_fd; 7525 opts->attach_btf_id = btf_type_id; 7526 } 7527 return 0; 7528 } 7529 7530 static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz); 7531 7532 static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog, 7533 struct bpf_insn *insns, int insns_cnt, 7534 const char *license, __u32 kern_version, int *prog_fd) 7535 { 7536 LIBBPF_OPTS(bpf_prog_load_opts, load_attr); 7537 const char *prog_name = NULL; 7538 size_t log_buf_size = 0; 7539 char *log_buf = NULL, *tmp; 7540 bool own_log_buf = true; 7541 __u32 log_level = prog->log_level; 7542 int ret, err; 7543 7544 /* Be more helpful by rejecting programs that can't be validated early 7545 * with more meaningful and actionable error message. 7546 */ 7547 switch (prog->type) { 7548 case BPF_PROG_TYPE_UNSPEC: 7549 /* 7550 * The program type must be set. Most likely we couldn't find a proper 7551 * section definition at load time, and thus we didn't infer the type. 7552 */ 7553 pr_warn("prog '%s': missing BPF prog type, check ELF section name '%s'\n", 7554 prog->name, prog->sec_name); 7555 return -EINVAL; 7556 case BPF_PROG_TYPE_STRUCT_OPS: 7557 if (prog->attach_btf_id == 0) { 7558 pr_warn("prog '%s': SEC(\"struct_ops\") program isn't referenced anywhere, did you forget to use it?\n", 7559 prog->name); 7560 return -EINVAL; 7561 } 7562 break; 7563 default: 7564 break; 7565 } 7566 7567 if (!insns || !insns_cnt) 7568 return -EINVAL; 7569 7570 if (kernel_supports(obj, FEAT_PROG_NAME)) 7571 prog_name = prog->name; 7572 load_attr.attach_prog_fd = prog->attach_prog_fd; 7573 load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd; 7574 load_attr.attach_btf_id = prog->attach_btf_id; 7575 load_attr.kern_version = kern_version; 7576 load_attr.prog_ifindex = prog->prog_ifindex; 7577 load_attr.expected_attach_type = prog->expected_attach_type; 7578 7579 /* specify func_info/line_info only if kernel supports them */ 7580 if (obj->btf && btf__fd(obj->btf) >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) { 7581 load_attr.prog_btf_fd = btf__fd(obj->btf); 7582 load_attr.func_info = prog->func_info; 7583 load_attr.func_info_rec_size = prog->func_info_rec_size; 7584 load_attr.func_info_cnt = prog->func_info_cnt; 7585 load_attr.line_info = prog->line_info; 7586 load_attr.line_info_rec_size = prog->line_info_rec_size; 7587 load_attr.line_info_cnt = prog->line_info_cnt; 7588 } 7589 load_attr.log_level = log_level; 7590 load_attr.prog_flags = prog->prog_flags; 7591 load_attr.fd_array = obj->fd_array; 7592 7593 load_attr.token_fd = obj->token_fd; 7594 if (obj->token_fd) 7595 load_attr.prog_flags |= BPF_F_TOKEN_FD; 7596 7597 /* adjust load_attr if sec_def provides custom preload callback */ 7598 if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) { 7599 err = prog->sec_def->prog_prepare_load_fn(prog, &load_attr, prog->sec_def->cookie); 7600 if (err < 0) { 7601 pr_warn("prog '%s': failed to prepare load attributes: %s\n", 7602 prog->name, errstr(err)); 7603 return err; 7604 } 7605 insns = prog->insns; 7606 insns_cnt = prog->insns_cnt; 7607 } 7608 7609 if (obj->gen_loader) { 7610 bpf_gen__prog_load(obj->gen_loader, prog->type, prog->name, 7611 license, insns, insns_cnt, &load_attr, 7612 prog - obj->programs); 7613 *prog_fd = -1; 7614 return 0; 7615 } 7616 7617 retry_load: 7618 /* if log_level is zero, we don't request logs initially even if 7619 * custom log_buf is specified; if the program load fails, then we'll 7620 * bump log_level to 1 and use either custom log_buf or we'll allocate 7621 * our own and retry the load to get details on what failed 7622 */ 7623 if (log_level) { 7624 if (prog->log_buf) { 7625 log_buf = prog->log_buf; 7626 log_buf_size = prog->log_size; 7627 own_log_buf = false; 7628 } else if (obj->log_buf) { 7629 log_buf = obj->log_buf; 7630 log_buf_size = obj->log_size; 7631 own_log_buf = false; 7632 } else { 7633 log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, log_buf_size * 2); 7634 tmp = realloc(log_buf, log_buf_size); 7635 if (!tmp) { 7636 ret = -ENOMEM; 7637 goto out; 7638 } 7639 log_buf = tmp; 7640 log_buf[0] = '\0'; 7641 own_log_buf = true; 7642 } 7643 } 7644 7645 load_attr.log_buf = log_buf; 7646 load_attr.log_size = log_buf_size; 7647 load_attr.log_level = log_level; 7648 7649 ret = bpf_prog_load(prog->type, prog_name, license, insns, insns_cnt, &load_attr); 7650 if (ret >= 0) { 7651 if (log_level && own_log_buf) { 7652 pr_debug("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n", 7653 prog->name, log_buf); 7654 } 7655 7656 if (obj->has_rodata && kernel_supports(obj, FEAT_PROG_BIND_MAP)) { 7657 struct bpf_map *map; 7658 int i; 7659 7660 for (i = 0; i < obj->nr_maps; i++) { 7661 map = &prog->obj->maps[i]; 7662 if (map->libbpf_type != LIBBPF_MAP_RODATA) 7663 continue; 7664 7665 if (bpf_prog_bind_map(ret, map->fd, NULL)) { 7666 pr_warn("prog '%s': failed to bind map '%s': %s\n", 7667 prog->name, map->real_name, errstr(errno)); 7668 /* Don't fail hard if can't bind rodata. */ 7669 } 7670 } 7671 } 7672 7673 *prog_fd = ret; 7674 ret = 0; 7675 goto out; 7676 } 7677 7678 if (log_level == 0) { 7679 log_level = 1; 7680 goto retry_load; 7681 } 7682 /* On ENOSPC, increase log buffer size and retry, unless custom 7683 * log_buf is specified. 7684 * Be careful to not overflow u32, though. Kernel's log buf size limit 7685 * isn't part of UAPI so it can always be bumped to full 4GB. So don't 7686 * multiply by 2 unless we are sure we'll fit within 32 bits. 7687 * Currently, we'll get -EINVAL when we reach (UINT_MAX >> 2). 7688 */ 7689 if (own_log_buf && errno == ENOSPC && log_buf_size <= UINT_MAX / 2) 7690 goto retry_load; 7691 7692 ret = -errno; 7693 7694 /* post-process verifier log to improve error descriptions */ 7695 fixup_verifier_log(prog, log_buf, log_buf_size); 7696 7697 pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, errstr(errno)); 7698 pr_perm_msg(ret); 7699 7700 if (own_log_buf && log_buf && log_buf[0] != '\0') { 7701 pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n", 7702 prog->name, log_buf); 7703 } 7704 7705 out: 7706 if (own_log_buf) 7707 free(log_buf); 7708 return ret; 7709 } 7710 7711 static char *find_prev_line(char *buf, char *cur) 7712 { 7713 char *p; 7714 7715 if (cur == buf) /* end of a log buf */ 7716 return NULL; 7717 7718 p = cur - 1; 7719 while (p - 1 >= buf && *(p - 1) != '\n') 7720 p--; 7721 7722 return p; 7723 } 7724 7725 static void patch_log(char *buf, size_t buf_sz, size_t log_sz, 7726 char *orig, size_t orig_sz, const char *patch) 7727 { 7728 /* size of the remaining log content to the right from the to-be-replaced part */ 7729 size_t rem_sz = (buf + log_sz) - (orig + orig_sz); 7730 size_t patch_sz = strlen(patch); 7731 7732 if (patch_sz != orig_sz) { 7733 /* If patch line(s) are longer than original piece of verifier log, 7734 * shift log contents by (patch_sz - orig_sz) bytes to the right 7735 * starting from after to-be-replaced part of the log. 7736 * 7737 * If patch line(s) are shorter than original piece of verifier log, 7738 * shift log contents by (orig_sz - patch_sz) bytes to the left 7739 * starting from after to-be-replaced part of the log 7740 * 7741 * We need to be careful about not overflowing available 7742 * buf_sz capacity. If that's the case, we'll truncate the end 7743 * of the original log, as necessary. 7744 */ 7745 if (patch_sz > orig_sz) { 7746 if (orig + patch_sz >= buf + buf_sz) { 7747 /* patch is big enough to cover remaining space completely */ 7748 patch_sz -= (orig + patch_sz) - (buf + buf_sz) + 1; 7749 rem_sz = 0; 7750 } else if (patch_sz - orig_sz > buf_sz - log_sz) { 7751 /* patch causes part of remaining log to be truncated */ 7752 rem_sz -= (patch_sz - orig_sz) - (buf_sz - log_sz); 7753 } 7754 } 7755 /* shift remaining log to the right by calculated amount */ 7756 memmove(orig + patch_sz, orig + orig_sz, rem_sz); 7757 } 7758 7759 memcpy(orig, patch, patch_sz); 7760 } 7761 7762 static void fixup_log_failed_core_relo(struct bpf_program *prog, 7763 char *buf, size_t buf_sz, size_t log_sz, 7764 char *line1, char *line2, char *line3) 7765 { 7766 /* Expected log for failed and not properly guarded CO-RE relocation: 7767 * line1 -> 123: (85) call unknown#195896080 7768 * line2 -> invalid func unknown#195896080 7769 * line3 -> <anything else or end of buffer> 7770 * 7771 * "123" is the index of the instruction that was poisoned. We extract 7772 * instruction index to find corresponding CO-RE relocation and 7773 * replace this part of the log with more relevant information about 7774 * failed CO-RE relocation. 7775 */ 7776 const struct bpf_core_relo *relo; 7777 struct bpf_core_spec spec; 7778 char patch[512], spec_buf[256]; 7779 int insn_idx, err, spec_len; 7780 7781 if (sscanf(line1, "%d: (%*d) call unknown#195896080\n", &insn_idx) != 1) 7782 return; 7783 7784 relo = find_relo_core(prog, insn_idx); 7785 if (!relo) 7786 return; 7787 7788 err = bpf_core_parse_spec(prog->name, prog->obj->btf, relo, &spec); 7789 if (err) 7790 return; 7791 7792 spec_len = bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec); 7793 snprintf(patch, sizeof(patch), 7794 "%d: <invalid CO-RE relocation>\n" 7795 "failed to resolve CO-RE relocation %s%s\n", 7796 insn_idx, spec_buf, spec_len >= sizeof(spec_buf) ? "..." : ""); 7797 7798 patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); 7799 } 7800 7801 static void fixup_log_missing_map_load(struct bpf_program *prog, 7802 char *buf, size_t buf_sz, size_t log_sz, 7803 char *line1, char *line2, char *line3) 7804 { 7805 /* Expected log for failed and not properly guarded map reference: 7806 * line1 -> 123: (85) call unknown#2001000345 7807 * line2 -> invalid func unknown#2001000345 7808 * line3 -> <anything else or end of buffer> 7809 * 7810 * "123" is the index of the instruction that was poisoned. 7811 * "345" in "2001000345" is a map index in obj->maps to fetch map name. 7812 */ 7813 struct bpf_object *obj = prog->obj; 7814 const struct bpf_map *map; 7815 int insn_idx, map_idx; 7816 char patch[128]; 7817 7818 if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &map_idx) != 2) 7819 return; 7820 7821 map_idx -= POISON_LDIMM64_MAP_BASE; 7822 if (map_idx < 0 || map_idx >= obj->nr_maps) 7823 return; 7824 map = &obj->maps[map_idx]; 7825 7826 snprintf(patch, sizeof(patch), 7827 "%d: <invalid BPF map reference>\n" 7828 "BPF map '%s' is referenced but wasn't created\n", 7829 insn_idx, map->name); 7830 7831 patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); 7832 } 7833 7834 static void fixup_log_missing_kfunc_call(struct bpf_program *prog, 7835 char *buf, size_t buf_sz, size_t log_sz, 7836 char *line1, char *line2, char *line3) 7837 { 7838 /* Expected log for failed and not properly guarded kfunc call: 7839 * line1 -> 123: (85) call unknown#2002000345 7840 * line2 -> invalid func unknown#2002000345 7841 * line3 -> <anything else or end of buffer> 7842 * 7843 * "123" is the index of the instruction that was poisoned. 7844 * "345" in "2002000345" is an extern index in obj->externs to fetch kfunc name. 7845 */ 7846 struct bpf_object *obj = prog->obj; 7847 const struct extern_desc *ext; 7848 int insn_idx, ext_idx; 7849 char patch[128]; 7850 7851 if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &ext_idx) != 2) 7852 return; 7853 7854 ext_idx -= POISON_CALL_KFUNC_BASE; 7855 if (ext_idx < 0 || ext_idx >= obj->nr_extern) 7856 return; 7857 ext = &obj->externs[ext_idx]; 7858 7859 snprintf(patch, sizeof(patch), 7860 "%d: <invalid kfunc call>\n" 7861 "kfunc '%s' is referenced but wasn't resolved\n", 7862 insn_idx, ext->name); 7863 7864 patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); 7865 } 7866 7867 static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz) 7868 { 7869 /* look for familiar error patterns in last N lines of the log */ 7870 const size_t max_last_line_cnt = 10; 7871 char *prev_line, *cur_line, *next_line; 7872 size_t log_sz; 7873 int i; 7874 7875 if (!buf) 7876 return; 7877 7878 log_sz = strlen(buf) + 1; 7879 next_line = buf + log_sz - 1; 7880 7881 for (i = 0; i < max_last_line_cnt; i++, next_line = cur_line) { 7882 cur_line = find_prev_line(buf, next_line); 7883 if (!cur_line) 7884 return; 7885 7886 if (str_has_pfx(cur_line, "invalid func unknown#195896080\n")) { 7887 prev_line = find_prev_line(buf, cur_line); 7888 if (!prev_line) 7889 continue; 7890 7891 /* failed CO-RE relocation case */ 7892 fixup_log_failed_core_relo(prog, buf, buf_sz, log_sz, 7893 prev_line, cur_line, next_line); 7894 return; 7895 } else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_LDIMM64_MAP_PFX)) { 7896 prev_line = find_prev_line(buf, cur_line); 7897 if (!prev_line) 7898 continue; 7899 7900 /* reference to uncreated BPF map */ 7901 fixup_log_missing_map_load(prog, buf, buf_sz, log_sz, 7902 prev_line, cur_line, next_line); 7903 return; 7904 } else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_CALL_KFUNC_PFX)) { 7905 prev_line = find_prev_line(buf, cur_line); 7906 if (!prev_line) 7907 continue; 7908 7909 /* reference to unresolved kfunc */ 7910 fixup_log_missing_kfunc_call(prog, buf, buf_sz, log_sz, 7911 prev_line, cur_line, next_line); 7912 return; 7913 } 7914 } 7915 } 7916 7917 static int bpf_program_record_relos(struct bpf_program *prog) 7918 { 7919 struct bpf_object *obj = prog->obj; 7920 int i; 7921 7922 for (i = 0; i < prog->nr_reloc; i++) { 7923 struct reloc_desc *relo = &prog->reloc_desc[i]; 7924 struct extern_desc *ext = &obj->externs[relo->ext_idx]; 7925 int kind; 7926 7927 switch (relo->type) { 7928 case RELO_EXTERN_LD64: 7929 if (ext->type != EXT_KSYM) 7930 continue; 7931 kind = btf_is_var(btf__type_by_id(obj->btf, ext->btf_id)) ? 7932 BTF_KIND_VAR : BTF_KIND_FUNC; 7933 bpf_gen__record_extern(obj->gen_loader, ext->name, 7934 ext->is_weak, !ext->ksym.type_id, 7935 true, kind, relo->insn_idx); 7936 break; 7937 case RELO_EXTERN_CALL: 7938 bpf_gen__record_extern(obj->gen_loader, ext->name, 7939 ext->is_weak, false, false, BTF_KIND_FUNC, 7940 relo->insn_idx); 7941 break; 7942 case RELO_CORE: { 7943 struct bpf_core_relo cr = { 7944 .insn_off = relo->insn_idx * 8, 7945 .type_id = relo->core_relo->type_id, 7946 .access_str_off = relo->core_relo->access_str_off, 7947 .kind = relo->core_relo->kind, 7948 }; 7949 7950 bpf_gen__record_relo_core(obj->gen_loader, &cr); 7951 break; 7952 } 7953 default: 7954 continue; 7955 } 7956 } 7957 return 0; 7958 } 7959 7960 static int 7961 bpf_object__load_progs(struct bpf_object *obj, int log_level) 7962 { 7963 struct bpf_program *prog; 7964 size_t i; 7965 int err; 7966 7967 for (i = 0; i < obj->nr_programs; i++) { 7968 prog = &obj->programs[i]; 7969 if (prog_is_subprog(obj, prog)) 7970 continue; 7971 if (!prog->autoload) { 7972 pr_debug("prog '%s': skipped loading\n", prog->name); 7973 continue; 7974 } 7975 prog->log_level |= log_level; 7976 7977 if (obj->gen_loader) 7978 bpf_program_record_relos(prog); 7979 7980 err = bpf_object_load_prog(obj, prog, prog->insns, prog->insns_cnt, 7981 obj->license, obj->kern_version, &prog->fd); 7982 if (err) { 7983 pr_warn("prog '%s': failed to load: %s\n", prog->name, errstr(err)); 7984 return err; 7985 } 7986 } 7987 7988 bpf_object__free_relocs(obj); 7989 return 0; 7990 } 7991 7992 static int bpf_object_prepare_progs(struct bpf_object *obj) 7993 { 7994 struct bpf_program *prog; 7995 size_t i; 7996 int err; 7997 7998 for (i = 0; i < obj->nr_programs; i++) { 7999 prog = &obj->programs[i]; 8000 err = bpf_object__sanitize_prog(obj, prog); 8001 if (err) 8002 return err; 8003 } 8004 return 0; 8005 } 8006 8007 static const struct bpf_sec_def *find_sec_def(const char *sec_name); 8008 8009 static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object_open_opts *opts) 8010 { 8011 struct bpf_program *prog; 8012 int err; 8013 8014 bpf_object__for_each_program(prog, obj) { 8015 prog->sec_def = find_sec_def(prog->sec_name); 8016 if (!prog->sec_def) { 8017 /* couldn't guess, but user might manually specify */ 8018 pr_debug("prog '%s': unrecognized ELF section name '%s'\n", 8019 prog->name, prog->sec_name); 8020 continue; 8021 } 8022 8023 prog->type = prog->sec_def->prog_type; 8024 prog->expected_attach_type = prog->sec_def->expected_attach_type; 8025 8026 /* sec_def can have custom callback which should be called 8027 * after bpf_program is initialized to adjust its properties 8028 */ 8029 if (prog->sec_def->prog_setup_fn) { 8030 err = prog->sec_def->prog_setup_fn(prog, prog->sec_def->cookie); 8031 if (err < 0) { 8032 pr_warn("prog '%s': failed to initialize: %s\n", 8033 prog->name, errstr(err)); 8034 return err; 8035 } 8036 } 8037 } 8038 8039 return 0; 8040 } 8041 8042 static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz, 8043 const char *obj_name, 8044 const struct bpf_object_open_opts *opts) 8045 { 8046 const char *kconfig, *btf_tmp_path, *token_path; 8047 struct bpf_object *obj; 8048 int err; 8049 char *log_buf; 8050 size_t log_size; 8051 __u32 log_level; 8052 8053 if (obj_buf && !obj_name) 8054 return ERR_PTR(-EINVAL); 8055 8056 if (elf_version(EV_CURRENT) == EV_NONE) { 8057 pr_warn("failed to init libelf for %s\n", 8058 path ? : "(mem buf)"); 8059 return ERR_PTR(-LIBBPF_ERRNO__LIBELF); 8060 } 8061 8062 if (!OPTS_VALID(opts, bpf_object_open_opts)) 8063 return ERR_PTR(-EINVAL); 8064 8065 obj_name = OPTS_GET(opts, object_name, NULL) ?: obj_name; 8066 if (obj_buf) { 8067 path = obj_name; 8068 pr_debug("loading object '%s' from buffer\n", obj_name); 8069 } else { 8070 pr_debug("loading object from %s\n", path); 8071 } 8072 8073 log_buf = OPTS_GET(opts, kernel_log_buf, NULL); 8074 log_size = OPTS_GET(opts, kernel_log_size, 0); 8075 log_level = OPTS_GET(opts, kernel_log_level, 0); 8076 if (log_size > UINT_MAX) 8077 return ERR_PTR(-EINVAL); 8078 if (log_size && !log_buf) 8079 return ERR_PTR(-EINVAL); 8080 8081 token_path = OPTS_GET(opts, bpf_token_path, NULL); 8082 /* if user didn't specify bpf_token_path explicitly, check if 8083 * LIBBPF_BPF_TOKEN_PATH envvar was set and treat it as bpf_token_path 8084 * option 8085 */ 8086 if (!token_path) 8087 token_path = getenv("LIBBPF_BPF_TOKEN_PATH"); 8088 if (token_path && strlen(token_path) >= PATH_MAX) 8089 return ERR_PTR(-ENAMETOOLONG); 8090 8091 obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name); 8092 if (IS_ERR(obj)) 8093 return obj; 8094 8095 obj->log_buf = log_buf; 8096 obj->log_size = log_size; 8097 obj->log_level = log_level; 8098 8099 if (token_path) { 8100 obj->token_path = strdup(token_path); 8101 if (!obj->token_path) { 8102 err = -ENOMEM; 8103 goto out; 8104 } 8105 } 8106 8107 btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL); 8108 if (btf_tmp_path) { 8109 if (strlen(btf_tmp_path) >= PATH_MAX) { 8110 err = -ENAMETOOLONG; 8111 goto out; 8112 } 8113 obj->btf_custom_path = strdup(btf_tmp_path); 8114 if (!obj->btf_custom_path) { 8115 err = -ENOMEM; 8116 goto out; 8117 } 8118 } 8119 8120 kconfig = OPTS_GET(opts, kconfig, NULL); 8121 if (kconfig) { 8122 obj->kconfig = strdup(kconfig); 8123 if (!obj->kconfig) { 8124 err = -ENOMEM; 8125 goto out; 8126 } 8127 } 8128 8129 err = bpf_object__elf_init(obj); 8130 err = err ? : bpf_object__elf_collect(obj); 8131 err = err ? : bpf_object__collect_externs(obj); 8132 err = err ? : bpf_object_fixup_btf(obj); 8133 err = err ? : bpf_object__init_maps(obj, opts); 8134 err = err ? : bpf_object_init_progs(obj, opts); 8135 err = err ? : bpf_object__collect_relos(obj); 8136 if (err) 8137 goto out; 8138 8139 bpf_object__elf_finish(obj); 8140 8141 return obj; 8142 out: 8143 bpf_object__close(obj); 8144 return ERR_PTR(err); 8145 } 8146 8147 struct bpf_object * 8148 bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts) 8149 { 8150 if (!path) 8151 return libbpf_err_ptr(-EINVAL); 8152 8153 return libbpf_ptr(bpf_object_open(path, NULL, 0, NULL, opts)); 8154 } 8155 8156 struct bpf_object *bpf_object__open(const char *path) 8157 { 8158 return bpf_object__open_file(path, NULL); 8159 } 8160 8161 struct bpf_object * 8162 bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, 8163 const struct bpf_object_open_opts *opts) 8164 { 8165 char tmp_name[64]; 8166 8167 if (!obj_buf || obj_buf_sz == 0) 8168 return libbpf_err_ptr(-EINVAL); 8169 8170 /* create a (quite useless) default "name" for this memory buffer object */ 8171 snprintf(tmp_name, sizeof(tmp_name), "%lx-%zx", (unsigned long)obj_buf, obj_buf_sz); 8172 8173 return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, tmp_name, opts)); 8174 } 8175 8176 static int bpf_object_unload(struct bpf_object *obj) 8177 { 8178 size_t i; 8179 8180 if (!obj) 8181 return libbpf_err(-EINVAL); 8182 8183 for (i = 0; i < obj->nr_maps; i++) { 8184 zclose(obj->maps[i].fd); 8185 if (obj->maps[i].st_ops) 8186 zfree(&obj->maps[i].st_ops->kern_vdata); 8187 } 8188 8189 for (i = 0; i < obj->nr_programs; i++) 8190 bpf_program__unload(&obj->programs[i]); 8191 8192 return 0; 8193 } 8194 8195 static int bpf_object__sanitize_maps(struct bpf_object *obj) 8196 { 8197 struct bpf_map *m; 8198 8199 bpf_object__for_each_map(m, obj) { 8200 if (!bpf_map__is_internal(m)) 8201 continue; 8202 if (!kernel_supports(obj, FEAT_ARRAY_MMAP)) 8203 m->def.map_flags &= ~BPF_F_MMAPABLE; 8204 } 8205 8206 return 0; 8207 } 8208 8209 typedef int (*kallsyms_cb_t)(unsigned long long sym_addr, char sym_type, 8210 const char *sym_name, void *ctx); 8211 8212 static int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *ctx) 8213 { 8214 char sym_type, sym_name[500]; 8215 unsigned long long sym_addr; 8216 int ret, err = 0; 8217 FILE *f; 8218 8219 f = fopen("/proc/kallsyms", "re"); 8220 if (!f) { 8221 err = -errno; 8222 pr_warn("failed to open /proc/kallsyms: %s\n", errstr(err)); 8223 return err; 8224 } 8225 8226 while (true) { 8227 ret = fscanf(f, "%llx %c %499s%*[^\n]\n", 8228 &sym_addr, &sym_type, sym_name); 8229 if (ret == EOF && feof(f)) 8230 break; 8231 if (ret != 3) { 8232 pr_warn("failed to read kallsyms entry: %d\n", ret); 8233 err = -EINVAL; 8234 break; 8235 } 8236 8237 err = cb(sym_addr, sym_type, sym_name, ctx); 8238 if (err) 8239 break; 8240 } 8241 8242 fclose(f); 8243 return err; 8244 } 8245 8246 static int kallsyms_cb(unsigned long long sym_addr, char sym_type, 8247 const char *sym_name, void *ctx) 8248 { 8249 struct bpf_object *obj = ctx; 8250 const struct btf_type *t; 8251 struct extern_desc *ext; 8252 char *res; 8253 8254 res = strstr(sym_name, ".llvm."); 8255 if (sym_type == 'd' && res) 8256 ext = find_extern_by_name_with_len(obj, sym_name, res - sym_name); 8257 else 8258 ext = find_extern_by_name(obj, sym_name); 8259 if (!ext || ext->type != EXT_KSYM) 8260 return 0; 8261 8262 t = btf__type_by_id(obj->btf, ext->btf_id); 8263 if (!btf_is_var(t)) 8264 return 0; 8265 8266 if (ext->is_set && ext->ksym.addr != sym_addr) { 8267 pr_warn("extern (ksym) '%s': resolution is ambiguous: 0x%llx or 0x%llx\n", 8268 sym_name, ext->ksym.addr, sym_addr); 8269 return -EINVAL; 8270 } 8271 if (!ext->is_set) { 8272 ext->is_set = true; 8273 ext->ksym.addr = sym_addr; 8274 pr_debug("extern (ksym) '%s': set to 0x%llx\n", sym_name, sym_addr); 8275 } 8276 return 0; 8277 } 8278 8279 static int bpf_object__read_kallsyms_file(struct bpf_object *obj) 8280 { 8281 return libbpf_kallsyms_parse(kallsyms_cb, obj); 8282 } 8283 8284 static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name, 8285 __u16 kind, struct btf **res_btf, 8286 struct module_btf **res_mod_btf) 8287 { 8288 struct module_btf *mod_btf; 8289 struct btf *btf; 8290 int i, id, err; 8291 8292 btf = obj->btf_vmlinux; 8293 mod_btf = NULL; 8294 id = btf__find_by_name_kind(btf, ksym_name, kind); 8295 8296 if (id == -ENOENT) { 8297 err = load_module_btfs(obj); 8298 if (err) 8299 return err; 8300 8301 for (i = 0; i < obj->btf_module_cnt; i++) { 8302 /* we assume module_btf's BTF FD is always >0 */ 8303 mod_btf = &obj->btf_modules[i]; 8304 btf = mod_btf->btf; 8305 id = btf__find_by_name_kind_own(btf, ksym_name, kind); 8306 if (id != -ENOENT) 8307 break; 8308 } 8309 } 8310 if (id <= 0) 8311 return -ESRCH; 8312 8313 *res_btf = btf; 8314 *res_mod_btf = mod_btf; 8315 return id; 8316 } 8317 8318 static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj, 8319 struct extern_desc *ext) 8320 { 8321 const struct btf_type *targ_var, *targ_type; 8322 __u32 targ_type_id, local_type_id; 8323 struct module_btf *mod_btf = NULL; 8324 const char *targ_var_name; 8325 struct btf *btf = NULL; 8326 int id, err; 8327 8328 id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &mod_btf); 8329 if (id < 0) { 8330 if (id == -ESRCH && ext->is_weak) 8331 return 0; 8332 pr_warn("extern (var ksym) '%s': not found in kernel BTF\n", 8333 ext->name); 8334 return id; 8335 } 8336 8337 /* find local type_id */ 8338 local_type_id = ext->ksym.type_id; 8339 8340 /* find target type_id */ 8341 targ_var = btf__type_by_id(btf, id); 8342 targ_var_name = btf__name_by_offset(btf, targ_var->name_off); 8343 targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id); 8344 8345 err = bpf_core_types_are_compat(obj->btf, local_type_id, 8346 btf, targ_type_id); 8347 if (err <= 0) { 8348 const struct btf_type *local_type; 8349 const char *targ_name, *local_name; 8350 8351 local_type = btf__type_by_id(obj->btf, local_type_id); 8352 local_name = btf__name_by_offset(obj->btf, local_type->name_off); 8353 targ_name = btf__name_by_offset(btf, targ_type->name_off); 8354 8355 pr_warn("extern (var ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n", 8356 ext->name, local_type_id, 8357 btf_kind_str(local_type), local_name, targ_type_id, 8358 btf_kind_str(targ_type), targ_name); 8359 return -EINVAL; 8360 } 8361 8362 ext->is_set = true; 8363 ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0; 8364 ext->ksym.kernel_btf_id = id; 8365 pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n", 8366 ext->name, id, btf_kind_str(targ_var), targ_var_name); 8367 8368 return 0; 8369 } 8370 8371 static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj, 8372 struct extern_desc *ext) 8373 { 8374 int local_func_proto_id, kfunc_proto_id, kfunc_id; 8375 struct module_btf *mod_btf = NULL; 8376 const struct btf_type *kern_func; 8377 struct btf *kern_btf = NULL; 8378 int ret; 8379 8380 local_func_proto_id = ext->ksym.type_id; 8381 8382 kfunc_id = find_ksym_btf_id(obj, ext->essent_name ?: ext->name, BTF_KIND_FUNC, &kern_btf, 8383 &mod_btf); 8384 if (kfunc_id < 0) { 8385 if (kfunc_id == -ESRCH && ext->is_weak) 8386 return 0; 8387 pr_warn("extern (func ksym) '%s': not found in kernel or module BTFs\n", 8388 ext->name); 8389 return kfunc_id; 8390 } 8391 8392 kern_func = btf__type_by_id(kern_btf, kfunc_id); 8393 kfunc_proto_id = kern_func->type; 8394 8395 ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id, 8396 kern_btf, kfunc_proto_id); 8397 if (ret <= 0) { 8398 if (ext->is_weak) 8399 return 0; 8400 8401 pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with %s [%d]\n", 8402 ext->name, local_func_proto_id, 8403 mod_btf ? mod_btf->name : "vmlinux", kfunc_proto_id); 8404 return -EINVAL; 8405 } 8406 8407 /* set index for module BTF fd in fd_array, if unset */ 8408 if (mod_btf && !mod_btf->fd_array_idx) { 8409 /* insn->off is s16 */ 8410 if (obj->fd_array_cnt == INT16_MAX) { 8411 pr_warn("extern (func ksym) '%s': module BTF fd index %d too big to fit in bpf_insn offset\n", 8412 ext->name, mod_btf->fd_array_idx); 8413 return -E2BIG; 8414 } 8415 /* Cannot use index 0 for module BTF fd */ 8416 if (!obj->fd_array_cnt) 8417 obj->fd_array_cnt = 1; 8418 8419 ret = libbpf_ensure_mem((void **)&obj->fd_array, &obj->fd_array_cap, sizeof(int), 8420 obj->fd_array_cnt + 1); 8421 if (ret) 8422 return ret; 8423 mod_btf->fd_array_idx = obj->fd_array_cnt; 8424 /* we assume module BTF FD is always >0 */ 8425 obj->fd_array[obj->fd_array_cnt++] = mod_btf->fd; 8426 } 8427 8428 ext->is_set = true; 8429 ext->ksym.kernel_btf_id = kfunc_id; 8430 ext->ksym.btf_fd_idx = mod_btf ? mod_btf->fd_array_idx : 0; 8431 /* Also set kernel_btf_obj_fd to make sure that bpf_object__relocate_data() 8432 * populates FD into ld_imm64 insn when it's used to point to kfunc. 8433 * {kernel_btf_id, btf_fd_idx} -> fixup bpf_call. 8434 * {kernel_btf_id, kernel_btf_obj_fd} -> fixup ld_imm64. 8435 */ 8436 ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0; 8437 pr_debug("extern (func ksym) '%s': resolved to %s [%d]\n", 8438 ext->name, mod_btf ? mod_btf->name : "vmlinux", kfunc_id); 8439 8440 return 0; 8441 } 8442 8443 static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj) 8444 { 8445 const struct btf_type *t; 8446 struct extern_desc *ext; 8447 int i, err; 8448 8449 for (i = 0; i < obj->nr_extern; i++) { 8450 ext = &obj->externs[i]; 8451 if (ext->type != EXT_KSYM || !ext->ksym.type_id) 8452 continue; 8453 8454 if (obj->gen_loader) { 8455 ext->is_set = true; 8456 ext->ksym.kernel_btf_obj_fd = 0; 8457 ext->ksym.kernel_btf_id = 0; 8458 continue; 8459 } 8460 t = btf__type_by_id(obj->btf, ext->btf_id); 8461 if (btf_is_var(t)) 8462 err = bpf_object__resolve_ksym_var_btf_id(obj, ext); 8463 else 8464 err = bpf_object__resolve_ksym_func_btf_id(obj, ext); 8465 if (err) 8466 return err; 8467 } 8468 return 0; 8469 } 8470 8471 static int bpf_object__resolve_externs(struct bpf_object *obj, 8472 const char *extra_kconfig) 8473 { 8474 bool need_config = false, need_kallsyms = false; 8475 bool need_vmlinux_btf = false; 8476 struct extern_desc *ext; 8477 void *kcfg_data = NULL; 8478 int err, i; 8479 8480 if (obj->nr_extern == 0) 8481 return 0; 8482 8483 if (obj->kconfig_map_idx >= 0) 8484 kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped; 8485 8486 for (i = 0; i < obj->nr_extern; i++) { 8487 ext = &obj->externs[i]; 8488 8489 if (ext->type == EXT_KSYM) { 8490 if (ext->ksym.type_id) 8491 need_vmlinux_btf = true; 8492 else 8493 need_kallsyms = true; 8494 continue; 8495 } else if (ext->type == EXT_KCFG) { 8496 void *ext_ptr = kcfg_data + ext->kcfg.data_off; 8497 __u64 value = 0; 8498 8499 /* Kconfig externs need actual /proc/config.gz */ 8500 if (str_has_pfx(ext->name, "CONFIG_")) { 8501 need_config = true; 8502 continue; 8503 } 8504 8505 /* Virtual kcfg externs are customly handled by libbpf */ 8506 if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) { 8507 value = get_kernel_version(); 8508 if (!value) { 8509 pr_warn("extern (kcfg) '%s': failed to get kernel version\n", ext->name); 8510 return -EINVAL; 8511 } 8512 } else if (strcmp(ext->name, "LINUX_HAS_BPF_COOKIE") == 0) { 8513 value = kernel_supports(obj, FEAT_BPF_COOKIE); 8514 } else if (strcmp(ext->name, "LINUX_HAS_SYSCALL_WRAPPER") == 0) { 8515 value = kernel_supports(obj, FEAT_SYSCALL_WRAPPER); 8516 } else if (!str_has_pfx(ext->name, "LINUX_") || !ext->is_weak) { 8517 /* Currently libbpf supports only CONFIG_ and LINUX_ prefixed 8518 * __kconfig externs, where LINUX_ ones are virtual and filled out 8519 * customly by libbpf (their values don't come from Kconfig). 8520 * If LINUX_xxx variable is not recognized by libbpf, but is marked 8521 * __weak, it defaults to zero value, just like for CONFIG_xxx 8522 * externs. 8523 */ 8524 pr_warn("extern (kcfg) '%s': unrecognized virtual extern\n", ext->name); 8525 return -EINVAL; 8526 } 8527 8528 err = set_kcfg_value_num(ext, ext_ptr, value); 8529 if (err) 8530 return err; 8531 pr_debug("extern (kcfg) '%s': set to 0x%llx\n", 8532 ext->name, (long long)value); 8533 } else { 8534 pr_warn("extern '%s': unrecognized extern kind\n", ext->name); 8535 return -EINVAL; 8536 } 8537 } 8538 if (need_config && extra_kconfig) { 8539 err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data); 8540 if (err) 8541 return -EINVAL; 8542 need_config = false; 8543 for (i = 0; i < obj->nr_extern; i++) { 8544 ext = &obj->externs[i]; 8545 if (ext->type == EXT_KCFG && !ext->is_set) { 8546 need_config = true; 8547 break; 8548 } 8549 } 8550 } 8551 if (need_config) { 8552 err = bpf_object__read_kconfig_file(obj, kcfg_data); 8553 if (err) 8554 return -EINVAL; 8555 } 8556 if (need_kallsyms) { 8557 err = bpf_object__read_kallsyms_file(obj); 8558 if (err) 8559 return -EINVAL; 8560 } 8561 if (need_vmlinux_btf) { 8562 err = bpf_object__resolve_ksyms_btf_id(obj); 8563 if (err) 8564 return -EINVAL; 8565 } 8566 for (i = 0; i < obj->nr_extern; i++) { 8567 ext = &obj->externs[i]; 8568 8569 if (!ext->is_set && !ext->is_weak) { 8570 pr_warn("extern '%s' (strong): not resolved\n", ext->name); 8571 return -ESRCH; 8572 } else if (!ext->is_set) { 8573 pr_debug("extern '%s' (weak): not resolved, defaulting to zero\n", 8574 ext->name); 8575 } 8576 } 8577 8578 return 0; 8579 } 8580 8581 static void bpf_map_prepare_vdata(const struct bpf_map *map) 8582 { 8583 const struct btf_type *type; 8584 struct bpf_struct_ops *st_ops; 8585 __u32 i; 8586 8587 st_ops = map->st_ops; 8588 type = btf__type_by_id(map->obj->btf, st_ops->type_id); 8589 for (i = 0; i < btf_vlen(type); i++) { 8590 struct bpf_program *prog = st_ops->progs[i]; 8591 void *kern_data; 8592 int prog_fd; 8593 8594 if (!prog) 8595 continue; 8596 8597 prog_fd = bpf_program__fd(prog); 8598 kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i]; 8599 *(unsigned long *)kern_data = prog_fd; 8600 } 8601 } 8602 8603 static int bpf_object_prepare_struct_ops(struct bpf_object *obj) 8604 { 8605 struct bpf_map *map; 8606 int i; 8607 8608 for (i = 0; i < obj->nr_maps; i++) { 8609 map = &obj->maps[i]; 8610 8611 if (!bpf_map__is_struct_ops(map)) 8612 continue; 8613 8614 if (!map->autocreate) 8615 continue; 8616 8617 bpf_map_prepare_vdata(map); 8618 } 8619 8620 return 0; 8621 } 8622 8623 static void bpf_object_unpin(struct bpf_object *obj) 8624 { 8625 int i; 8626 8627 /* unpin any maps that were auto-pinned during load */ 8628 for (i = 0; i < obj->nr_maps; i++) 8629 if (obj->maps[i].pinned && !obj->maps[i].reused) 8630 bpf_map__unpin(&obj->maps[i], NULL); 8631 } 8632 8633 static void bpf_object_post_load_cleanup(struct bpf_object *obj) 8634 { 8635 int i; 8636 8637 /* clean up fd_array */ 8638 zfree(&obj->fd_array); 8639 8640 /* clean up module BTFs */ 8641 for (i = 0; i < obj->btf_module_cnt; i++) { 8642 close(obj->btf_modules[i].fd); 8643 btf__free(obj->btf_modules[i].btf); 8644 free(obj->btf_modules[i].name); 8645 } 8646 obj->btf_module_cnt = 0; 8647 zfree(&obj->btf_modules); 8648 8649 /* clean up vmlinux BTF */ 8650 btf__free(obj->btf_vmlinux); 8651 obj->btf_vmlinux = NULL; 8652 } 8653 8654 static int bpf_object_prepare(struct bpf_object *obj, const char *target_btf_path) 8655 { 8656 int err; 8657 8658 if (obj->state >= OBJ_PREPARED) { 8659 pr_warn("object '%s': prepare loading can't be attempted twice\n", obj->name); 8660 return -EINVAL; 8661 } 8662 8663 err = bpf_object_prepare_token(obj); 8664 err = err ? : bpf_object__probe_loading(obj); 8665 err = err ? : bpf_object__load_vmlinux_btf(obj, false); 8666 err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); 8667 err = err ? : bpf_object__sanitize_maps(obj); 8668 err = err ? : bpf_object__init_kern_struct_ops_maps(obj); 8669 err = err ? : bpf_object_adjust_struct_ops_autoload(obj); 8670 err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path); 8671 err = err ? : bpf_object__sanitize_and_load_btf(obj); 8672 err = err ? : bpf_object__create_maps(obj); 8673 err = err ? : bpf_object_prepare_progs(obj); 8674 8675 if (err) { 8676 bpf_object_unpin(obj); 8677 bpf_object_unload(obj); 8678 obj->state = OBJ_LOADED; 8679 return err; 8680 } 8681 8682 obj->state = OBJ_PREPARED; 8683 return 0; 8684 } 8685 8686 static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path) 8687 { 8688 int err; 8689 8690 if (!obj) 8691 return libbpf_err(-EINVAL); 8692 8693 if (obj->state >= OBJ_LOADED) { 8694 pr_warn("object '%s': load can't be attempted twice\n", obj->name); 8695 return libbpf_err(-EINVAL); 8696 } 8697 8698 /* Disallow kernel loading programs of non-native endianness but 8699 * permit cross-endian creation of "light skeleton". 8700 */ 8701 if (obj->gen_loader) { 8702 bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps); 8703 } else if (!is_native_endianness(obj)) { 8704 pr_warn("object '%s': loading non-native endianness is unsupported\n", obj->name); 8705 return libbpf_err(-LIBBPF_ERRNO__ENDIAN); 8706 } 8707 8708 if (obj->state < OBJ_PREPARED) { 8709 err = bpf_object_prepare(obj, target_btf_path); 8710 if (err) 8711 return libbpf_err(err); 8712 } 8713 err = bpf_object__load_progs(obj, extra_log_level); 8714 err = err ? : bpf_object_init_prog_arrays(obj); 8715 err = err ? : bpf_object_prepare_struct_ops(obj); 8716 8717 if (obj->gen_loader) { 8718 /* reset FDs */ 8719 if (obj->btf) 8720 btf__set_fd(obj->btf, -1); 8721 if (!err) 8722 err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps); 8723 } 8724 8725 bpf_object_post_load_cleanup(obj); 8726 obj->state = OBJ_LOADED; /* doesn't matter if successfully or not */ 8727 8728 if (err) { 8729 bpf_object_unpin(obj); 8730 bpf_object_unload(obj); 8731 pr_warn("failed to load object '%s'\n", obj->path); 8732 return libbpf_err(err); 8733 } 8734 8735 return 0; 8736 } 8737 8738 int bpf_object__prepare(struct bpf_object *obj) 8739 { 8740 return libbpf_err(bpf_object_prepare(obj, NULL)); 8741 } 8742 8743 int bpf_object__load(struct bpf_object *obj) 8744 { 8745 return bpf_object_load(obj, 0, NULL); 8746 } 8747 8748 static int make_parent_dir(const char *path) 8749 { 8750 char *dname, *dir; 8751 int err = 0; 8752 8753 dname = strdup(path); 8754 if (dname == NULL) 8755 return -ENOMEM; 8756 8757 dir = dirname(dname); 8758 if (mkdir(dir, 0700) && errno != EEXIST) 8759 err = -errno; 8760 8761 free(dname); 8762 if (err) { 8763 pr_warn("failed to mkdir %s: %s\n", path, errstr(err)); 8764 } 8765 return err; 8766 } 8767 8768 static int check_path(const char *path) 8769 { 8770 struct statfs st_fs; 8771 char *dname, *dir; 8772 int err = 0; 8773 8774 if (path == NULL) 8775 return -EINVAL; 8776 8777 dname = strdup(path); 8778 if (dname == NULL) 8779 return -ENOMEM; 8780 8781 dir = dirname(dname); 8782 if (statfs(dir, &st_fs)) { 8783 pr_warn("failed to statfs %s: %s\n", dir, errstr(errno)); 8784 err = -errno; 8785 } 8786 free(dname); 8787 8788 if (!err && st_fs.f_type != BPF_FS_MAGIC) { 8789 pr_warn("specified path %s is not on BPF FS\n", path); 8790 err = -EINVAL; 8791 } 8792 8793 return err; 8794 } 8795 8796 int bpf_program__pin(struct bpf_program *prog, const char *path) 8797 { 8798 int err; 8799 8800 if (prog->fd < 0) { 8801 pr_warn("prog '%s': can't pin program that wasn't loaded\n", prog->name); 8802 return libbpf_err(-EINVAL); 8803 } 8804 8805 err = make_parent_dir(path); 8806 if (err) 8807 return libbpf_err(err); 8808 8809 err = check_path(path); 8810 if (err) 8811 return libbpf_err(err); 8812 8813 if (bpf_obj_pin(prog->fd, path)) { 8814 err = -errno; 8815 pr_warn("prog '%s': failed to pin at '%s': %s\n", prog->name, path, errstr(err)); 8816 return libbpf_err(err); 8817 } 8818 8819 pr_debug("prog '%s': pinned at '%s'\n", prog->name, path); 8820 return 0; 8821 } 8822 8823 int bpf_program__unpin(struct bpf_program *prog, const char *path) 8824 { 8825 int err; 8826 8827 if (prog->fd < 0) { 8828 pr_warn("prog '%s': can't unpin program that wasn't loaded\n", prog->name); 8829 return libbpf_err(-EINVAL); 8830 } 8831 8832 err = check_path(path); 8833 if (err) 8834 return libbpf_err(err); 8835 8836 err = unlink(path); 8837 if (err) 8838 return libbpf_err(-errno); 8839 8840 pr_debug("prog '%s': unpinned from '%s'\n", prog->name, path); 8841 return 0; 8842 } 8843 8844 int bpf_map__pin(struct bpf_map *map, const char *path) 8845 { 8846 int err; 8847 8848 if (map == NULL) { 8849 pr_warn("invalid map pointer\n"); 8850 return libbpf_err(-EINVAL); 8851 } 8852 8853 if (map->fd < 0) { 8854 pr_warn("map '%s': can't pin BPF map without FD (was it created?)\n", map->name); 8855 return libbpf_err(-EINVAL); 8856 } 8857 8858 if (map->pin_path) { 8859 if (path && strcmp(path, map->pin_path)) { 8860 pr_warn("map '%s' already has pin path '%s' different from '%s'\n", 8861 bpf_map__name(map), map->pin_path, path); 8862 return libbpf_err(-EINVAL); 8863 } else if (map->pinned) { 8864 pr_debug("map '%s' already pinned at '%s'; not re-pinning\n", 8865 bpf_map__name(map), map->pin_path); 8866 return 0; 8867 } 8868 } else { 8869 if (!path) { 8870 pr_warn("missing a path to pin map '%s' at\n", 8871 bpf_map__name(map)); 8872 return libbpf_err(-EINVAL); 8873 } else if (map->pinned) { 8874 pr_warn("map '%s' already pinned\n", bpf_map__name(map)); 8875 return libbpf_err(-EEXIST); 8876 } 8877 8878 map->pin_path = strdup(path); 8879 if (!map->pin_path) { 8880 err = -errno; 8881 goto out_err; 8882 } 8883 } 8884 8885 err = make_parent_dir(map->pin_path); 8886 if (err) 8887 return libbpf_err(err); 8888 8889 err = check_path(map->pin_path); 8890 if (err) 8891 return libbpf_err(err); 8892 8893 if (bpf_obj_pin(map->fd, map->pin_path)) { 8894 err = -errno; 8895 goto out_err; 8896 } 8897 8898 map->pinned = true; 8899 pr_debug("pinned map '%s'\n", map->pin_path); 8900 8901 return 0; 8902 8903 out_err: 8904 pr_warn("failed to pin map: %s\n", errstr(err)); 8905 return libbpf_err(err); 8906 } 8907 8908 int bpf_map__unpin(struct bpf_map *map, const char *path) 8909 { 8910 int err; 8911 8912 if (map == NULL) { 8913 pr_warn("invalid map pointer\n"); 8914 return libbpf_err(-EINVAL); 8915 } 8916 8917 if (map->pin_path) { 8918 if (path && strcmp(path, map->pin_path)) { 8919 pr_warn("map '%s' already has pin path '%s' different from '%s'\n", 8920 bpf_map__name(map), map->pin_path, path); 8921 return libbpf_err(-EINVAL); 8922 } 8923 path = map->pin_path; 8924 } else if (!path) { 8925 pr_warn("no path to unpin map '%s' from\n", 8926 bpf_map__name(map)); 8927 return libbpf_err(-EINVAL); 8928 } 8929 8930 err = check_path(path); 8931 if (err) 8932 return libbpf_err(err); 8933 8934 err = unlink(path); 8935 if (err != 0) 8936 return libbpf_err(-errno); 8937 8938 map->pinned = false; 8939 pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path); 8940 8941 return 0; 8942 } 8943 8944 int bpf_map__set_pin_path(struct bpf_map *map, const char *path) 8945 { 8946 char *new = NULL; 8947 8948 if (path) { 8949 new = strdup(path); 8950 if (!new) 8951 return libbpf_err(-errno); 8952 } 8953 8954 free(map->pin_path); 8955 map->pin_path = new; 8956 return 0; 8957 } 8958 8959 __alias(bpf_map__pin_path) 8960 const char *bpf_map__get_pin_path(const struct bpf_map *map); 8961 8962 const char *bpf_map__pin_path(const struct bpf_map *map) 8963 { 8964 return map->pin_path; 8965 } 8966 8967 bool bpf_map__is_pinned(const struct bpf_map *map) 8968 { 8969 return map->pinned; 8970 } 8971 8972 static void sanitize_pin_path(char *s) 8973 { 8974 /* bpffs disallows periods in path names */ 8975 while (*s) { 8976 if (*s == '.') 8977 *s = '_'; 8978 s++; 8979 } 8980 } 8981 8982 int bpf_object__pin_maps(struct bpf_object *obj, const char *path) 8983 { 8984 struct bpf_map *map; 8985 int err; 8986 8987 if (!obj) 8988 return libbpf_err(-ENOENT); 8989 8990 if (obj->state < OBJ_PREPARED) { 8991 pr_warn("object not yet loaded; load it first\n"); 8992 return libbpf_err(-ENOENT); 8993 } 8994 8995 bpf_object__for_each_map(map, obj) { 8996 char *pin_path = NULL; 8997 char buf[PATH_MAX]; 8998 8999 if (!map->autocreate) 9000 continue; 9001 9002 if (path) { 9003 err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map)); 9004 if (err) 9005 goto err_unpin_maps; 9006 sanitize_pin_path(buf); 9007 pin_path = buf; 9008 } else if (!map->pin_path) { 9009 continue; 9010 } 9011 9012 err = bpf_map__pin(map, pin_path); 9013 if (err) 9014 goto err_unpin_maps; 9015 } 9016 9017 return 0; 9018 9019 err_unpin_maps: 9020 while ((map = bpf_object__prev_map(obj, map))) { 9021 if (!map->pin_path) 9022 continue; 9023 9024 bpf_map__unpin(map, NULL); 9025 } 9026 9027 return libbpf_err(err); 9028 } 9029 9030 int bpf_object__unpin_maps(struct bpf_object *obj, const char *path) 9031 { 9032 struct bpf_map *map; 9033 int err; 9034 9035 if (!obj) 9036 return libbpf_err(-ENOENT); 9037 9038 bpf_object__for_each_map(map, obj) { 9039 char *pin_path = NULL; 9040 char buf[PATH_MAX]; 9041 9042 if (path) { 9043 err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map)); 9044 if (err) 9045 return libbpf_err(err); 9046 sanitize_pin_path(buf); 9047 pin_path = buf; 9048 } else if (!map->pin_path) { 9049 continue; 9050 } 9051 9052 err = bpf_map__unpin(map, pin_path); 9053 if (err) 9054 return libbpf_err(err); 9055 } 9056 9057 return 0; 9058 } 9059 9060 int bpf_object__pin_programs(struct bpf_object *obj, const char *path) 9061 { 9062 struct bpf_program *prog; 9063 char buf[PATH_MAX]; 9064 int err; 9065 9066 if (!obj) 9067 return libbpf_err(-ENOENT); 9068 9069 if (obj->state < OBJ_LOADED) { 9070 pr_warn("object not yet loaded; load it first\n"); 9071 return libbpf_err(-ENOENT); 9072 } 9073 9074 bpf_object__for_each_program(prog, obj) { 9075 err = pathname_concat(buf, sizeof(buf), path, prog->name); 9076 if (err) 9077 goto err_unpin_programs; 9078 9079 err = bpf_program__pin(prog, buf); 9080 if (err) 9081 goto err_unpin_programs; 9082 } 9083 9084 return 0; 9085 9086 err_unpin_programs: 9087 while ((prog = bpf_object__prev_program(obj, prog))) { 9088 if (pathname_concat(buf, sizeof(buf), path, prog->name)) 9089 continue; 9090 9091 bpf_program__unpin(prog, buf); 9092 } 9093 9094 return libbpf_err(err); 9095 } 9096 9097 int bpf_object__unpin_programs(struct bpf_object *obj, const char *path) 9098 { 9099 struct bpf_program *prog; 9100 int err; 9101 9102 if (!obj) 9103 return libbpf_err(-ENOENT); 9104 9105 bpf_object__for_each_program(prog, obj) { 9106 char buf[PATH_MAX]; 9107 9108 err = pathname_concat(buf, sizeof(buf), path, prog->name); 9109 if (err) 9110 return libbpf_err(err); 9111 9112 err = bpf_program__unpin(prog, buf); 9113 if (err) 9114 return libbpf_err(err); 9115 } 9116 9117 return 0; 9118 } 9119 9120 int bpf_object__pin(struct bpf_object *obj, const char *path) 9121 { 9122 int err; 9123 9124 err = bpf_object__pin_maps(obj, path); 9125 if (err) 9126 return libbpf_err(err); 9127 9128 err = bpf_object__pin_programs(obj, path); 9129 if (err) { 9130 bpf_object__unpin_maps(obj, path); 9131 return libbpf_err(err); 9132 } 9133 9134 return 0; 9135 } 9136 9137 int bpf_object__unpin(struct bpf_object *obj, const char *path) 9138 { 9139 int err; 9140 9141 err = bpf_object__unpin_programs(obj, path); 9142 if (err) 9143 return libbpf_err(err); 9144 9145 err = bpf_object__unpin_maps(obj, path); 9146 if (err) 9147 return libbpf_err(err); 9148 9149 return 0; 9150 } 9151 9152 static void bpf_map__destroy(struct bpf_map *map) 9153 { 9154 if (map->inner_map) { 9155 bpf_map__destroy(map->inner_map); 9156 zfree(&map->inner_map); 9157 } 9158 9159 zfree(&map->init_slots); 9160 map->init_slots_sz = 0; 9161 9162 if (map->mmaped && map->mmaped != map->obj->arena_data) 9163 munmap(map->mmaped, bpf_map_mmap_sz(map)); 9164 map->mmaped = NULL; 9165 9166 if (map->st_ops) { 9167 zfree(&map->st_ops->data); 9168 zfree(&map->st_ops->progs); 9169 zfree(&map->st_ops->kern_func_off); 9170 zfree(&map->st_ops); 9171 } 9172 9173 zfree(&map->name); 9174 zfree(&map->real_name); 9175 zfree(&map->pin_path); 9176 9177 if (map->fd >= 0) 9178 zclose(map->fd); 9179 } 9180 9181 void bpf_object__close(struct bpf_object *obj) 9182 { 9183 size_t i; 9184 9185 if (IS_ERR_OR_NULL(obj)) 9186 return; 9187 9188 /* 9189 * if user called bpf_object__prepare() without ever getting to 9190 * bpf_object__load(), we need to clean up stuff that is normally 9191 * cleaned up at the end of loading step 9192 */ 9193 bpf_object_post_load_cleanup(obj); 9194 9195 usdt_manager_free(obj->usdt_man); 9196 obj->usdt_man = NULL; 9197 9198 bpf_gen__free(obj->gen_loader); 9199 bpf_object__elf_finish(obj); 9200 bpf_object_unload(obj); 9201 btf__free(obj->btf); 9202 btf__free(obj->btf_vmlinux); 9203 btf_ext__free(obj->btf_ext); 9204 9205 for (i = 0; i < obj->nr_maps; i++) 9206 bpf_map__destroy(&obj->maps[i]); 9207 9208 zfree(&obj->btf_custom_path); 9209 zfree(&obj->kconfig); 9210 9211 for (i = 0; i < obj->nr_extern; i++) { 9212 zfree(&obj->externs[i].name); 9213 zfree(&obj->externs[i].essent_name); 9214 } 9215 9216 zfree(&obj->externs); 9217 obj->nr_extern = 0; 9218 9219 zfree(&obj->maps); 9220 obj->nr_maps = 0; 9221 9222 if (obj->programs && obj->nr_programs) { 9223 for (i = 0; i < obj->nr_programs; i++) 9224 bpf_program__exit(&obj->programs[i]); 9225 } 9226 zfree(&obj->programs); 9227 9228 zfree(&obj->feat_cache); 9229 zfree(&obj->token_path); 9230 if (obj->token_fd > 0) 9231 close(obj->token_fd); 9232 9233 zfree(&obj->arena_data); 9234 9235 free(obj); 9236 } 9237 9238 const char *bpf_object__name(const struct bpf_object *obj) 9239 { 9240 return obj ? obj->name : libbpf_err_ptr(-EINVAL); 9241 } 9242 9243 unsigned int bpf_object__kversion(const struct bpf_object *obj) 9244 { 9245 return obj ? obj->kern_version : 0; 9246 } 9247 9248 int bpf_object__token_fd(const struct bpf_object *obj) 9249 { 9250 return obj->token_fd ?: -1; 9251 } 9252 9253 struct btf *bpf_object__btf(const struct bpf_object *obj) 9254 { 9255 return obj ? obj->btf : NULL; 9256 } 9257 9258 int bpf_object__btf_fd(const struct bpf_object *obj) 9259 { 9260 return obj->btf ? btf__fd(obj->btf) : -1; 9261 } 9262 9263 int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version) 9264 { 9265 if (obj->state >= OBJ_LOADED) 9266 return libbpf_err(-EINVAL); 9267 9268 obj->kern_version = kern_version; 9269 9270 return 0; 9271 } 9272 9273 int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts) 9274 { 9275 struct bpf_gen *gen; 9276 9277 if (!opts) 9278 return libbpf_err(-EFAULT); 9279 if (!OPTS_VALID(opts, gen_loader_opts)) 9280 return libbpf_err(-EINVAL); 9281 gen = calloc(1, sizeof(*gen)); 9282 if (!gen) 9283 return libbpf_err(-ENOMEM); 9284 gen->opts = opts; 9285 gen->swapped_endian = !is_native_endianness(obj); 9286 obj->gen_loader = gen; 9287 return 0; 9288 } 9289 9290 static struct bpf_program * 9291 __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj, 9292 bool forward) 9293 { 9294 size_t nr_programs = obj->nr_programs; 9295 ssize_t idx; 9296 9297 if (!nr_programs) 9298 return NULL; 9299 9300 if (!p) 9301 /* Iter from the beginning */ 9302 return forward ? &obj->programs[0] : 9303 &obj->programs[nr_programs - 1]; 9304 9305 if (p->obj != obj) { 9306 pr_warn("error: program handler doesn't match object\n"); 9307 return errno = EINVAL, NULL; 9308 } 9309 9310 idx = (p - obj->programs) + (forward ? 1 : -1); 9311 if (idx >= obj->nr_programs || idx < 0) 9312 return NULL; 9313 return &obj->programs[idx]; 9314 } 9315 9316 struct bpf_program * 9317 bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev) 9318 { 9319 struct bpf_program *prog = prev; 9320 9321 do { 9322 prog = __bpf_program__iter(prog, obj, true); 9323 } while (prog && prog_is_subprog(obj, prog)); 9324 9325 return prog; 9326 } 9327 9328 struct bpf_program * 9329 bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next) 9330 { 9331 struct bpf_program *prog = next; 9332 9333 do { 9334 prog = __bpf_program__iter(prog, obj, false); 9335 } while (prog && prog_is_subprog(obj, prog)); 9336 9337 return prog; 9338 } 9339 9340 void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex) 9341 { 9342 prog->prog_ifindex = ifindex; 9343 } 9344 9345 const char *bpf_program__name(const struct bpf_program *prog) 9346 { 9347 return prog->name; 9348 } 9349 9350 const char *bpf_program__section_name(const struct bpf_program *prog) 9351 { 9352 return prog->sec_name; 9353 } 9354 9355 bool bpf_program__autoload(const struct bpf_program *prog) 9356 { 9357 return prog->autoload; 9358 } 9359 9360 int bpf_program__set_autoload(struct bpf_program *prog, bool autoload) 9361 { 9362 if (prog->obj->state >= OBJ_LOADED) 9363 return libbpf_err(-EINVAL); 9364 9365 prog->autoload = autoload; 9366 return 0; 9367 } 9368 9369 bool bpf_program__autoattach(const struct bpf_program *prog) 9370 { 9371 return prog->autoattach; 9372 } 9373 9374 void bpf_program__set_autoattach(struct bpf_program *prog, bool autoattach) 9375 { 9376 prog->autoattach = autoattach; 9377 } 9378 9379 const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog) 9380 { 9381 return prog->insns; 9382 } 9383 9384 size_t bpf_program__insn_cnt(const struct bpf_program *prog) 9385 { 9386 return prog->insns_cnt; 9387 } 9388 9389 int bpf_program__set_insns(struct bpf_program *prog, 9390 struct bpf_insn *new_insns, size_t new_insn_cnt) 9391 { 9392 struct bpf_insn *insns; 9393 9394 if (prog->obj->state >= OBJ_LOADED) 9395 return libbpf_err(-EBUSY); 9396 9397 insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns)); 9398 /* NULL is a valid return from reallocarray if the new count is zero */ 9399 if (!insns && new_insn_cnt) { 9400 pr_warn("prog '%s': failed to realloc prog code\n", prog->name); 9401 return libbpf_err(-ENOMEM); 9402 } 9403 memcpy(insns, new_insns, new_insn_cnt * sizeof(*insns)); 9404 9405 prog->insns = insns; 9406 prog->insns_cnt = new_insn_cnt; 9407 return 0; 9408 } 9409 9410 int bpf_program__fd(const struct bpf_program *prog) 9411 { 9412 if (!prog) 9413 return libbpf_err(-EINVAL); 9414 9415 if (prog->fd < 0) 9416 return libbpf_err(-ENOENT); 9417 9418 return prog->fd; 9419 } 9420 9421 __alias(bpf_program__type) 9422 enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog); 9423 9424 enum bpf_prog_type bpf_program__type(const struct bpf_program *prog) 9425 { 9426 return prog->type; 9427 } 9428 9429 static size_t custom_sec_def_cnt; 9430 static struct bpf_sec_def *custom_sec_defs; 9431 static struct bpf_sec_def custom_fallback_def; 9432 static bool has_custom_fallback_def; 9433 static int last_custom_sec_def_handler_id; 9434 9435 int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type) 9436 { 9437 if (prog->obj->state >= OBJ_LOADED) 9438 return libbpf_err(-EBUSY); 9439 9440 /* if type is not changed, do nothing */ 9441 if (prog->type == type) 9442 return 0; 9443 9444 prog->type = type; 9445 9446 /* If a program type was changed, we need to reset associated SEC() 9447 * handler, as it will be invalid now. The only exception is a generic 9448 * fallback handler, which by definition is program type-agnostic and 9449 * is a catch-all custom handler, optionally set by the application, 9450 * so should be able to handle any type of BPF program. 9451 */ 9452 if (prog->sec_def != &custom_fallback_def) 9453 prog->sec_def = NULL; 9454 return 0; 9455 } 9456 9457 __alias(bpf_program__expected_attach_type) 9458 enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog); 9459 9460 enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program *prog) 9461 { 9462 return prog->expected_attach_type; 9463 } 9464 9465 int bpf_program__set_expected_attach_type(struct bpf_program *prog, 9466 enum bpf_attach_type type) 9467 { 9468 if (prog->obj->state >= OBJ_LOADED) 9469 return libbpf_err(-EBUSY); 9470 9471 prog->expected_attach_type = type; 9472 return 0; 9473 } 9474 9475 __u32 bpf_program__flags(const struct bpf_program *prog) 9476 { 9477 return prog->prog_flags; 9478 } 9479 9480 int bpf_program__set_flags(struct bpf_program *prog, __u32 flags) 9481 { 9482 if (prog->obj->state >= OBJ_LOADED) 9483 return libbpf_err(-EBUSY); 9484 9485 prog->prog_flags = flags; 9486 return 0; 9487 } 9488 9489 __u32 bpf_program__log_level(const struct bpf_program *prog) 9490 { 9491 return prog->log_level; 9492 } 9493 9494 int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level) 9495 { 9496 if (prog->obj->state >= OBJ_LOADED) 9497 return libbpf_err(-EBUSY); 9498 9499 prog->log_level = log_level; 9500 return 0; 9501 } 9502 9503 const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size) 9504 { 9505 *log_size = prog->log_size; 9506 return prog->log_buf; 9507 } 9508 9509 int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size) 9510 { 9511 if (log_size && !log_buf) 9512 return libbpf_err(-EINVAL); 9513 if (prog->log_size > UINT_MAX) 9514 return libbpf_err(-EINVAL); 9515 if (prog->obj->state >= OBJ_LOADED) 9516 return libbpf_err(-EBUSY); 9517 9518 prog->log_buf = log_buf; 9519 prog->log_size = log_size; 9520 return 0; 9521 } 9522 9523 struct bpf_func_info *bpf_program__func_info(const struct bpf_program *prog) 9524 { 9525 if (prog->func_info_rec_size != sizeof(struct bpf_func_info)) 9526 return libbpf_err_ptr(-EOPNOTSUPP); 9527 return prog->func_info; 9528 } 9529 9530 __u32 bpf_program__func_info_cnt(const struct bpf_program *prog) 9531 { 9532 return prog->func_info_cnt; 9533 } 9534 9535 struct bpf_line_info *bpf_program__line_info(const struct bpf_program *prog) 9536 { 9537 if (prog->line_info_rec_size != sizeof(struct bpf_line_info)) 9538 return libbpf_err_ptr(-EOPNOTSUPP); 9539 return prog->line_info; 9540 } 9541 9542 __u32 bpf_program__line_info_cnt(const struct bpf_program *prog) 9543 { 9544 return prog->line_info_cnt; 9545 } 9546 9547 #define SEC_DEF(sec_pfx, ptype, atype, flags, ...) { \ 9548 .sec = (char *)sec_pfx, \ 9549 .prog_type = BPF_PROG_TYPE_##ptype, \ 9550 .expected_attach_type = atype, \ 9551 .cookie = (long)(flags), \ 9552 .prog_prepare_load_fn = libbpf_prepare_prog_load, \ 9553 __VA_ARGS__ \ 9554 } 9555 9556 static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9557 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9558 static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9559 static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9560 static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9561 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9562 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9563 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9564 static int attach_kprobe_session(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9565 static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9566 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9567 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link); 9568 9569 static const struct bpf_sec_def section_defs[] = { 9570 SEC_DEF("socket", SOCKET_FILTER, 0, SEC_NONE), 9571 SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE), 9572 SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE), 9573 SEC_DEF("kprobe+", KPROBE, 0, SEC_NONE, attach_kprobe), 9574 SEC_DEF("uprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), 9575 SEC_DEF("uprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe), 9576 SEC_DEF("kretprobe+", KPROBE, 0, SEC_NONE, attach_kprobe), 9577 SEC_DEF("uretprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), 9578 SEC_DEF("uretprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe), 9579 SEC_DEF("kprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), 9580 SEC_DEF("kretprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), 9581 SEC_DEF("kprobe.session+", KPROBE, BPF_TRACE_KPROBE_SESSION, SEC_NONE, attach_kprobe_session), 9582 SEC_DEF("uprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi), 9583 SEC_DEF("uretprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi), 9584 SEC_DEF("uprobe.session+", KPROBE, BPF_TRACE_UPROBE_SESSION, SEC_NONE, attach_uprobe_multi), 9585 SEC_DEF("uprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi), 9586 SEC_DEF("uretprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi), 9587 SEC_DEF("uprobe.session.s+", KPROBE, BPF_TRACE_UPROBE_SESSION, SEC_SLEEPABLE, attach_uprobe_multi), 9588 SEC_DEF("ksyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall), 9589 SEC_DEF("kretsyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall), 9590 SEC_DEF("usdt+", KPROBE, 0, SEC_USDT, attach_usdt), 9591 SEC_DEF("usdt.s+", KPROBE, 0, SEC_USDT | SEC_SLEEPABLE, attach_usdt), 9592 SEC_DEF("tc/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE), /* alias for tcx */ 9593 SEC_DEF("tc/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE), /* alias for tcx */ 9594 SEC_DEF("tcx/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE), 9595 SEC_DEF("tcx/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE), 9596 SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */ 9597 SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */ 9598 SEC_DEF("action", SCHED_ACT, 0, SEC_NONE), /* deprecated / legacy, use tcx */ 9599 SEC_DEF("netkit/primary", SCHED_CLS, BPF_NETKIT_PRIMARY, SEC_NONE), 9600 SEC_DEF("netkit/peer", SCHED_CLS, BPF_NETKIT_PEER, SEC_NONE), 9601 SEC_DEF("tracepoint+", TRACEPOINT, 0, SEC_NONE, attach_tp), 9602 SEC_DEF("tp+", TRACEPOINT, 0, SEC_NONE, attach_tp), 9603 SEC_DEF("raw_tracepoint+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), 9604 SEC_DEF("raw_tp+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), 9605 SEC_DEF("raw_tracepoint.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), 9606 SEC_DEF("raw_tp.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), 9607 SEC_DEF("tp_btf+", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace), 9608 SEC_DEF("fentry+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace), 9609 SEC_DEF("fmod_ret+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace), 9610 SEC_DEF("fexit+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace), 9611 SEC_DEF("fentry.s+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), 9612 SEC_DEF("fmod_ret.s+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), 9613 SEC_DEF("fexit.s+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), 9614 SEC_DEF("freplace+", EXT, 0, SEC_ATTACH_BTF, attach_trace), 9615 SEC_DEF("lsm+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm), 9616 SEC_DEF("lsm.s+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm), 9617 SEC_DEF("lsm_cgroup+", LSM, BPF_LSM_CGROUP, SEC_ATTACH_BTF), 9618 SEC_DEF("iter+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter), 9619 SEC_DEF("iter.s+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter), 9620 SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE), 9621 SEC_DEF("xdp.frags/devmap", XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS), 9622 SEC_DEF("xdp/devmap", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE), 9623 SEC_DEF("xdp.frags/cpumap", XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS), 9624 SEC_DEF("xdp/cpumap", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE), 9625 SEC_DEF("xdp.frags", XDP, BPF_XDP, SEC_XDP_FRAGS), 9626 SEC_DEF("xdp", XDP, BPF_XDP, SEC_ATTACHABLE_OPT), 9627 SEC_DEF("perf_event", PERF_EVENT, 0, SEC_NONE), 9628 SEC_DEF("lwt_in", LWT_IN, 0, SEC_NONE), 9629 SEC_DEF("lwt_out", LWT_OUT, 0, SEC_NONE), 9630 SEC_DEF("lwt_xmit", LWT_XMIT, 0, SEC_NONE), 9631 SEC_DEF("lwt_seg6local", LWT_SEG6LOCAL, 0, SEC_NONE), 9632 SEC_DEF("sockops", SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT), 9633 SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT), 9634 SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT), 9635 SEC_DEF("sk_skb/verdict", SK_SKB, BPF_SK_SKB_VERDICT, SEC_ATTACHABLE_OPT), 9636 SEC_DEF("sk_skb", SK_SKB, 0, SEC_NONE), 9637 SEC_DEF("sk_msg", SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT), 9638 SEC_DEF("lirc_mode2", LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT), 9639 SEC_DEF("flow_dissector", FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT), 9640 SEC_DEF("cgroup_skb/ingress", CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT), 9641 SEC_DEF("cgroup_skb/egress", CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT), 9642 SEC_DEF("cgroup/skb", CGROUP_SKB, 0, SEC_NONE), 9643 SEC_DEF("cgroup/sock_create", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE), 9644 SEC_DEF("cgroup/sock_release", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE), 9645 SEC_DEF("cgroup/sock", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT), 9646 SEC_DEF("cgroup/post_bind4", CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE), 9647 SEC_DEF("cgroup/post_bind6", CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE), 9648 SEC_DEF("cgroup/bind4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE), 9649 SEC_DEF("cgroup/bind6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE), 9650 SEC_DEF("cgroup/connect4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE), 9651 SEC_DEF("cgroup/connect6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE), 9652 SEC_DEF("cgroup/connect_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_CONNECT, SEC_ATTACHABLE), 9653 SEC_DEF("cgroup/sendmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE), 9654 SEC_DEF("cgroup/sendmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE), 9655 SEC_DEF("cgroup/sendmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_SENDMSG, SEC_ATTACHABLE), 9656 SEC_DEF("cgroup/recvmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE), 9657 SEC_DEF("cgroup/recvmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE), 9658 SEC_DEF("cgroup/recvmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_RECVMSG, SEC_ATTACHABLE), 9659 SEC_DEF("cgroup/getpeername4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE), 9660 SEC_DEF("cgroup/getpeername6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE), 9661 SEC_DEF("cgroup/getpeername_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETPEERNAME, SEC_ATTACHABLE), 9662 SEC_DEF("cgroup/getsockname4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE), 9663 SEC_DEF("cgroup/getsockname6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE), 9664 SEC_DEF("cgroup/getsockname_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETSOCKNAME, SEC_ATTACHABLE), 9665 SEC_DEF("cgroup/sysctl", CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE), 9666 SEC_DEF("cgroup/getsockopt", CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE), 9667 SEC_DEF("cgroup/setsockopt", CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE), 9668 SEC_DEF("cgroup/dev", CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT), 9669 SEC_DEF("struct_ops+", STRUCT_OPS, 0, SEC_NONE), 9670 SEC_DEF("struct_ops.s+", STRUCT_OPS, 0, SEC_SLEEPABLE), 9671 SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE), 9672 SEC_DEF("netfilter", NETFILTER, BPF_NETFILTER, SEC_NONE), 9673 }; 9674 9675 int libbpf_register_prog_handler(const char *sec, 9676 enum bpf_prog_type prog_type, 9677 enum bpf_attach_type exp_attach_type, 9678 const struct libbpf_prog_handler_opts *opts) 9679 { 9680 struct bpf_sec_def *sec_def; 9681 9682 if (!OPTS_VALID(opts, libbpf_prog_handler_opts)) 9683 return libbpf_err(-EINVAL); 9684 9685 if (last_custom_sec_def_handler_id == INT_MAX) /* prevent overflow */ 9686 return libbpf_err(-E2BIG); 9687 9688 if (sec) { 9689 sec_def = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt + 1, 9690 sizeof(*sec_def)); 9691 if (!sec_def) 9692 return libbpf_err(-ENOMEM); 9693 9694 custom_sec_defs = sec_def; 9695 sec_def = &custom_sec_defs[custom_sec_def_cnt]; 9696 } else { 9697 if (has_custom_fallback_def) 9698 return libbpf_err(-EBUSY); 9699 9700 sec_def = &custom_fallback_def; 9701 } 9702 9703 sec_def->sec = sec ? strdup(sec) : NULL; 9704 if (sec && !sec_def->sec) 9705 return libbpf_err(-ENOMEM); 9706 9707 sec_def->prog_type = prog_type; 9708 sec_def->expected_attach_type = exp_attach_type; 9709 sec_def->cookie = OPTS_GET(opts, cookie, 0); 9710 9711 sec_def->prog_setup_fn = OPTS_GET(opts, prog_setup_fn, NULL); 9712 sec_def->prog_prepare_load_fn = OPTS_GET(opts, prog_prepare_load_fn, NULL); 9713 sec_def->prog_attach_fn = OPTS_GET(opts, prog_attach_fn, NULL); 9714 9715 sec_def->handler_id = ++last_custom_sec_def_handler_id; 9716 9717 if (sec) 9718 custom_sec_def_cnt++; 9719 else 9720 has_custom_fallback_def = true; 9721 9722 return sec_def->handler_id; 9723 } 9724 9725 int libbpf_unregister_prog_handler(int handler_id) 9726 { 9727 struct bpf_sec_def *sec_defs; 9728 int i; 9729 9730 if (handler_id <= 0) 9731 return libbpf_err(-EINVAL); 9732 9733 if (has_custom_fallback_def && custom_fallback_def.handler_id == handler_id) { 9734 memset(&custom_fallback_def, 0, sizeof(custom_fallback_def)); 9735 has_custom_fallback_def = false; 9736 return 0; 9737 } 9738 9739 for (i = 0; i < custom_sec_def_cnt; i++) { 9740 if (custom_sec_defs[i].handler_id == handler_id) 9741 break; 9742 } 9743 9744 if (i == custom_sec_def_cnt) 9745 return libbpf_err(-ENOENT); 9746 9747 free(custom_sec_defs[i].sec); 9748 for (i = i + 1; i < custom_sec_def_cnt; i++) 9749 custom_sec_defs[i - 1] = custom_sec_defs[i]; 9750 custom_sec_def_cnt--; 9751 9752 /* try to shrink the array, but it's ok if we couldn't */ 9753 sec_defs = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt, sizeof(*sec_defs)); 9754 /* if new count is zero, reallocarray can return a valid NULL result; 9755 * in this case the previous pointer will be freed, so we *have to* 9756 * reassign old pointer to the new value (even if it's NULL) 9757 */ 9758 if (sec_defs || custom_sec_def_cnt == 0) 9759 custom_sec_defs = sec_defs; 9760 9761 return 0; 9762 } 9763 9764 static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_name) 9765 { 9766 size_t len = strlen(sec_def->sec); 9767 9768 /* "type/" always has to have proper SEC("type/extras") form */ 9769 if (sec_def->sec[len - 1] == '/') { 9770 if (str_has_pfx(sec_name, sec_def->sec)) 9771 return true; 9772 return false; 9773 } 9774 9775 /* "type+" means it can be either exact SEC("type") or 9776 * well-formed SEC("type/extras") with proper '/' separator 9777 */ 9778 if (sec_def->sec[len - 1] == '+') { 9779 len--; 9780 /* not even a prefix */ 9781 if (strncmp(sec_name, sec_def->sec, len) != 0) 9782 return false; 9783 /* exact match or has '/' separator */ 9784 if (sec_name[len] == '\0' || sec_name[len] == '/') 9785 return true; 9786 return false; 9787 } 9788 9789 return strcmp(sec_name, sec_def->sec) == 0; 9790 } 9791 9792 static const struct bpf_sec_def *find_sec_def(const char *sec_name) 9793 { 9794 const struct bpf_sec_def *sec_def; 9795 int i, n; 9796 9797 n = custom_sec_def_cnt; 9798 for (i = 0; i < n; i++) { 9799 sec_def = &custom_sec_defs[i]; 9800 if (sec_def_matches(sec_def, sec_name)) 9801 return sec_def; 9802 } 9803 9804 n = ARRAY_SIZE(section_defs); 9805 for (i = 0; i < n; i++) { 9806 sec_def = §ion_defs[i]; 9807 if (sec_def_matches(sec_def, sec_name)) 9808 return sec_def; 9809 } 9810 9811 if (has_custom_fallback_def) 9812 return &custom_fallback_def; 9813 9814 return NULL; 9815 } 9816 9817 #define MAX_TYPE_NAME_SIZE 32 9818 9819 static char *libbpf_get_type_names(bool attach_type) 9820 { 9821 int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE; 9822 char *buf; 9823 9824 buf = malloc(len); 9825 if (!buf) 9826 return NULL; 9827 9828 buf[0] = '\0'; 9829 /* Forge string buf with all available names */ 9830 for (i = 0; i < ARRAY_SIZE(section_defs); i++) { 9831 const struct bpf_sec_def *sec_def = §ion_defs[i]; 9832 9833 if (attach_type) { 9834 if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load) 9835 continue; 9836 9837 if (!(sec_def->cookie & SEC_ATTACHABLE)) 9838 continue; 9839 } 9840 9841 if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) { 9842 free(buf); 9843 return NULL; 9844 } 9845 strcat(buf, " "); 9846 strcat(buf, section_defs[i].sec); 9847 } 9848 9849 return buf; 9850 } 9851 9852 int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, 9853 enum bpf_attach_type *expected_attach_type) 9854 { 9855 const struct bpf_sec_def *sec_def; 9856 char *type_names; 9857 9858 if (!name) 9859 return libbpf_err(-EINVAL); 9860 9861 sec_def = find_sec_def(name); 9862 if (sec_def) { 9863 *prog_type = sec_def->prog_type; 9864 *expected_attach_type = sec_def->expected_attach_type; 9865 return 0; 9866 } 9867 9868 pr_debug("failed to guess program type from ELF section '%s'\n", name); 9869 type_names = libbpf_get_type_names(false); 9870 if (type_names != NULL) { 9871 pr_debug("supported section(type) names are:%s\n", type_names); 9872 free(type_names); 9873 } 9874 9875 return libbpf_err(-ESRCH); 9876 } 9877 9878 const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t) 9879 { 9880 if (t < 0 || t >= ARRAY_SIZE(attach_type_name)) 9881 return NULL; 9882 9883 return attach_type_name[t]; 9884 } 9885 9886 const char *libbpf_bpf_link_type_str(enum bpf_link_type t) 9887 { 9888 if (t < 0 || t >= ARRAY_SIZE(link_type_name)) 9889 return NULL; 9890 9891 return link_type_name[t]; 9892 } 9893 9894 const char *libbpf_bpf_map_type_str(enum bpf_map_type t) 9895 { 9896 if (t < 0 || t >= ARRAY_SIZE(map_type_name)) 9897 return NULL; 9898 9899 return map_type_name[t]; 9900 } 9901 9902 const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t) 9903 { 9904 if (t < 0 || t >= ARRAY_SIZE(prog_type_name)) 9905 return NULL; 9906 9907 return prog_type_name[t]; 9908 } 9909 9910 static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj, 9911 int sec_idx, 9912 size_t offset) 9913 { 9914 struct bpf_map *map; 9915 size_t i; 9916 9917 for (i = 0; i < obj->nr_maps; i++) { 9918 map = &obj->maps[i]; 9919 if (!bpf_map__is_struct_ops(map)) 9920 continue; 9921 if (map->sec_idx == sec_idx && 9922 map->sec_offset <= offset && 9923 offset - map->sec_offset < map->def.value_size) 9924 return map; 9925 } 9926 9927 return NULL; 9928 } 9929 9930 /* Collect the reloc from ELF, populate the st_ops->progs[], and update 9931 * st_ops->data for shadow type. 9932 */ 9933 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, 9934 Elf64_Shdr *shdr, Elf_Data *data) 9935 { 9936 const struct btf_type *type; 9937 const struct btf_member *member; 9938 struct bpf_struct_ops *st_ops; 9939 struct bpf_program *prog; 9940 unsigned int shdr_idx; 9941 const struct btf *btf; 9942 struct bpf_map *map; 9943 unsigned int moff, insn_idx; 9944 const char *name; 9945 __u32 member_idx; 9946 Elf64_Sym *sym; 9947 Elf64_Rel *rel; 9948 int i, nrels; 9949 9950 btf = obj->btf; 9951 nrels = shdr->sh_size / shdr->sh_entsize; 9952 for (i = 0; i < nrels; i++) { 9953 rel = elf_rel_by_idx(data, i); 9954 if (!rel) { 9955 pr_warn("struct_ops reloc: failed to get %d reloc\n", i); 9956 return -LIBBPF_ERRNO__FORMAT; 9957 } 9958 9959 sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info)); 9960 if (!sym) { 9961 pr_warn("struct_ops reloc: symbol %zx not found\n", 9962 (size_t)ELF64_R_SYM(rel->r_info)); 9963 return -LIBBPF_ERRNO__FORMAT; 9964 } 9965 9966 name = elf_sym_str(obj, sym->st_name) ?: "<?>"; 9967 map = find_struct_ops_map_by_offset(obj, shdr->sh_info, rel->r_offset); 9968 if (!map) { 9969 pr_warn("struct_ops reloc: cannot find map at rel->r_offset %zu\n", 9970 (size_t)rel->r_offset); 9971 return -EINVAL; 9972 } 9973 9974 moff = rel->r_offset - map->sec_offset; 9975 shdr_idx = sym->st_shndx; 9976 st_ops = map->st_ops; 9977 pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel->r_offset %zu map->sec_offset %zu name %d (\'%s\')\n", 9978 map->name, 9979 (long long)(rel->r_info >> 32), 9980 (long long)sym->st_value, 9981 shdr_idx, (size_t)rel->r_offset, 9982 map->sec_offset, sym->st_name, name); 9983 9984 if (shdr_idx >= SHN_LORESERVE) { 9985 pr_warn("struct_ops reloc %s: rel->r_offset %zu shdr_idx %u unsupported non-static function\n", 9986 map->name, (size_t)rel->r_offset, shdr_idx); 9987 return -LIBBPF_ERRNO__RELOC; 9988 } 9989 if (sym->st_value % BPF_INSN_SZ) { 9990 pr_warn("struct_ops reloc %s: invalid target program offset %llu\n", 9991 map->name, (unsigned long long)sym->st_value); 9992 return -LIBBPF_ERRNO__FORMAT; 9993 } 9994 insn_idx = sym->st_value / BPF_INSN_SZ; 9995 9996 type = btf__type_by_id(btf, st_ops->type_id); 9997 member = find_member_by_offset(type, moff * 8); 9998 if (!member) { 9999 pr_warn("struct_ops reloc %s: cannot find member at moff %u\n", 10000 map->name, moff); 10001 return -EINVAL; 10002 } 10003 member_idx = member - btf_members(type); 10004 name = btf__name_by_offset(btf, member->name_off); 10005 10006 if (!resolve_func_ptr(btf, member->type, NULL)) { 10007 pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n", 10008 map->name, name); 10009 return -EINVAL; 10010 } 10011 10012 prog = find_prog_by_sec_insn(obj, shdr_idx, insn_idx); 10013 if (!prog) { 10014 pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n", 10015 map->name, shdr_idx, name); 10016 return -EINVAL; 10017 } 10018 10019 /* prevent the use of BPF prog with invalid type */ 10020 if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) { 10021 pr_warn("struct_ops reloc %s: prog %s is not struct_ops BPF program\n", 10022 map->name, prog->name); 10023 return -EINVAL; 10024 } 10025 10026 st_ops->progs[member_idx] = prog; 10027 10028 /* st_ops->data will be exposed to users, being returned by 10029 * bpf_map__initial_value() as a pointer to the shadow 10030 * type. All function pointers in the original struct type 10031 * should be converted to a pointer to struct bpf_program 10032 * in the shadow type. 10033 */ 10034 *((struct bpf_program **)(st_ops->data + moff)) = prog; 10035 } 10036 10037 return 0; 10038 } 10039 10040 #define BTF_TRACE_PREFIX "btf_trace_" 10041 #define BTF_LSM_PREFIX "bpf_lsm_" 10042 #define BTF_ITER_PREFIX "bpf_iter_" 10043 #define BTF_MAX_NAME_SIZE 128 10044 10045 void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, 10046 const char **prefix, int *kind) 10047 { 10048 switch (attach_type) { 10049 case BPF_TRACE_RAW_TP: 10050 *prefix = BTF_TRACE_PREFIX; 10051 *kind = BTF_KIND_TYPEDEF; 10052 break; 10053 case BPF_LSM_MAC: 10054 case BPF_LSM_CGROUP: 10055 *prefix = BTF_LSM_PREFIX; 10056 *kind = BTF_KIND_FUNC; 10057 break; 10058 case BPF_TRACE_ITER: 10059 *prefix = BTF_ITER_PREFIX; 10060 *kind = BTF_KIND_FUNC; 10061 break; 10062 default: 10063 *prefix = ""; 10064 *kind = BTF_KIND_FUNC; 10065 } 10066 } 10067 10068 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, 10069 const char *name, __u32 kind) 10070 { 10071 char btf_type_name[BTF_MAX_NAME_SIZE]; 10072 int ret; 10073 10074 ret = snprintf(btf_type_name, sizeof(btf_type_name), 10075 "%s%s", prefix, name); 10076 /* snprintf returns the number of characters written excluding the 10077 * terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it 10078 * indicates truncation. 10079 */ 10080 if (ret < 0 || ret >= sizeof(btf_type_name)) 10081 return -ENAMETOOLONG; 10082 return btf__find_by_name_kind(btf, btf_type_name, kind); 10083 } 10084 10085 static inline int find_attach_btf_id(struct btf *btf, const char *name, 10086 enum bpf_attach_type attach_type) 10087 { 10088 const char *prefix; 10089 int kind; 10090 10091 btf_get_kernel_prefix_kind(attach_type, &prefix, &kind); 10092 return find_btf_by_prefix_kind(btf, prefix, name, kind); 10093 } 10094 10095 int libbpf_find_vmlinux_btf_id(const char *name, 10096 enum bpf_attach_type attach_type) 10097 { 10098 struct btf *btf; 10099 int err; 10100 10101 btf = btf__load_vmlinux_btf(); 10102 err = libbpf_get_error(btf); 10103 if (err) { 10104 pr_warn("vmlinux BTF is not found\n"); 10105 return libbpf_err(err); 10106 } 10107 10108 err = find_attach_btf_id(btf, name, attach_type); 10109 if (err <= 0) 10110 pr_warn("%s is not found in vmlinux BTF\n", name); 10111 10112 btf__free(btf); 10113 return libbpf_err(err); 10114 } 10115 10116 static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd, int token_fd) 10117 { 10118 struct bpf_prog_info info; 10119 __u32 info_len = sizeof(info); 10120 struct btf *btf; 10121 int err; 10122 10123 memset(&info, 0, info_len); 10124 err = bpf_prog_get_info_by_fd(attach_prog_fd, &info, &info_len); 10125 if (err) { 10126 pr_warn("failed bpf_prog_get_info_by_fd for FD %d: %s\n", 10127 attach_prog_fd, errstr(err)); 10128 return err; 10129 } 10130 10131 err = -EINVAL; 10132 if (!info.btf_id) { 10133 pr_warn("The target program doesn't have BTF\n"); 10134 goto out; 10135 } 10136 btf = btf_load_from_kernel(info.btf_id, NULL, token_fd); 10137 err = libbpf_get_error(btf); 10138 if (err) { 10139 pr_warn("Failed to get BTF %d of the program: %s\n", info.btf_id, errstr(err)); 10140 goto out; 10141 } 10142 err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC); 10143 btf__free(btf); 10144 if (err <= 0) { 10145 pr_warn("%s is not found in prog's BTF\n", name); 10146 goto out; 10147 } 10148 out: 10149 return err; 10150 } 10151 10152 static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name, 10153 enum bpf_attach_type attach_type, 10154 int *btf_obj_fd, int *btf_type_id) 10155 { 10156 int ret, i, mod_len = 0; 10157 const char *fn_name, *mod_name = NULL; 10158 10159 fn_name = strchr(attach_name, ':'); 10160 if (fn_name) { 10161 mod_name = attach_name; 10162 mod_len = fn_name - mod_name; 10163 fn_name++; 10164 } 10165 10166 if (!mod_name || strncmp(mod_name, "vmlinux", mod_len) == 0) { 10167 ret = find_attach_btf_id(obj->btf_vmlinux, 10168 mod_name ? fn_name : attach_name, 10169 attach_type); 10170 if (ret > 0) { 10171 *btf_obj_fd = 0; /* vmlinux BTF */ 10172 *btf_type_id = ret; 10173 return 0; 10174 } 10175 if (ret != -ENOENT) 10176 return ret; 10177 } 10178 10179 ret = load_module_btfs(obj); 10180 if (ret) 10181 return ret; 10182 10183 for (i = 0; i < obj->btf_module_cnt; i++) { 10184 const struct module_btf *mod = &obj->btf_modules[i]; 10185 10186 if (mod_name && strncmp(mod->name, mod_name, mod_len) != 0) 10187 continue; 10188 10189 ret = find_attach_btf_id(mod->btf, 10190 mod_name ? fn_name : attach_name, 10191 attach_type); 10192 if (ret > 0) { 10193 *btf_obj_fd = mod->fd; 10194 *btf_type_id = ret; 10195 return 0; 10196 } 10197 if (ret == -ENOENT) 10198 continue; 10199 10200 return ret; 10201 } 10202 10203 return -ESRCH; 10204 } 10205 10206 static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name, 10207 int *btf_obj_fd, int *btf_type_id) 10208 { 10209 enum bpf_attach_type attach_type = prog->expected_attach_type; 10210 __u32 attach_prog_fd = prog->attach_prog_fd; 10211 int err = 0; 10212 10213 /* BPF program's BTF ID */ 10214 if (prog->type == BPF_PROG_TYPE_EXT || attach_prog_fd) { 10215 if (!attach_prog_fd) { 10216 pr_warn("prog '%s': attach program FD is not set\n", prog->name); 10217 return -EINVAL; 10218 } 10219 err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd, prog->obj->token_fd); 10220 if (err < 0) { 10221 pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %s\n", 10222 prog->name, attach_prog_fd, attach_name, errstr(err)); 10223 return err; 10224 } 10225 *btf_obj_fd = 0; 10226 *btf_type_id = err; 10227 return 0; 10228 } 10229 10230 /* kernel/module BTF ID */ 10231 if (prog->obj->gen_loader) { 10232 bpf_gen__record_attach_target(prog->obj->gen_loader, attach_name, attach_type); 10233 *btf_obj_fd = 0; 10234 *btf_type_id = 1; 10235 } else { 10236 err = find_kernel_btf_id(prog->obj, attach_name, 10237 attach_type, btf_obj_fd, 10238 btf_type_id); 10239 } 10240 if (err) { 10241 pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %s\n", 10242 prog->name, attach_name, errstr(err)); 10243 return err; 10244 } 10245 return 0; 10246 } 10247 10248 int libbpf_attach_type_by_name(const char *name, 10249 enum bpf_attach_type *attach_type) 10250 { 10251 char *type_names; 10252 const struct bpf_sec_def *sec_def; 10253 10254 if (!name) 10255 return libbpf_err(-EINVAL); 10256 10257 sec_def = find_sec_def(name); 10258 if (!sec_def) { 10259 pr_debug("failed to guess attach type based on ELF section name '%s'\n", name); 10260 type_names = libbpf_get_type_names(true); 10261 if (type_names != NULL) { 10262 pr_debug("attachable section(type) names are:%s\n", type_names); 10263 free(type_names); 10264 } 10265 10266 return libbpf_err(-EINVAL); 10267 } 10268 10269 if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load) 10270 return libbpf_err(-EINVAL); 10271 if (!(sec_def->cookie & SEC_ATTACHABLE)) 10272 return libbpf_err(-EINVAL); 10273 10274 *attach_type = sec_def->expected_attach_type; 10275 return 0; 10276 } 10277 10278 int bpf_map__fd(const struct bpf_map *map) 10279 { 10280 if (!map) 10281 return libbpf_err(-EINVAL); 10282 if (!map_is_created(map)) 10283 return -1; 10284 return map->fd; 10285 } 10286 10287 static bool map_uses_real_name(const struct bpf_map *map) 10288 { 10289 /* Since libbpf started to support custom .data.* and .rodata.* maps, 10290 * their user-visible name differs from kernel-visible name. Users see 10291 * such map's corresponding ELF section name as a map name. 10292 * This check distinguishes .data/.rodata from .data.* and .rodata.* 10293 * maps to know which name has to be returned to the user. 10294 */ 10295 if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0) 10296 return true; 10297 if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0) 10298 return true; 10299 return false; 10300 } 10301 10302 const char *bpf_map__name(const struct bpf_map *map) 10303 { 10304 if (!map) 10305 return NULL; 10306 10307 if (map_uses_real_name(map)) 10308 return map->real_name; 10309 10310 return map->name; 10311 } 10312 10313 enum bpf_map_type bpf_map__type(const struct bpf_map *map) 10314 { 10315 return map->def.type; 10316 } 10317 10318 int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type) 10319 { 10320 if (map_is_created(map)) 10321 return libbpf_err(-EBUSY); 10322 map->def.type = type; 10323 return 0; 10324 } 10325 10326 __u32 bpf_map__map_flags(const struct bpf_map *map) 10327 { 10328 return map->def.map_flags; 10329 } 10330 10331 int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags) 10332 { 10333 if (map_is_created(map)) 10334 return libbpf_err(-EBUSY); 10335 map->def.map_flags = flags; 10336 return 0; 10337 } 10338 10339 __u64 bpf_map__map_extra(const struct bpf_map *map) 10340 { 10341 return map->map_extra; 10342 } 10343 10344 int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra) 10345 { 10346 if (map_is_created(map)) 10347 return libbpf_err(-EBUSY); 10348 map->map_extra = map_extra; 10349 return 0; 10350 } 10351 10352 __u32 bpf_map__numa_node(const struct bpf_map *map) 10353 { 10354 return map->numa_node; 10355 } 10356 10357 int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node) 10358 { 10359 if (map_is_created(map)) 10360 return libbpf_err(-EBUSY); 10361 map->numa_node = numa_node; 10362 return 0; 10363 } 10364 10365 __u32 bpf_map__key_size(const struct bpf_map *map) 10366 { 10367 return map->def.key_size; 10368 } 10369 10370 int bpf_map__set_key_size(struct bpf_map *map, __u32 size) 10371 { 10372 if (map_is_created(map)) 10373 return libbpf_err(-EBUSY); 10374 map->def.key_size = size; 10375 return 0; 10376 } 10377 10378 __u32 bpf_map__value_size(const struct bpf_map *map) 10379 { 10380 return map->def.value_size; 10381 } 10382 10383 static int map_btf_datasec_resize(struct bpf_map *map, __u32 size) 10384 { 10385 struct btf *btf; 10386 struct btf_type *datasec_type, *var_type; 10387 struct btf_var_secinfo *var; 10388 const struct btf_type *array_type; 10389 const struct btf_array *array; 10390 int vlen, element_sz, new_array_id; 10391 __u32 nr_elements; 10392 10393 /* check btf existence */ 10394 btf = bpf_object__btf(map->obj); 10395 if (!btf) 10396 return -ENOENT; 10397 10398 /* verify map is datasec */ 10399 datasec_type = btf_type_by_id(btf, bpf_map__btf_value_type_id(map)); 10400 if (!btf_is_datasec(datasec_type)) { 10401 pr_warn("map '%s': cannot be resized, map value type is not a datasec\n", 10402 bpf_map__name(map)); 10403 return -EINVAL; 10404 } 10405 10406 /* verify datasec has at least one var */ 10407 vlen = btf_vlen(datasec_type); 10408 if (vlen == 0) { 10409 pr_warn("map '%s': cannot be resized, map value datasec is empty\n", 10410 bpf_map__name(map)); 10411 return -EINVAL; 10412 } 10413 10414 /* verify last var in the datasec is an array */ 10415 var = &btf_var_secinfos(datasec_type)[vlen - 1]; 10416 var_type = btf_type_by_id(btf, var->type); 10417 array_type = skip_mods_and_typedefs(btf, var_type->type, NULL); 10418 if (!btf_is_array(array_type)) { 10419 pr_warn("map '%s': cannot be resized, last var must be an array\n", 10420 bpf_map__name(map)); 10421 return -EINVAL; 10422 } 10423 10424 /* verify request size aligns with array */ 10425 array = btf_array(array_type); 10426 element_sz = btf__resolve_size(btf, array->type); 10427 if (element_sz <= 0 || (size - var->offset) % element_sz != 0) { 10428 pr_warn("map '%s': cannot be resized, element size (%d) doesn't align with new total size (%u)\n", 10429 bpf_map__name(map), element_sz, size); 10430 return -EINVAL; 10431 } 10432 10433 /* create a new array based on the existing array, but with new length */ 10434 nr_elements = (size - var->offset) / element_sz; 10435 new_array_id = btf__add_array(btf, array->index_type, array->type, nr_elements); 10436 if (new_array_id < 0) 10437 return new_array_id; 10438 10439 /* adding a new btf type invalidates existing pointers to btf objects, 10440 * so refresh pointers before proceeding 10441 */ 10442 datasec_type = btf_type_by_id(btf, map->btf_value_type_id); 10443 var = &btf_var_secinfos(datasec_type)[vlen - 1]; 10444 var_type = btf_type_by_id(btf, var->type); 10445 10446 /* finally update btf info */ 10447 datasec_type->size = size; 10448 var->size = size - var->offset; 10449 var_type->type = new_array_id; 10450 10451 return 0; 10452 } 10453 10454 int bpf_map__set_value_size(struct bpf_map *map, __u32 size) 10455 { 10456 if (map_is_created(map)) 10457 return libbpf_err(-EBUSY); 10458 10459 if (map->mmaped) { 10460 size_t mmap_old_sz, mmap_new_sz; 10461 int err; 10462 10463 if (map->def.type != BPF_MAP_TYPE_ARRAY) 10464 return libbpf_err(-EOPNOTSUPP); 10465 10466 mmap_old_sz = bpf_map_mmap_sz(map); 10467 mmap_new_sz = array_map_mmap_sz(size, map->def.max_entries); 10468 err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz); 10469 if (err) { 10470 pr_warn("map '%s': failed to resize memory-mapped region: %s\n", 10471 bpf_map__name(map), errstr(err)); 10472 return libbpf_err(err); 10473 } 10474 err = map_btf_datasec_resize(map, size); 10475 if (err && err != -ENOENT) { 10476 pr_warn("map '%s': failed to adjust resized BTF, clearing BTF key/value info: %s\n", 10477 bpf_map__name(map), errstr(err)); 10478 map->btf_value_type_id = 0; 10479 map->btf_key_type_id = 0; 10480 } 10481 } 10482 10483 map->def.value_size = size; 10484 return 0; 10485 } 10486 10487 __u32 bpf_map__btf_key_type_id(const struct bpf_map *map) 10488 { 10489 return map ? map->btf_key_type_id : 0; 10490 } 10491 10492 __u32 bpf_map__btf_value_type_id(const struct bpf_map *map) 10493 { 10494 return map ? map->btf_value_type_id : 0; 10495 } 10496 10497 int bpf_map__set_initial_value(struct bpf_map *map, 10498 const void *data, size_t size) 10499 { 10500 size_t actual_sz; 10501 10502 if (map_is_created(map)) 10503 return libbpf_err(-EBUSY); 10504 10505 if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG) 10506 return libbpf_err(-EINVAL); 10507 10508 if (map->def.type == BPF_MAP_TYPE_ARENA) 10509 actual_sz = map->obj->arena_data_sz; 10510 else 10511 actual_sz = map->def.value_size; 10512 if (size != actual_sz) 10513 return libbpf_err(-EINVAL); 10514 10515 memcpy(map->mmaped, data, size); 10516 return 0; 10517 } 10518 10519 void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize) 10520 { 10521 if (bpf_map__is_struct_ops(map)) { 10522 if (psize) 10523 *psize = map->def.value_size; 10524 return map->st_ops->data; 10525 } 10526 10527 if (!map->mmaped) 10528 return NULL; 10529 10530 if (map->def.type == BPF_MAP_TYPE_ARENA) 10531 *psize = map->obj->arena_data_sz; 10532 else 10533 *psize = map->def.value_size; 10534 10535 return map->mmaped; 10536 } 10537 10538 bool bpf_map__is_internal(const struct bpf_map *map) 10539 { 10540 return map->libbpf_type != LIBBPF_MAP_UNSPEC; 10541 } 10542 10543 __u32 bpf_map__ifindex(const struct bpf_map *map) 10544 { 10545 return map->map_ifindex; 10546 } 10547 10548 int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex) 10549 { 10550 if (map_is_created(map)) 10551 return libbpf_err(-EBUSY); 10552 map->map_ifindex = ifindex; 10553 return 0; 10554 } 10555 10556 int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd) 10557 { 10558 if (!bpf_map_type__is_map_in_map(map->def.type)) { 10559 pr_warn("error: unsupported map type\n"); 10560 return libbpf_err(-EINVAL); 10561 } 10562 if (map->inner_map_fd != -1) { 10563 pr_warn("error: inner_map_fd already specified\n"); 10564 return libbpf_err(-EINVAL); 10565 } 10566 if (map->inner_map) { 10567 bpf_map__destroy(map->inner_map); 10568 zfree(&map->inner_map); 10569 } 10570 map->inner_map_fd = fd; 10571 return 0; 10572 } 10573 10574 int bpf_map__set_exclusive_program(struct bpf_map *map, struct bpf_program *prog) 10575 { 10576 if (map_is_created(map)) { 10577 pr_warn("exclusive programs must be set before map creation\n"); 10578 return libbpf_err(-EINVAL); 10579 } 10580 10581 if (map->obj != prog->obj) { 10582 pr_warn("excl_prog and map must be from the same bpf object\n"); 10583 return libbpf_err(-EINVAL); 10584 } 10585 10586 map->excl_prog = prog; 10587 return 0; 10588 } 10589 10590 struct bpf_program *bpf_map__exclusive_program(struct bpf_map *map) 10591 { 10592 return map->excl_prog; 10593 } 10594 10595 static struct bpf_map * 10596 __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i) 10597 { 10598 ssize_t idx; 10599 struct bpf_map *s, *e; 10600 10601 if (!obj || !obj->maps) 10602 return errno = EINVAL, NULL; 10603 10604 s = obj->maps; 10605 e = obj->maps + obj->nr_maps; 10606 10607 if ((m < s) || (m >= e)) { 10608 pr_warn("error in %s: map handler doesn't belong to object\n", 10609 __func__); 10610 return errno = EINVAL, NULL; 10611 } 10612 10613 idx = (m - obj->maps) + i; 10614 if (idx >= obj->nr_maps || idx < 0) 10615 return NULL; 10616 return &obj->maps[idx]; 10617 } 10618 10619 struct bpf_map * 10620 bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) 10621 { 10622 if (prev == NULL && obj != NULL) 10623 return obj->maps; 10624 10625 return __bpf_map__iter(prev, obj, 1); 10626 } 10627 10628 struct bpf_map * 10629 bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next) 10630 { 10631 if (next == NULL && obj != NULL) { 10632 if (!obj->nr_maps) 10633 return NULL; 10634 return obj->maps + obj->nr_maps - 1; 10635 } 10636 10637 return __bpf_map__iter(next, obj, -1); 10638 } 10639 10640 struct bpf_map * 10641 bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name) 10642 { 10643 struct bpf_map *pos; 10644 10645 bpf_object__for_each_map(pos, obj) { 10646 /* if it's a special internal map name (which always starts 10647 * with dot) then check if that special name matches the 10648 * real map name (ELF section name) 10649 */ 10650 if (name[0] == '.') { 10651 if (pos->real_name && strcmp(pos->real_name, name) == 0) 10652 return pos; 10653 continue; 10654 } 10655 /* otherwise map name has to be an exact match */ 10656 if (map_uses_real_name(pos)) { 10657 if (strcmp(pos->real_name, name) == 0) 10658 return pos; 10659 continue; 10660 } 10661 if (strcmp(pos->name, name) == 0) 10662 return pos; 10663 } 10664 return errno = ENOENT, NULL; 10665 } 10666 10667 int 10668 bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name) 10669 { 10670 return bpf_map__fd(bpf_object__find_map_by_name(obj, name)); 10671 } 10672 10673 static int validate_map_op(const struct bpf_map *map, size_t key_sz, 10674 size_t value_sz, bool check_value_sz) 10675 { 10676 if (!map_is_created(map)) /* map is not yet created */ 10677 return -ENOENT; 10678 10679 if (map->def.key_size != key_sz) { 10680 pr_warn("map '%s': unexpected key size %zu provided, expected %u\n", 10681 map->name, key_sz, map->def.key_size); 10682 return -EINVAL; 10683 } 10684 10685 if (map->fd < 0) { 10686 pr_warn("map '%s': can't use BPF map without FD (was it created?)\n", map->name); 10687 return -EINVAL; 10688 } 10689 10690 if (!check_value_sz) 10691 return 0; 10692 10693 switch (map->def.type) { 10694 case BPF_MAP_TYPE_PERCPU_ARRAY: 10695 case BPF_MAP_TYPE_PERCPU_HASH: 10696 case BPF_MAP_TYPE_LRU_PERCPU_HASH: 10697 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: { 10698 int num_cpu = libbpf_num_possible_cpus(); 10699 size_t elem_sz = roundup(map->def.value_size, 8); 10700 10701 if (value_sz != num_cpu * elem_sz) { 10702 pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n", 10703 map->name, value_sz, num_cpu, elem_sz, num_cpu * elem_sz); 10704 return -EINVAL; 10705 } 10706 break; 10707 } 10708 default: 10709 if (map->def.value_size != value_sz) { 10710 pr_warn("map '%s': unexpected value size %zu provided, expected %u\n", 10711 map->name, value_sz, map->def.value_size); 10712 return -EINVAL; 10713 } 10714 break; 10715 } 10716 return 0; 10717 } 10718 10719 int bpf_map__lookup_elem(const struct bpf_map *map, 10720 const void *key, size_t key_sz, 10721 void *value, size_t value_sz, __u64 flags) 10722 { 10723 int err; 10724 10725 err = validate_map_op(map, key_sz, value_sz, true); 10726 if (err) 10727 return libbpf_err(err); 10728 10729 return bpf_map_lookup_elem_flags(map->fd, key, value, flags); 10730 } 10731 10732 int bpf_map__update_elem(const struct bpf_map *map, 10733 const void *key, size_t key_sz, 10734 const void *value, size_t value_sz, __u64 flags) 10735 { 10736 int err; 10737 10738 err = validate_map_op(map, key_sz, value_sz, true); 10739 if (err) 10740 return libbpf_err(err); 10741 10742 return bpf_map_update_elem(map->fd, key, value, flags); 10743 } 10744 10745 int bpf_map__delete_elem(const struct bpf_map *map, 10746 const void *key, size_t key_sz, __u64 flags) 10747 { 10748 int err; 10749 10750 err = validate_map_op(map, key_sz, 0, false /* check_value_sz */); 10751 if (err) 10752 return libbpf_err(err); 10753 10754 return bpf_map_delete_elem_flags(map->fd, key, flags); 10755 } 10756 10757 int bpf_map__lookup_and_delete_elem(const struct bpf_map *map, 10758 const void *key, size_t key_sz, 10759 void *value, size_t value_sz, __u64 flags) 10760 { 10761 int err; 10762 10763 err = validate_map_op(map, key_sz, value_sz, true); 10764 if (err) 10765 return libbpf_err(err); 10766 10767 return bpf_map_lookup_and_delete_elem_flags(map->fd, key, value, flags); 10768 } 10769 10770 int bpf_map__get_next_key(const struct bpf_map *map, 10771 const void *cur_key, void *next_key, size_t key_sz) 10772 { 10773 int err; 10774 10775 err = validate_map_op(map, key_sz, 0, false /* check_value_sz */); 10776 if (err) 10777 return libbpf_err(err); 10778 10779 return bpf_map_get_next_key(map->fd, cur_key, next_key); 10780 } 10781 10782 long libbpf_get_error(const void *ptr) 10783 { 10784 if (!IS_ERR_OR_NULL(ptr)) 10785 return 0; 10786 10787 if (IS_ERR(ptr)) 10788 errno = -PTR_ERR(ptr); 10789 10790 /* If ptr == NULL, then errno should be already set by the failing 10791 * API, because libbpf never returns NULL on success and it now always 10792 * sets errno on error. So no extra errno handling for ptr == NULL 10793 * case. 10794 */ 10795 return -errno; 10796 } 10797 10798 /* Replace link's underlying BPF program with the new one */ 10799 int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog) 10800 { 10801 int ret; 10802 int prog_fd = bpf_program__fd(prog); 10803 10804 if (prog_fd < 0) { 10805 pr_warn("prog '%s': can't use BPF program without FD (was it loaded?)\n", 10806 prog->name); 10807 return libbpf_err(-EINVAL); 10808 } 10809 10810 ret = bpf_link_update(bpf_link__fd(link), prog_fd, NULL); 10811 return libbpf_err_errno(ret); 10812 } 10813 10814 /* Release "ownership" of underlying BPF resource (typically, BPF program 10815 * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected 10816 * link, when destructed through bpf_link__destroy() call won't attempt to 10817 * detach/unregisted that BPF resource. This is useful in situations where, 10818 * say, attached BPF program has to outlive userspace program that attached it 10819 * in the system. Depending on type of BPF program, though, there might be 10820 * additional steps (like pinning BPF program in BPF FS) necessary to ensure 10821 * exit of userspace program doesn't trigger automatic detachment and clean up 10822 * inside the kernel. 10823 */ 10824 void bpf_link__disconnect(struct bpf_link *link) 10825 { 10826 link->disconnected = true; 10827 } 10828 10829 int bpf_link__destroy(struct bpf_link *link) 10830 { 10831 int err = 0; 10832 10833 if (IS_ERR_OR_NULL(link)) 10834 return 0; 10835 10836 if (!link->disconnected && link->detach) 10837 err = link->detach(link); 10838 if (link->pin_path) 10839 free(link->pin_path); 10840 if (link->dealloc) 10841 link->dealloc(link); 10842 else 10843 free(link); 10844 10845 return libbpf_err(err); 10846 } 10847 10848 int bpf_link__fd(const struct bpf_link *link) 10849 { 10850 return link->fd; 10851 } 10852 10853 const char *bpf_link__pin_path(const struct bpf_link *link) 10854 { 10855 return link->pin_path; 10856 } 10857 10858 static int bpf_link__detach_fd(struct bpf_link *link) 10859 { 10860 return libbpf_err_errno(close(link->fd)); 10861 } 10862 10863 struct bpf_link *bpf_link__open(const char *path) 10864 { 10865 struct bpf_link *link; 10866 int fd; 10867 10868 fd = bpf_obj_get(path); 10869 if (fd < 0) { 10870 fd = -errno; 10871 pr_warn("failed to open link at %s: %d\n", path, fd); 10872 return libbpf_err_ptr(fd); 10873 } 10874 10875 link = calloc(1, sizeof(*link)); 10876 if (!link) { 10877 close(fd); 10878 return libbpf_err_ptr(-ENOMEM); 10879 } 10880 link->detach = &bpf_link__detach_fd; 10881 link->fd = fd; 10882 10883 link->pin_path = strdup(path); 10884 if (!link->pin_path) { 10885 bpf_link__destroy(link); 10886 return libbpf_err_ptr(-ENOMEM); 10887 } 10888 10889 return link; 10890 } 10891 10892 int bpf_link__detach(struct bpf_link *link) 10893 { 10894 return bpf_link_detach(link->fd) ? -errno : 0; 10895 } 10896 10897 int bpf_link__pin(struct bpf_link *link, const char *path) 10898 { 10899 int err; 10900 10901 if (link->pin_path) 10902 return libbpf_err(-EBUSY); 10903 err = make_parent_dir(path); 10904 if (err) 10905 return libbpf_err(err); 10906 err = check_path(path); 10907 if (err) 10908 return libbpf_err(err); 10909 10910 link->pin_path = strdup(path); 10911 if (!link->pin_path) 10912 return libbpf_err(-ENOMEM); 10913 10914 if (bpf_obj_pin(link->fd, link->pin_path)) { 10915 err = -errno; 10916 zfree(&link->pin_path); 10917 return libbpf_err(err); 10918 } 10919 10920 pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path); 10921 return 0; 10922 } 10923 10924 int bpf_link__unpin(struct bpf_link *link) 10925 { 10926 int err; 10927 10928 if (!link->pin_path) 10929 return libbpf_err(-EINVAL); 10930 10931 err = unlink(link->pin_path); 10932 if (err != 0) 10933 return -errno; 10934 10935 pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path); 10936 zfree(&link->pin_path); 10937 return 0; 10938 } 10939 10940 struct bpf_link_perf { 10941 struct bpf_link link; 10942 int perf_event_fd; 10943 /* legacy kprobe support: keep track of probe identifier and type */ 10944 char *legacy_probe_name; 10945 bool legacy_is_kprobe; 10946 bool legacy_is_retprobe; 10947 }; 10948 10949 static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe); 10950 static int remove_uprobe_event_legacy(const char *probe_name, bool retprobe); 10951 10952 static int bpf_link_perf_detach(struct bpf_link *link) 10953 { 10954 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); 10955 int err = 0; 10956 10957 if (ioctl(perf_link->perf_event_fd, PERF_EVENT_IOC_DISABLE, 0) < 0) 10958 err = -errno; 10959 10960 if (perf_link->perf_event_fd != link->fd) 10961 close(perf_link->perf_event_fd); 10962 close(link->fd); 10963 10964 /* legacy uprobe/kprobe needs to be removed after perf event fd closure */ 10965 if (perf_link->legacy_probe_name) { 10966 if (perf_link->legacy_is_kprobe) { 10967 err = remove_kprobe_event_legacy(perf_link->legacy_probe_name, 10968 perf_link->legacy_is_retprobe); 10969 } else { 10970 err = remove_uprobe_event_legacy(perf_link->legacy_probe_name, 10971 perf_link->legacy_is_retprobe); 10972 } 10973 } 10974 10975 return err; 10976 } 10977 10978 static void bpf_link_perf_dealloc(struct bpf_link *link) 10979 { 10980 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); 10981 10982 free(perf_link->legacy_probe_name); 10983 free(perf_link); 10984 } 10985 10986 struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd, 10987 const struct bpf_perf_event_opts *opts) 10988 { 10989 struct bpf_link_perf *link; 10990 int prog_fd, link_fd = -1, err; 10991 bool force_ioctl_attach; 10992 10993 if (!OPTS_VALID(opts, bpf_perf_event_opts)) 10994 return libbpf_err_ptr(-EINVAL); 10995 10996 if (pfd < 0) { 10997 pr_warn("prog '%s': invalid perf event FD %d\n", 10998 prog->name, pfd); 10999 return libbpf_err_ptr(-EINVAL); 11000 } 11001 prog_fd = bpf_program__fd(prog); 11002 if (prog_fd < 0) { 11003 pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n", 11004 prog->name); 11005 return libbpf_err_ptr(-EINVAL); 11006 } 11007 11008 link = calloc(1, sizeof(*link)); 11009 if (!link) 11010 return libbpf_err_ptr(-ENOMEM); 11011 link->link.detach = &bpf_link_perf_detach; 11012 link->link.dealloc = &bpf_link_perf_dealloc; 11013 link->perf_event_fd = pfd; 11014 11015 force_ioctl_attach = OPTS_GET(opts, force_ioctl_attach, false); 11016 if (kernel_supports(prog->obj, FEAT_PERF_LINK) && !force_ioctl_attach) { 11017 DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_opts, 11018 .perf_event.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0)); 11019 11020 link_fd = bpf_link_create(prog_fd, pfd, BPF_PERF_EVENT, &link_opts); 11021 if (link_fd < 0) { 11022 err = -errno; 11023 pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %s\n", 11024 prog->name, pfd, errstr(err)); 11025 goto err_out; 11026 } 11027 link->link.fd = link_fd; 11028 } else { 11029 if (OPTS_GET(opts, bpf_cookie, 0)) { 11030 pr_warn("prog '%s': user context value is not supported\n", prog->name); 11031 err = -EOPNOTSUPP; 11032 goto err_out; 11033 } 11034 11035 if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) { 11036 err = -errno; 11037 pr_warn("prog '%s': failed to attach to perf_event FD %d: %s\n", 11038 prog->name, pfd, errstr(err)); 11039 if (err == -EPROTO) 11040 pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n", 11041 prog->name, pfd); 11042 goto err_out; 11043 } 11044 link->link.fd = pfd; 11045 } 11046 11047 if (!OPTS_GET(opts, dont_enable, false)) { 11048 if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) { 11049 err = -errno; 11050 pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n", 11051 prog->name, pfd, errstr(err)); 11052 goto err_out; 11053 } 11054 } 11055 11056 return &link->link; 11057 err_out: 11058 if (link_fd >= 0) 11059 close(link_fd); 11060 free(link); 11061 return libbpf_err_ptr(err); 11062 } 11063 11064 struct bpf_link *bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd) 11065 { 11066 return bpf_program__attach_perf_event_opts(prog, pfd, NULL); 11067 } 11068 11069 /* 11070 * this function is expected to parse integer in the range of [0, 2^31-1] from 11071 * given file using scanf format string fmt. If actual parsed value is 11072 * negative, the result might be indistinguishable from error 11073 */ 11074 static int parse_uint_from_file(const char *file, const char *fmt) 11075 { 11076 int err, ret; 11077 FILE *f; 11078 11079 f = fopen(file, "re"); 11080 if (!f) { 11081 err = -errno; 11082 pr_debug("failed to open '%s': %s\n", file, errstr(err)); 11083 return err; 11084 } 11085 err = fscanf(f, fmt, &ret); 11086 if (err != 1) { 11087 err = err == EOF ? -EIO : -errno; 11088 pr_debug("failed to parse '%s': %s\n", file, errstr(err)); 11089 fclose(f); 11090 return err; 11091 } 11092 fclose(f); 11093 return ret; 11094 } 11095 11096 static int determine_kprobe_perf_type(void) 11097 { 11098 const char *file = "/sys/bus/event_source/devices/kprobe/type"; 11099 11100 return parse_uint_from_file(file, "%d\n"); 11101 } 11102 11103 static int determine_uprobe_perf_type(void) 11104 { 11105 const char *file = "/sys/bus/event_source/devices/uprobe/type"; 11106 11107 return parse_uint_from_file(file, "%d\n"); 11108 } 11109 11110 static int determine_kprobe_retprobe_bit(void) 11111 { 11112 const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe"; 11113 11114 return parse_uint_from_file(file, "config:%d\n"); 11115 } 11116 11117 static int determine_uprobe_retprobe_bit(void) 11118 { 11119 const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe"; 11120 11121 return parse_uint_from_file(file, "config:%d\n"); 11122 } 11123 11124 #define PERF_UPROBE_REF_CTR_OFFSET_BITS 32 11125 #define PERF_UPROBE_REF_CTR_OFFSET_SHIFT 32 11126 11127 static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, 11128 uint64_t offset, int pid, size_t ref_ctr_off) 11129 { 11130 const size_t attr_sz = sizeof(struct perf_event_attr); 11131 struct perf_event_attr attr; 11132 int type, pfd; 11133 11134 if ((__u64)ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS)) 11135 return -EINVAL; 11136 11137 memset(&attr, 0, attr_sz); 11138 11139 type = uprobe ? determine_uprobe_perf_type() 11140 : determine_kprobe_perf_type(); 11141 if (type < 0) { 11142 pr_warn("failed to determine %s perf type: %s\n", 11143 uprobe ? "uprobe" : "kprobe", 11144 errstr(type)); 11145 return type; 11146 } 11147 if (retprobe) { 11148 int bit = uprobe ? determine_uprobe_retprobe_bit() 11149 : determine_kprobe_retprobe_bit(); 11150 11151 if (bit < 0) { 11152 pr_warn("failed to determine %s retprobe bit: %s\n", 11153 uprobe ? "uprobe" : "kprobe", 11154 errstr(bit)); 11155 return bit; 11156 } 11157 attr.config |= 1 << bit; 11158 } 11159 attr.size = attr_sz; 11160 attr.type = type; 11161 attr.config |= (__u64)ref_ctr_off << PERF_UPROBE_REF_CTR_OFFSET_SHIFT; 11162 attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */ 11163 attr.config2 = offset; /* kprobe_addr or probe_offset */ 11164 11165 /* pid filter is meaningful only for uprobes */ 11166 pfd = syscall(__NR_perf_event_open, &attr, 11167 pid < 0 ? -1 : pid /* pid */, 11168 pid == -1 ? 0 : -1 /* cpu */, 11169 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); 11170 return pfd >= 0 ? pfd : -errno; 11171 } 11172 11173 static int append_to_file(const char *file, const char *fmt, ...) 11174 { 11175 int fd, n, err = 0; 11176 va_list ap; 11177 char buf[1024]; 11178 11179 va_start(ap, fmt); 11180 n = vsnprintf(buf, sizeof(buf), fmt, ap); 11181 va_end(ap); 11182 11183 if (n < 0 || n >= sizeof(buf)) 11184 return -EINVAL; 11185 11186 fd = open(file, O_WRONLY | O_APPEND | O_CLOEXEC, 0); 11187 if (fd < 0) 11188 return -errno; 11189 11190 if (write(fd, buf, n) < 0) 11191 err = -errno; 11192 11193 close(fd); 11194 return err; 11195 } 11196 11197 #define DEBUGFS "/sys/kernel/debug/tracing" 11198 #define TRACEFS "/sys/kernel/tracing" 11199 11200 static bool use_debugfs(void) 11201 { 11202 static int has_debugfs = -1; 11203 11204 if (has_debugfs < 0) 11205 has_debugfs = faccessat(AT_FDCWD, DEBUGFS, F_OK, AT_EACCESS) == 0; 11206 11207 return has_debugfs == 1; 11208 } 11209 11210 static const char *tracefs_path(void) 11211 { 11212 return use_debugfs() ? DEBUGFS : TRACEFS; 11213 } 11214 11215 static const char *tracefs_kprobe_events(void) 11216 { 11217 return use_debugfs() ? DEBUGFS"/kprobe_events" : TRACEFS"/kprobe_events"; 11218 } 11219 11220 static const char *tracefs_uprobe_events(void) 11221 { 11222 return use_debugfs() ? DEBUGFS"/uprobe_events" : TRACEFS"/uprobe_events"; 11223 } 11224 11225 static const char *tracefs_available_filter_functions(void) 11226 { 11227 return use_debugfs() ? DEBUGFS"/available_filter_functions" 11228 : TRACEFS"/available_filter_functions"; 11229 } 11230 11231 static const char *tracefs_available_filter_functions_addrs(void) 11232 { 11233 return use_debugfs() ? DEBUGFS"/available_filter_functions_addrs" 11234 : TRACEFS"/available_filter_functions_addrs"; 11235 } 11236 11237 static void gen_probe_legacy_event_name(char *buf, size_t buf_sz, 11238 const char *name, size_t offset) 11239 { 11240 static int index = 0; 11241 int i; 11242 11243 snprintf(buf, buf_sz, "libbpf_%u_%d_%s_0x%zx", getpid(), 11244 __sync_fetch_and_add(&index, 1), name, offset); 11245 11246 /* sanitize name in the probe name */ 11247 for (i = 0; buf[i]; i++) { 11248 if (!isalnum(buf[i])) 11249 buf[i] = '_'; 11250 } 11251 } 11252 11253 static int add_kprobe_event_legacy(const char *probe_name, bool retprobe, 11254 const char *kfunc_name, size_t offset) 11255 { 11256 return append_to_file(tracefs_kprobe_events(), "%c:%s/%s %s+0x%zx", 11257 retprobe ? 'r' : 'p', 11258 retprobe ? "kretprobes" : "kprobes", 11259 probe_name, kfunc_name, offset); 11260 } 11261 11262 static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe) 11263 { 11264 return append_to_file(tracefs_kprobe_events(), "-:%s/%s", 11265 retprobe ? "kretprobes" : "kprobes", probe_name); 11266 } 11267 11268 static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retprobe) 11269 { 11270 char file[256]; 11271 11272 snprintf(file, sizeof(file), "%s/events/%s/%s/id", 11273 tracefs_path(), retprobe ? "kretprobes" : "kprobes", probe_name); 11274 11275 return parse_uint_from_file(file, "%d\n"); 11276 } 11277 11278 static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe, 11279 const char *kfunc_name, size_t offset, int pid) 11280 { 11281 const size_t attr_sz = sizeof(struct perf_event_attr); 11282 struct perf_event_attr attr; 11283 int type, pfd, err; 11284 11285 err = add_kprobe_event_legacy(probe_name, retprobe, kfunc_name, offset); 11286 if (err < 0) { 11287 pr_warn("failed to add legacy kprobe event for '%s+0x%zx': %s\n", 11288 kfunc_name, offset, 11289 errstr(err)); 11290 return err; 11291 } 11292 type = determine_kprobe_perf_type_legacy(probe_name, retprobe); 11293 if (type < 0) { 11294 err = type; 11295 pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n", 11296 kfunc_name, offset, 11297 errstr(err)); 11298 goto err_clean_legacy; 11299 } 11300 11301 memset(&attr, 0, attr_sz); 11302 attr.size = attr_sz; 11303 attr.config = type; 11304 attr.type = PERF_TYPE_TRACEPOINT; 11305 11306 pfd = syscall(__NR_perf_event_open, &attr, 11307 pid < 0 ? -1 : pid, /* pid */ 11308 pid == -1 ? 0 : -1, /* cpu */ 11309 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); 11310 if (pfd < 0) { 11311 err = -errno; 11312 pr_warn("legacy kprobe perf_event_open() failed: %s\n", 11313 errstr(err)); 11314 goto err_clean_legacy; 11315 } 11316 return pfd; 11317 11318 err_clean_legacy: 11319 /* Clear the newly added legacy kprobe_event */ 11320 remove_kprobe_event_legacy(probe_name, retprobe); 11321 return err; 11322 } 11323 11324 static const char *arch_specific_syscall_pfx(void) 11325 { 11326 #if defined(__x86_64__) 11327 return "x64"; 11328 #elif defined(__i386__) 11329 return "ia32"; 11330 #elif defined(__s390x__) 11331 return "s390x"; 11332 #elif defined(__s390__) 11333 return "s390"; 11334 #elif defined(__arm__) 11335 return "arm"; 11336 #elif defined(__aarch64__) 11337 return "arm64"; 11338 #elif defined(__mips__) 11339 return "mips"; 11340 #elif defined(__riscv) 11341 return "riscv"; 11342 #elif defined(__powerpc__) 11343 return "powerpc"; 11344 #elif defined(__powerpc64__) 11345 return "powerpc64"; 11346 #else 11347 return NULL; 11348 #endif 11349 } 11350 11351 int probe_kern_syscall_wrapper(int token_fd) 11352 { 11353 char syscall_name[64]; 11354 const char *ksys_pfx; 11355 11356 ksys_pfx = arch_specific_syscall_pfx(); 11357 if (!ksys_pfx) 11358 return 0; 11359 11360 snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx); 11361 11362 if (determine_kprobe_perf_type() >= 0) { 11363 int pfd; 11364 11365 pfd = perf_event_open_probe(false, false, syscall_name, 0, getpid(), 0); 11366 if (pfd >= 0) 11367 close(pfd); 11368 11369 return pfd >= 0 ? 1 : 0; 11370 } else { /* legacy mode */ 11371 char probe_name[MAX_EVENT_NAME_LEN]; 11372 11373 gen_probe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0); 11374 if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0) 11375 return 0; 11376 11377 (void)remove_kprobe_event_legacy(probe_name, false); 11378 return 1; 11379 } 11380 } 11381 11382 struct bpf_link * 11383 bpf_program__attach_kprobe_opts(const struct bpf_program *prog, 11384 const char *func_name, 11385 const struct bpf_kprobe_opts *opts) 11386 { 11387 DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); 11388 enum probe_attach_mode attach_mode; 11389 char *legacy_probe = NULL; 11390 struct bpf_link *link; 11391 size_t offset; 11392 bool retprobe, legacy; 11393 int pfd, err; 11394 11395 if (!OPTS_VALID(opts, bpf_kprobe_opts)) 11396 return libbpf_err_ptr(-EINVAL); 11397 11398 attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT); 11399 retprobe = OPTS_GET(opts, retprobe, false); 11400 offset = OPTS_GET(opts, offset, 0); 11401 pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); 11402 11403 legacy = determine_kprobe_perf_type() < 0; 11404 switch (attach_mode) { 11405 case PROBE_ATTACH_MODE_LEGACY: 11406 legacy = true; 11407 pe_opts.force_ioctl_attach = true; 11408 break; 11409 case PROBE_ATTACH_MODE_PERF: 11410 if (legacy) 11411 return libbpf_err_ptr(-ENOTSUP); 11412 pe_opts.force_ioctl_attach = true; 11413 break; 11414 case PROBE_ATTACH_MODE_LINK: 11415 if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK)) 11416 return libbpf_err_ptr(-ENOTSUP); 11417 break; 11418 case PROBE_ATTACH_MODE_DEFAULT: 11419 break; 11420 default: 11421 return libbpf_err_ptr(-EINVAL); 11422 } 11423 11424 if (!legacy) { 11425 pfd = perf_event_open_probe(false /* uprobe */, retprobe, 11426 func_name, offset, 11427 -1 /* pid */, 0 /* ref_ctr_off */); 11428 } else { 11429 char probe_name[MAX_EVENT_NAME_LEN]; 11430 11431 gen_probe_legacy_event_name(probe_name, sizeof(probe_name), 11432 func_name, offset); 11433 11434 legacy_probe = strdup(probe_name); 11435 if (!legacy_probe) 11436 return libbpf_err_ptr(-ENOMEM); 11437 11438 pfd = perf_event_kprobe_open_legacy(legacy_probe, retprobe, func_name, 11439 offset, -1 /* pid */); 11440 } 11441 if (pfd < 0) { 11442 err = -errno; 11443 pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n", 11444 prog->name, retprobe ? "kretprobe" : "kprobe", 11445 func_name, offset, 11446 errstr(err)); 11447 goto err_out; 11448 } 11449 link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); 11450 err = libbpf_get_error(link); 11451 if (err) { 11452 close(pfd); 11453 pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n", 11454 prog->name, retprobe ? "kretprobe" : "kprobe", 11455 func_name, offset, 11456 errstr(err)); 11457 goto err_clean_legacy; 11458 } 11459 if (legacy) { 11460 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); 11461 11462 perf_link->legacy_probe_name = legacy_probe; 11463 perf_link->legacy_is_kprobe = true; 11464 perf_link->legacy_is_retprobe = retprobe; 11465 } 11466 11467 return link; 11468 11469 err_clean_legacy: 11470 if (legacy) 11471 remove_kprobe_event_legacy(legacy_probe, retprobe); 11472 err_out: 11473 free(legacy_probe); 11474 return libbpf_err_ptr(err); 11475 } 11476 11477 struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog, 11478 bool retprobe, 11479 const char *func_name) 11480 { 11481 DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts, 11482 .retprobe = retprobe, 11483 ); 11484 11485 return bpf_program__attach_kprobe_opts(prog, func_name, &opts); 11486 } 11487 11488 struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog, 11489 const char *syscall_name, 11490 const struct bpf_ksyscall_opts *opts) 11491 { 11492 LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts); 11493 char func_name[128]; 11494 11495 if (!OPTS_VALID(opts, bpf_ksyscall_opts)) 11496 return libbpf_err_ptr(-EINVAL); 11497 11498 if (kernel_supports(prog->obj, FEAT_SYSCALL_WRAPPER)) { 11499 /* arch_specific_syscall_pfx() should never return NULL here 11500 * because it is guarded by kernel_supports(). However, since 11501 * compiler does not know that we have an explicit conditional 11502 * as well. 11503 */ 11504 snprintf(func_name, sizeof(func_name), "__%s_sys_%s", 11505 arch_specific_syscall_pfx() ? : "", syscall_name); 11506 } else { 11507 snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name); 11508 } 11509 11510 kprobe_opts.retprobe = OPTS_GET(opts, retprobe, false); 11511 kprobe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); 11512 11513 return bpf_program__attach_kprobe_opts(prog, func_name, &kprobe_opts); 11514 } 11515 11516 /* Adapted from perf/util/string.c */ 11517 bool glob_match(const char *str, const char *pat) 11518 { 11519 while (*str && *pat && *pat != '*') { 11520 if (*pat == '?') { /* Matches any single character */ 11521 str++; 11522 pat++; 11523 continue; 11524 } 11525 if (*str != *pat) 11526 return false; 11527 str++; 11528 pat++; 11529 } 11530 /* Check wild card */ 11531 if (*pat == '*') { 11532 while (*pat == '*') 11533 pat++; 11534 if (!*pat) /* Tail wild card matches all */ 11535 return true; 11536 while (*str) 11537 if (glob_match(str++, pat)) 11538 return true; 11539 } 11540 return !*str && !*pat; 11541 } 11542 11543 struct kprobe_multi_resolve { 11544 const char *pattern; 11545 unsigned long *addrs; 11546 size_t cap; 11547 size_t cnt; 11548 }; 11549 11550 struct avail_kallsyms_data { 11551 char **syms; 11552 size_t cnt; 11553 struct kprobe_multi_resolve *res; 11554 }; 11555 11556 static int avail_func_cmp(const void *a, const void *b) 11557 { 11558 return strcmp(*(const char **)a, *(const char **)b); 11559 } 11560 11561 static int avail_kallsyms_cb(unsigned long long sym_addr, char sym_type, 11562 const char *sym_name, void *ctx) 11563 { 11564 struct avail_kallsyms_data *data = ctx; 11565 struct kprobe_multi_resolve *res = data->res; 11566 int err; 11567 11568 if (!glob_match(sym_name, res->pattern)) 11569 return 0; 11570 11571 if (!bsearch(&sym_name, data->syms, data->cnt, sizeof(*data->syms), avail_func_cmp)) { 11572 /* Some versions of kernel strip out .llvm.<hash> suffix from 11573 * function names reported in available_filter_functions, but 11574 * don't do so for kallsyms. While this is clearly a kernel 11575 * bug (fixed by [0]) we try to accommodate that in libbpf to 11576 * make multi-kprobe usability a bit better: if no match is 11577 * found, we will strip .llvm. suffix and try one more time. 11578 * 11579 * [0] fb6a421fb615 ("kallsyms: Match symbols exactly with CONFIG_LTO_CLANG") 11580 */ 11581 char sym_trim[256], *psym_trim = sym_trim, *sym_sfx; 11582 11583 if (!(sym_sfx = strstr(sym_name, ".llvm."))) 11584 return 0; 11585 11586 /* psym_trim vs sym_trim dance is done to avoid pointer vs array 11587 * coercion differences and get proper `const char **` pointer 11588 * which avail_func_cmp() expects 11589 */ 11590 snprintf(sym_trim, sizeof(sym_trim), "%.*s", (int)(sym_sfx - sym_name), sym_name); 11591 if (!bsearch(&psym_trim, data->syms, data->cnt, sizeof(*data->syms), avail_func_cmp)) 11592 return 0; 11593 } 11594 11595 err = libbpf_ensure_mem((void **)&res->addrs, &res->cap, sizeof(*res->addrs), res->cnt + 1); 11596 if (err) 11597 return err; 11598 11599 res->addrs[res->cnt++] = (unsigned long)sym_addr; 11600 return 0; 11601 } 11602 11603 static int libbpf_available_kallsyms_parse(struct kprobe_multi_resolve *res) 11604 { 11605 const char *available_functions_file = tracefs_available_filter_functions(); 11606 struct avail_kallsyms_data data; 11607 char sym_name[500]; 11608 FILE *f; 11609 int err = 0, ret, i; 11610 char **syms = NULL; 11611 size_t cap = 0, cnt = 0; 11612 11613 f = fopen(available_functions_file, "re"); 11614 if (!f) { 11615 err = -errno; 11616 pr_warn("failed to open %s: %s\n", available_functions_file, errstr(err)); 11617 return err; 11618 } 11619 11620 while (true) { 11621 char *name; 11622 11623 ret = fscanf(f, "%499s%*[^\n]\n", sym_name); 11624 if (ret == EOF && feof(f)) 11625 break; 11626 11627 if (ret != 1) { 11628 pr_warn("failed to parse available_filter_functions entry: %d\n", ret); 11629 err = -EINVAL; 11630 goto cleanup; 11631 } 11632 11633 if (!glob_match(sym_name, res->pattern)) 11634 continue; 11635 11636 err = libbpf_ensure_mem((void **)&syms, &cap, sizeof(*syms), cnt + 1); 11637 if (err) 11638 goto cleanup; 11639 11640 name = strdup(sym_name); 11641 if (!name) { 11642 err = -errno; 11643 goto cleanup; 11644 } 11645 11646 syms[cnt++] = name; 11647 } 11648 11649 /* no entries found, bail out */ 11650 if (cnt == 0) { 11651 err = -ENOENT; 11652 goto cleanup; 11653 } 11654 11655 /* sort available functions */ 11656 qsort(syms, cnt, sizeof(*syms), avail_func_cmp); 11657 11658 data.syms = syms; 11659 data.res = res; 11660 data.cnt = cnt; 11661 libbpf_kallsyms_parse(avail_kallsyms_cb, &data); 11662 11663 if (res->cnt == 0) 11664 err = -ENOENT; 11665 11666 cleanup: 11667 for (i = 0; i < cnt; i++) 11668 free((char *)syms[i]); 11669 free(syms); 11670 11671 fclose(f); 11672 return err; 11673 } 11674 11675 static bool has_available_filter_functions_addrs(void) 11676 { 11677 return access(tracefs_available_filter_functions_addrs(), R_OK) != -1; 11678 } 11679 11680 static int libbpf_available_kprobes_parse(struct kprobe_multi_resolve *res) 11681 { 11682 const char *available_path = tracefs_available_filter_functions_addrs(); 11683 char sym_name[500]; 11684 FILE *f; 11685 int ret, err = 0; 11686 unsigned long long sym_addr; 11687 11688 f = fopen(available_path, "re"); 11689 if (!f) { 11690 err = -errno; 11691 pr_warn("failed to open %s: %s\n", available_path, errstr(err)); 11692 return err; 11693 } 11694 11695 while (true) { 11696 ret = fscanf(f, "%llx %499s%*[^\n]\n", &sym_addr, sym_name); 11697 if (ret == EOF && feof(f)) 11698 break; 11699 11700 if (ret != 2) { 11701 pr_warn("failed to parse available_filter_functions_addrs entry: %d\n", 11702 ret); 11703 err = -EINVAL; 11704 goto cleanup; 11705 } 11706 11707 if (!glob_match(sym_name, res->pattern)) 11708 continue; 11709 11710 err = libbpf_ensure_mem((void **)&res->addrs, &res->cap, 11711 sizeof(*res->addrs), res->cnt + 1); 11712 if (err) 11713 goto cleanup; 11714 11715 res->addrs[res->cnt++] = (unsigned long)sym_addr; 11716 } 11717 11718 if (res->cnt == 0) 11719 err = -ENOENT; 11720 11721 cleanup: 11722 fclose(f); 11723 return err; 11724 } 11725 11726 struct bpf_link * 11727 bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, 11728 const char *pattern, 11729 const struct bpf_kprobe_multi_opts *opts) 11730 { 11731 LIBBPF_OPTS(bpf_link_create_opts, lopts); 11732 struct kprobe_multi_resolve res = { 11733 .pattern = pattern, 11734 }; 11735 enum bpf_attach_type attach_type; 11736 struct bpf_link *link = NULL; 11737 const unsigned long *addrs; 11738 int err, link_fd, prog_fd; 11739 bool retprobe, session, unique_match; 11740 const __u64 *cookies; 11741 const char **syms; 11742 size_t cnt; 11743 11744 if (!OPTS_VALID(opts, bpf_kprobe_multi_opts)) 11745 return libbpf_err_ptr(-EINVAL); 11746 11747 prog_fd = bpf_program__fd(prog); 11748 if (prog_fd < 0) { 11749 pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n", 11750 prog->name); 11751 return libbpf_err_ptr(-EINVAL); 11752 } 11753 11754 syms = OPTS_GET(opts, syms, false); 11755 addrs = OPTS_GET(opts, addrs, false); 11756 cnt = OPTS_GET(opts, cnt, false); 11757 cookies = OPTS_GET(opts, cookies, false); 11758 unique_match = OPTS_GET(opts, unique_match, false); 11759 11760 if (!pattern && !addrs && !syms) 11761 return libbpf_err_ptr(-EINVAL); 11762 if (pattern && (addrs || syms || cookies || cnt)) 11763 return libbpf_err_ptr(-EINVAL); 11764 if (!pattern && !cnt) 11765 return libbpf_err_ptr(-EINVAL); 11766 if (!pattern && unique_match) 11767 return libbpf_err_ptr(-EINVAL); 11768 if (addrs && syms) 11769 return libbpf_err_ptr(-EINVAL); 11770 11771 if (pattern) { 11772 if (has_available_filter_functions_addrs()) 11773 err = libbpf_available_kprobes_parse(&res); 11774 else 11775 err = libbpf_available_kallsyms_parse(&res); 11776 if (err) 11777 goto error; 11778 11779 if (unique_match && res.cnt != 1) { 11780 pr_warn("prog '%s': failed to find a unique match for '%s' (%zu matches)\n", 11781 prog->name, pattern, res.cnt); 11782 err = -EINVAL; 11783 goto error; 11784 } 11785 11786 addrs = res.addrs; 11787 cnt = res.cnt; 11788 } 11789 11790 retprobe = OPTS_GET(opts, retprobe, false); 11791 session = OPTS_GET(opts, session, false); 11792 11793 if (retprobe && session) 11794 return libbpf_err_ptr(-EINVAL); 11795 11796 attach_type = session ? BPF_TRACE_KPROBE_SESSION : BPF_TRACE_KPROBE_MULTI; 11797 11798 lopts.kprobe_multi.syms = syms; 11799 lopts.kprobe_multi.addrs = addrs; 11800 lopts.kprobe_multi.cookies = cookies; 11801 lopts.kprobe_multi.cnt = cnt; 11802 lopts.kprobe_multi.flags = retprobe ? BPF_F_KPROBE_MULTI_RETURN : 0; 11803 11804 link = calloc(1, sizeof(*link)); 11805 if (!link) { 11806 err = -ENOMEM; 11807 goto error; 11808 } 11809 link->detach = &bpf_link__detach_fd; 11810 11811 link_fd = bpf_link_create(prog_fd, 0, attach_type, &lopts); 11812 if (link_fd < 0) { 11813 err = -errno; 11814 pr_warn("prog '%s': failed to attach: %s\n", 11815 prog->name, errstr(err)); 11816 goto error; 11817 } 11818 link->fd = link_fd; 11819 free(res.addrs); 11820 return link; 11821 11822 error: 11823 free(link); 11824 free(res.addrs); 11825 return libbpf_err_ptr(err); 11826 } 11827 11828 static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link) 11829 { 11830 DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts); 11831 unsigned long offset = 0; 11832 const char *func_name; 11833 char *func; 11834 int n; 11835 11836 *link = NULL; 11837 11838 /* no auto-attach for SEC("kprobe") and SEC("kretprobe") */ 11839 if (strcmp(prog->sec_name, "kprobe") == 0 || strcmp(prog->sec_name, "kretprobe") == 0) 11840 return 0; 11841 11842 opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe/"); 11843 if (opts.retprobe) 11844 func_name = prog->sec_name + sizeof("kretprobe/") - 1; 11845 else 11846 func_name = prog->sec_name + sizeof("kprobe/") - 1; 11847 11848 n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset); 11849 if (n < 1) { 11850 pr_warn("kprobe name is invalid: %s\n", func_name); 11851 return -EINVAL; 11852 } 11853 if (opts.retprobe && offset != 0) { 11854 free(func); 11855 pr_warn("kretprobes do not support offset specification\n"); 11856 return -EINVAL; 11857 } 11858 11859 opts.offset = offset; 11860 *link = bpf_program__attach_kprobe_opts(prog, func, &opts); 11861 free(func); 11862 return libbpf_get_error(*link); 11863 } 11864 11865 static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link) 11866 { 11867 LIBBPF_OPTS(bpf_ksyscall_opts, opts); 11868 const char *syscall_name; 11869 11870 *link = NULL; 11871 11872 /* no auto-attach for SEC("ksyscall") and SEC("kretsyscall") */ 11873 if (strcmp(prog->sec_name, "ksyscall") == 0 || strcmp(prog->sec_name, "kretsyscall") == 0) 11874 return 0; 11875 11876 opts.retprobe = str_has_pfx(prog->sec_name, "kretsyscall/"); 11877 if (opts.retprobe) 11878 syscall_name = prog->sec_name + sizeof("kretsyscall/") - 1; 11879 else 11880 syscall_name = prog->sec_name + sizeof("ksyscall/") - 1; 11881 11882 *link = bpf_program__attach_ksyscall(prog, syscall_name, &opts); 11883 return *link ? 0 : -errno; 11884 } 11885 11886 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link) 11887 { 11888 LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); 11889 const char *spec; 11890 char *pattern; 11891 int n; 11892 11893 *link = NULL; 11894 11895 /* no auto-attach for SEC("kprobe.multi") and SEC("kretprobe.multi") */ 11896 if (strcmp(prog->sec_name, "kprobe.multi") == 0 || 11897 strcmp(prog->sec_name, "kretprobe.multi") == 0) 11898 return 0; 11899 11900 opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe.multi/"); 11901 if (opts.retprobe) 11902 spec = prog->sec_name + sizeof("kretprobe.multi/") - 1; 11903 else 11904 spec = prog->sec_name + sizeof("kprobe.multi/") - 1; 11905 11906 n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern); 11907 if (n < 1) { 11908 pr_warn("kprobe multi pattern is invalid: %s\n", spec); 11909 return -EINVAL; 11910 } 11911 11912 *link = bpf_program__attach_kprobe_multi_opts(prog, pattern, &opts); 11913 free(pattern); 11914 return libbpf_get_error(*link); 11915 } 11916 11917 static int attach_kprobe_session(const struct bpf_program *prog, long cookie, 11918 struct bpf_link **link) 11919 { 11920 LIBBPF_OPTS(bpf_kprobe_multi_opts, opts, .session = true); 11921 const char *spec; 11922 char *pattern; 11923 int n; 11924 11925 *link = NULL; 11926 11927 /* no auto-attach for SEC("kprobe.session") */ 11928 if (strcmp(prog->sec_name, "kprobe.session") == 0) 11929 return 0; 11930 11931 spec = prog->sec_name + sizeof("kprobe.session/") - 1; 11932 n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern); 11933 if (n < 1) { 11934 pr_warn("kprobe session pattern is invalid: %s\n", spec); 11935 return -EINVAL; 11936 } 11937 11938 *link = bpf_program__attach_kprobe_multi_opts(prog, pattern, &opts); 11939 free(pattern); 11940 return *link ? 0 : -errno; 11941 } 11942 11943 static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link) 11944 { 11945 char *probe_type = NULL, *binary_path = NULL, *func_name = NULL; 11946 LIBBPF_OPTS(bpf_uprobe_multi_opts, opts); 11947 int n, ret = -EINVAL; 11948 11949 *link = NULL; 11950 11951 n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]", 11952 &probe_type, &binary_path, &func_name); 11953 switch (n) { 11954 case 1: 11955 /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */ 11956 ret = 0; 11957 break; 11958 case 3: 11959 opts.session = str_has_pfx(probe_type, "uprobe.session"); 11960 opts.retprobe = str_has_pfx(probe_type, "uretprobe.multi"); 11961 11962 *link = bpf_program__attach_uprobe_multi(prog, -1, binary_path, func_name, &opts); 11963 ret = libbpf_get_error(*link); 11964 break; 11965 default: 11966 pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name, 11967 prog->sec_name); 11968 break; 11969 } 11970 free(probe_type); 11971 free(binary_path); 11972 free(func_name); 11973 return ret; 11974 } 11975 11976 static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe, 11977 const char *binary_path, size_t offset) 11978 { 11979 return append_to_file(tracefs_uprobe_events(), "%c:%s/%s %s:0x%zx", 11980 retprobe ? 'r' : 'p', 11981 retprobe ? "uretprobes" : "uprobes", 11982 probe_name, binary_path, offset); 11983 } 11984 11985 static inline int remove_uprobe_event_legacy(const char *probe_name, bool retprobe) 11986 { 11987 return append_to_file(tracefs_uprobe_events(), "-:%s/%s", 11988 retprobe ? "uretprobes" : "uprobes", probe_name); 11989 } 11990 11991 static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retprobe) 11992 { 11993 char file[512]; 11994 11995 snprintf(file, sizeof(file), "%s/events/%s/%s/id", 11996 tracefs_path(), retprobe ? "uretprobes" : "uprobes", probe_name); 11997 11998 return parse_uint_from_file(file, "%d\n"); 11999 } 12000 12001 static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe, 12002 const char *binary_path, size_t offset, int pid) 12003 { 12004 const size_t attr_sz = sizeof(struct perf_event_attr); 12005 struct perf_event_attr attr; 12006 int type, pfd, err; 12007 12008 err = add_uprobe_event_legacy(probe_name, retprobe, binary_path, offset); 12009 if (err < 0) { 12010 pr_warn("failed to add legacy uprobe event for %s:0x%zx: %s\n", 12011 binary_path, (size_t)offset, errstr(err)); 12012 return err; 12013 } 12014 type = determine_uprobe_perf_type_legacy(probe_name, retprobe); 12015 if (type < 0) { 12016 err = type; 12017 pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %s\n", 12018 binary_path, offset, errstr(err)); 12019 goto err_clean_legacy; 12020 } 12021 12022 memset(&attr, 0, attr_sz); 12023 attr.size = attr_sz; 12024 attr.config = type; 12025 attr.type = PERF_TYPE_TRACEPOINT; 12026 12027 pfd = syscall(__NR_perf_event_open, &attr, 12028 pid < 0 ? -1 : pid, /* pid */ 12029 pid == -1 ? 0 : -1, /* cpu */ 12030 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); 12031 if (pfd < 0) { 12032 err = -errno; 12033 pr_warn("legacy uprobe perf_event_open() failed: %s\n", errstr(err)); 12034 goto err_clean_legacy; 12035 } 12036 return pfd; 12037 12038 err_clean_legacy: 12039 /* Clear the newly added legacy uprobe_event */ 12040 remove_uprobe_event_legacy(probe_name, retprobe); 12041 return err; 12042 } 12043 12044 /* Find offset of function name in archive specified by path. Currently 12045 * supported are .zip files that do not compress their contents, as used on 12046 * Android in the form of APKs, for example. "file_name" is the name of the ELF 12047 * file inside the archive. "func_name" matches symbol name or name@@LIB for 12048 * library functions. 12049 * 12050 * An overview of the APK format specifically provided here: 12051 * https://en.wikipedia.org/w/index.php?title=Apk_(file_format)&oldid=1139099120#Package_contents 12052 */ 12053 static long elf_find_func_offset_from_archive(const char *archive_path, const char *file_name, 12054 const char *func_name) 12055 { 12056 struct zip_archive *archive; 12057 struct zip_entry entry; 12058 long ret; 12059 Elf *elf; 12060 12061 archive = zip_archive_open(archive_path); 12062 if (IS_ERR(archive)) { 12063 ret = PTR_ERR(archive); 12064 pr_warn("zip: failed to open %s: %ld\n", archive_path, ret); 12065 return ret; 12066 } 12067 12068 ret = zip_archive_find_entry(archive, file_name, &entry); 12069 if (ret) { 12070 pr_warn("zip: could not find archive member %s in %s: %ld\n", file_name, 12071 archive_path, ret); 12072 goto out; 12073 } 12074 pr_debug("zip: found entry for %s in %s at 0x%lx\n", file_name, archive_path, 12075 (unsigned long)entry.data_offset); 12076 12077 if (entry.compression) { 12078 pr_warn("zip: entry %s of %s is compressed and cannot be handled\n", file_name, 12079 archive_path); 12080 ret = -LIBBPF_ERRNO__FORMAT; 12081 goto out; 12082 } 12083 12084 elf = elf_memory((void *)entry.data, entry.data_length); 12085 if (!elf) { 12086 pr_warn("elf: could not read elf file %s from %s: %s\n", file_name, archive_path, 12087 elf_errmsg(-1)); 12088 ret = -LIBBPF_ERRNO__LIBELF; 12089 goto out; 12090 } 12091 12092 ret = elf_find_func_offset(elf, file_name, func_name); 12093 if (ret > 0) { 12094 pr_debug("elf: symbol address match for %s of %s in %s: 0x%x + 0x%lx = 0x%lx\n", 12095 func_name, file_name, archive_path, entry.data_offset, ret, 12096 ret + entry.data_offset); 12097 ret += entry.data_offset; 12098 } 12099 elf_end(elf); 12100 12101 out: 12102 zip_archive_close(archive); 12103 return ret; 12104 } 12105 12106 static const char *arch_specific_lib_paths(void) 12107 { 12108 /* 12109 * Based on https://packages.debian.org/sid/libc6. 12110 * 12111 * Assume that the traced program is built for the same architecture 12112 * as libbpf, which should cover the vast majority of cases. 12113 */ 12114 #if defined(__x86_64__) 12115 return "/lib/x86_64-linux-gnu"; 12116 #elif defined(__i386__) 12117 return "/lib/i386-linux-gnu"; 12118 #elif defined(__s390x__) 12119 return "/lib/s390x-linux-gnu"; 12120 #elif defined(__s390__) 12121 return "/lib/s390-linux-gnu"; 12122 #elif defined(__arm__) && defined(__SOFTFP__) 12123 return "/lib/arm-linux-gnueabi"; 12124 #elif defined(__arm__) && !defined(__SOFTFP__) 12125 return "/lib/arm-linux-gnueabihf"; 12126 #elif defined(__aarch64__) 12127 return "/lib/aarch64-linux-gnu"; 12128 #elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 64 12129 return "/lib/mips64el-linux-gnuabi64"; 12130 #elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 32 12131 return "/lib/mipsel-linux-gnu"; 12132 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 12133 return "/lib/powerpc64le-linux-gnu"; 12134 #elif defined(__sparc__) && defined(__arch64__) 12135 return "/lib/sparc64-linux-gnu"; 12136 #elif defined(__riscv) && __riscv_xlen == 64 12137 return "/lib/riscv64-linux-gnu"; 12138 #else 12139 return NULL; 12140 #endif 12141 } 12142 12143 /* Get full path to program/shared library. */ 12144 static int resolve_full_path(const char *file, char *result, size_t result_sz) 12145 { 12146 const char *search_paths[3] = {}; 12147 int i, perm; 12148 12149 if (str_has_sfx(file, ".so") || strstr(file, ".so.")) { 12150 search_paths[0] = getenv("LD_LIBRARY_PATH"); 12151 search_paths[1] = "/usr/lib64:/usr/lib"; 12152 search_paths[2] = arch_specific_lib_paths(); 12153 perm = R_OK; 12154 } else { 12155 search_paths[0] = getenv("PATH"); 12156 search_paths[1] = "/usr/bin:/usr/sbin"; 12157 perm = R_OK | X_OK; 12158 } 12159 12160 for (i = 0; i < ARRAY_SIZE(search_paths); i++) { 12161 const char *s; 12162 12163 if (!search_paths[i]) 12164 continue; 12165 for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) { 12166 char *next_path; 12167 int seg_len; 12168 12169 if (s[0] == ':') 12170 s++; 12171 next_path = strchr(s, ':'); 12172 seg_len = next_path ? next_path - s : strlen(s); 12173 if (!seg_len) 12174 continue; 12175 snprintf(result, result_sz, "%.*s/%s", seg_len, s, file); 12176 /* ensure it has required permissions */ 12177 if (faccessat(AT_FDCWD, result, perm, AT_EACCESS) < 0) 12178 continue; 12179 pr_debug("resolved '%s' to '%s'\n", file, result); 12180 return 0; 12181 } 12182 } 12183 return -ENOENT; 12184 } 12185 12186 struct bpf_link * 12187 bpf_program__attach_uprobe_multi(const struct bpf_program *prog, 12188 pid_t pid, 12189 const char *path, 12190 const char *func_pattern, 12191 const struct bpf_uprobe_multi_opts *opts) 12192 { 12193 const unsigned long *ref_ctr_offsets = NULL, *offsets = NULL; 12194 LIBBPF_OPTS(bpf_link_create_opts, lopts); 12195 unsigned long *resolved_offsets = NULL; 12196 enum bpf_attach_type attach_type; 12197 int err = 0, link_fd, prog_fd; 12198 struct bpf_link *link = NULL; 12199 char full_path[PATH_MAX]; 12200 bool retprobe, session; 12201 const __u64 *cookies; 12202 const char **syms; 12203 size_t cnt; 12204 12205 if (!OPTS_VALID(opts, bpf_uprobe_multi_opts)) 12206 return libbpf_err_ptr(-EINVAL); 12207 12208 prog_fd = bpf_program__fd(prog); 12209 if (prog_fd < 0) { 12210 pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n", 12211 prog->name); 12212 return libbpf_err_ptr(-EINVAL); 12213 } 12214 12215 syms = OPTS_GET(opts, syms, NULL); 12216 offsets = OPTS_GET(opts, offsets, NULL); 12217 ref_ctr_offsets = OPTS_GET(opts, ref_ctr_offsets, NULL); 12218 cookies = OPTS_GET(opts, cookies, NULL); 12219 cnt = OPTS_GET(opts, cnt, 0); 12220 retprobe = OPTS_GET(opts, retprobe, false); 12221 session = OPTS_GET(opts, session, false); 12222 12223 /* 12224 * User can specify 2 mutually exclusive set of inputs: 12225 * 12226 * 1) use only path/func_pattern/pid arguments 12227 * 12228 * 2) use path/pid with allowed combinations of: 12229 * syms/offsets/ref_ctr_offsets/cookies/cnt 12230 * 12231 * - syms and offsets are mutually exclusive 12232 * - ref_ctr_offsets and cookies are optional 12233 * 12234 * Any other usage results in error. 12235 */ 12236 12237 if (!path) 12238 return libbpf_err_ptr(-EINVAL); 12239 if (!func_pattern && cnt == 0) 12240 return libbpf_err_ptr(-EINVAL); 12241 12242 if (func_pattern) { 12243 if (syms || offsets || ref_ctr_offsets || cookies || cnt) 12244 return libbpf_err_ptr(-EINVAL); 12245 } else { 12246 if (!!syms == !!offsets) 12247 return libbpf_err_ptr(-EINVAL); 12248 } 12249 12250 if (retprobe && session) 12251 return libbpf_err_ptr(-EINVAL); 12252 12253 if (func_pattern) { 12254 if (!strchr(path, '/')) { 12255 err = resolve_full_path(path, full_path, sizeof(full_path)); 12256 if (err) { 12257 pr_warn("prog '%s': failed to resolve full path for '%s': %s\n", 12258 prog->name, path, errstr(err)); 12259 return libbpf_err_ptr(err); 12260 } 12261 path = full_path; 12262 } 12263 12264 err = elf_resolve_pattern_offsets(path, func_pattern, 12265 &resolved_offsets, &cnt); 12266 if (err < 0) 12267 return libbpf_err_ptr(err); 12268 offsets = resolved_offsets; 12269 } else if (syms) { 12270 err = elf_resolve_syms_offsets(path, cnt, syms, &resolved_offsets, STT_FUNC); 12271 if (err < 0) 12272 return libbpf_err_ptr(err); 12273 offsets = resolved_offsets; 12274 } 12275 12276 attach_type = session ? BPF_TRACE_UPROBE_SESSION : BPF_TRACE_UPROBE_MULTI; 12277 12278 lopts.uprobe_multi.path = path; 12279 lopts.uprobe_multi.offsets = offsets; 12280 lopts.uprobe_multi.ref_ctr_offsets = ref_ctr_offsets; 12281 lopts.uprobe_multi.cookies = cookies; 12282 lopts.uprobe_multi.cnt = cnt; 12283 lopts.uprobe_multi.flags = retprobe ? BPF_F_UPROBE_MULTI_RETURN : 0; 12284 12285 if (pid == 0) 12286 pid = getpid(); 12287 if (pid > 0) 12288 lopts.uprobe_multi.pid = pid; 12289 12290 link = calloc(1, sizeof(*link)); 12291 if (!link) { 12292 err = -ENOMEM; 12293 goto error; 12294 } 12295 link->detach = &bpf_link__detach_fd; 12296 12297 link_fd = bpf_link_create(prog_fd, 0, attach_type, &lopts); 12298 if (link_fd < 0) { 12299 err = -errno; 12300 pr_warn("prog '%s': failed to attach multi-uprobe: %s\n", 12301 prog->name, errstr(err)); 12302 goto error; 12303 } 12304 link->fd = link_fd; 12305 free(resolved_offsets); 12306 return link; 12307 12308 error: 12309 free(resolved_offsets); 12310 free(link); 12311 return libbpf_err_ptr(err); 12312 } 12313 12314 LIBBPF_API struct bpf_link * 12315 bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, 12316 const char *binary_path, size_t func_offset, 12317 const struct bpf_uprobe_opts *opts) 12318 { 12319 const char *archive_path = NULL, *archive_sep = NULL; 12320 char *legacy_probe = NULL; 12321 DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); 12322 enum probe_attach_mode attach_mode; 12323 char full_path[PATH_MAX]; 12324 struct bpf_link *link; 12325 size_t ref_ctr_off; 12326 int pfd, err; 12327 bool retprobe, legacy; 12328 const char *func_name; 12329 12330 if (!OPTS_VALID(opts, bpf_uprobe_opts)) 12331 return libbpf_err_ptr(-EINVAL); 12332 12333 attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT); 12334 retprobe = OPTS_GET(opts, retprobe, false); 12335 ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0); 12336 pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); 12337 12338 if (!binary_path) 12339 return libbpf_err_ptr(-EINVAL); 12340 12341 /* Check if "binary_path" refers to an archive. */ 12342 archive_sep = strstr(binary_path, "!/"); 12343 if (archive_sep) { 12344 full_path[0] = '\0'; 12345 libbpf_strlcpy(full_path, binary_path, 12346 min(sizeof(full_path), (size_t)(archive_sep - binary_path + 1))); 12347 archive_path = full_path; 12348 binary_path = archive_sep + 2; 12349 } else if (!strchr(binary_path, '/')) { 12350 err = resolve_full_path(binary_path, full_path, sizeof(full_path)); 12351 if (err) { 12352 pr_warn("prog '%s': failed to resolve full path for '%s': %s\n", 12353 prog->name, binary_path, errstr(err)); 12354 return libbpf_err_ptr(err); 12355 } 12356 binary_path = full_path; 12357 } 12358 func_name = OPTS_GET(opts, func_name, NULL); 12359 if (func_name) { 12360 long sym_off; 12361 12362 if (archive_path) { 12363 sym_off = elf_find_func_offset_from_archive(archive_path, binary_path, 12364 func_name); 12365 binary_path = archive_path; 12366 } else { 12367 sym_off = elf_find_func_offset_from_file(binary_path, func_name); 12368 } 12369 if (sym_off < 0) 12370 return libbpf_err_ptr(sym_off); 12371 func_offset += sym_off; 12372 } 12373 12374 legacy = determine_uprobe_perf_type() < 0; 12375 switch (attach_mode) { 12376 case PROBE_ATTACH_MODE_LEGACY: 12377 legacy = true; 12378 pe_opts.force_ioctl_attach = true; 12379 break; 12380 case PROBE_ATTACH_MODE_PERF: 12381 if (legacy) 12382 return libbpf_err_ptr(-ENOTSUP); 12383 pe_opts.force_ioctl_attach = true; 12384 break; 12385 case PROBE_ATTACH_MODE_LINK: 12386 if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK)) 12387 return libbpf_err_ptr(-ENOTSUP); 12388 break; 12389 case PROBE_ATTACH_MODE_DEFAULT: 12390 break; 12391 default: 12392 return libbpf_err_ptr(-EINVAL); 12393 } 12394 12395 if (!legacy) { 12396 pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path, 12397 func_offset, pid, ref_ctr_off); 12398 } else { 12399 char probe_name[MAX_EVENT_NAME_LEN]; 12400 12401 if (ref_ctr_off) 12402 return libbpf_err_ptr(-EINVAL); 12403 12404 gen_probe_legacy_event_name(probe_name, sizeof(probe_name), 12405 strrchr(binary_path, '/') ? : binary_path, 12406 func_offset); 12407 12408 legacy_probe = strdup(probe_name); 12409 if (!legacy_probe) 12410 return libbpf_err_ptr(-ENOMEM); 12411 12412 pfd = perf_event_uprobe_open_legacy(legacy_probe, retprobe, 12413 binary_path, func_offset, pid); 12414 } 12415 if (pfd < 0) { 12416 err = -errno; 12417 pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n", 12418 prog->name, retprobe ? "uretprobe" : "uprobe", 12419 binary_path, func_offset, 12420 errstr(err)); 12421 goto err_out; 12422 } 12423 12424 link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); 12425 err = libbpf_get_error(link); 12426 if (err) { 12427 close(pfd); 12428 pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n", 12429 prog->name, retprobe ? "uretprobe" : "uprobe", 12430 binary_path, func_offset, 12431 errstr(err)); 12432 goto err_clean_legacy; 12433 } 12434 if (legacy) { 12435 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link); 12436 12437 perf_link->legacy_probe_name = legacy_probe; 12438 perf_link->legacy_is_kprobe = false; 12439 perf_link->legacy_is_retprobe = retprobe; 12440 } 12441 return link; 12442 12443 err_clean_legacy: 12444 if (legacy) 12445 remove_uprobe_event_legacy(legacy_probe, retprobe); 12446 err_out: 12447 free(legacy_probe); 12448 return libbpf_err_ptr(err); 12449 } 12450 12451 /* Format of u[ret]probe section definition supporting auto-attach: 12452 * u[ret]probe/binary:function[+offset] 12453 * 12454 * binary can be an absolute/relative path or a filename; the latter is resolved to a 12455 * full binary path via bpf_program__attach_uprobe_opts. 12456 * 12457 * Specifying uprobe+ ensures we carry out strict matching; either "uprobe" must be 12458 * specified (and auto-attach is not possible) or the above format is specified for 12459 * auto-attach. 12460 */ 12461 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link) 12462 { 12463 DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts); 12464 char *probe_type = NULL, *binary_path = NULL, *func_name = NULL, *func_off; 12465 int n, c, ret = -EINVAL; 12466 long offset = 0; 12467 12468 *link = NULL; 12469 12470 n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]", 12471 &probe_type, &binary_path, &func_name); 12472 switch (n) { 12473 case 1: 12474 /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */ 12475 ret = 0; 12476 break; 12477 case 2: 12478 pr_warn("prog '%s': section '%s' missing ':function[+offset]' specification\n", 12479 prog->name, prog->sec_name); 12480 break; 12481 case 3: 12482 /* check if user specifies `+offset`, if yes, this should be 12483 * the last part of the string, make sure sscanf read to EOL 12484 */ 12485 func_off = strrchr(func_name, '+'); 12486 if (func_off) { 12487 n = sscanf(func_off, "+%li%n", &offset, &c); 12488 if (n == 1 && *(func_off + c) == '\0') 12489 func_off[0] = '\0'; 12490 else 12491 offset = 0; 12492 } 12493 opts.retprobe = strcmp(probe_type, "uretprobe") == 0 || 12494 strcmp(probe_type, "uretprobe.s") == 0; 12495 if (opts.retprobe && offset != 0) { 12496 pr_warn("prog '%s': uretprobes do not support offset specification\n", 12497 prog->name); 12498 break; 12499 } 12500 opts.func_name = func_name; 12501 *link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts); 12502 ret = libbpf_get_error(*link); 12503 break; 12504 default: 12505 pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name, 12506 prog->sec_name); 12507 break; 12508 } 12509 free(probe_type); 12510 free(binary_path); 12511 free(func_name); 12512 12513 return ret; 12514 } 12515 12516 struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog, 12517 bool retprobe, pid_t pid, 12518 const char *binary_path, 12519 size_t func_offset) 12520 { 12521 DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts, .retprobe = retprobe); 12522 12523 return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts); 12524 } 12525 12526 struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog, 12527 pid_t pid, const char *binary_path, 12528 const char *usdt_provider, const char *usdt_name, 12529 const struct bpf_usdt_opts *opts) 12530 { 12531 char resolved_path[512]; 12532 struct bpf_object *obj = prog->obj; 12533 struct bpf_link *link; 12534 __u64 usdt_cookie; 12535 int err; 12536 12537 if (!OPTS_VALID(opts, bpf_uprobe_opts)) 12538 return libbpf_err_ptr(-EINVAL); 12539 12540 if (bpf_program__fd(prog) < 0) { 12541 pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n", 12542 prog->name); 12543 return libbpf_err_ptr(-EINVAL); 12544 } 12545 12546 if (!binary_path) 12547 return libbpf_err_ptr(-EINVAL); 12548 12549 if (!strchr(binary_path, '/')) { 12550 err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path)); 12551 if (err) { 12552 pr_warn("prog '%s': failed to resolve full path for '%s': %s\n", 12553 prog->name, binary_path, errstr(err)); 12554 return libbpf_err_ptr(err); 12555 } 12556 binary_path = resolved_path; 12557 } 12558 12559 /* USDT manager is instantiated lazily on first USDT attach. It will 12560 * be destroyed together with BPF object in bpf_object__close(). 12561 */ 12562 if (IS_ERR(obj->usdt_man)) 12563 return libbpf_ptr(obj->usdt_man); 12564 if (!obj->usdt_man) { 12565 obj->usdt_man = usdt_manager_new(obj); 12566 if (IS_ERR(obj->usdt_man)) 12567 return libbpf_ptr(obj->usdt_man); 12568 } 12569 12570 usdt_cookie = OPTS_GET(opts, usdt_cookie, 0); 12571 link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path, 12572 usdt_provider, usdt_name, usdt_cookie); 12573 err = libbpf_get_error(link); 12574 if (err) 12575 return libbpf_err_ptr(err); 12576 return link; 12577 } 12578 12579 static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link) 12580 { 12581 char *path = NULL, *provider = NULL, *name = NULL; 12582 const char *sec_name; 12583 int n, err; 12584 12585 sec_name = bpf_program__section_name(prog); 12586 if (strcmp(sec_name, "usdt") == 0) { 12587 /* no auto-attach for just SEC("usdt") */ 12588 *link = NULL; 12589 return 0; 12590 } 12591 12592 n = sscanf(sec_name, "usdt/%m[^:]:%m[^:]:%m[^:]", &path, &provider, &name); 12593 if (n != 3) { 12594 pr_warn("invalid section '%s', expected SEC(\"usdt/<path>:<provider>:<name>\")\n", 12595 sec_name); 12596 err = -EINVAL; 12597 } else { 12598 *link = bpf_program__attach_usdt(prog, -1 /* any process */, path, 12599 provider, name, NULL); 12600 err = libbpf_get_error(*link); 12601 } 12602 free(path); 12603 free(provider); 12604 free(name); 12605 return err; 12606 } 12607 12608 static int determine_tracepoint_id(const char *tp_category, 12609 const char *tp_name) 12610 { 12611 char file[PATH_MAX]; 12612 int ret; 12613 12614 ret = snprintf(file, sizeof(file), "%s/events/%s/%s/id", 12615 tracefs_path(), tp_category, tp_name); 12616 if (ret < 0) 12617 return -errno; 12618 if (ret >= sizeof(file)) { 12619 pr_debug("tracepoint %s/%s path is too long\n", 12620 tp_category, tp_name); 12621 return -E2BIG; 12622 } 12623 return parse_uint_from_file(file, "%d\n"); 12624 } 12625 12626 static int perf_event_open_tracepoint(const char *tp_category, 12627 const char *tp_name) 12628 { 12629 const size_t attr_sz = sizeof(struct perf_event_attr); 12630 struct perf_event_attr attr; 12631 int tp_id, pfd, err; 12632 12633 tp_id = determine_tracepoint_id(tp_category, tp_name); 12634 if (tp_id < 0) { 12635 pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n", 12636 tp_category, tp_name, 12637 errstr(tp_id)); 12638 return tp_id; 12639 } 12640 12641 memset(&attr, 0, attr_sz); 12642 attr.type = PERF_TYPE_TRACEPOINT; 12643 attr.size = attr_sz; 12644 attr.config = tp_id; 12645 12646 pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */, 12647 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); 12648 if (pfd < 0) { 12649 err = -errno; 12650 pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n", 12651 tp_category, tp_name, 12652 errstr(err)); 12653 return err; 12654 } 12655 return pfd; 12656 } 12657 12658 struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *prog, 12659 const char *tp_category, 12660 const char *tp_name, 12661 const struct bpf_tracepoint_opts *opts) 12662 { 12663 DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); 12664 struct bpf_link *link; 12665 int pfd, err; 12666 12667 if (!OPTS_VALID(opts, bpf_tracepoint_opts)) 12668 return libbpf_err_ptr(-EINVAL); 12669 12670 pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); 12671 12672 pfd = perf_event_open_tracepoint(tp_category, tp_name); 12673 if (pfd < 0) { 12674 pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n", 12675 prog->name, tp_category, tp_name, 12676 errstr(pfd)); 12677 return libbpf_err_ptr(pfd); 12678 } 12679 link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); 12680 err = libbpf_get_error(link); 12681 if (err) { 12682 close(pfd); 12683 pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n", 12684 prog->name, tp_category, tp_name, 12685 errstr(err)); 12686 return libbpf_err_ptr(err); 12687 } 12688 return link; 12689 } 12690 12691 struct bpf_link *bpf_program__attach_tracepoint(const struct bpf_program *prog, 12692 const char *tp_category, 12693 const char *tp_name) 12694 { 12695 return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL); 12696 } 12697 12698 static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link) 12699 { 12700 char *sec_name, *tp_cat, *tp_name; 12701 12702 *link = NULL; 12703 12704 /* no auto-attach for SEC("tp") or SEC("tracepoint") */ 12705 if (strcmp(prog->sec_name, "tp") == 0 || strcmp(prog->sec_name, "tracepoint") == 0) 12706 return 0; 12707 12708 sec_name = strdup(prog->sec_name); 12709 if (!sec_name) 12710 return -ENOMEM; 12711 12712 /* extract "tp/<category>/<name>" or "tracepoint/<category>/<name>" */ 12713 if (str_has_pfx(prog->sec_name, "tp/")) 12714 tp_cat = sec_name + sizeof("tp/") - 1; 12715 else 12716 tp_cat = sec_name + sizeof("tracepoint/") - 1; 12717 tp_name = strchr(tp_cat, '/'); 12718 if (!tp_name) { 12719 free(sec_name); 12720 return -EINVAL; 12721 } 12722 *tp_name = '\0'; 12723 tp_name++; 12724 12725 *link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name); 12726 free(sec_name); 12727 return libbpf_get_error(*link); 12728 } 12729 12730 struct bpf_link * 12731 bpf_program__attach_raw_tracepoint_opts(const struct bpf_program *prog, 12732 const char *tp_name, 12733 struct bpf_raw_tracepoint_opts *opts) 12734 { 12735 LIBBPF_OPTS(bpf_raw_tp_opts, raw_opts); 12736 struct bpf_link *link; 12737 int prog_fd, pfd; 12738 12739 if (!OPTS_VALID(opts, bpf_raw_tracepoint_opts)) 12740 return libbpf_err_ptr(-EINVAL); 12741 12742 prog_fd = bpf_program__fd(prog); 12743 if (prog_fd < 0) { 12744 pr_warn("prog '%s': can't attach before loaded\n", prog->name); 12745 return libbpf_err_ptr(-EINVAL); 12746 } 12747 12748 link = calloc(1, sizeof(*link)); 12749 if (!link) 12750 return libbpf_err_ptr(-ENOMEM); 12751 link->detach = &bpf_link__detach_fd; 12752 12753 raw_opts.tp_name = tp_name; 12754 raw_opts.cookie = OPTS_GET(opts, cookie, 0); 12755 pfd = bpf_raw_tracepoint_open_opts(prog_fd, &raw_opts); 12756 if (pfd < 0) { 12757 pfd = -errno; 12758 free(link); 12759 pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n", 12760 prog->name, tp_name, errstr(pfd)); 12761 return libbpf_err_ptr(pfd); 12762 } 12763 link->fd = pfd; 12764 return link; 12765 } 12766 12767 struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *prog, 12768 const char *tp_name) 12769 { 12770 return bpf_program__attach_raw_tracepoint_opts(prog, tp_name, NULL); 12771 } 12772 12773 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link) 12774 { 12775 static const char *const prefixes[] = { 12776 "raw_tp", 12777 "raw_tracepoint", 12778 "raw_tp.w", 12779 "raw_tracepoint.w", 12780 }; 12781 size_t i; 12782 const char *tp_name = NULL; 12783 12784 *link = NULL; 12785 12786 for (i = 0; i < ARRAY_SIZE(prefixes); i++) { 12787 size_t pfx_len; 12788 12789 if (!str_has_pfx(prog->sec_name, prefixes[i])) 12790 continue; 12791 12792 pfx_len = strlen(prefixes[i]); 12793 /* no auto-attach case of, e.g., SEC("raw_tp") */ 12794 if (prog->sec_name[pfx_len] == '\0') 12795 return 0; 12796 12797 if (prog->sec_name[pfx_len] != '/') 12798 continue; 12799 12800 tp_name = prog->sec_name + pfx_len + 1; 12801 break; 12802 } 12803 12804 if (!tp_name) { 12805 pr_warn("prog '%s': invalid section name '%s'\n", 12806 prog->name, prog->sec_name); 12807 return -EINVAL; 12808 } 12809 12810 *link = bpf_program__attach_raw_tracepoint(prog, tp_name); 12811 return libbpf_get_error(*link); 12812 } 12813 12814 /* Common logic for all BPF program types that attach to a btf_id */ 12815 static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog, 12816 const struct bpf_trace_opts *opts) 12817 { 12818 LIBBPF_OPTS(bpf_link_create_opts, link_opts); 12819 struct bpf_link *link; 12820 int prog_fd, pfd; 12821 12822 if (!OPTS_VALID(opts, bpf_trace_opts)) 12823 return libbpf_err_ptr(-EINVAL); 12824 12825 prog_fd = bpf_program__fd(prog); 12826 if (prog_fd < 0) { 12827 pr_warn("prog '%s': can't attach before loaded\n", prog->name); 12828 return libbpf_err_ptr(-EINVAL); 12829 } 12830 12831 link = calloc(1, sizeof(*link)); 12832 if (!link) 12833 return libbpf_err_ptr(-ENOMEM); 12834 link->detach = &bpf_link__detach_fd; 12835 12836 /* libbpf is smart enough to redirect to BPF_RAW_TRACEPOINT_OPEN on old kernels */ 12837 link_opts.tracing.cookie = OPTS_GET(opts, cookie, 0); 12838 pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), &link_opts); 12839 if (pfd < 0) { 12840 pfd = -errno; 12841 free(link); 12842 pr_warn("prog '%s': failed to attach: %s\n", 12843 prog->name, errstr(pfd)); 12844 return libbpf_err_ptr(pfd); 12845 } 12846 link->fd = pfd; 12847 return link; 12848 } 12849 12850 struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog) 12851 { 12852 return bpf_program__attach_btf_id(prog, NULL); 12853 } 12854 12855 struct bpf_link *bpf_program__attach_trace_opts(const struct bpf_program *prog, 12856 const struct bpf_trace_opts *opts) 12857 { 12858 return bpf_program__attach_btf_id(prog, opts); 12859 } 12860 12861 struct bpf_link *bpf_program__attach_lsm(const struct bpf_program *prog) 12862 { 12863 return bpf_program__attach_btf_id(prog, NULL); 12864 } 12865 12866 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link) 12867 { 12868 *link = bpf_program__attach_trace(prog); 12869 return libbpf_get_error(*link); 12870 } 12871 12872 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link) 12873 { 12874 *link = bpf_program__attach_lsm(prog); 12875 return libbpf_get_error(*link); 12876 } 12877 12878 static struct bpf_link * 12879 bpf_program_attach_fd(const struct bpf_program *prog, 12880 int target_fd, const char *target_name, 12881 const struct bpf_link_create_opts *opts) 12882 { 12883 enum bpf_attach_type attach_type; 12884 struct bpf_link *link; 12885 int prog_fd, link_fd; 12886 12887 prog_fd = bpf_program__fd(prog); 12888 if (prog_fd < 0) { 12889 pr_warn("prog '%s': can't attach before loaded\n", prog->name); 12890 return libbpf_err_ptr(-EINVAL); 12891 } 12892 12893 link = calloc(1, sizeof(*link)); 12894 if (!link) 12895 return libbpf_err_ptr(-ENOMEM); 12896 link->detach = &bpf_link__detach_fd; 12897 12898 attach_type = bpf_program__expected_attach_type(prog); 12899 link_fd = bpf_link_create(prog_fd, target_fd, attach_type, opts); 12900 if (link_fd < 0) { 12901 link_fd = -errno; 12902 free(link); 12903 pr_warn("prog '%s': failed to attach to %s: %s\n", 12904 prog->name, target_name, 12905 errstr(link_fd)); 12906 return libbpf_err_ptr(link_fd); 12907 } 12908 link->fd = link_fd; 12909 return link; 12910 } 12911 12912 struct bpf_link * 12913 bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd) 12914 { 12915 return bpf_program_attach_fd(prog, cgroup_fd, "cgroup", NULL); 12916 } 12917 12918 struct bpf_link * 12919 bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd) 12920 { 12921 return bpf_program_attach_fd(prog, netns_fd, "netns", NULL); 12922 } 12923 12924 struct bpf_link * 12925 bpf_program__attach_sockmap(const struct bpf_program *prog, int map_fd) 12926 { 12927 return bpf_program_attach_fd(prog, map_fd, "sockmap", NULL); 12928 } 12929 12930 struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex) 12931 { 12932 /* target_fd/target_ifindex use the same field in LINK_CREATE */ 12933 return bpf_program_attach_fd(prog, ifindex, "xdp", NULL); 12934 } 12935 12936 struct bpf_link * 12937 bpf_program__attach_cgroup_opts(const struct bpf_program *prog, int cgroup_fd, 12938 const struct bpf_cgroup_opts *opts) 12939 { 12940 LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); 12941 __u32 relative_id; 12942 int relative_fd; 12943 12944 if (!OPTS_VALID(opts, bpf_cgroup_opts)) 12945 return libbpf_err_ptr(-EINVAL); 12946 12947 relative_id = OPTS_GET(opts, relative_id, 0); 12948 relative_fd = OPTS_GET(opts, relative_fd, 0); 12949 12950 if (relative_fd && relative_id) { 12951 pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n", 12952 prog->name); 12953 return libbpf_err_ptr(-EINVAL); 12954 } 12955 12956 link_create_opts.cgroup.expected_revision = OPTS_GET(opts, expected_revision, 0); 12957 link_create_opts.cgroup.relative_fd = relative_fd; 12958 link_create_opts.cgroup.relative_id = relative_id; 12959 link_create_opts.flags = OPTS_GET(opts, flags, 0); 12960 12961 return bpf_program_attach_fd(prog, cgroup_fd, "cgroup", &link_create_opts); 12962 } 12963 12964 struct bpf_link * 12965 bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex, 12966 const struct bpf_tcx_opts *opts) 12967 { 12968 LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); 12969 __u32 relative_id; 12970 int relative_fd; 12971 12972 if (!OPTS_VALID(opts, bpf_tcx_opts)) 12973 return libbpf_err_ptr(-EINVAL); 12974 12975 relative_id = OPTS_GET(opts, relative_id, 0); 12976 relative_fd = OPTS_GET(opts, relative_fd, 0); 12977 12978 /* validate we don't have unexpected combinations of non-zero fields */ 12979 if (!ifindex) { 12980 pr_warn("prog '%s': target netdevice ifindex cannot be zero\n", 12981 prog->name); 12982 return libbpf_err_ptr(-EINVAL); 12983 } 12984 if (relative_fd && relative_id) { 12985 pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n", 12986 prog->name); 12987 return libbpf_err_ptr(-EINVAL); 12988 } 12989 12990 link_create_opts.tcx.expected_revision = OPTS_GET(opts, expected_revision, 0); 12991 link_create_opts.tcx.relative_fd = relative_fd; 12992 link_create_opts.tcx.relative_id = relative_id; 12993 link_create_opts.flags = OPTS_GET(opts, flags, 0); 12994 12995 /* target_fd/target_ifindex use the same field in LINK_CREATE */ 12996 return bpf_program_attach_fd(prog, ifindex, "tcx", &link_create_opts); 12997 } 12998 12999 struct bpf_link * 13000 bpf_program__attach_netkit(const struct bpf_program *prog, int ifindex, 13001 const struct bpf_netkit_opts *opts) 13002 { 13003 LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); 13004 __u32 relative_id; 13005 int relative_fd; 13006 13007 if (!OPTS_VALID(opts, bpf_netkit_opts)) 13008 return libbpf_err_ptr(-EINVAL); 13009 13010 relative_id = OPTS_GET(opts, relative_id, 0); 13011 relative_fd = OPTS_GET(opts, relative_fd, 0); 13012 13013 /* validate we don't have unexpected combinations of non-zero fields */ 13014 if (!ifindex) { 13015 pr_warn("prog '%s': target netdevice ifindex cannot be zero\n", 13016 prog->name); 13017 return libbpf_err_ptr(-EINVAL); 13018 } 13019 if (relative_fd && relative_id) { 13020 pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n", 13021 prog->name); 13022 return libbpf_err_ptr(-EINVAL); 13023 } 13024 13025 link_create_opts.netkit.expected_revision = OPTS_GET(opts, expected_revision, 0); 13026 link_create_opts.netkit.relative_fd = relative_fd; 13027 link_create_opts.netkit.relative_id = relative_id; 13028 link_create_opts.flags = OPTS_GET(opts, flags, 0); 13029 13030 return bpf_program_attach_fd(prog, ifindex, "netkit", &link_create_opts); 13031 } 13032 13033 struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog, 13034 int target_fd, 13035 const char *attach_func_name) 13036 { 13037 int btf_id; 13038 13039 if (!!target_fd != !!attach_func_name) { 13040 pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n", 13041 prog->name); 13042 return libbpf_err_ptr(-EINVAL); 13043 } 13044 13045 if (prog->type != BPF_PROG_TYPE_EXT) { 13046 pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace\n", 13047 prog->name); 13048 return libbpf_err_ptr(-EINVAL); 13049 } 13050 13051 if (target_fd) { 13052 LIBBPF_OPTS(bpf_link_create_opts, target_opts); 13053 13054 btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd, prog->obj->token_fd); 13055 if (btf_id < 0) 13056 return libbpf_err_ptr(btf_id); 13057 13058 target_opts.target_btf_id = btf_id; 13059 13060 return bpf_program_attach_fd(prog, target_fd, "freplace", 13061 &target_opts); 13062 } else { 13063 /* no target, so use raw_tracepoint_open for compatibility 13064 * with old kernels 13065 */ 13066 return bpf_program__attach_trace(prog); 13067 } 13068 } 13069 13070 struct bpf_link * 13071 bpf_program__attach_iter(const struct bpf_program *prog, 13072 const struct bpf_iter_attach_opts *opts) 13073 { 13074 DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); 13075 struct bpf_link *link; 13076 int prog_fd, link_fd; 13077 __u32 target_fd = 0; 13078 13079 if (!OPTS_VALID(opts, bpf_iter_attach_opts)) 13080 return libbpf_err_ptr(-EINVAL); 13081 13082 link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0); 13083 link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0); 13084 13085 prog_fd = bpf_program__fd(prog); 13086 if (prog_fd < 0) { 13087 pr_warn("prog '%s': can't attach before loaded\n", prog->name); 13088 return libbpf_err_ptr(-EINVAL); 13089 } 13090 13091 link = calloc(1, sizeof(*link)); 13092 if (!link) 13093 return libbpf_err_ptr(-ENOMEM); 13094 link->detach = &bpf_link__detach_fd; 13095 13096 link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER, 13097 &link_create_opts); 13098 if (link_fd < 0) { 13099 link_fd = -errno; 13100 free(link); 13101 pr_warn("prog '%s': failed to attach to iterator: %s\n", 13102 prog->name, errstr(link_fd)); 13103 return libbpf_err_ptr(link_fd); 13104 } 13105 link->fd = link_fd; 13106 return link; 13107 } 13108 13109 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link) 13110 { 13111 *link = bpf_program__attach_iter(prog, NULL); 13112 return libbpf_get_error(*link); 13113 } 13114 13115 struct bpf_link *bpf_program__attach_netfilter(const struct bpf_program *prog, 13116 const struct bpf_netfilter_opts *opts) 13117 { 13118 LIBBPF_OPTS(bpf_link_create_opts, lopts); 13119 struct bpf_link *link; 13120 int prog_fd, link_fd; 13121 13122 if (!OPTS_VALID(opts, bpf_netfilter_opts)) 13123 return libbpf_err_ptr(-EINVAL); 13124 13125 prog_fd = bpf_program__fd(prog); 13126 if (prog_fd < 0) { 13127 pr_warn("prog '%s': can't attach before loaded\n", prog->name); 13128 return libbpf_err_ptr(-EINVAL); 13129 } 13130 13131 link = calloc(1, sizeof(*link)); 13132 if (!link) 13133 return libbpf_err_ptr(-ENOMEM); 13134 13135 link->detach = &bpf_link__detach_fd; 13136 13137 lopts.netfilter.pf = OPTS_GET(opts, pf, 0); 13138 lopts.netfilter.hooknum = OPTS_GET(opts, hooknum, 0); 13139 lopts.netfilter.priority = OPTS_GET(opts, priority, 0); 13140 lopts.netfilter.flags = OPTS_GET(opts, flags, 0); 13141 13142 link_fd = bpf_link_create(prog_fd, 0, BPF_NETFILTER, &lopts); 13143 if (link_fd < 0) { 13144 link_fd = -errno; 13145 free(link); 13146 pr_warn("prog '%s': failed to attach to netfilter: %s\n", 13147 prog->name, errstr(link_fd)); 13148 return libbpf_err_ptr(link_fd); 13149 } 13150 link->fd = link_fd; 13151 13152 return link; 13153 } 13154 13155 struct bpf_link *bpf_program__attach(const struct bpf_program *prog) 13156 { 13157 struct bpf_link *link = NULL; 13158 int err; 13159 13160 if (!prog->sec_def || !prog->sec_def->prog_attach_fn) 13161 return libbpf_err_ptr(-EOPNOTSUPP); 13162 13163 if (bpf_program__fd(prog) < 0) { 13164 pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n", 13165 prog->name); 13166 return libbpf_err_ptr(-EINVAL); 13167 } 13168 13169 err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, &link); 13170 if (err) 13171 return libbpf_err_ptr(err); 13172 13173 /* When calling bpf_program__attach() explicitly, auto-attach support 13174 * is expected to work, so NULL returned link is considered an error. 13175 * This is different for skeleton's attach, see comment in 13176 * bpf_object__attach_skeleton(). 13177 */ 13178 if (!link) 13179 return libbpf_err_ptr(-EOPNOTSUPP); 13180 13181 return link; 13182 } 13183 13184 struct bpf_link_struct_ops { 13185 struct bpf_link link; 13186 int map_fd; 13187 }; 13188 13189 static int bpf_link__detach_struct_ops(struct bpf_link *link) 13190 { 13191 struct bpf_link_struct_ops *st_link; 13192 __u32 zero = 0; 13193 13194 st_link = container_of(link, struct bpf_link_struct_ops, link); 13195 13196 if (st_link->map_fd < 0) 13197 /* w/o a real link */ 13198 return bpf_map_delete_elem(link->fd, &zero); 13199 13200 return close(link->fd); 13201 } 13202 13203 struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map) 13204 { 13205 struct bpf_link_struct_ops *link; 13206 __u32 zero = 0; 13207 int err, fd; 13208 13209 if (!bpf_map__is_struct_ops(map)) { 13210 pr_warn("map '%s': can't attach non-struct_ops map\n", map->name); 13211 return libbpf_err_ptr(-EINVAL); 13212 } 13213 13214 if (map->fd < 0) { 13215 pr_warn("map '%s': can't attach BPF map without FD (was it created?)\n", map->name); 13216 return libbpf_err_ptr(-EINVAL); 13217 } 13218 13219 link = calloc(1, sizeof(*link)); 13220 if (!link) 13221 return libbpf_err_ptr(-EINVAL); 13222 13223 /* kern_vdata should be prepared during the loading phase. */ 13224 err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0); 13225 /* It can be EBUSY if the map has been used to create or 13226 * update a link before. We don't allow updating the value of 13227 * a struct_ops once it is set. That ensures that the value 13228 * never changed. So, it is safe to skip EBUSY. 13229 */ 13230 if (err && (!(map->def.map_flags & BPF_F_LINK) || err != -EBUSY)) { 13231 free(link); 13232 return libbpf_err_ptr(err); 13233 } 13234 13235 link->link.detach = bpf_link__detach_struct_ops; 13236 13237 if (!(map->def.map_flags & BPF_F_LINK)) { 13238 /* w/o a real link */ 13239 link->link.fd = map->fd; 13240 link->map_fd = -1; 13241 return &link->link; 13242 } 13243 13244 fd = bpf_link_create(map->fd, 0, BPF_STRUCT_OPS, NULL); 13245 if (fd < 0) { 13246 free(link); 13247 return libbpf_err_ptr(fd); 13248 } 13249 13250 link->link.fd = fd; 13251 link->map_fd = map->fd; 13252 13253 return &link->link; 13254 } 13255 13256 /* 13257 * Swap the back struct_ops of a link with a new struct_ops map. 13258 */ 13259 int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map) 13260 { 13261 struct bpf_link_struct_ops *st_ops_link; 13262 __u32 zero = 0; 13263 int err; 13264 13265 if (!bpf_map__is_struct_ops(map)) 13266 return libbpf_err(-EINVAL); 13267 13268 if (map->fd < 0) { 13269 pr_warn("map '%s': can't use BPF map without FD (was it created?)\n", map->name); 13270 return libbpf_err(-EINVAL); 13271 } 13272 13273 st_ops_link = container_of(link, struct bpf_link_struct_ops, link); 13274 /* Ensure the type of a link is correct */ 13275 if (st_ops_link->map_fd < 0) 13276 return libbpf_err(-EINVAL); 13277 13278 err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0); 13279 /* It can be EBUSY if the map has been used to create or 13280 * update a link before. We don't allow updating the value of 13281 * a struct_ops once it is set. That ensures that the value 13282 * never changed. So, it is safe to skip EBUSY. 13283 */ 13284 if (err && err != -EBUSY) 13285 return err; 13286 13287 err = bpf_link_update(link->fd, map->fd, NULL); 13288 if (err < 0) 13289 return err; 13290 13291 st_ops_link->map_fd = map->fd; 13292 13293 return 0; 13294 } 13295 13296 typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr, 13297 void *private_data); 13298 13299 static enum bpf_perf_event_ret 13300 perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, 13301 void **copy_mem, size_t *copy_size, 13302 bpf_perf_event_print_t fn, void *private_data) 13303 { 13304 struct perf_event_mmap_page *header = mmap_mem; 13305 __u64 data_head = ring_buffer_read_head(header); 13306 __u64 data_tail = header->data_tail; 13307 void *base = ((__u8 *)header) + page_size; 13308 int ret = LIBBPF_PERF_EVENT_CONT; 13309 struct perf_event_header *ehdr; 13310 size_t ehdr_size; 13311 13312 while (data_head != data_tail) { 13313 ehdr = base + (data_tail & (mmap_size - 1)); 13314 ehdr_size = ehdr->size; 13315 13316 if (((void *)ehdr) + ehdr_size > base + mmap_size) { 13317 void *copy_start = ehdr; 13318 size_t len_first = base + mmap_size - copy_start; 13319 size_t len_secnd = ehdr_size - len_first; 13320 13321 if (*copy_size < ehdr_size) { 13322 free(*copy_mem); 13323 *copy_mem = malloc(ehdr_size); 13324 if (!*copy_mem) { 13325 *copy_size = 0; 13326 ret = LIBBPF_PERF_EVENT_ERROR; 13327 break; 13328 } 13329 *copy_size = ehdr_size; 13330 } 13331 13332 memcpy(*copy_mem, copy_start, len_first); 13333 memcpy(*copy_mem + len_first, base, len_secnd); 13334 ehdr = *copy_mem; 13335 } 13336 13337 ret = fn(ehdr, private_data); 13338 data_tail += ehdr_size; 13339 if (ret != LIBBPF_PERF_EVENT_CONT) 13340 break; 13341 } 13342 13343 ring_buffer_write_tail(header, data_tail); 13344 return libbpf_err(ret); 13345 } 13346 13347 struct perf_buffer; 13348 13349 struct perf_buffer_params { 13350 struct perf_event_attr *attr; 13351 /* if event_cb is specified, it takes precendence */ 13352 perf_buffer_event_fn event_cb; 13353 /* sample_cb and lost_cb are higher-level common-case callbacks */ 13354 perf_buffer_sample_fn sample_cb; 13355 perf_buffer_lost_fn lost_cb; 13356 void *ctx; 13357 int cpu_cnt; 13358 int *cpus; 13359 int *map_keys; 13360 }; 13361 13362 struct perf_cpu_buf { 13363 struct perf_buffer *pb; 13364 void *base; /* mmap()'ed memory */ 13365 void *buf; /* for reconstructing segmented data */ 13366 size_t buf_size; 13367 int fd; 13368 int cpu; 13369 int map_key; 13370 }; 13371 13372 struct perf_buffer { 13373 perf_buffer_event_fn event_cb; 13374 perf_buffer_sample_fn sample_cb; 13375 perf_buffer_lost_fn lost_cb; 13376 void *ctx; /* passed into callbacks */ 13377 13378 size_t page_size; 13379 size_t mmap_size; 13380 struct perf_cpu_buf **cpu_bufs; 13381 struct epoll_event *events; 13382 int cpu_cnt; /* number of allocated CPU buffers */ 13383 int epoll_fd; /* perf event FD */ 13384 int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */ 13385 }; 13386 13387 static void perf_buffer__free_cpu_buf(struct perf_buffer *pb, 13388 struct perf_cpu_buf *cpu_buf) 13389 { 13390 if (!cpu_buf) 13391 return; 13392 if (cpu_buf->base && 13393 munmap(cpu_buf->base, pb->mmap_size + pb->page_size)) 13394 pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu); 13395 if (cpu_buf->fd >= 0) { 13396 ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0); 13397 close(cpu_buf->fd); 13398 } 13399 free(cpu_buf->buf); 13400 free(cpu_buf); 13401 } 13402 13403 void perf_buffer__free(struct perf_buffer *pb) 13404 { 13405 int i; 13406 13407 if (IS_ERR_OR_NULL(pb)) 13408 return; 13409 if (pb->cpu_bufs) { 13410 for (i = 0; i < pb->cpu_cnt; i++) { 13411 struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i]; 13412 13413 if (!cpu_buf) 13414 continue; 13415 13416 bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key); 13417 perf_buffer__free_cpu_buf(pb, cpu_buf); 13418 } 13419 free(pb->cpu_bufs); 13420 } 13421 if (pb->epoll_fd >= 0) 13422 close(pb->epoll_fd); 13423 free(pb->events); 13424 free(pb); 13425 } 13426 13427 static struct perf_cpu_buf * 13428 perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr, 13429 int cpu, int map_key) 13430 { 13431 struct perf_cpu_buf *cpu_buf; 13432 int err; 13433 13434 cpu_buf = calloc(1, sizeof(*cpu_buf)); 13435 if (!cpu_buf) 13436 return ERR_PTR(-ENOMEM); 13437 13438 cpu_buf->pb = pb; 13439 cpu_buf->cpu = cpu; 13440 cpu_buf->map_key = map_key; 13441 13442 cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu, 13443 -1, PERF_FLAG_FD_CLOEXEC); 13444 if (cpu_buf->fd < 0) { 13445 err = -errno; 13446 pr_warn("failed to open perf buffer event on cpu #%d: %s\n", 13447 cpu, errstr(err)); 13448 goto error; 13449 } 13450 13451 cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size, 13452 PROT_READ | PROT_WRITE, MAP_SHARED, 13453 cpu_buf->fd, 0); 13454 if (cpu_buf->base == MAP_FAILED) { 13455 cpu_buf->base = NULL; 13456 err = -errno; 13457 pr_warn("failed to mmap perf buffer on cpu #%d: %s\n", 13458 cpu, errstr(err)); 13459 goto error; 13460 } 13461 13462 if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) { 13463 err = -errno; 13464 pr_warn("failed to enable perf buffer event on cpu #%d: %s\n", 13465 cpu, errstr(err)); 13466 goto error; 13467 } 13468 13469 return cpu_buf; 13470 13471 error: 13472 perf_buffer__free_cpu_buf(pb, cpu_buf); 13473 return (struct perf_cpu_buf *)ERR_PTR(err); 13474 } 13475 13476 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, 13477 struct perf_buffer_params *p); 13478 13479 struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt, 13480 perf_buffer_sample_fn sample_cb, 13481 perf_buffer_lost_fn lost_cb, 13482 void *ctx, 13483 const struct perf_buffer_opts *opts) 13484 { 13485 const size_t attr_sz = sizeof(struct perf_event_attr); 13486 struct perf_buffer_params p = {}; 13487 struct perf_event_attr attr; 13488 __u32 sample_period; 13489 13490 if (!OPTS_VALID(opts, perf_buffer_opts)) 13491 return libbpf_err_ptr(-EINVAL); 13492 13493 sample_period = OPTS_GET(opts, sample_period, 1); 13494 if (!sample_period) 13495 sample_period = 1; 13496 13497 memset(&attr, 0, attr_sz); 13498 attr.size = attr_sz; 13499 attr.config = PERF_COUNT_SW_BPF_OUTPUT; 13500 attr.type = PERF_TYPE_SOFTWARE; 13501 attr.sample_type = PERF_SAMPLE_RAW; 13502 attr.wakeup_events = sample_period; 13503 13504 p.attr = &attr; 13505 p.sample_cb = sample_cb; 13506 p.lost_cb = lost_cb; 13507 p.ctx = ctx; 13508 13509 return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p)); 13510 } 13511 13512 struct perf_buffer *perf_buffer__new_raw(int map_fd, size_t page_cnt, 13513 struct perf_event_attr *attr, 13514 perf_buffer_event_fn event_cb, void *ctx, 13515 const struct perf_buffer_raw_opts *opts) 13516 { 13517 struct perf_buffer_params p = {}; 13518 13519 if (!attr) 13520 return libbpf_err_ptr(-EINVAL); 13521 13522 if (!OPTS_VALID(opts, perf_buffer_raw_opts)) 13523 return libbpf_err_ptr(-EINVAL); 13524 13525 p.attr = attr; 13526 p.event_cb = event_cb; 13527 p.ctx = ctx; 13528 p.cpu_cnt = OPTS_GET(opts, cpu_cnt, 0); 13529 p.cpus = OPTS_GET(opts, cpus, NULL); 13530 p.map_keys = OPTS_GET(opts, map_keys, NULL); 13531 13532 return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p)); 13533 } 13534 13535 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, 13536 struct perf_buffer_params *p) 13537 { 13538 const char *online_cpus_file = "/sys/devices/system/cpu/online"; 13539 struct bpf_map_info map; 13540 struct perf_buffer *pb; 13541 bool *online = NULL; 13542 __u32 map_info_len; 13543 int err, i, j, n; 13544 13545 if (page_cnt == 0 || (page_cnt & (page_cnt - 1))) { 13546 pr_warn("page count should be power of two, but is %zu\n", 13547 page_cnt); 13548 return ERR_PTR(-EINVAL); 13549 } 13550 13551 /* best-effort sanity checks */ 13552 memset(&map, 0, sizeof(map)); 13553 map_info_len = sizeof(map); 13554 err = bpf_map_get_info_by_fd(map_fd, &map, &map_info_len); 13555 if (err) { 13556 err = -errno; 13557 /* if BPF_OBJ_GET_INFO_BY_FD is supported, will return 13558 * -EBADFD, -EFAULT, or -E2BIG on real error 13559 */ 13560 if (err != -EINVAL) { 13561 pr_warn("failed to get map info for map FD %d: %s\n", 13562 map_fd, errstr(err)); 13563 return ERR_PTR(err); 13564 } 13565 pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n", 13566 map_fd); 13567 } else { 13568 if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) { 13569 pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n", 13570 map.name); 13571 return ERR_PTR(-EINVAL); 13572 } 13573 } 13574 13575 pb = calloc(1, sizeof(*pb)); 13576 if (!pb) 13577 return ERR_PTR(-ENOMEM); 13578 13579 pb->event_cb = p->event_cb; 13580 pb->sample_cb = p->sample_cb; 13581 pb->lost_cb = p->lost_cb; 13582 pb->ctx = p->ctx; 13583 13584 pb->page_size = getpagesize(); 13585 pb->mmap_size = pb->page_size * page_cnt; 13586 pb->map_fd = map_fd; 13587 13588 pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC); 13589 if (pb->epoll_fd < 0) { 13590 err = -errno; 13591 pr_warn("failed to create epoll instance: %s\n", 13592 errstr(err)); 13593 goto error; 13594 } 13595 13596 if (p->cpu_cnt > 0) { 13597 pb->cpu_cnt = p->cpu_cnt; 13598 } else { 13599 pb->cpu_cnt = libbpf_num_possible_cpus(); 13600 if (pb->cpu_cnt < 0) { 13601 err = pb->cpu_cnt; 13602 goto error; 13603 } 13604 if (map.max_entries && map.max_entries < pb->cpu_cnt) 13605 pb->cpu_cnt = map.max_entries; 13606 } 13607 13608 pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events)); 13609 if (!pb->events) { 13610 err = -ENOMEM; 13611 pr_warn("failed to allocate events: out of memory\n"); 13612 goto error; 13613 } 13614 pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs)); 13615 if (!pb->cpu_bufs) { 13616 err = -ENOMEM; 13617 pr_warn("failed to allocate buffers: out of memory\n"); 13618 goto error; 13619 } 13620 13621 err = parse_cpu_mask_file(online_cpus_file, &online, &n); 13622 if (err) { 13623 pr_warn("failed to get online CPU mask: %s\n", errstr(err)); 13624 goto error; 13625 } 13626 13627 for (i = 0, j = 0; i < pb->cpu_cnt; i++) { 13628 struct perf_cpu_buf *cpu_buf; 13629 int cpu, map_key; 13630 13631 cpu = p->cpu_cnt > 0 ? p->cpus[i] : i; 13632 map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i; 13633 13634 /* in case user didn't explicitly requested particular CPUs to 13635 * be attached to, skip offline/not present CPUs 13636 */ 13637 if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu])) 13638 continue; 13639 13640 cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key); 13641 if (IS_ERR(cpu_buf)) { 13642 err = PTR_ERR(cpu_buf); 13643 goto error; 13644 } 13645 13646 pb->cpu_bufs[j] = cpu_buf; 13647 13648 err = bpf_map_update_elem(pb->map_fd, &map_key, 13649 &cpu_buf->fd, 0); 13650 if (err) { 13651 err = -errno; 13652 pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n", 13653 cpu, map_key, cpu_buf->fd, 13654 errstr(err)); 13655 goto error; 13656 } 13657 13658 pb->events[j].events = EPOLLIN; 13659 pb->events[j].data.ptr = cpu_buf; 13660 if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd, 13661 &pb->events[j]) < 0) { 13662 err = -errno; 13663 pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n", 13664 cpu, cpu_buf->fd, 13665 errstr(err)); 13666 goto error; 13667 } 13668 j++; 13669 } 13670 pb->cpu_cnt = j; 13671 free(online); 13672 13673 return pb; 13674 13675 error: 13676 free(online); 13677 if (pb) 13678 perf_buffer__free(pb); 13679 return ERR_PTR(err); 13680 } 13681 13682 struct perf_sample_raw { 13683 struct perf_event_header header; 13684 uint32_t size; 13685 char data[]; 13686 }; 13687 13688 struct perf_sample_lost { 13689 struct perf_event_header header; 13690 uint64_t id; 13691 uint64_t lost; 13692 uint64_t sample_id; 13693 }; 13694 13695 static enum bpf_perf_event_ret 13696 perf_buffer__process_record(struct perf_event_header *e, void *ctx) 13697 { 13698 struct perf_cpu_buf *cpu_buf = ctx; 13699 struct perf_buffer *pb = cpu_buf->pb; 13700 void *data = e; 13701 13702 /* user wants full control over parsing perf event */ 13703 if (pb->event_cb) 13704 return pb->event_cb(pb->ctx, cpu_buf->cpu, e); 13705 13706 switch (e->type) { 13707 case PERF_RECORD_SAMPLE: { 13708 struct perf_sample_raw *s = data; 13709 13710 if (pb->sample_cb) 13711 pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size); 13712 break; 13713 } 13714 case PERF_RECORD_LOST: { 13715 struct perf_sample_lost *s = data; 13716 13717 if (pb->lost_cb) 13718 pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost); 13719 break; 13720 } 13721 default: 13722 pr_warn("unknown perf sample type %d\n", e->type); 13723 return LIBBPF_PERF_EVENT_ERROR; 13724 } 13725 return LIBBPF_PERF_EVENT_CONT; 13726 } 13727 13728 static int perf_buffer__process_records(struct perf_buffer *pb, 13729 struct perf_cpu_buf *cpu_buf) 13730 { 13731 enum bpf_perf_event_ret ret; 13732 13733 ret = perf_event_read_simple(cpu_buf->base, pb->mmap_size, 13734 pb->page_size, &cpu_buf->buf, 13735 &cpu_buf->buf_size, 13736 perf_buffer__process_record, cpu_buf); 13737 if (ret != LIBBPF_PERF_EVENT_CONT) 13738 return ret; 13739 return 0; 13740 } 13741 13742 int perf_buffer__epoll_fd(const struct perf_buffer *pb) 13743 { 13744 return pb->epoll_fd; 13745 } 13746 13747 int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms) 13748 { 13749 int i, cnt, err; 13750 13751 cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms); 13752 if (cnt < 0) 13753 return -errno; 13754 13755 for (i = 0; i < cnt; i++) { 13756 struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr; 13757 13758 err = perf_buffer__process_records(pb, cpu_buf); 13759 if (err) { 13760 pr_warn("error while processing records: %s\n", errstr(err)); 13761 return libbpf_err(err); 13762 } 13763 } 13764 return cnt; 13765 } 13766 13767 /* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer 13768 * manager. 13769 */ 13770 size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb) 13771 { 13772 return pb->cpu_cnt; 13773 } 13774 13775 /* 13776 * Return perf_event FD of a ring buffer in *buf_idx* slot of 13777 * PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using 13778 * select()/poll()/epoll() Linux syscalls. 13779 */ 13780 int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx) 13781 { 13782 struct perf_cpu_buf *cpu_buf; 13783 13784 if (buf_idx >= pb->cpu_cnt) 13785 return libbpf_err(-EINVAL); 13786 13787 cpu_buf = pb->cpu_bufs[buf_idx]; 13788 if (!cpu_buf) 13789 return libbpf_err(-ENOENT); 13790 13791 return cpu_buf->fd; 13792 } 13793 13794 int perf_buffer__buffer(struct perf_buffer *pb, int buf_idx, void **buf, size_t *buf_size) 13795 { 13796 struct perf_cpu_buf *cpu_buf; 13797 13798 if (buf_idx >= pb->cpu_cnt) 13799 return libbpf_err(-EINVAL); 13800 13801 cpu_buf = pb->cpu_bufs[buf_idx]; 13802 if (!cpu_buf) 13803 return libbpf_err(-ENOENT); 13804 13805 *buf = cpu_buf->base; 13806 *buf_size = pb->mmap_size; 13807 return 0; 13808 } 13809 13810 /* 13811 * Consume data from perf ring buffer corresponding to slot *buf_idx* in 13812 * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to 13813 * consume, do nothing and return success. 13814 * Returns: 13815 * - 0 on success; 13816 * - <0 on failure. 13817 */ 13818 int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx) 13819 { 13820 struct perf_cpu_buf *cpu_buf; 13821 13822 if (buf_idx >= pb->cpu_cnt) 13823 return libbpf_err(-EINVAL); 13824 13825 cpu_buf = pb->cpu_bufs[buf_idx]; 13826 if (!cpu_buf) 13827 return libbpf_err(-ENOENT); 13828 13829 return perf_buffer__process_records(pb, cpu_buf); 13830 } 13831 13832 int perf_buffer__consume(struct perf_buffer *pb) 13833 { 13834 int i, err; 13835 13836 for (i = 0; i < pb->cpu_cnt; i++) { 13837 struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i]; 13838 13839 if (!cpu_buf) 13840 continue; 13841 13842 err = perf_buffer__process_records(pb, cpu_buf); 13843 if (err) { 13844 pr_warn("perf_buffer: failed to process records in buffer #%d: %s\n", 13845 i, errstr(err)); 13846 return libbpf_err(err); 13847 } 13848 } 13849 return 0; 13850 } 13851 13852 int bpf_program__set_attach_target(struct bpf_program *prog, 13853 int attach_prog_fd, 13854 const char *attach_func_name) 13855 { 13856 int btf_obj_fd = 0, btf_id = 0, err; 13857 13858 if (!prog || attach_prog_fd < 0) 13859 return libbpf_err(-EINVAL); 13860 13861 if (prog->obj->state >= OBJ_LOADED) 13862 return libbpf_err(-EINVAL); 13863 13864 if (attach_prog_fd && !attach_func_name) { 13865 /* remember attach_prog_fd and let bpf_program__load() find 13866 * BTF ID during the program load 13867 */ 13868 prog->attach_prog_fd = attach_prog_fd; 13869 return 0; 13870 } 13871 13872 if (attach_prog_fd) { 13873 btf_id = libbpf_find_prog_btf_id(attach_func_name, 13874 attach_prog_fd, prog->obj->token_fd); 13875 if (btf_id < 0) 13876 return libbpf_err(btf_id); 13877 } else { 13878 if (!attach_func_name) 13879 return libbpf_err(-EINVAL); 13880 13881 /* load btf_vmlinux, if not yet */ 13882 err = bpf_object__load_vmlinux_btf(prog->obj, true); 13883 if (err) 13884 return libbpf_err(err); 13885 err = find_kernel_btf_id(prog->obj, attach_func_name, 13886 prog->expected_attach_type, 13887 &btf_obj_fd, &btf_id); 13888 if (err) 13889 return libbpf_err(err); 13890 } 13891 13892 prog->attach_btf_id = btf_id; 13893 prog->attach_btf_obj_fd = btf_obj_fd; 13894 prog->attach_prog_fd = attach_prog_fd; 13895 return 0; 13896 } 13897 13898 int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz) 13899 { 13900 int err = 0, n, len, start, end = -1; 13901 bool *tmp; 13902 13903 *mask = NULL; 13904 *mask_sz = 0; 13905 13906 /* Each sub string separated by ',' has format \d+-\d+ or \d+ */ 13907 while (*s) { 13908 if (*s == ',' || *s == '\n') { 13909 s++; 13910 continue; 13911 } 13912 n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len); 13913 if (n <= 0 || n > 2) { 13914 pr_warn("Failed to get CPU range %s: %d\n", s, n); 13915 err = -EINVAL; 13916 goto cleanup; 13917 } else if (n == 1) { 13918 end = start; 13919 } 13920 if (start < 0 || start > end) { 13921 pr_warn("Invalid CPU range [%d,%d] in %s\n", 13922 start, end, s); 13923 err = -EINVAL; 13924 goto cleanup; 13925 } 13926 tmp = realloc(*mask, end + 1); 13927 if (!tmp) { 13928 err = -ENOMEM; 13929 goto cleanup; 13930 } 13931 *mask = tmp; 13932 memset(tmp + *mask_sz, 0, start - *mask_sz); 13933 memset(tmp + start, 1, end - start + 1); 13934 *mask_sz = end + 1; 13935 s += len; 13936 } 13937 if (!*mask_sz) { 13938 pr_warn("Empty CPU range\n"); 13939 return -EINVAL; 13940 } 13941 return 0; 13942 cleanup: 13943 free(*mask); 13944 *mask = NULL; 13945 return err; 13946 } 13947 13948 int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz) 13949 { 13950 int fd, err = 0, len; 13951 char buf[128]; 13952 13953 fd = open(fcpu, O_RDONLY | O_CLOEXEC); 13954 if (fd < 0) { 13955 err = -errno; 13956 pr_warn("Failed to open cpu mask file %s: %s\n", fcpu, errstr(err)); 13957 return err; 13958 } 13959 len = read(fd, buf, sizeof(buf)); 13960 close(fd); 13961 if (len <= 0) { 13962 err = len ? -errno : -EINVAL; 13963 pr_warn("Failed to read cpu mask from %s: %s\n", fcpu, errstr(err)); 13964 return err; 13965 } 13966 if (len >= sizeof(buf)) { 13967 pr_warn("CPU mask is too big in file %s\n", fcpu); 13968 return -E2BIG; 13969 } 13970 buf[len] = '\0'; 13971 13972 return parse_cpu_mask_str(buf, mask, mask_sz); 13973 } 13974 13975 int libbpf_num_possible_cpus(void) 13976 { 13977 static const char *fcpu = "/sys/devices/system/cpu/possible"; 13978 static int cpus; 13979 int err, n, i, tmp_cpus; 13980 bool *mask; 13981 13982 tmp_cpus = READ_ONCE(cpus); 13983 if (tmp_cpus > 0) 13984 return tmp_cpus; 13985 13986 err = parse_cpu_mask_file(fcpu, &mask, &n); 13987 if (err) 13988 return libbpf_err(err); 13989 13990 tmp_cpus = 0; 13991 for (i = 0; i < n; i++) { 13992 if (mask[i]) 13993 tmp_cpus++; 13994 } 13995 free(mask); 13996 13997 WRITE_ONCE(cpus, tmp_cpus); 13998 return tmp_cpus; 13999 } 14000 14001 static int populate_skeleton_maps(const struct bpf_object *obj, 14002 struct bpf_map_skeleton *maps, 14003 size_t map_cnt, size_t map_skel_sz) 14004 { 14005 int i; 14006 14007 for (i = 0; i < map_cnt; i++) { 14008 struct bpf_map_skeleton *map_skel = (void *)maps + i * map_skel_sz; 14009 struct bpf_map **map = map_skel->map; 14010 const char *name = map_skel->name; 14011 void **mmaped = map_skel->mmaped; 14012 14013 *map = bpf_object__find_map_by_name(obj, name); 14014 if (!*map) { 14015 pr_warn("failed to find skeleton map '%s'\n", name); 14016 return -ESRCH; 14017 } 14018 14019 /* externs shouldn't be pre-setup from user code */ 14020 if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG) 14021 *mmaped = (*map)->mmaped; 14022 } 14023 return 0; 14024 } 14025 14026 static int populate_skeleton_progs(const struct bpf_object *obj, 14027 struct bpf_prog_skeleton *progs, 14028 size_t prog_cnt, size_t prog_skel_sz) 14029 { 14030 int i; 14031 14032 for (i = 0; i < prog_cnt; i++) { 14033 struct bpf_prog_skeleton *prog_skel = (void *)progs + i * prog_skel_sz; 14034 struct bpf_program **prog = prog_skel->prog; 14035 const char *name = prog_skel->name; 14036 14037 *prog = bpf_object__find_program_by_name(obj, name); 14038 if (!*prog) { 14039 pr_warn("failed to find skeleton program '%s'\n", name); 14040 return -ESRCH; 14041 } 14042 } 14043 return 0; 14044 } 14045 14046 int bpf_object__open_skeleton(struct bpf_object_skeleton *s, 14047 const struct bpf_object_open_opts *opts) 14048 { 14049 struct bpf_object *obj; 14050 int err; 14051 14052 obj = bpf_object_open(NULL, s->data, s->data_sz, s->name, opts); 14053 if (IS_ERR(obj)) { 14054 err = PTR_ERR(obj); 14055 pr_warn("failed to initialize skeleton BPF object '%s': %s\n", 14056 s->name, errstr(err)); 14057 return libbpf_err(err); 14058 } 14059 14060 *s->obj = obj; 14061 err = populate_skeleton_maps(obj, s->maps, s->map_cnt, s->map_skel_sz); 14062 if (err) { 14063 pr_warn("failed to populate skeleton maps for '%s': %s\n", s->name, errstr(err)); 14064 return libbpf_err(err); 14065 } 14066 14067 err = populate_skeleton_progs(obj, s->progs, s->prog_cnt, s->prog_skel_sz); 14068 if (err) { 14069 pr_warn("failed to populate skeleton progs for '%s': %s\n", s->name, errstr(err)); 14070 return libbpf_err(err); 14071 } 14072 14073 return 0; 14074 } 14075 14076 int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s) 14077 { 14078 int err, len, var_idx, i; 14079 const char *var_name; 14080 const struct bpf_map *map; 14081 struct btf *btf; 14082 __u32 map_type_id; 14083 const struct btf_type *map_type, *var_type; 14084 const struct bpf_var_skeleton *var_skel; 14085 struct btf_var_secinfo *var; 14086 14087 if (!s->obj) 14088 return libbpf_err(-EINVAL); 14089 14090 btf = bpf_object__btf(s->obj); 14091 if (!btf) { 14092 pr_warn("subskeletons require BTF at runtime (object %s)\n", 14093 bpf_object__name(s->obj)); 14094 return libbpf_err(-errno); 14095 } 14096 14097 err = populate_skeleton_maps(s->obj, s->maps, s->map_cnt, s->map_skel_sz); 14098 if (err) { 14099 pr_warn("failed to populate subskeleton maps: %s\n", errstr(err)); 14100 return libbpf_err(err); 14101 } 14102 14103 err = populate_skeleton_progs(s->obj, s->progs, s->prog_cnt, s->prog_skel_sz); 14104 if (err) { 14105 pr_warn("failed to populate subskeleton maps: %s\n", errstr(err)); 14106 return libbpf_err(err); 14107 } 14108 14109 for (var_idx = 0; var_idx < s->var_cnt; var_idx++) { 14110 var_skel = (void *)s->vars + var_idx * s->var_skel_sz; 14111 map = *var_skel->map; 14112 map_type_id = bpf_map__btf_value_type_id(map); 14113 map_type = btf__type_by_id(btf, map_type_id); 14114 14115 if (!btf_is_datasec(map_type)) { 14116 pr_warn("type for map '%1$s' is not a datasec: %2$s\n", 14117 bpf_map__name(map), 14118 __btf_kind_str(btf_kind(map_type))); 14119 return libbpf_err(-EINVAL); 14120 } 14121 14122 len = btf_vlen(map_type); 14123 var = btf_var_secinfos(map_type); 14124 for (i = 0; i < len; i++, var++) { 14125 var_type = btf__type_by_id(btf, var->type); 14126 var_name = btf__name_by_offset(btf, var_type->name_off); 14127 if (strcmp(var_name, var_skel->name) == 0) { 14128 *var_skel->addr = map->mmaped + var->offset; 14129 break; 14130 } 14131 } 14132 } 14133 return 0; 14134 } 14135 14136 void bpf_object__destroy_subskeleton(struct bpf_object_subskeleton *s) 14137 { 14138 if (!s) 14139 return; 14140 free(s->maps); 14141 free(s->progs); 14142 free(s->vars); 14143 free(s); 14144 } 14145 14146 int bpf_object__load_skeleton(struct bpf_object_skeleton *s) 14147 { 14148 int i, err; 14149 14150 err = bpf_object__load(*s->obj); 14151 if (err) { 14152 pr_warn("failed to load BPF skeleton '%s': %s\n", s->name, errstr(err)); 14153 return libbpf_err(err); 14154 } 14155 14156 for (i = 0; i < s->map_cnt; i++) { 14157 struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz; 14158 struct bpf_map *map = *map_skel->map; 14159 14160 if (!map_skel->mmaped) 14161 continue; 14162 14163 *map_skel->mmaped = map->mmaped; 14164 } 14165 14166 return 0; 14167 } 14168 14169 int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) 14170 { 14171 int i, err; 14172 14173 for (i = 0; i < s->prog_cnt; i++) { 14174 struct bpf_prog_skeleton *prog_skel = (void *)s->progs + i * s->prog_skel_sz; 14175 struct bpf_program *prog = *prog_skel->prog; 14176 struct bpf_link **link = prog_skel->link; 14177 14178 if (!prog->autoload || !prog->autoattach) 14179 continue; 14180 14181 /* auto-attaching not supported for this program */ 14182 if (!prog->sec_def || !prog->sec_def->prog_attach_fn) 14183 continue; 14184 14185 /* if user already set the link manually, don't attempt auto-attach */ 14186 if (*link) 14187 continue; 14188 14189 err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, link); 14190 if (err) { 14191 pr_warn("prog '%s': failed to auto-attach: %s\n", 14192 bpf_program__name(prog), errstr(err)); 14193 return libbpf_err(err); 14194 } 14195 14196 /* It's possible that for some SEC() definitions auto-attach 14197 * is supported in some cases (e.g., if definition completely 14198 * specifies target information), but is not in other cases. 14199 * SEC("uprobe") is one such case. If user specified target 14200 * binary and function name, such BPF program can be 14201 * auto-attached. But if not, it shouldn't trigger skeleton's 14202 * attach to fail. It should just be skipped. 14203 * attach_fn signals such case with returning 0 (no error) and 14204 * setting link to NULL. 14205 */ 14206 } 14207 14208 14209 for (i = 0; i < s->map_cnt; i++) { 14210 struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz; 14211 struct bpf_map *map = *map_skel->map; 14212 struct bpf_link **link; 14213 14214 if (!map->autocreate || !map->autoattach) 14215 continue; 14216 14217 /* only struct_ops maps can be attached */ 14218 if (!bpf_map__is_struct_ops(map)) 14219 continue; 14220 14221 /* skeleton is created with earlier version of bpftool, notify user */ 14222 if (s->map_skel_sz < offsetofend(struct bpf_map_skeleton, link)) { 14223 pr_warn("map '%s': BPF skeleton version is old, skipping map auto-attachment...\n", 14224 bpf_map__name(map)); 14225 continue; 14226 } 14227 14228 link = map_skel->link; 14229 if (!link) { 14230 pr_warn("map '%s': BPF map skeleton link is uninitialized\n", 14231 bpf_map__name(map)); 14232 continue; 14233 } 14234 14235 if (*link) 14236 continue; 14237 14238 *link = bpf_map__attach_struct_ops(map); 14239 if (!*link) { 14240 err = -errno; 14241 pr_warn("map '%s': failed to auto-attach: %s\n", 14242 bpf_map__name(map), errstr(err)); 14243 return libbpf_err(err); 14244 } 14245 } 14246 14247 return 0; 14248 } 14249 14250 void bpf_object__detach_skeleton(struct bpf_object_skeleton *s) 14251 { 14252 int i; 14253 14254 for (i = 0; i < s->prog_cnt; i++) { 14255 struct bpf_prog_skeleton *prog_skel = (void *)s->progs + i * s->prog_skel_sz; 14256 struct bpf_link **link = prog_skel->link; 14257 14258 bpf_link__destroy(*link); 14259 *link = NULL; 14260 } 14261 14262 if (s->map_skel_sz < sizeof(struct bpf_map_skeleton)) 14263 return; 14264 14265 for (i = 0; i < s->map_cnt; i++) { 14266 struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz; 14267 struct bpf_link **link = map_skel->link; 14268 14269 if (link) { 14270 bpf_link__destroy(*link); 14271 *link = NULL; 14272 } 14273 } 14274 } 14275 14276 void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s) 14277 { 14278 if (!s) 14279 return; 14280 14281 bpf_object__detach_skeleton(s); 14282 if (s->obj) 14283 bpf_object__close(*s->obj); 14284 free(s->maps); 14285 free(s->progs); 14286 free(s); 14287 } 14288 14289 static inline __u32 ror32(__u32 v, int bits) 14290 { 14291 return (v >> bits) | (v << (32 - bits)); 14292 } 14293 14294 #define SHA256_BLOCK_LENGTH 64 14295 #define Ch(x, y, z) (((x) & (y)) ^ (~(x) & (z))) 14296 #define Maj(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) 14297 #define Sigma_0(x) (ror32((x), 2) ^ ror32((x), 13) ^ ror32((x), 22)) 14298 #define Sigma_1(x) (ror32((x), 6) ^ ror32((x), 11) ^ ror32((x), 25)) 14299 #define sigma_0(x) (ror32((x), 7) ^ ror32((x), 18) ^ ((x) >> 3)) 14300 #define sigma_1(x) (ror32((x), 17) ^ ror32((x), 19) ^ ((x) >> 10)) 14301 14302 static const __u32 sha256_K[64] = { 14303 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 14304 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 14305 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786, 14306 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, 14307 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 14308 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 14309 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b, 14310 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, 14311 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 14312 0x5b9cca4f, 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 14313 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, 14314 }; 14315 14316 #define SHA256_ROUND(i, a, b, c, d, e, f, g, h) \ 14317 { \ 14318 __u32 tmp = h + Sigma_1(e) + Ch(e, f, g) + sha256_K[i] + w[i]; \ 14319 d += tmp; \ 14320 h = tmp + Sigma_0(a) + Maj(a, b, c); \ 14321 } 14322 14323 static void sha256_blocks(__u32 state[8], const __u8 *data, size_t nblocks) 14324 { 14325 while (nblocks--) { 14326 __u32 a = state[0]; 14327 __u32 b = state[1]; 14328 __u32 c = state[2]; 14329 __u32 d = state[3]; 14330 __u32 e = state[4]; 14331 __u32 f = state[5]; 14332 __u32 g = state[6]; 14333 __u32 h = state[7]; 14334 __u32 w[64]; 14335 int i; 14336 14337 for (i = 0; i < 16; i++) 14338 w[i] = get_unaligned_be32(&data[4 * i]); 14339 for (; i < ARRAY_SIZE(w); i++) 14340 w[i] = sigma_1(w[i - 2]) + w[i - 7] + 14341 sigma_0(w[i - 15]) + w[i - 16]; 14342 for (i = 0; i < ARRAY_SIZE(w); i += 8) { 14343 SHA256_ROUND(i + 0, a, b, c, d, e, f, g, h); 14344 SHA256_ROUND(i + 1, h, a, b, c, d, e, f, g); 14345 SHA256_ROUND(i + 2, g, h, a, b, c, d, e, f); 14346 SHA256_ROUND(i + 3, f, g, h, a, b, c, d, e); 14347 SHA256_ROUND(i + 4, e, f, g, h, a, b, c, d); 14348 SHA256_ROUND(i + 5, d, e, f, g, h, a, b, c); 14349 SHA256_ROUND(i + 6, c, d, e, f, g, h, a, b); 14350 SHA256_ROUND(i + 7, b, c, d, e, f, g, h, a); 14351 } 14352 state[0] += a; 14353 state[1] += b; 14354 state[2] += c; 14355 state[3] += d; 14356 state[4] += e; 14357 state[5] += f; 14358 state[6] += g; 14359 state[7] += h; 14360 data += SHA256_BLOCK_LENGTH; 14361 } 14362 } 14363 14364 void libbpf_sha256(const void *data, size_t len, __u8 out[SHA256_DIGEST_LENGTH]) 14365 { 14366 __u32 state[8] = { 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 14367 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 }; 14368 const __be64 bitcount = cpu_to_be64((__u64)len * 8); 14369 __u8 final_data[2 * SHA256_BLOCK_LENGTH] = { 0 }; 14370 size_t final_len = len % SHA256_BLOCK_LENGTH; 14371 int i; 14372 14373 sha256_blocks(state, data, len / SHA256_BLOCK_LENGTH); 14374 14375 memcpy(final_data, data + len - final_len, final_len); 14376 final_data[final_len] = 0x80; 14377 final_len = round_up(final_len + 9, SHA256_BLOCK_LENGTH); 14378 memcpy(&final_data[final_len - 8], &bitcount, 8); 14379 14380 sha256_blocks(state, final_data, final_len / SHA256_BLOCK_LENGTH); 14381 14382 for (i = 0; i < ARRAY_SIZE(state); i++) 14383 put_unaligned_be32(state[i], &out[4 * i]); 14384 } 14385