1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2
3 /*
4 * Common eBPF ELF object loading operations.
5 *
6 * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
7 * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
8 * Copyright (C) 2015 Huawei Inc.
9 * Copyright (C) 2017 Nicira, Inc.
10 * Copyright (C) 2019 Isovalent, Inc.
11 */
12
13 #ifndef _GNU_SOURCE
14 #define _GNU_SOURCE
15 #endif
16 #include <stdlib.h>
17 #include <stdio.h>
18 #include <stdarg.h>
19 #include <libgen.h>
20 #include <inttypes.h>
21 #include <limits.h>
22 #include <string.h>
23 #include <unistd.h>
24 #include <endian.h>
25 #include <fcntl.h>
26 #include <errno.h>
27 #include <ctype.h>
28 #include <asm/unistd.h>
29 #include <linux/err.h>
30 #include <linux/kernel.h>
31 #include <linux/bpf.h>
32 #include <linux/btf.h>
33 #include <linux/filter.h>
34 #include <linux/limits.h>
35 #include <linux/perf_event.h>
36 #include <linux/bpf_perf_event.h>
37 #include <linux/ring_buffer.h>
38 #include <sys/epoll.h>
39 #include <sys/ioctl.h>
40 #include <sys/mman.h>
41 #include <sys/stat.h>
42 #include <sys/types.h>
43 #include <sys/vfs.h>
44 #include <sys/utsname.h>
45 #include <sys/resource.h>
46 #include <libelf.h>
47 #include <gelf.h>
48 #include <zlib.h>
49
50 #include "libbpf.h"
51 #include "bpf.h"
52 #include "btf.h"
53 #include "libbpf_internal.h"
54 #include "hashmap.h"
55 #include "bpf_gen_internal.h"
56 #include "zip.h"
57
58 #ifndef BPF_FS_MAGIC
59 #define BPF_FS_MAGIC 0xcafe4a11
60 #endif
61
62 #define MAX_EVENT_NAME_LEN 64
63
64 #define BPF_FS_DEFAULT_PATH "/sys/fs/bpf"
65
66 #define BPF_INSN_SZ (sizeof(struct bpf_insn))
67
68 /* vsprintf() in __base_pr() uses nonliteral format string. It may break
69 * compilation if user enables corresponding warning. Disable it explicitly.
70 */
71 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
72
73 #define __printf(a, b) __attribute__((format(printf, a, b)))
74
75 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
76 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog);
77 static int map_set_def_max_entries(struct bpf_map *map);
78
79 static const char * const attach_type_name[] = {
80 [BPF_CGROUP_INET_INGRESS] = "cgroup_inet_ingress",
81 [BPF_CGROUP_INET_EGRESS] = "cgroup_inet_egress",
82 [BPF_CGROUP_INET_SOCK_CREATE] = "cgroup_inet_sock_create",
83 [BPF_CGROUP_INET_SOCK_RELEASE] = "cgroup_inet_sock_release",
84 [BPF_CGROUP_SOCK_OPS] = "cgroup_sock_ops",
85 [BPF_CGROUP_DEVICE] = "cgroup_device",
86 [BPF_CGROUP_INET4_BIND] = "cgroup_inet4_bind",
87 [BPF_CGROUP_INET6_BIND] = "cgroup_inet6_bind",
88 [BPF_CGROUP_INET4_CONNECT] = "cgroup_inet4_connect",
89 [BPF_CGROUP_INET6_CONNECT] = "cgroup_inet6_connect",
90 [BPF_CGROUP_UNIX_CONNECT] = "cgroup_unix_connect",
91 [BPF_CGROUP_INET4_POST_BIND] = "cgroup_inet4_post_bind",
92 [BPF_CGROUP_INET6_POST_BIND] = "cgroup_inet6_post_bind",
93 [BPF_CGROUP_INET4_GETPEERNAME] = "cgroup_inet4_getpeername",
94 [BPF_CGROUP_INET6_GETPEERNAME] = "cgroup_inet6_getpeername",
95 [BPF_CGROUP_UNIX_GETPEERNAME] = "cgroup_unix_getpeername",
96 [BPF_CGROUP_INET4_GETSOCKNAME] = "cgroup_inet4_getsockname",
97 [BPF_CGROUP_INET6_GETSOCKNAME] = "cgroup_inet6_getsockname",
98 [BPF_CGROUP_UNIX_GETSOCKNAME] = "cgroup_unix_getsockname",
99 [BPF_CGROUP_UDP4_SENDMSG] = "cgroup_udp4_sendmsg",
100 [BPF_CGROUP_UDP6_SENDMSG] = "cgroup_udp6_sendmsg",
101 [BPF_CGROUP_UNIX_SENDMSG] = "cgroup_unix_sendmsg",
102 [BPF_CGROUP_SYSCTL] = "cgroup_sysctl",
103 [BPF_CGROUP_UDP4_RECVMSG] = "cgroup_udp4_recvmsg",
104 [BPF_CGROUP_UDP6_RECVMSG] = "cgroup_udp6_recvmsg",
105 [BPF_CGROUP_UNIX_RECVMSG] = "cgroup_unix_recvmsg",
106 [BPF_CGROUP_GETSOCKOPT] = "cgroup_getsockopt",
107 [BPF_CGROUP_SETSOCKOPT] = "cgroup_setsockopt",
108 [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser",
109 [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict",
110 [BPF_SK_SKB_VERDICT] = "sk_skb_verdict",
111 [BPF_SK_MSG_VERDICT] = "sk_msg_verdict",
112 [BPF_LIRC_MODE2] = "lirc_mode2",
113 [BPF_FLOW_DISSECTOR] = "flow_dissector",
114 [BPF_TRACE_RAW_TP] = "trace_raw_tp",
115 [BPF_TRACE_FENTRY] = "trace_fentry",
116 [BPF_TRACE_FEXIT] = "trace_fexit",
117 [BPF_MODIFY_RETURN] = "modify_return",
118 [BPF_LSM_MAC] = "lsm_mac",
119 [BPF_LSM_CGROUP] = "lsm_cgroup",
120 [BPF_SK_LOOKUP] = "sk_lookup",
121 [BPF_TRACE_ITER] = "trace_iter",
122 [BPF_XDP_DEVMAP] = "xdp_devmap",
123 [BPF_XDP_CPUMAP] = "xdp_cpumap",
124 [BPF_XDP] = "xdp",
125 [BPF_SK_REUSEPORT_SELECT] = "sk_reuseport_select",
126 [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_reuseport_select_or_migrate",
127 [BPF_PERF_EVENT] = "perf_event",
128 [BPF_TRACE_KPROBE_MULTI] = "trace_kprobe_multi",
129 [BPF_STRUCT_OPS] = "struct_ops",
130 [BPF_NETFILTER] = "netfilter",
131 [BPF_TCX_INGRESS] = "tcx_ingress",
132 [BPF_TCX_EGRESS] = "tcx_egress",
133 [BPF_TRACE_UPROBE_MULTI] = "trace_uprobe_multi",
134 [BPF_NETKIT_PRIMARY] = "netkit_primary",
135 [BPF_NETKIT_PEER] = "netkit_peer",
136 [BPF_TRACE_KPROBE_SESSION] = "trace_kprobe_session",
137 [BPF_TRACE_UPROBE_SESSION] = "trace_uprobe_session",
138 };
139
140 static const char * const link_type_name[] = {
141 [BPF_LINK_TYPE_UNSPEC] = "unspec",
142 [BPF_LINK_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
143 [BPF_LINK_TYPE_TRACING] = "tracing",
144 [BPF_LINK_TYPE_CGROUP] = "cgroup",
145 [BPF_LINK_TYPE_ITER] = "iter",
146 [BPF_LINK_TYPE_NETNS] = "netns",
147 [BPF_LINK_TYPE_XDP] = "xdp",
148 [BPF_LINK_TYPE_PERF_EVENT] = "perf_event",
149 [BPF_LINK_TYPE_KPROBE_MULTI] = "kprobe_multi",
150 [BPF_LINK_TYPE_STRUCT_OPS] = "struct_ops",
151 [BPF_LINK_TYPE_NETFILTER] = "netfilter",
152 [BPF_LINK_TYPE_TCX] = "tcx",
153 [BPF_LINK_TYPE_UPROBE_MULTI] = "uprobe_multi",
154 [BPF_LINK_TYPE_NETKIT] = "netkit",
155 [BPF_LINK_TYPE_SOCKMAP] = "sockmap",
156 };
157
158 static const char * const map_type_name[] = {
159 [BPF_MAP_TYPE_UNSPEC] = "unspec",
160 [BPF_MAP_TYPE_HASH] = "hash",
161 [BPF_MAP_TYPE_ARRAY] = "array",
162 [BPF_MAP_TYPE_PROG_ARRAY] = "prog_array",
163 [BPF_MAP_TYPE_PERF_EVENT_ARRAY] = "perf_event_array",
164 [BPF_MAP_TYPE_PERCPU_HASH] = "percpu_hash",
165 [BPF_MAP_TYPE_PERCPU_ARRAY] = "percpu_array",
166 [BPF_MAP_TYPE_STACK_TRACE] = "stack_trace",
167 [BPF_MAP_TYPE_CGROUP_ARRAY] = "cgroup_array",
168 [BPF_MAP_TYPE_LRU_HASH] = "lru_hash",
169 [BPF_MAP_TYPE_LRU_PERCPU_HASH] = "lru_percpu_hash",
170 [BPF_MAP_TYPE_LPM_TRIE] = "lpm_trie",
171 [BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps",
172 [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps",
173 [BPF_MAP_TYPE_DEVMAP] = "devmap",
174 [BPF_MAP_TYPE_DEVMAP_HASH] = "devmap_hash",
175 [BPF_MAP_TYPE_SOCKMAP] = "sockmap",
176 [BPF_MAP_TYPE_CPUMAP] = "cpumap",
177 [BPF_MAP_TYPE_XSKMAP] = "xskmap",
178 [BPF_MAP_TYPE_SOCKHASH] = "sockhash",
179 [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage",
180 [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray",
181 [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE] = "percpu_cgroup_storage",
182 [BPF_MAP_TYPE_QUEUE] = "queue",
183 [BPF_MAP_TYPE_STACK] = "stack",
184 [BPF_MAP_TYPE_SK_STORAGE] = "sk_storage",
185 [BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops",
186 [BPF_MAP_TYPE_RINGBUF] = "ringbuf",
187 [BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage",
188 [BPF_MAP_TYPE_TASK_STORAGE] = "task_storage",
189 [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter",
190 [BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf",
191 [BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage",
192 [BPF_MAP_TYPE_ARENA] = "arena",
193 [BPF_MAP_TYPE_INSN_ARRAY] = "insn_array",
194 };
195
196 static const char * const prog_type_name[] = {
197 [BPF_PROG_TYPE_UNSPEC] = "unspec",
198 [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter",
199 [BPF_PROG_TYPE_KPROBE] = "kprobe",
200 [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls",
201 [BPF_PROG_TYPE_SCHED_ACT] = "sched_act",
202 [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint",
203 [BPF_PROG_TYPE_XDP] = "xdp",
204 [BPF_PROG_TYPE_PERF_EVENT] = "perf_event",
205 [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb",
206 [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock",
207 [BPF_PROG_TYPE_LWT_IN] = "lwt_in",
208 [BPF_PROG_TYPE_LWT_OUT] = "lwt_out",
209 [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit",
210 [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops",
211 [BPF_PROG_TYPE_SK_SKB] = "sk_skb",
212 [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device",
213 [BPF_PROG_TYPE_SK_MSG] = "sk_msg",
214 [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
215 [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr",
216 [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local",
217 [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2",
218 [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport",
219 [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector",
220 [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl",
221 [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable",
222 [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt",
223 [BPF_PROG_TYPE_TRACING] = "tracing",
224 [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops",
225 [BPF_PROG_TYPE_EXT] = "ext",
226 [BPF_PROG_TYPE_LSM] = "lsm",
227 [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup",
228 [BPF_PROG_TYPE_SYSCALL] = "syscall",
229 [BPF_PROG_TYPE_NETFILTER] = "netfilter",
230 };
231
__base_pr(enum libbpf_print_level level,const char * format,va_list args)232 static int __base_pr(enum libbpf_print_level level, const char *format,
233 va_list args)
234 {
235 const char *env_var = "LIBBPF_LOG_LEVEL";
236 static enum libbpf_print_level min_level = LIBBPF_INFO;
237 static bool initialized;
238
239 if (!initialized) {
240 char *verbosity;
241
242 initialized = true;
243 verbosity = getenv(env_var);
244 if (verbosity) {
245 if (strcasecmp(verbosity, "warn") == 0)
246 min_level = LIBBPF_WARN;
247 else if (strcasecmp(verbosity, "debug") == 0)
248 min_level = LIBBPF_DEBUG;
249 else if (strcasecmp(verbosity, "info") == 0)
250 min_level = LIBBPF_INFO;
251 else
252 fprintf(stderr, "libbpf: unrecognized '%s' envvar value: '%s', should be one of 'warn', 'debug', or 'info'.\n",
253 env_var, verbosity);
254 }
255 }
256
257 /* if too verbose, skip logging */
258 if (level > min_level)
259 return 0;
260
261 return vfprintf(stderr, format, args);
262 }
263
264 static libbpf_print_fn_t __libbpf_pr = __base_pr;
265
libbpf_set_print(libbpf_print_fn_t fn)266 libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn)
267 {
268 libbpf_print_fn_t old_print_fn;
269
270 old_print_fn = __atomic_exchange_n(&__libbpf_pr, fn, __ATOMIC_RELAXED);
271
272 return old_print_fn;
273 }
274
275 __printf(2, 3)
libbpf_print(enum libbpf_print_level level,const char * format,...)276 void libbpf_print(enum libbpf_print_level level, const char *format, ...)
277 {
278 va_list args;
279 int old_errno;
280 libbpf_print_fn_t print_fn;
281
282 print_fn = __atomic_load_n(&__libbpf_pr, __ATOMIC_RELAXED);
283 if (!print_fn)
284 return;
285
286 old_errno = errno;
287
288 va_start(args, format);
289 print_fn(level, format, args);
290 va_end(args);
291
292 errno = old_errno;
293 }
294
pr_perm_msg(int err)295 static void pr_perm_msg(int err)
296 {
297 struct rlimit limit;
298 char buf[100];
299
300 if (err != -EPERM || geteuid() != 0)
301 return;
302
303 err = getrlimit(RLIMIT_MEMLOCK, &limit);
304 if (err)
305 return;
306
307 if (limit.rlim_cur == RLIM_INFINITY)
308 return;
309
310 if (limit.rlim_cur < 1024)
311 snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur);
312 else if (limit.rlim_cur < 1024*1024)
313 snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024);
314 else
315 snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024));
316
317 pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n",
318 buf);
319 }
320
321 /* Copied from tools/perf/util/util.h */
322 #ifndef zfree
323 # define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
324 #endif
325
326 #ifndef zclose
327 # define zclose(fd) ({ \
328 int ___err = 0; \
329 if ((fd) >= 0) \
330 ___err = close((fd)); \
331 fd = -1; \
332 ___err; })
333 #endif
334
ptr_to_u64(const void * ptr)335 static inline __u64 ptr_to_u64(const void *ptr)
336 {
337 return (__u64) (unsigned long) ptr;
338 }
339
libbpf_set_strict_mode(enum libbpf_strict_mode mode)340 int libbpf_set_strict_mode(enum libbpf_strict_mode mode)
341 {
342 /* as of v1.0 libbpf_set_strict_mode() is a no-op */
343 return 0;
344 }
345
libbpf_major_version(void)346 __u32 libbpf_major_version(void)
347 {
348 return LIBBPF_MAJOR_VERSION;
349 }
350
libbpf_minor_version(void)351 __u32 libbpf_minor_version(void)
352 {
353 return LIBBPF_MINOR_VERSION;
354 }
355
libbpf_version_string(void)356 const char *libbpf_version_string(void)
357 {
358 #define __S(X) #X
359 #define _S(X) __S(X)
360 return "v" _S(LIBBPF_MAJOR_VERSION) "." _S(LIBBPF_MINOR_VERSION);
361 #undef _S
362 #undef __S
363 }
364
365 enum reloc_type {
366 RELO_LD64,
367 RELO_CALL,
368 RELO_DATA,
369 RELO_EXTERN_LD64,
370 RELO_EXTERN_CALL,
371 RELO_SUBPROG_ADDR,
372 RELO_CORE,
373 RELO_INSN_ARRAY,
374 };
375
376 struct reloc_desc {
377 enum reloc_type type;
378 int insn_idx;
379 union {
380 const struct bpf_core_relo *core_relo; /* used when type == RELO_CORE */
381 struct {
382 int map_idx;
383 int sym_off;
384 /*
385 * The following two fields can be unionized, as the
386 * ext_idx field is used for extern symbols, and the
387 * sym_size is used for jump tables, which are never
388 * extern
389 */
390 union {
391 int ext_idx;
392 int sym_size;
393 };
394 };
395 };
396 };
397
398 /* stored as sec_def->cookie for all libbpf-supported SEC()s */
399 enum sec_def_flags {
400 SEC_NONE = 0,
401 /* expected_attach_type is optional, if kernel doesn't support that */
402 SEC_EXP_ATTACH_OPT = 1,
403 /* legacy, only used by libbpf_get_type_names() and
404 * libbpf_attach_type_by_name(), not used by libbpf itself at all.
405 * This used to be associated with cgroup (and few other) BPF programs
406 * that were attachable through BPF_PROG_ATTACH command. Pretty
407 * meaningless nowadays, though.
408 */
409 SEC_ATTACHABLE = 2,
410 SEC_ATTACHABLE_OPT = SEC_ATTACHABLE | SEC_EXP_ATTACH_OPT,
411 /* attachment target is specified through BTF ID in either kernel or
412 * other BPF program's BTF object
413 */
414 SEC_ATTACH_BTF = 4,
415 /* BPF program type allows sleeping/blocking in kernel */
416 SEC_SLEEPABLE = 8,
417 /* BPF program support non-linear XDP buffer */
418 SEC_XDP_FRAGS = 16,
419 /* Setup proper attach type for usdt probes. */
420 SEC_USDT = 32,
421 };
422
423 struct bpf_sec_def {
424 char *sec;
425 enum bpf_prog_type prog_type;
426 enum bpf_attach_type expected_attach_type;
427 long cookie;
428 int handler_id;
429
430 libbpf_prog_setup_fn_t prog_setup_fn;
431 libbpf_prog_prepare_load_fn_t prog_prepare_load_fn;
432 libbpf_prog_attach_fn_t prog_attach_fn;
433 };
434
435 struct bpf_light_subprog {
436 __u32 sec_insn_off;
437 __u32 sub_insn_off;
438 };
439
440 /*
441 * bpf_prog should be a better name but it has been used in
442 * linux/filter.h.
443 */
444 struct bpf_program {
445 char *name;
446 char *sec_name;
447 size_t sec_idx;
448 const struct bpf_sec_def *sec_def;
449 /* this program's instruction offset (in number of instructions)
450 * within its containing ELF section
451 */
452 size_t sec_insn_off;
453 /* number of original instructions in ELF section belonging to this
454 * program, not taking into account subprogram instructions possible
455 * appended later during relocation
456 */
457 size_t sec_insn_cnt;
458 /* Offset (in number of instructions) of the start of instruction
459 * belonging to this BPF program within its containing main BPF
460 * program. For the entry-point (main) BPF program, this is always
461 * zero. For a sub-program, this gets reset before each of main BPF
462 * programs are processed and relocated and is used to determined
463 * whether sub-program was already appended to the main program, and
464 * if yes, at which instruction offset.
465 */
466 size_t sub_insn_off;
467
468 /* instructions that belong to BPF program; insns[0] is located at
469 * sec_insn_off instruction within its ELF section in ELF file, so
470 * when mapping ELF file instruction index to the local instruction,
471 * one needs to subtract sec_insn_off; and vice versa.
472 */
473 struct bpf_insn *insns;
474 /* actual number of instruction in this BPF program's image; for
475 * entry-point BPF programs this includes the size of main program
476 * itself plus all the used sub-programs, appended at the end
477 */
478 size_t insns_cnt;
479
480 struct reloc_desc *reloc_desc;
481 int nr_reloc;
482
483 /* BPF verifier log settings */
484 char *log_buf;
485 size_t log_size;
486 __u32 log_level;
487
488 struct bpf_object *obj;
489
490 int fd;
491 bool autoload;
492 bool autoattach;
493 bool sym_global;
494 bool mark_btf_static;
495 enum bpf_prog_type type;
496 enum bpf_attach_type expected_attach_type;
497 int exception_cb_idx;
498
499 int prog_ifindex;
500 __u32 attach_btf_obj_fd;
501 __u32 attach_btf_id;
502 __u32 attach_prog_fd;
503
504 void *func_info;
505 __u32 func_info_rec_size;
506 __u32 func_info_cnt;
507
508 void *line_info;
509 __u32 line_info_rec_size;
510 __u32 line_info_cnt;
511 __u32 prog_flags;
512 __u8 hash[SHA256_DIGEST_LENGTH];
513
514 struct bpf_light_subprog *subprogs;
515 __u32 subprog_cnt;
516 };
517
518 struct bpf_struct_ops {
519 struct bpf_program **progs;
520 __u32 *kern_func_off;
521 /* e.g. struct tcp_congestion_ops in bpf_prog's btf format */
522 void *data;
523 /* e.g. struct bpf_struct_ops_tcp_congestion_ops in
524 * btf_vmlinux's format.
525 * struct bpf_struct_ops_tcp_congestion_ops {
526 * [... some other kernel fields ...]
527 * struct tcp_congestion_ops data;
528 * }
529 * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops)
530 * bpf_map__init_kern_struct_ops() will populate the "kern_vdata"
531 * from "data".
532 */
533 void *kern_vdata;
534 __u32 type_id;
535 };
536
537 #define DATA_SEC ".data"
538 #define BSS_SEC ".bss"
539 #define RODATA_SEC ".rodata"
540 #define KCONFIG_SEC ".kconfig"
541 #define KSYMS_SEC ".ksyms"
542 #define STRUCT_OPS_SEC ".struct_ops"
543 #define STRUCT_OPS_LINK_SEC ".struct_ops.link"
544 #define ARENA_SEC ".addr_space.1"
545
546 enum libbpf_map_type {
547 LIBBPF_MAP_UNSPEC,
548 LIBBPF_MAP_DATA,
549 LIBBPF_MAP_BSS,
550 LIBBPF_MAP_RODATA,
551 LIBBPF_MAP_KCONFIG,
552 };
553
554 struct bpf_map_def {
555 unsigned int type;
556 unsigned int key_size;
557 unsigned int value_size;
558 unsigned int max_entries;
559 unsigned int map_flags;
560 };
561
562 struct bpf_map {
563 struct bpf_object *obj;
564 char *name;
565 /* real_name is defined for special internal maps (.rodata*,
566 * .data*, .bss, .kconfig) and preserves their original ELF section
567 * name. This is important to be able to find corresponding BTF
568 * DATASEC information.
569 */
570 char *real_name;
571 int fd;
572 int sec_idx;
573 size_t sec_offset;
574 int map_ifindex;
575 int inner_map_fd;
576 struct bpf_map_def def;
577 __u32 numa_node;
578 __u32 btf_var_idx;
579 int mod_btf_fd;
580 __u32 btf_key_type_id;
581 __u32 btf_value_type_id;
582 __u32 btf_vmlinux_value_type_id;
583 enum libbpf_map_type libbpf_type;
584 void *mmaped;
585 struct bpf_struct_ops *st_ops;
586 struct bpf_map *inner_map;
587 void **init_slots;
588 int init_slots_sz;
589 char *pin_path;
590 bool pinned;
591 bool reused;
592 bool autocreate;
593 bool autoattach;
594 __u64 map_extra;
595 struct bpf_program *excl_prog;
596 };
597
598 enum extern_type {
599 EXT_UNKNOWN,
600 EXT_KCFG,
601 EXT_KSYM,
602 };
603
604 enum kcfg_type {
605 KCFG_UNKNOWN,
606 KCFG_CHAR,
607 KCFG_BOOL,
608 KCFG_INT,
609 KCFG_TRISTATE,
610 KCFG_CHAR_ARR,
611 };
612
613 struct extern_desc {
614 enum extern_type type;
615 int sym_idx;
616 int btf_id;
617 int sec_btf_id;
618 char *name;
619 char *essent_name;
620 bool is_set;
621 bool is_weak;
622 union {
623 struct {
624 enum kcfg_type type;
625 int sz;
626 int align;
627 int data_off;
628 bool is_signed;
629 } kcfg;
630 struct {
631 unsigned long long addr;
632
633 /* target btf_id of the corresponding kernel var. */
634 int kernel_btf_obj_fd;
635 int kernel_btf_id;
636
637 /* local btf_id of the ksym extern's type. */
638 __u32 type_id;
639 /* BTF fd index to be patched in for insn->off, this is
640 * 0 for vmlinux BTF, index in obj->fd_array for module
641 * BTF
642 */
643 __s16 btf_fd_idx;
644 } ksym;
645 };
646 };
647
648 struct module_btf {
649 struct btf *btf;
650 char *name;
651 __u32 id;
652 int fd;
653 int fd_array_idx;
654 };
655
656 enum sec_type {
657 SEC_UNUSED = 0,
658 SEC_RELO,
659 SEC_BSS,
660 SEC_DATA,
661 SEC_RODATA,
662 SEC_ST_OPS,
663 };
664
665 struct elf_sec_desc {
666 enum sec_type sec_type;
667 Elf64_Shdr *shdr;
668 Elf_Data *data;
669 };
670
671 struct elf_state {
672 int fd;
673 const void *obj_buf;
674 size_t obj_buf_sz;
675 Elf *elf;
676 Elf64_Ehdr *ehdr;
677 Elf_Data *symbols;
678 Elf_Data *arena_data;
679 size_t shstrndx; /* section index for section name strings */
680 size_t strtabidx;
681 struct elf_sec_desc *secs;
682 size_t sec_cnt;
683 int btf_maps_shndx;
684 __u32 btf_maps_sec_btf_id;
685 int text_shndx;
686 int symbols_shndx;
687 bool has_st_ops;
688 int arena_data_shndx;
689 int jumptables_data_shndx;
690 };
691
692 struct usdt_manager;
693
694 enum bpf_object_state {
695 OBJ_OPEN,
696 OBJ_PREPARED,
697 OBJ_LOADED,
698 };
699
700 struct bpf_object {
701 char name[BPF_OBJ_NAME_LEN];
702 char license[64];
703 __u32 kern_version;
704
705 enum bpf_object_state state;
706 struct bpf_program *programs;
707 size_t nr_programs;
708 struct bpf_map *maps;
709 size_t nr_maps;
710 size_t maps_cap;
711
712 char *kconfig;
713 struct extern_desc *externs;
714 int nr_extern;
715 int kconfig_map_idx;
716
717 bool has_subcalls;
718 bool has_rodata;
719
720 struct bpf_gen *gen_loader;
721
722 /* Information when doing ELF related work. Only valid if efile.elf is not NULL */
723 struct elf_state efile;
724
725 unsigned char byteorder;
726
727 struct btf *btf;
728 struct btf_ext *btf_ext;
729
730 /* Parse and load BTF vmlinux if any of the programs in the object need
731 * it at load time.
732 */
733 struct btf *btf_vmlinux;
734 /* Path to the custom BTF to be used for BPF CO-RE relocations as an
735 * override for vmlinux BTF.
736 */
737 char *btf_custom_path;
738 /* vmlinux BTF override for CO-RE relocations */
739 struct btf *btf_vmlinux_override;
740 /* Lazily initialized kernel module BTFs */
741 struct module_btf *btf_modules;
742 bool btf_modules_loaded;
743 size_t btf_module_cnt;
744 size_t btf_module_cap;
745
746 /* optional log settings passed to BPF_BTF_LOAD and BPF_PROG_LOAD commands */
747 char *log_buf;
748 size_t log_size;
749 __u32 log_level;
750
751 int *fd_array;
752 size_t fd_array_cap;
753 size_t fd_array_cnt;
754
755 struct usdt_manager *usdt_man;
756
757 int arena_map_idx;
758 void *arena_data;
759 size_t arena_data_sz;
760
761 void *jumptables_data;
762 size_t jumptables_data_sz;
763
764 struct {
765 struct bpf_program *prog;
766 int sym_off;
767 int fd;
768 } *jumptable_maps;
769 size_t jumptable_map_cnt;
770
771 struct kern_feature_cache *feat_cache;
772 char *token_path;
773 int token_fd;
774
775 char path[];
776 };
777
778 static const char *elf_sym_str(const struct bpf_object *obj, size_t off);
779 static const char *elf_sec_str(const struct bpf_object *obj, size_t off);
780 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx);
781 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name);
782 static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn);
783 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn);
784 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn);
785 static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx);
786 static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx);
787
bpf_program__unload(struct bpf_program * prog)788 void bpf_program__unload(struct bpf_program *prog)
789 {
790 if (!prog)
791 return;
792
793 zclose(prog->fd);
794
795 zfree(&prog->func_info);
796 zfree(&prog->line_info);
797 zfree(&prog->subprogs);
798 }
799
bpf_program__exit(struct bpf_program * prog)800 static void bpf_program__exit(struct bpf_program *prog)
801 {
802 if (!prog)
803 return;
804
805 bpf_program__unload(prog);
806 zfree(&prog->name);
807 zfree(&prog->sec_name);
808 zfree(&prog->insns);
809 zfree(&prog->reloc_desc);
810
811 prog->nr_reloc = 0;
812 prog->insns_cnt = 0;
813 prog->sec_idx = -1;
814 }
815
insn_is_subprog_call(const struct bpf_insn * insn)816 static bool insn_is_subprog_call(const struct bpf_insn *insn)
817 {
818 return BPF_CLASS(insn->code) == BPF_JMP &&
819 BPF_OP(insn->code) == BPF_CALL &&
820 BPF_SRC(insn->code) == BPF_K &&
821 insn->src_reg == BPF_PSEUDO_CALL &&
822 insn->dst_reg == 0 &&
823 insn->off == 0;
824 }
825
is_call_insn(const struct bpf_insn * insn)826 static bool is_call_insn(const struct bpf_insn *insn)
827 {
828 return insn->code == (BPF_JMP | BPF_CALL);
829 }
830
insn_is_pseudo_func(struct bpf_insn * insn)831 static bool insn_is_pseudo_func(struct bpf_insn *insn)
832 {
833 return is_ldimm64_insn(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
834 }
835
836 static int
bpf_object__init_prog(struct bpf_object * obj,struct bpf_program * prog,const char * name,size_t sec_idx,const char * sec_name,size_t sec_off,void * insn_data,size_t insn_data_sz)837 bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
838 const char *name, size_t sec_idx, const char *sec_name,
839 size_t sec_off, void *insn_data, size_t insn_data_sz)
840 {
841 if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) {
842 pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n",
843 sec_name, name, sec_off, insn_data_sz);
844 return -EINVAL;
845 }
846
847 memset(prog, 0, sizeof(*prog));
848 prog->obj = obj;
849
850 prog->sec_idx = sec_idx;
851 prog->sec_insn_off = sec_off / BPF_INSN_SZ;
852 prog->sec_insn_cnt = insn_data_sz / BPF_INSN_SZ;
853 /* insns_cnt can later be increased by appending used subprograms */
854 prog->insns_cnt = prog->sec_insn_cnt;
855
856 prog->type = BPF_PROG_TYPE_UNSPEC;
857 prog->fd = -1;
858 prog->exception_cb_idx = -1;
859
860 /* libbpf's convention for SEC("?abc...") is that it's just like
861 * SEC("abc...") but the corresponding bpf_program starts out with
862 * autoload set to false.
863 */
864 if (sec_name[0] == '?') {
865 prog->autoload = false;
866 /* from now on forget there was ? in section name */
867 sec_name++;
868 } else {
869 prog->autoload = true;
870 }
871
872 prog->autoattach = true;
873
874 /* inherit object's log_level */
875 prog->log_level = obj->log_level;
876
877 prog->sec_name = strdup(sec_name);
878 if (!prog->sec_name)
879 goto errout;
880
881 prog->name = strdup(name);
882 if (!prog->name)
883 goto errout;
884
885 prog->insns = malloc(insn_data_sz);
886 if (!prog->insns)
887 goto errout;
888 memcpy(prog->insns, insn_data, insn_data_sz);
889
890 return 0;
891 errout:
892 pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name);
893 bpf_program__exit(prog);
894 return -ENOMEM;
895 }
896
897 static int
bpf_object__add_programs(struct bpf_object * obj,Elf_Data * sec_data,const char * sec_name,int sec_idx)898 bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
899 const char *sec_name, int sec_idx)
900 {
901 Elf_Data *symbols = obj->efile.symbols;
902 struct bpf_program *prog, *progs;
903 void *data = sec_data->d_buf;
904 size_t sec_sz = sec_data->d_size, sec_off, prog_sz, nr_syms;
905 int nr_progs, err, i;
906 const char *name;
907 Elf64_Sym *sym;
908
909 progs = obj->programs;
910 nr_progs = obj->nr_programs;
911 nr_syms = symbols->d_size / sizeof(Elf64_Sym);
912
913 for (i = 0; i < nr_syms; i++) {
914 sym = elf_sym_by_idx(obj, i);
915
916 if (sym->st_shndx != sec_idx)
917 continue;
918 if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
919 continue;
920
921 prog_sz = sym->st_size;
922 sec_off = sym->st_value;
923
924 name = elf_sym_str(obj, sym->st_name);
925 if (!name) {
926 pr_warn("sec '%s': failed to get symbol name for offset %zu\n",
927 sec_name, sec_off);
928 return -LIBBPF_ERRNO__FORMAT;
929 }
930
931 if (sec_off + prog_sz > sec_sz || sec_off + prog_sz < sec_off) {
932 pr_warn("sec '%s': program at offset %zu crosses section boundary\n",
933 sec_name, sec_off);
934 return -LIBBPF_ERRNO__FORMAT;
935 }
936
937 if (sec_idx != obj->efile.text_shndx && ELF64_ST_BIND(sym->st_info) == STB_LOCAL) {
938 pr_warn("sec '%s': program '%s' is static and not supported\n", sec_name, name);
939 return -ENOTSUP;
940 }
941
942 pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n",
943 sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz);
944
945 progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(*progs));
946 if (!progs) {
947 /*
948 * In this case the original obj->programs
949 * is still valid, so don't need special treat for
950 * bpf_close_object().
951 */
952 pr_warn("sec '%s': failed to alloc memory for new program '%s'\n",
953 sec_name, name);
954 return -ENOMEM;
955 }
956 obj->programs = progs;
957
958 prog = &progs[nr_progs];
959
960 err = bpf_object__init_prog(obj, prog, name, sec_idx, sec_name,
961 sec_off, data + sec_off, prog_sz);
962 if (err)
963 return err;
964
965 if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL)
966 prog->sym_global = true;
967
968 /* if function is a global/weak symbol, but has restricted
969 * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC
970 * as static to enable more permissive BPF verification mode
971 * with more outside context available to BPF verifier
972 */
973 if (prog->sym_global && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
974 || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL))
975 prog->mark_btf_static = true;
976
977 nr_progs++;
978 obj->nr_programs = nr_progs;
979 }
980
981 return 0;
982 }
983
bpf_object_bswap_progs(struct bpf_object * obj)984 static void bpf_object_bswap_progs(struct bpf_object *obj)
985 {
986 struct bpf_program *prog = obj->programs;
987 struct bpf_insn *insn;
988 int p, i;
989
990 for (p = 0; p < obj->nr_programs; p++, prog++) {
991 insn = prog->insns;
992 for (i = 0; i < prog->insns_cnt; i++, insn++)
993 bpf_insn_bswap(insn);
994 }
995 pr_debug("converted %zu BPF programs to native byte order\n", obj->nr_programs);
996 }
997
998 static const struct btf_member *
find_member_by_offset(const struct btf_type * t,__u32 bit_offset)999 find_member_by_offset(const struct btf_type *t, __u32 bit_offset)
1000 {
1001 struct btf_member *m;
1002 int i;
1003
1004 for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
1005 if (btf_member_bit_offset(t, i) == bit_offset)
1006 return m;
1007 }
1008
1009 return NULL;
1010 }
1011
1012 static const struct btf_member *
find_member_by_name(const struct btf * btf,const struct btf_type * t,const char * name)1013 find_member_by_name(const struct btf *btf, const struct btf_type *t,
1014 const char *name)
1015 {
1016 struct btf_member *m;
1017 int i;
1018
1019 for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
1020 if (!strcmp(btf__name_by_offset(btf, m->name_off), name))
1021 return m;
1022 }
1023
1024 return NULL;
1025 }
1026
1027 static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
1028 __u16 kind, struct btf **res_btf,
1029 struct module_btf **res_mod_btf);
1030
1031 #define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_"
1032 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
1033 const char *name, __u32 kind);
1034
1035 static int
find_struct_ops_kern_types(struct bpf_object * obj,const char * tname_raw,struct module_btf ** mod_btf,const struct btf_type ** type,__u32 * type_id,const struct btf_type ** vtype,__u32 * vtype_id,const struct btf_member ** data_member)1036 find_struct_ops_kern_types(struct bpf_object *obj, const char *tname_raw,
1037 struct module_btf **mod_btf,
1038 const struct btf_type **type, __u32 *type_id,
1039 const struct btf_type **vtype, __u32 *vtype_id,
1040 const struct btf_member **data_member)
1041 {
1042 const struct btf_type *kern_type, *kern_vtype;
1043 const struct btf_member *kern_data_member;
1044 struct btf *btf = NULL;
1045 __s32 kern_vtype_id, kern_type_id;
1046 char tname[192], stname[256];
1047 __u32 i;
1048
1049 snprintf(tname, sizeof(tname), "%.*s",
1050 (int)bpf_core_essential_name_len(tname_raw), tname_raw);
1051
1052 snprintf(stname, sizeof(stname), "%s%s", STRUCT_OPS_VALUE_PREFIX, tname);
1053
1054 /* Look for the corresponding "map_value" type that will be used
1055 * in map_update(BPF_MAP_TYPE_STRUCT_OPS) first, figure out the btf
1056 * and the mod_btf.
1057 * For example, find "struct bpf_struct_ops_tcp_congestion_ops".
1058 */
1059 kern_vtype_id = find_ksym_btf_id(obj, stname, BTF_KIND_STRUCT, &btf, mod_btf);
1060 if (kern_vtype_id < 0) {
1061 pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", stname);
1062 return kern_vtype_id;
1063 }
1064 kern_vtype = btf__type_by_id(btf, kern_vtype_id);
1065
1066 kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT);
1067 if (kern_type_id < 0) {
1068 pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", tname);
1069 return kern_type_id;
1070 }
1071 kern_type = btf__type_by_id(btf, kern_type_id);
1072
1073 /* Find "struct tcp_congestion_ops" from
1074 * struct bpf_struct_ops_tcp_congestion_ops {
1075 * [ ... ]
1076 * struct tcp_congestion_ops data;
1077 * }
1078 */
1079 kern_data_member = btf_members(kern_vtype);
1080 for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) {
1081 if (kern_data_member->type == kern_type_id)
1082 break;
1083 }
1084 if (i == btf_vlen(kern_vtype)) {
1085 pr_warn("struct_ops init_kern: struct %s data is not found in struct %s\n",
1086 tname, stname);
1087 return -EINVAL;
1088 }
1089
1090 *type = kern_type;
1091 *type_id = kern_type_id;
1092 *vtype = kern_vtype;
1093 *vtype_id = kern_vtype_id;
1094 *data_member = kern_data_member;
1095
1096 return 0;
1097 }
1098
bpf_map__is_struct_ops(const struct bpf_map * map)1099 static bool bpf_map__is_struct_ops(const struct bpf_map *map)
1100 {
1101 return map->def.type == BPF_MAP_TYPE_STRUCT_OPS;
1102 }
1103
is_valid_st_ops_program(struct bpf_object * obj,const struct bpf_program * prog)1104 static bool is_valid_st_ops_program(struct bpf_object *obj,
1105 const struct bpf_program *prog)
1106 {
1107 int i;
1108
1109 for (i = 0; i < obj->nr_programs; i++) {
1110 if (&obj->programs[i] == prog)
1111 return prog->type == BPF_PROG_TYPE_STRUCT_OPS;
1112 }
1113
1114 return false;
1115 }
1116
1117 /* For each struct_ops program P, referenced from some struct_ops map M,
1118 * enable P.autoload if there are Ms for which M.autocreate is true,
1119 * disable P.autoload if for all Ms M.autocreate is false.
1120 * Don't change P.autoload for programs that are not referenced from any maps.
1121 */
bpf_object_adjust_struct_ops_autoload(struct bpf_object * obj)1122 static int bpf_object_adjust_struct_ops_autoload(struct bpf_object *obj)
1123 {
1124 struct bpf_program *prog, *slot_prog;
1125 struct bpf_map *map;
1126 int i, j, k, vlen;
1127
1128 for (i = 0; i < obj->nr_programs; ++i) {
1129 int should_load = false;
1130 int use_cnt = 0;
1131
1132 prog = &obj->programs[i];
1133 if (prog->type != BPF_PROG_TYPE_STRUCT_OPS)
1134 continue;
1135
1136 for (j = 0; j < obj->nr_maps; ++j) {
1137 const struct btf_type *type;
1138
1139 map = &obj->maps[j];
1140 if (!bpf_map__is_struct_ops(map))
1141 continue;
1142
1143 type = btf__type_by_id(obj->btf, map->st_ops->type_id);
1144 vlen = btf_vlen(type);
1145 for (k = 0; k < vlen; ++k) {
1146 slot_prog = map->st_ops->progs[k];
1147 if (prog != slot_prog)
1148 continue;
1149
1150 use_cnt++;
1151 if (map->autocreate)
1152 should_load = true;
1153 }
1154 }
1155 if (use_cnt)
1156 prog->autoload = should_load;
1157 }
1158
1159 return 0;
1160 }
1161
1162 /* Init the map's fields that depend on kern_btf */
bpf_map__init_kern_struct_ops(struct bpf_map * map)1163 static int bpf_map__init_kern_struct_ops(struct bpf_map *map)
1164 {
1165 const struct btf_member *member, *kern_member, *kern_data_member;
1166 const struct btf_type *type, *kern_type, *kern_vtype;
1167 __u32 i, kern_type_id, kern_vtype_id, kern_data_off;
1168 struct bpf_object *obj = map->obj;
1169 const struct btf *btf = obj->btf;
1170 struct bpf_struct_ops *st_ops;
1171 const struct btf *kern_btf;
1172 struct module_btf *mod_btf = NULL;
1173 void *data, *kern_data;
1174 const char *tname;
1175 int err;
1176
1177 st_ops = map->st_ops;
1178 type = btf__type_by_id(btf, st_ops->type_id);
1179 tname = btf__name_by_offset(btf, type->name_off);
1180 err = find_struct_ops_kern_types(obj, tname, &mod_btf,
1181 &kern_type, &kern_type_id,
1182 &kern_vtype, &kern_vtype_id,
1183 &kern_data_member);
1184 if (err)
1185 return err;
1186
1187 kern_btf = mod_btf ? mod_btf->btf : obj->btf_vmlinux;
1188
1189 pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n",
1190 map->name, st_ops->type_id, kern_type_id, kern_vtype_id);
1191
1192 map->mod_btf_fd = mod_btf ? mod_btf->fd : -1;
1193 map->def.value_size = kern_vtype->size;
1194 map->btf_vmlinux_value_type_id = kern_vtype_id;
1195
1196 st_ops->kern_vdata = calloc(1, kern_vtype->size);
1197 if (!st_ops->kern_vdata)
1198 return -ENOMEM;
1199
1200 data = st_ops->data;
1201 kern_data_off = kern_data_member->offset / 8;
1202 kern_data = st_ops->kern_vdata + kern_data_off;
1203
1204 member = btf_members(type);
1205 for (i = 0; i < btf_vlen(type); i++, member++) {
1206 const struct btf_type *mtype, *kern_mtype;
1207 __u32 mtype_id, kern_mtype_id;
1208 void *mdata, *kern_mdata;
1209 struct bpf_program *prog;
1210 __s64 msize, kern_msize;
1211 __u32 moff, kern_moff;
1212 __u32 kern_member_idx;
1213 const char *mname;
1214
1215 mname = btf__name_by_offset(btf, member->name_off);
1216 moff = member->offset / 8;
1217 mdata = data + moff;
1218 msize = btf__resolve_size(btf, member->type);
1219 if (msize < 0) {
1220 pr_warn("struct_ops init_kern %s: failed to resolve the size of member %s\n",
1221 map->name, mname);
1222 return msize;
1223 }
1224
1225 kern_member = find_member_by_name(kern_btf, kern_type, mname);
1226 if (!kern_member) {
1227 if (!libbpf_is_mem_zeroed(mdata, msize)) {
1228 pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n",
1229 map->name, mname);
1230 return -ENOTSUP;
1231 }
1232
1233 if (st_ops->progs[i]) {
1234 /* If we had declaratively set struct_ops callback, we need to
1235 * force its autoload to false, because it doesn't have
1236 * a chance of succeeding from POV of the current struct_ops map.
1237 * If this program is still referenced somewhere else, though,
1238 * then bpf_object_adjust_struct_ops_autoload() will update its
1239 * autoload accordingly.
1240 */
1241 st_ops->progs[i]->autoload = false;
1242 st_ops->progs[i] = NULL;
1243 }
1244
1245 /* Skip all-zero/NULL fields if they are not present in the kernel BTF */
1246 pr_info("struct_ops %s: member %s not found in kernel, skipping it as it's set to zero\n",
1247 map->name, mname);
1248 continue;
1249 }
1250
1251 kern_member_idx = kern_member - btf_members(kern_type);
1252 if (btf_member_bitfield_size(type, i) ||
1253 btf_member_bitfield_size(kern_type, kern_member_idx)) {
1254 pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n",
1255 map->name, mname);
1256 return -ENOTSUP;
1257 }
1258
1259 kern_moff = kern_member->offset / 8;
1260 kern_mdata = kern_data + kern_moff;
1261
1262 mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id);
1263 kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type,
1264 &kern_mtype_id);
1265 if (BTF_INFO_KIND(mtype->info) !=
1266 BTF_INFO_KIND(kern_mtype->info)) {
1267 pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n",
1268 map->name, mname, BTF_INFO_KIND(mtype->info),
1269 BTF_INFO_KIND(kern_mtype->info));
1270 return -ENOTSUP;
1271 }
1272
1273 if (btf_is_ptr(mtype)) {
1274 prog = *(void **)mdata;
1275 /* just like for !kern_member case above, reset declaratively
1276 * set (at compile time) program's autload to false,
1277 * if user replaced it with another program or NULL
1278 */
1279 if (st_ops->progs[i] && st_ops->progs[i] != prog)
1280 st_ops->progs[i]->autoload = false;
1281
1282 /* Update the value from the shadow type */
1283 st_ops->progs[i] = prog;
1284 if (!prog)
1285 continue;
1286
1287 if (!is_valid_st_ops_program(obj, prog)) {
1288 pr_warn("struct_ops init_kern %s: member %s is not a struct_ops program\n",
1289 map->name, mname);
1290 return -ENOTSUP;
1291 }
1292
1293 kern_mtype = skip_mods_and_typedefs(kern_btf,
1294 kern_mtype->type,
1295 &kern_mtype_id);
1296
1297 /* mtype->type must be a func_proto which was
1298 * guaranteed in bpf_object__collect_st_ops_relos(),
1299 * so only check kern_mtype for func_proto here.
1300 */
1301 if (!btf_is_func_proto(kern_mtype)) {
1302 pr_warn("struct_ops init_kern %s: kernel member %s is not a func ptr\n",
1303 map->name, mname);
1304 return -ENOTSUP;
1305 }
1306
1307 if (mod_btf)
1308 prog->attach_btf_obj_fd = mod_btf->fd;
1309
1310 /* if we haven't yet processed this BPF program, record proper
1311 * attach_btf_id and member_idx
1312 */
1313 if (!prog->attach_btf_id) {
1314 prog->attach_btf_id = kern_type_id;
1315 prog->expected_attach_type = kern_member_idx;
1316 }
1317
1318 /* struct_ops BPF prog can be re-used between multiple
1319 * .struct_ops & .struct_ops.link as long as it's the
1320 * same struct_ops struct definition and the same
1321 * function pointer field
1322 */
1323 if (prog->attach_btf_id != kern_type_id) {
1324 pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: attach_btf_id %u != kern_type_id %u\n",
1325 map->name, mname, prog->name, prog->sec_name, prog->type,
1326 prog->attach_btf_id, kern_type_id);
1327 return -EINVAL;
1328 }
1329 if (prog->expected_attach_type != kern_member_idx) {
1330 pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: expected_attach_type %u != kern_member_idx %u\n",
1331 map->name, mname, prog->name, prog->sec_name, prog->type,
1332 prog->expected_attach_type, kern_member_idx);
1333 return -EINVAL;
1334 }
1335
1336 st_ops->kern_func_off[i] = kern_data_off + kern_moff;
1337
1338 pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n",
1339 map->name, mname, prog->name, moff,
1340 kern_moff);
1341
1342 continue;
1343 }
1344
1345 kern_msize = btf__resolve_size(kern_btf, kern_mtype_id);
1346 if (kern_msize < 0 || msize != kern_msize) {
1347 pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n",
1348 map->name, mname, (ssize_t)msize,
1349 (ssize_t)kern_msize);
1350 return -ENOTSUP;
1351 }
1352
1353 pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n",
1354 map->name, mname, (unsigned int)msize,
1355 moff, kern_moff);
1356 memcpy(kern_mdata, mdata, msize);
1357 }
1358
1359 return 0;
1360 }
1361
bpf_object__init_kern_struct_ops_maps(struct bpf_object * obj)1362 static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj)
1363 {
1364 struct bpf_map *map;
1365 size_t i;
1366 int err;
1367
1368 for (i = 0; i < obj->nr_maps; i++) {
1369 map = &obj->maps[i];
1370
1371 if (!bpf_map__is_struct_ops(map))
1372 continue;
1373
1374 if (!map->autocreate)
1375 continue;
1376
1377 err = bpf_map__init_kern_struct_ops(map);
1378 if (err)
1379 return err;
1380 }
1381
1382 return 0;
1383 }
1384
init_struct_ops_maps(struct bpf_object * obj,const char * sec_name,int shndx,Elf_Data * data)1385 static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name,
1386 int shndx, Elf_Data *data)
1387 {
1388 const struct btf_type *type, *datasec;
1389 const struct btf_var_secinfo *vsi;
1390 struct bpf_struct_ops *st_ops;
1391 const char *tname, *var_name;
1392 __s32 type_id, datasec_id;
1393 const struct btf *btf;
1394 struct bpf_map *map;
1395 __u32 i;
1396
1397 if (shndx == -1)
1398 return 0;
1399
1400 btf = obj->btf;
1401 datasec_id = btf__find_by_name_kind(btf, sec_name,
1402 BTF_KIND_DATASEC);
1403 if (datasec_id < 0) {
1404 pr_warn("struct_ops init: DATASEC %s not found\n",
1405 sec_name);
1406 return -EINVAL;
1407 }
1408
1409 datasec = btf__type_by_id(btf, datasec_id);
1410 vsi = btf_var_secinfos(datasec);
1411 for (i = 0; i < btf_vlen(datasec); i++, vsi++) {
1412 type = btf__type_by_id(obj->btf, vsi->type);
1413 var_name = btf__name_by_offset(obj->btf, type->name_off);
1414
1415 type_id = btf__resolve_type(obj->btf, vsi->type);
1416 if (type_id < 0) {
1417 pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n",
1418 vsi->type, sec_name);
1419 return -EINVAL;
1420 }
1421
1422 type = btf__type_by_id(obj->btf, type_id);
1423 tname = btf__name_by_offset(obj->btf, type->name_off);
1424 if (!tname[0]) {
1425 pr_warn("struct_ops init: anonymous type is not supported\n");
1426 return -ENOTSUP;
1427 }
1428 if (!btf_is_struct(type)) {
1429 pr_warn("struct_ops init: %s is not a struct\n", tname);
1430 return -EINVAL;
1431 }
1432
1433 map = bpf_object__add_map(obj);
1434 if (IS_ERR(map))
1435 return PTR_ERR(map);
1436
1437 map->sec_idx = shndx;
1438 map->sec_offset = vsi->offset;
1439 map->name = strdup(var_name);
1440 if (!map->name)
1441 return -ENOMEM;
1442 map->btf_value_type_id = type_id;
1443
1444 /* Follow same convention as for programs autoload:
1445 * SEC("?.struct_ops") means map is not created by default.
1446 */
1447 if (sec_name[0] == '?') {
1448 map->autocreate = false;
1449 /* from now on forget there was ? in section name */
1450 sec_name++;
1451 }
1452
1453 map->def.type = BPF_MAP_TYPE_STRUCT_OPS;
1454 map->def.key_size = sizeof(int);
1455 map->def.value_size = type->size;
1456 map->def.max_entries = 1;
1457 map->def.map_flags = strcmp(sec_name, STRUCT_OPS_LINK_SEC) == 0 ? BPF_F_LINK : 0;
1458 map->autoattach = true;
1459
1460 map->st_ops = calloc(1, sizeof(*map->st_ops));
1461 if (!map->st_ops)
1462 return -ENOMEM;
1463 st_ops = map->st_ops;
1464 st_ops->data = malloc(type->size);
1465 st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs));
1466 st_ops->kern_func_off = malloc(btf_vlen(type) *
1467 sizeof(*st_ops->kern_func_off));
1468 if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off)
1469 return -ENOMEM;
1470
1471 if (vsi->offset + type->size > data->d_size) {
1472 pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n",
1473 var_name, sec_name);
1474 return -EINVAL;
1475 }
1476
1477 memcpy(st_ops->data,
1478 data->d_buf + vsi->offset,
1479 type->size);
1480 st_ops->type_id = type_id;
1481
1482 pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n",
1483 tname, type_id, var_name, vsi->offset);
1484 }
1485
1486 return 0;
1487 }
1488
bpf_object_init_struct_ops(struct bpf_object * obj)1489 static int bpf_object_init_struct_ops(struct bpf_object *obj)
1490 {
1491 const char *sec_name;
1492 int sec_idx, err;
1493
1494 for (sec_idx = 0; sec_idx < obj->efile.sec_cnt; ++sec_idx) {
1495 struct elf_sec_desc *desc = &obj->efile.secs[sec_idx];
1496
1497 if (desc->sec_type != SEC_ST_OPS)
1498 continue;
1499
1500 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1501 if (!sec_name)
1502 return -LIBBPF_ERRNO__FORMAT;
1503
1504 err = init_struct_ops_maps(obj, sec_name, sec_idx, desc->data);
1505 if (err)
1506 return err;
1507 }
1508
1509 return 0;
1510 }
1511
bpf_object__new(const char * path,const void * obj_buf,size_t obj_buf_sz,const char * obj_name)1512 static struct bpf_object *bpf_object__new(const char *path,
1513 const void *obj_buf,
1514 size_t obj_buf_sz,
1515 const char *obj_name)
1516 {
1517 struct bpf_object *obj;
1518 char *end;
1519
1520 obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);
1521 if (!obj) {
1522 pr_warn("alloc memory failed for %s\n", path);
1523 return ERR_PTR(-ENOMEM);
1524 }
1525
1526 strcpy(obj->path, path);
1527 if (obj_name) {
1528 libbpf_strlcpy(obj->name, obj_name, sizeof(obj->name));
1529 } else {
1530 /* Using basename() GNU version which doesn't modify arg. */
1531 libbpf_strlcpy(obj->name, basename((void *)path), sizeof(obj->name));
1532 end = strchr(obj->name, '.');
1533 if (end)
1534 *end = 0;
1535 }
1536
1537 obj->efile.fd = -1;
1538 /*
1539 * Caller of this function should also call
1540 * bpf_object__elf_finish() after data collection to return
1541 * obj_buf to user. If not, we should duplicate the buffer to
1542 * avoid user freeing them before elf finish.
1543 */
1544 obj->efile.obj_buf = obj_buf;
1545 obj->efile.obj_buf_sz = obj_buf_sz;
1546 obj->efile.btf_maps_shndx = -1;
1547 obj->kconfig_map_idx = -1;
1548 obj->arena_map_idx = -1;
1549
1550 obj->kern_version = get_kernel_version();
1551 obj->state = OBJ_OPEN;
1552
1553 return obj;
1554 }
1555
bpf_object__elf_finish(struct bpf_object * obj)1556 static void bpf_object__elf_finish(struct bpf_object *obj)
1557 {
1558 if (!obj->efile.elf)
1559 return;
1560
1561 elf_end(obj->efile.elf);
1562 obj->efile.elf = NULL;
1563 obj->efile.ehdr = NULL;
1564 obj->efile.symbols = NULL;
1565 obj->efile.arena_data = NULL;
1566
1567 zfree(&obj->efile.secs);
1568 obj->efile.sec_cnt = 0;
1569 zclose(obj->efile.fd);
1570 obj->efile.obj_buf = NULL;
1571 obj->efile.obj_buf_sz = 0;
1572 }
1573
bpf_object__elf_init(struct bpf_object * obj)1574 static int bpf_object__elf_init(struct bpf_object *obj)
1575 {
1576 Elf64_Ehdr *ehdr;
1577 int err = 0;
1578 Elf *elf;
1579
1580 if (obj->efile.elf) {
1581 pr_warn("elf: init internal error\n");
1582 return -LIBBPF_ERRNO__LIBELF;
1583 }
1584
1585 if (obj->efile.obj_buf_sz > 0) {
1586 /* obj_buf should have been validated by bpf_object__open_mem(). */
1587 elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz);
1588 } else {
1589 obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC);
1590 if (obj->efile.fd < 0) {
1591 err = -errno;
1592 pr_warn("elf: failed to open %s: %s\n", obj->path, errstr(err));
1593 return err;
1594 }
1595
1596 elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL);
1597 }
1598
1599 if (!elf) {
1600 pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1));
1601 err = -LIBBPF_ERRNO__LIBELF;
1602 goto errout;
1603 }
1604
1605 obj->efile.elf = elf;
1606
1607 if (elf_kind(elf) != ELF_K_ELF) {
1608 err = -LIBBPF_ERRNO__FORMAT;
1609 pr_warn("elf: '%s' is not a proper ELF object\n", obj->path);
1610 goto errout;
1611 }
1612
1613 if (gelf_getclass(elf) != ELFCLASS64) {
1614 err = -LIBBPF_ERRNO__FORMAT;
1615 pr_warn("elf: '%s' is not a 64-bit ELF object\n", obj->path);
1616 goto errout;
1617 }
1618
1619 obj->efile.ehdr = ehdr = elf64_getehdr(elf);
1620 if (!obj->efile.ehdr) {
1621 pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1));
1622 err = -LIBBPF_ERRNO__FORMAT;
1623 goto errout;
1624 }
1625
1626 /* Validate ELF object endianness... */
1627 if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB &&
1628 ehdr->e_ident[EI_DATA] != ELFDATA2MSB) {
1629 err = -LIBBPF_ERRNO__ENDIAN;
1630 pr_warn("elf: '%s' has unknown byte order\n", obj->path);
1631 goto errout;
1632 }
1633 /* and save after bpf_object_open() frees ELF data */
1634 obj->byteorder = ehdr->e_ident[EI_DATA];
1635
1636 if (elf_getshdrstrndx(elf, &obj->efile.shstrndx)) {
1637 pr_warn("elf: failed to get section names section index for %s: %s\n",
1638 obj->path, elf_errmsg(-1));
1639 err = -LIBBPF_ERRNO__FORMAT;
1640 goto errout;
1641 }
1642
1643 /* ELF is corrupted/truncated, avoid calling elf_strptr. */
1644 if (!elf_rawdata(elf_getscn(elf, obj->efile.shstrndx), NULL)) {
1645 pr_warn("elf: failed to get section names strings from %s: %s\n",
1646 obj->path, elf_errmsg(-1));
1647 err = -LIBBPF_ERRNO__FORMAT;
1648 goto errout;
1649 }
1650
1651 /* Old LLVM set e_machine to EM_NONE */
1652 if (ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF)) {
1653 pr_warn("elf: %s is not a valid eBPF object file\n", obj->path);
1654 err = -LIBBPF_ERRNO__FORMAT;
1655 goto errout;
1656 }
1657
1658 return 0;
1659 errout:
1660 bpf_object__elf_finish(obj);
1661 return err;
1662 }
1663
is_native_endianness(struct bpf_object * obj)1664 static bool is_native_endianness(struct bpf_object *obj)
1665 {
1666 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
1667 return obj->byteorder == ELFDATA2LSB;
1668 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
1669 return obj->byteorder == ELFDATA2MSB;
1670 #else
1671 # error "Unrecognized __BYTE_ORDER__"
1672 #endif
1673 }
1674
1675 static int
bpf_object__init_license(struct bpf_object * obj,void * data,size_t size)1676 bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
1677 {
1678 if (!data) {
1679 pr_warn("invalid license section in %s\n", obj->path);
1680 return -LIBBPF_ERRNO__FORMAT;
1681 }
1682 /* libbpf_strlcpy() only copies first N - 1 bytes, so size + 1 won't
1683 * go over allowed ELF data section buffer
1684 */
1685 libbpf_strlcpy(obj->license, data, min(size + 1, sizeof(obj->license)));
1686 pr_debug("license of %s is %s\n", obj->path, obj->license);
1687 return 0;
1688 }
1689
1690 static int
bpf_object__init_kversion(struct bpf_object * obj,void * data,size_t size)1691 bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
1692 {
1693 __u32 kver;
1694
1695 if (!data || size != sizeof(kver)) {
1696 pr_warn("invalid kver section in %s\n", obj->path);
1697 return -LIBBPF_ERRNO__FORMAT;
1698 }
1699 memcpy(&kver, data, sizeof(kver));
1700 obj->kern_version = kver;
1701 pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version);
1702 return 0;
1703 }
1704
bpf_map_type__is_map_in_map(enum bpf_map_type type)1705 static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
1706 {
1707 if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
1708 type == BPF_MAP_TYPE_HASH_OF_MAPS)
1709 return true;
1710 return false;
1711 }
1712
find_elf_sec_sz(const struct bpf_object * obj,const char * name,__u32 * size)1713 static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32 *size)
1714 {
1715 Elf_Data *data;
1716 Elf_Scn *scn;
1717
1718 if (!name)
1719 return -EINVAL;
1720
1721 scn = elf_sec_by_name(obj, name);
1722 data = elf_sec_data(obj, scn);
1723 if (data) {
1724 *size = data->d_size;
1725 return 0; /* found it */
1726 }
1727
1728 return -ENOENT;
1729 }
1730
find_elf_var_sym(const struct bpf_object * obj,const char * name)1731 static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *name)
1732 {
1733 Elf_Data *symbols = obj->efile.symbols;
1734 const char *sname;
1735 size_t si;
1736
1737 for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) {
1738 Elf64_Sym *sym = elf_sym_by_idx(obj, si);
1739
1740 if (ELF64_ST_TYPE(sym->st_info) != STT_OBJECT)
1741 continue;
1742
1743 if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL &&
1744 ELF64_ST_BIND(sym->st_info) != STB_WEAK)
1745 continue;
1746
1747 sname = elf_sym_str(obj, sym->st_name);
1748 if (!sname) {
1749 pr_warn("failed to get sym name string for var %s\n", name);
1750 return ERR_PTR(-EIO);
1751 }
1752 if (strcmp(name, sname) == 0)
1753 return sym;
1754 }
1755
1756 return ERR_PTR(-ENOENT);
1757 }
1758
1759 #ifndef MFD_CLOEXEC
1760 #define MFD_CLOEXEC 0x0001U
1761 #endif
1762 #ifndef MFD_NOEXEC_SEAL
1763 #define MFD_NOEXEC_SEAL 0x0008U
1764 #endif
1765
create_placeholder_fd(void)1766 static int create_placeholder_fd(void)
1767 {
1768 unsigned int flags = MFD_CLOEXEC | MFD_NOEXEC_SEAL;
1769 const char *name = "libbpf-placeholder-fd";
1770 int fd;
1771
1772 fd = ensure_good_fd(sys_memfd_create(name, flags));
1773 if (fd >= 0)
1774 return fd;
1775 else if (errno != EINVAL)
1776 return -errno;
1777
1778 /* Possibly running on kernel without MFD_NOEXEC_SEAL */
1779 fd = ensure_good_fd(sys_memfd_create(name, flags & ~MFD_NOEXEC_SEAL));
1780 if (fd < 0)
1781 return -errno;
1782 return fd;
1783 }
1784
bpf_object__add_map(struct bpf_object * obj)1785 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
1786 {
1787 struct bpf_map *map;
1788 int err;
1789
1790 err = libbpf_ensure_mem((void **)&obj->maps, &obj->maps_cap,
1791 sizeof(*obj->maps), obj->nr_maps + 1);
1792 if (err)
1793 return ERR_PTR(err);
1794
1795 map = &obj->maps[obj->nr_maps++];
1796 map->obj = obj;
1797 /* Preallocate map FD without actually creating BPF map just yet.
1798 * These map FD "placeholders" will be reused later without changing
1799 * FD value when map is actually created in the kernel.
1800 *
1801 * This is useful to be able to perform BPF program relocations
1802 * without having to create BPF maps before that step. This allows us
1803 * to finalize and load BTF very late in BPF object's loading phase,
1804 * right before BPF maps have to be created and BPF programs have to
1805 * be loaded. By having these map FD placeholders we can perform all
1806 * the sanitizations, relocations, and any other adjustments before we
1807 * start creating actual BPF kernel objects (BTF, maps, progs).
1808 */
1809 map->fd = create_placeholder_fd();
1810 if (map->fd < 0)
1811 return ERR_PTR(map->fd);
1812 map->inner_map_fd = -1;
1813 map->autocreate = true;
1814
1815 return map;
1816 }
1817
array_map_mmap_sz(unsigned int value_sz,unsigned int max_entries)1818 static size_t array_map_mmap_sz(unsigned int value_sz, unsigned int max_entries)
1819 {
1820 const long page_sz = sysconf(_SC_PAGE_SIZE);
1821 size_t map_sz;
1822
1823 map_sz = (size_t)roundup(value_sz, 8) * max_entries;
1824 map_sz = roundup(map_sz, page_sz);
1825 return map_sz;
1826 }
1827
bpf_map_mmap_sz(const struct bpf_map * map)1828 static size_t bpf_map_mmap_sz(const struct bpf_map *map)
1829 {
1830 const long page_sz = sysconf(_SC_PAGE_SIZE);
1831
1832 switch (map->def.type) {
1833 case BPF_MAP_TYPE_ARRAY:
1834 return array_map_mmap_sz(map->def.value_size, map->def.max_entries);
1835 case BPF_MAP_TYPE_ARENA:
1836 return page_sz * map->def.max_entries;
1837 default:
1838 return 0; /* not supported */
1839 }
1840 }
1841
bpf_map_mmap_resize(struct bpf_map * map,size_t old_sz,size_t new_sz)1842 static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz)
1843 {
1844 void *mmaped;
1845
1846 if (!map->mmaped)
1847 return -EINVAL;
1848
1849 if (old_sz == new_sz)
1850 return 0;
1851
1852 mmaped = mmap(NULL, new_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1853 if (mmaped == MAP_FAILED)
1854 return -errno;
1855
1856 memcpy(mmaped, map->mmaped, min(old_sz, new_sz));
1857 munmap(map->mmaped, old_sz);
1858 map->mmaped = mmaped;
1859 return 0;
1860 }
1861
internal_map_name(struct bpf_object * obj,const char * real_name)1862 static char *internal_map_name(struct bpf_object *obj, const char *real_name)
1863 {
1864 char map_name[BPF_OBJ_NAME_LEN], *p;
1865 int pfx_len, sfx_len = max((size_t)7, strlen(real_name));
1866
1867 /* This is one of the more confusing parts of libbpf for various
1868 * reasons, some of which are historical. The original idea for naming
1869 * internal names was to include as much of BPF object name prefix as
1870 * possible, so that it can be distinguished from similar internal
1871 * maps of a different BPF object.
1872 * As an example, let's say we have bpf_object named 'my_object_name'
1873 * and internal map corresponding to '.rodata' ELF section. The final
1874 * map name advertised to user and to the kernel will be
1875 * 'my_objec.rodata', taking first 8 characters of object name and
1876 * entire 7 characters of '.rodata'.
1877 * Somewhat confusingly, if internal map ELF section name is shorter
1878 * than 7 characters, e.g., '.bss', we still reserve 7 characters
1879 * for the suffix, even though we only have 4 actual characters, and
1880 * resulting map will be called 'my_objec.bss', not even using all 15
1881 * characters allowed by the kernel. Oh well, at least the truncated
1882 * object name is somewhat consistent in this case. But if the map
1883 * name is '.kconfig', we'll still have entirety of '.kconfig' added
1884 * (8 chars) and thus will be left with only first 7 characters of the
1885 * object name ('my_obje'). Happy guessing, user, that the final map
1886 * name will be "my_obje.kconfig".
1887 * Now, with libbpf starting to support arbitrarily named .rodata.*
1888 * and .data.* data sections, it's possible that ELF section name is
1889 * longer than allowed 15 chars, so we now need to be careful to take
1890 * only up to 15 first characters of ELF name, taking no BPF object
1891 * name characters at all. So '.rodata.abracadabra' will result in
1892 * '.rodata.abracad' kernel and user-visible name.
1893 * We need to keep this convoluted logic intact for .data, .bss and
1894 * .rodata maps, but for new custom .data.custom and .rodata.custom
1895 * maps we use their ELF names as is, not prepending bpf_object name
1896 * in front. We still need to truncate them to 15 characters for the
1897 * kernel. Full name can be recovered for such maps by using DATASEC
1898 * BTF type associated with such map's value type, though.
1899 */
1900 if (sfx_len >= BPF_OBJ_NAME_LEN)
1901 sfx_len = BPF_OBJ_NAME_LEN - 1;
1902
1903 /* if there are two or more dots in map name, it's a custom dot map */
1904 if (strchr(real_name + 1, '.') != NULL)
1905 pfx_len = 0;
1906 else
1907 pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, strlen(obj->name));
1908
1909 snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name,
1910 sfx_len, real_name);
1911
1912 /* sanities map name to characters allowed by kernel */
1913 for (p = map_name; *p && p < map_name + sizeof(map_name); p++)
1914 if (!isalnum(*p) && *p != '_' && *p != '.')
1915 *p = '_';
1916
1917 return strdup(map_name);
1918 }
1919
1920 static int
1921 map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map);
1922
1923 /* Internal BPF map is mmap()'able only if at least one of corresponding
1924 * DATASEC's VARs are to be exposed through BPF skeleton. I.e., it's a GLOBAL
1925 * variable and it's not marked as __hidden (which turns it into, effectively,
1926 * a STATIC variable).
1927 */
map_is_mmapable(struct bpf_object * obj,struct bpf_map * map)1928 static bool map_is_mmapable(struct bpf_object *obj, struct bpf_map *map)
1929 {
1930 const struct btf_type *t, *vt;
1931 struct btf_var_secinfo *vsi;
1932 int i, n;
1933
1934 if (!map->btf_value_type_id)
1935 return false;
1936
1937 t = btf__type_by_id(obj->btf, map->btf_value_type_id);
1938 if (!btf_is_datasec(t))
1939 return false;
1940
1941 vsi = btf_var_secinfos(t);
1942 for (i = 0, n = btf_vlen(t); i < n; i++, vsi++) {
1943 vt = btf__type_by_id(obj->btf, vsi->type);
1944 if (!btf_is_var(vt))
1945 continue;
1946
1947 if (btf_var(vt)->linkage != BTF_VAR_STATIC)
1948 return true;
1949 }
1950
1951 return false;
1952 }
1953
1954 static int
bpf_object__init_internal_map(struct bpf_object * obj,enum libbpf_map_type type,const char * real_name,int sec_idx,void * data,size_t data_sz)1955 bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
1956 const char *real_name, int sec_idx, void *data, size_t data_sz)
1957 {
1958 struct bpf_map_def *def;
1959 struct bpf_map *map;
1960 size_t mmap_sz;
1961 int err;
1962
1963 map = bpf_object__add_map(obj);
1964 if (IS_ERR(map))
1965 return PTR_ERR(map);
1966
1967 map->libbpf_type = type;
1968 map->sec_idx = sec_idx;
1969 map->sec_offset = 0;
1970 map->real_name = strdup(real_name);
1971 map->name = internal_map_name(obj, real_name);
1972 if (!map->real_name || !map->name) {
1973 zfree(&map->real_name);
1974 zfree(&map->name);
1975 return -ENOMEM;
1976 }
1977
1978 def = &map->def;
1979 def->type = BPF_MAP_TYPE_ARRAY;
1980 def->key_size = sizeof(int);
1981 def->value_size = data_sz;
1982 def->max_entries = 1;
1983 def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
1984 ? BPF_F_RDONLY_PROG : 0;
1985
1986 /* failures are fine because of maps like .rodata.str1.1 */
1987 (void) map_fill_btf_type_info(obj, map);
1988
1989 if (map_is_mmapable(obj, map))
1990 def->map_flags |= BPF_F_MMAPABLE;
1991
1992 pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
1993 map->name, map->sec_idx, map->sec_offset, def->map_flags);
1994
1995 mmap_sz = bpf_map_mmap_sz(map);
1996 map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
1997 MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1998 if (map->mmaped == MAP_FAILED) {
1999 err = -errno;
2000 map->mmaped = NULL;
2001 pr_warn("failed to alloc map '%s' content buffer: %s\n", map->name, errstr(err));
2002 zfree(&map->real_name);
2003 zfree(&map->name);
2004 return err;
2005 }
2006
2007 if (data)
2008 memcpy(map->mmaped, data, data_sz);
2009
2010 pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
2011 return 0;
2012 }
2013
bpf_object__init_global_data_maps(struct bpf_object * obj)2014 static int bpf_object__init_global_data_maps(struct bpf_object *obj)
2015 {
2016 struct elf_sec_desc *sec_desc;
2017 const char *sec_name;
2018 int err = 0, sec_idx;
2019
2020 /*
2021 * Populate obj->maps with libbpf internal maps.
2022 */
2023 for (sec_idx = 1; sec_idx < obj->efile.sec_cnt; sec_idx++) {
2024 sec_desc = &obj->efile.secs[sec_idx];
2025
2026 /* Skip recognized sections with size 0. */
2027 if (!sec_desc->data || sec_desc->data->d_size == 0)
2028 continue;
2029
2030 switch (sec_desc->sec_type) {
2031 case SEC_DATA:
2032 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
2033 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
2034 sec_name, sec_idx,
2035 sec_desc->data->d_buf,
2036 sec_desc->data->d_size);
2037 break;
2038 case SEC_RODATA:
2039 obj->has_rodata = true;
2040 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
2041 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
2042 sec_name, sec_idx,
2043 sec_desc->data->d_buf,
2044 sec_desc->data->d_size);
2045 break;
2046 case SEC_BSS:
2047 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
2048 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
2049 sec_name, sec_idx,
2050 NULL,
2051 sec_desc->data->d_size);
2052 break;
2053 default:
2054 /* skip */
2055 break;
2056 }
2057 if (err)
2058 return err;
2059 }
2060 return 0;
2061 }
2062
2063
find_extern_by_name(const struct bpf_object * obj,const void * name)2064 static struct extern_desc *find_extern_by_name(const struct bpf_object *obj,
2065 const void *name)
2066 {
2067 int i;
2068
2069 for (i = 0; i < obj->nr_extern; i++) {
2070 if (strcmp(obj->externs[i].name, name) == 0)
2071 return &obj->externs[i];
2072 }
2073 return NULL;
2074 }
2075
find_extern_by_name_with_len(const struct bpf_object * obj,const void * name,int len)2076 static struct extern_desc *find_extern_by_name_with_len(const struct bpf_object *obj,
2077 const void *name, int len)
2078 {
2079 const char *ext_name;
2080 int i;
2081
2082 for (i = 0; i < obj->nr_extern; i++) {
2083 ext_name = obj->externs[i].name;
2084 if (strlen(ext_name) == len && strncmp(ext_name, name, len) == 0)
2085 return &obj->externs[i];
2086 }
2087 return NULL;
2088 }
2089
set_kcfg_value_tri(struct extern_desc * ext,void * ext_val,char value)2090 static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val,
2091 char value)
2092 {
2093 switch (ext->kcfg.type) {
2094 case KCFG_BOOL:
2095 if (value == 'm') {
2096 pr_warn("extern (kcfg) '%s': value '%c' implies tristate or char type\n",
2097 ext->name, value);
2098 return -EINVAL;
2099 }
2100 *(bool *)ext_val = value == 'y' ? true : false;
2101 break;
2102 case KCFG_TRISTATE:
2103 if (value == 'y')
2104 *(enum libbpf_tristate *)ext_val = TRI_YES;
2105 else if (value == 'm')
2106 *(enum libbpf_tristate *)ext_val = TRI_MODULE;
2107 else /* value == 'n' */
2108 *(enum libbpf_tristate *)ext_val = TRI_NO;
2109 break;
2110 case KCFG_CHAR:
2111 *(char *)ext_val = value;
2112 break;
2113 case KCFG_UNKNOWN:
2114 case KCFG_INT:
2115 case KCFG_CHAR_ARR:
2116 default:
2117 pr_warn("extern (kcfg) '%s': value '%c' implies bool, tristate, or char type\n",
2118 ext->name, value);
2119 return -EINVAL;
2120 }
2121 ext->is_set = true;
2122 return 0;
2123 }
2124
set_kcfg_value_str(struct extern_desc * ext,char * ext_val,const char * value)2125 static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
2126 const char *value)
2127 {
2128 size_t len;
2129
2130 if (ext->kcfg.type != KCFG_CHAR_ARR) {
2131 pr_warn("extern (kcfg) '%s': value '%s' implies char array type\n",
2132 ext->name, value);
2133 return -EINVAL;
2134 }
2135
2136 len = strlen(value);
2137 if (len < 2 || value[len - 1] != '"') {
2138 pr_warn("extern (kcfg) '%s': invalid string config '%s'\n",
2139 ext->name, value);
2140 return -EINVAL;
2141 }
2142
2143 /* strip quotes */
2144 len -= 2;
2145 if (len >= ext->kcfg.sz) {
2146 pr_warn("extern (kcfg) '%s': long string '%s' of (%zu bytes) truncated to %d bytes\n",
2147 ext->name, value, len, ext->kcfg.sz - 1);
2148 len = ext->kcfg.sz - 1;
2149 }
2150 memcpy(ext_val, value + 1, len);
2151 ext_val[len] = '\0';
2152 ext->is_set = true;
2153 return 0;
2154 }
2155
parse_u64(const char * value,__u64 * res)2156 static int parse_u64(const char *value, __u64 *res)
2157 {
2158 char *value_end;
2159 int err;
2160
2161 errno = 0;
2162 *res = strtoull(value, &value_end, 0);
2163 if (errno) {
2164 err = -errno;
2165 pr_warn("failed to parse '%s': %s\n", value, errstr(err));
2166 return err;
2167 }
2168 if (*value_end) {
2169 pr_warn("failed to parse '%s' as integer completely\n", value);
2170 return -EINVAL;
2171 }
2172 return 0;
2173 }
2174
is_kcfg_value_in_range(const struct extern_desc * ext,__u64 v)2175 static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v)
2176 {
2177 int bit_sz = ext->kcfg.sz * 8;
2178
2179 if (ext->kcfg.sz == 8)
2180 return true;
2181
2182 /* Validate that value stored in u64 fits in integer of `ext->sz`
2183 * bytes size without any loss of information. If the target integer
2184 * is signed, we rely on the following limits of integer type of
2185 * Y bits and subsequent transformation:
2186 *
2187 * -2^(Y-1) <= X <= 2^(Y-1) - 1
2188 * 0 <= X + 2^(Y-1) <= 2^Y - 1
2189 * 0 <= X + 2^(Y-1) < 2^Y
2190 *
2191 * For unsigned target integer, check that all the (64 - Y) bits are
2192 * zero.
2193 */
2194 if (ext->kcfg.is_signed)
2195 return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz);
2196 else
2197 return (v >> bit_sz) == 0;
2198 }
2199
set_kcfg_value_num(struct extern_desc * ext,void * ext_val,__u64 value)2200 static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val,
2201 __u64 value)
2202 {
2203 if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR &&
2204 ext->kcfg.type != KCFG_BOOL) {
2205 pr_warn("extern (kcfg) '%s': value '%llu' implies integer, char, or boolean type\n",
2206 ext->name, (unsigned long long)value);
2207 return -EINVAL;
2208 }
2209 if (ext->kcfg.type == KCFG_BOOL && value > 1) {
2210 pr_warn("extern (kcfg) '%s': value '%llu' isn't boolean compatible\n",
2211 ext->name, (unsigned long long)value);
2212 return -EINVAL;
2213
2214 }
2215 if (!is_kcfg_value_in_range(ext, value)) {
2216 pr_warn("extern (kcfg) '%s': value '%llu' doesn't fit in %d bytes\n",
2217 ext->name, (unsigned long long)value, ext->kcfg.sz);
2218 return -ERANGE;
2219 }
2220 switch (ext->kcfg.sz) {
2221 case 1:
2222 *(__u8 *)ext_val = value;
2223 break;
2224 case 2:
2225 *(__u16 *)ext_val = value;
2226 break;
2227 case 4:
2228 *(__u32 *)ext_val = value;
2229 break;
2230 case 8:
2231 *(__u64 *)ext_val = value;
2232 break;
2233 default:
2234 return -EINVAL;
2235 }
2236 ext->is_set = true;
2237 return 0;
2238 }
2239
bpf_object__process_kconfig_line(struct bpf_object * obj,char * buf,void * data)2240 static int bpf_object__process_kconfig_line(struct bpf_object *obj,
2241 char *buf, void *data)
2242 {
2243 struct extern_desc *ext;
2244 char *sep, *value;
2245 int len, err = 0;
2246 void *ext_val;
2247 __u64 num;
2248
2249 if (!str_has_pfx(buf, "CONFIG_"))
2250 return 0;
2251
2252 sep = strchr(buf, '=');
2253 if (!sep) {
2254 pr_warn("failed to parse '%s': no separator\n", buf);
2255 return -EINVAL;
2256 }
2257
2258 /* Trim ending '\n' */
2259 len = strlen(buf);
2260 if (buf[len - 1] == '\n')
2261 buf[len - 1] = '\0';
2262 /* Split on '=' and ensure that a value is present. */
2263 *sep = '\0';
2264 if (!sep[1]) {
2265 *sep = '=';
2266 pr_warn("failed to parse '%s': no value\n", buf);
2267 return -EINVAL;
2268 }
2269
2270 ext = find_extern_by_name(obj, buf);
2271 if (!ext || ext->is_set)
2272 return 0;
2273
2274 ext_val = data + ext->kcfg.data_off;
2275 value = sep + 1;
2276
2277 switch (*value) {
2278 case 'y': case 'n': case 'm':
2279 err = set_kcfg_value_tri(ext, ext_val, *value);
2280 break;
2281 case '"':
2282 err = set_kcfg_value_str(ext, ext_val, value);
2283 break;
2284 default:
2285 /* assume integer */
2286 err = parse_u64(value, &num);
2287 if (err) {
2288 pr_warn("extern (kcfg) '%s': value '%s' isn't a valid integer\n", ext->name, value);
2289 return err;
2290 }
2291 if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) {
2292 pr_warn("extern (kcfg) '%s': value '%s' implies integer type\n", ext->name, value);
2293 return -EINVAL;
2294 }
2295 err = set_kcfg_value_num(ext, ext_val, num);
2296 break;
2297 }
2298 if (err)
2299 return err;
2300 pr_debug("extern (kcfg) '%s': set to %s\n", ext->name, value);
2301 return 0;
2302 }
2303
bpf_object__read_kconfig_file(struct bpf_object * obj,void * data)2304 static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data)
2305 {
2306 char buf[PATH_MAX];
2307 struct utsname uts;
2308 int len, err = 0;
2309 gzFile file;
2310
2311 uname(&uts);
2312 len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release);
2313 if (len < 0)
2314 return -EINVAL;
2315 else if (len >= PATH_MAX)
2316 return -ENAMETOOLONG;
2317
2318 /* gzopen also accepts uncompressed files. */
2319 file = gzopen(buf, "re");
2320 if (!file)
2321 file = gzopen("/proc/config.gz", "re");
2322
2323 if (!file) {
2324 pr_warn("failed to open system Kconfig\n");
2325 return -ENOENT;
2326 }
2327
2328 while (gzgets(file, buf, sizeof(buf))) {
2329 err = bpf_object__process_kconfig_line(obj, buf, data);
2330 if (err) {
2331 pr_warn("error parsing system Kconfig line '%s': %s\n",
2332 buf, errstr(err));
2333 goto out;
2334 }
2335 }
2336
2337 out:
2338 gzclose(file);
2339 return err;
2340 }
2341
bpf_object__read_kconfig_mem(struct bpf_object * obj,const char * config,void * data)2342 static int bpf_object__read_kconfig_mem(struct bpf_object *obj,
2343 const char *config, void *data)
2344 {
2345 char buf[PATH_MAX];
2346 int err = 0;
2347 FILE *file;
2348
2349 file = fmemopen((void *)config, strlen(config), "r");
2350 if (!file) {
2351 err = -errno;
2352 pr_warn("failed to open in-memory Kconfig: %s\n", errstr(err));
2353 return err;
2354 }
2355
2356 while (fgets(buf, sizeof(buf), file)) {
2357 err = bpf_object__process_kconfig_line(obj, buf, data);
2358 if (err) {
2359 pr_warn("error parsing in-memory Kconfig line '%s': %s\n",
2360 buf, errstr(err));
2361 break;
2362 }
2363 }
2364
2365 fclose(file);
2366 return err;
2367 }
2368
bpf_object__init_kconfig_map(struct bpf_object * obj)2369 static int bpf_object__init_kconfig_map(struct bpf_object *obj)
2370 {
2371 struct extern_desc *last_ext = NULL, *ext;
2372 size_t map_sz;
2373 int i, err;
2374
2375 for (i = 0; i < obj->nr_extern; i++) {
2376 ext = &obj->externs[i];
2377 if (ext->type == EXT_KCFG)
2378 last_ext = ext;
2379 }
2380
2381 if (!last_ext)
2382 return 0;
2383
2384 map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz;
2385 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG,
2386 ".kconfig", obj->efile.symbols_shndx,
2387 NULL, map_sz);
2388 if (err)
2389 return err;
2390
2391 obj->kconfig_map_idx = obj->nr_maps - 1;
2392
2393 return 0;
2394 }
2395
2396 const struct btf_type *
skip_mods_and_typedefs(const struct btf * btf,__u32 id,__u32 * res_id)2397 skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
2398 {
2399 const struct btf_type *t = btf__type_by_id(btf, id);
2400
2401 if (res_id)
2402 *res_id = id;
2403
2404 while (btf_is_mod(t) || btf_is_typedef(t)) {
2405 if (res_id)
2406 *res_id = t->type;
2407 t = btf__type_by_id(btf, t->type);
2408 }
2409
2410 return t;
2411 }
2412
2413 static const struct btf_type *
resolve_func_ptr(const struct btf * btf,__u32 id,__u32 * res_id)2414 resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
2415 {
2416 const struct btf_type *t;
2417
2418 t = skip_mods_and_typedefs(btf, id, NULL);
2419 if (!btf_is_ptr(t))
2420 return NULL;
2421
2422 t = skip_mods_and_typedefs(btf, t->type, res_id);
2423
2424 return btf_is_func_proto(t) ? t : NULL;
2425 }
2426
__btf_kind_str(__u16 kind)2427 static const char *__btf_kind_str(__u16 kind)
2428 {
2429 switch (kind) {
2430 case BTF_KIND_UNKN: return "void";
2431 case BTF_KIND_INT: return "int";
2432 case BTF_KIND_PTR: return "ptr";
2433 case BTF_KIND_ARRAY: return "array";
2434 case BTF_KIND_STRUCT: return "struct";
2435 case BTF_KIND_UNION: return "union";
2436 case BTF_KIND_ENUM: return "enum";
2437 case BTF_KIND_FWD: return "fwd";
2438 case BTF_KIND_TYPEDEF: return "typedef";
2439 case BTF_KIND_VOLATILE: return "volatile";
2440 case BTF_KIND_CONST: return "const";
2441 case BTF_KIND_RESTRICT: return "restrict";
2442 case BTF_KIND_FUNC: return "func";
2443 case BTF_KIND_FUNC_PROTO: return "func_proto";
2444 case BTF_KIND_VAR: return "var";
2445 case BTF_KIND_DATASEC: return "datasec";
2446 case BTF_KIND_FLOAT: return "float";
2447 case BTF_KIND_DECL_TAG: return "decl_tag";
2448 case BTF_KIND_TYPE_TAG: return "type_tag";
2449 case BTF_KIND_ENUM64: return "enum64";
2450 default: return "unknown";
2451 }
2452 }
2453
btf_kind_str(const struct btf_type * t)2454 const char *btf_kind_str(const struct btf_type *t)
2455 {
2456 return __btf_kind_str(btf_kind(t));
2457 }
2458
2459 /*
2460 * Fetch integer attribute of BTF map definition. Such attributes are
2461 * represented using a pointer to an array, in which dimensionality of array
2462 * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY];
2463 * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF
2464 * type definition, while using only sizeof(void *) space in ELF data section.
2465 */
get_map_field_int(const char * map_name,const struct btf * btf,const struct btf_member * m,__u32 * res)2466 static bool get_map_field_int(const char *map_name, const struct btf *btf,
2467 const struct btf_member *m, __u32 *res)
2468 {
2469 const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
2470 const char *name = btf__name_by_offset(btf, m->name_off);
2471 const struct btf_array *arr_info;
2472 const struct btf_type *arr_t;
2473
2474 if (!btf_is_ptr(t)) {
2475 pr_warn("map '%s': attr '%s': expected PTR, got %s.\n",
2476 map_name, name, btf_kind_str(t));
2477 return false;
2478 }
2479
2480 arr_t = btf__type_by_id(btf, t->type);
2481 if (!arr_t) {
2482 pr_warn("map '%s': attr '%s': type [%u] not found.\n",
2483 map_name, name, t->type);
2484 return false;
2485 }
2486 if (!btf_is_array(arr_t)) {
2487 pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n",
2488 map_name, name, btf_kind_str(arr_t));
2489 return false;
2490 }
2491 arr_info = btf_array(arr_t);
2492 *res = arr_info->nelems;
2493 return true;
2494 }
2495
get_map_field_long(const char * map_name,const struct btf * btf,const struct btf_member * m,__u64 * res)2496 static bool get_map_field_long(const char *map_name, const struct btf *btf,
2497 const struct btf_member *m, __u64 *res)
2498 {
2499 const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
2500 const char *name = btf__name_by_offset(btf, m->name_off);
2501
2502 if (btf_is_ptr(t)) {
2503 __u32 res32;
2504 bool ret;
2505
2506 ret = get_map_field_int(map_name, btf, m, &res32);
2507 if (ret)
2508 *res = (__u64)res32;
2509 return ret;
2510 }
2511
2512 if (!btf_is_enum(t) && !btf_is_enum64(t)) {
2513 pr_warn("map '%s': attr '%s': expected ENUM or ENUM64, got %s.\n",
2514 map_name, name, btf_kind_str(t));
2515 return false;
2516 }
2517
2518 if (btf_vlen(t) != 1) {
2519 pr_warn("map '%s': attr '%s': invalid __ulong\n",
2520 map_name, name);
2521 return false;
2522 }
2523
2524 if (btf_is_enum(t)) {
2525 const struct btf_enum *e = btf_enum(t);
2526
2527 *res = e->val;
2528 } else {
2529 const struct btf_enum64 *e = btf_enum64(t);
2530
2531 *res = btf_enum64_value(e);
2532 }
2533 return true;
2534 }
2535
pathname_concat(char * buf,size_t buf_sz,const char * path,const char * name)2536 static int pathname_concat(char *buf, size_t buf_sz, const char *path, const char *name)
2537 {
2538 int len;
2539
2540 len = snprintf(buf, buf_sz, "%s/%s", path, name);
2541 if (len < 0)
2542 return -EINVAL;
2543 if (len >= buf_sz)
2544 return -ENAMETOOLONG;
2545
2546 return 0;
2547 }
2548
build_map_pin_path(struct bpf_map * map,const char * path)2549 static int build_map_pin_path(struct bpf_map *map, const char *path)
2550 {
2551 char buf[PATH_MAX];
2552 int err;
2553
2554 if (!path)
2555 path = BPF_FS_DEFAULT_PATH;
2556
2557 err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
2558 if (err)
2559 return err;
2560
2561 return bpf_map__set_pin_path(map, buf);
2562 }
2563
2564 /* should match definition in bpf_helpers.h */
2565 enum libbpf_pin_type {
2566 LIBBPF_PIN_NONE,
2567 /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
2568 LIBBPF_PIN_BY_NAME,
2569 };
2570
parse_btf_map_def(const char * map_name,struct btf * btf,const struct btf_type * def_t,bool strict,struct btf_map_def * map_def,struct btf_map_def * inner_def)2571 int parse_btf_map_def(const char *map_name, struct btf *btf,
2572 const struct btf_type *def_t, bool strict,
2573 struct btf_map_def *map_def, struct btf_map_def *inner_def)
2574 {
2575 const struct btf_type *t;
2576 const struct btf_member *m;
2577 bool is_inner = inner_def == NULL;
2578 int vlen, i;
2579
2580 vlen = btf_vlen(def_t);
2581 m = btf_members(def_t);
2582 for (i = 0; i < vlen; i++, m++) {
2583 const char *name = btf__name_by_offset(btf, m->name_off);
2584
2585 if (!name) {
2586 pr_warn("map '%s': invalid field #%d.\n", map_name, i);
2587 return -EINVAL;
2588 }
2589 if (strcmp(name, "type") == 0) {
2590 if (!get_map_field_int(map_name, btf, m, &map_def->map_type))
2591 return -EINVAL;
2592 map_def->parts |= MAP_DEF_MAP_TYPE;
2593 } else if (strcmp(name, "max_entries") == 0) {
2594 if (!get_map_field_int(map_name, btf, m, &map_def->max_entries))
2595 return -EINVAL;
2596 map_def->parts |= MAP_DEF_MAX_ENTRIES;
2597 } else if (strcmp(name, "map_flags") == 0) {
2598 if (!get_map_field_int(map_name, btf, m, &map_def->map_flags))
2599 return -EINVAL;
2600 map_def->parts |= MAP_DEF_MAP_FLAGS;
2601 } else if (strcmp(name, "numa_node") == 0) {
2602 if (!get_map_field_int(map_name, btf, m, &map_def->numa_node))
2603 return -EINVAL;
2604 map_def->parts |= MAP_DEF_NUMA_NODE;
2605 } else if (strcmp(name, "key_size") == 0) {
2606 __u32 sz;
2607
2608 if (!get_map_field_int(map_name, btf, m, &sz))
2609 return -EINVAL;
2610 if (map_def->key_size && map_def->key_size != sz) {
2611 pr_warn("map '%s': conflicting key size %u != %u.\n",
2612 map_name, map_def->key_size, sz);
2613 return -EINVAL;
2614 }
2615 map_def->key_size = sz;
2616 map_def->parts |= MAP_DEF_KEY_SIZE;
2617 } else if (strcmp(name, "key") == 0) {
2618 __s64 sz;
2619
2620 t = btf__type_by_id(btf, m->type);
2621 if (!t) {
2622 pr_warn("map '%s': key type [%d] not found.\n",
2623 map_name, m->type);
2624 return -EINVAL;
2625 }
2626 if (!btf_is_ptr(t)) {
2627 pr_warn("map '%s': key spec is not PTR: %s.\n",
2628 map_name, btf_kind_str(t));
2629 return -EINVAL;
2630 }
2631 sz = btf__resolve_size(btf, t->type);
2632 if (sz < 0) {
2633 pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
2634 map_name, t->type, (ssize_t)sz);
2635 return sz;
2636 }
2637 if (map_def->key_size && map_def->key_size != sz) {
2638 pr_warn("map '%s': conflicting key size %u != %zd.\n",
2639 map_name, map_def->key_size, (ssize_t)sz);
2640 return -EINVAL;
2641 }
2642 map_def->key_size = sz;
2643 map_def->key_type_id = t->type;
2644 map_def->parts |= MAP_DEF_KEY_SIZE | MAP_DEF_KEY_TYPE;
2645 } else if (strcmp(name, "value_size") == 0) {
2646 __u32 sz;
2647
2648 if (!get_map_field_int(map_name, btf, m, &sz))
2649 return -EINVAL;
2650 if (map_def->value_size && map_def->value_size != sz) {
2651 pr_warn("map '%s': conflicting value size %u != %u.\n",
2652 map_name, map_def->value_size, sz);
2653 return -EINVAL;
2654 }
2655 map_def->value_size = sz;
2656 map_def->parts |= MAP_DEF_VALUE_SIZE;
2657 } else if (strcmp(name, "value") == 0) {
2658 __s64 sz;
2659
2660 t = btf__type_by_id(btf, m->type);
2661 if (!t) {
2662 pr_warn("map '%s': value type [%d] not found.\n",
2663 map_name, m->type);
2664 return -EINVAL;
2665 }
2666 if (!btf_is_ptr(t)) {
2667 pr_warn("map '%s': value spec is not PTR: %s.\n",
2668 map_name, btf_kind_str(t));
2669 return -EINVAL;
2670 }
2671 sz = btf__resolve_size(btf, t->type);
2672 if (sz < 0) {
2673 pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
2674 map_name, t->type, (ssize_t)sz);
2675 return sz;
2676 }
2677 if (map_def->value_size && map_def->value_size != sz) {
2678 pr_warn("map '%s': conflicting value size %u != %zd.\n",
2679 map_name, map_def->value_size, (ssize_t)sz);
2680 return -EINVAL;
2681 }
2682 map_def->value_size = sz;
2683 map_def->value_type_id = t->type;
2684 map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE;
2685 }
2686 else if (strcmp(name, "values") == 0) {
2687 bool is_map_in_map = bpf_map_type__is_map_in_map(map_def->map_type);
2688 bool is_prog_array = map_def->map_type == BPF_MAP_TYPE_PROG_ARRAY;
2689 const char *desc = is_map_in_map ? "map-in-map inner" : "prog-array value";
2690 char inner_map_name[128];
2691 int err;
2692
2693 if (is_inner) {
2694 pr_warn("map '%s': multi-level inner maps not supported.\n",
2695 map_name);
2696 return -ENOTSUP;
2697 }
2698 if (i != vlen - 1) {
2699 pr_warn("map '%s': '%s' member should be last.\n",
2700 map_name, name);
2701 return -EINVAL;
2702 }
2703 if (!is_map_in_map && !is_prog_array) {
2704 pr_warn("map '%s': should be map-in-map or prog-array.\n",
2705 map_name);
2706 return -ENOTSUP;
2707 }
2708 if (map_def->value_size && map_def->value_size != 4) {
2709 pr_warn("map '%s': conflicting value size %u != 4.\n",
2710 map_name, map_def->value_size);
2711 return -EINVAL;
2712 }
2713 map_def->value_size = 4;
2714 t = btf__type_by_id(btf, m->type);
2715 if (!t) {
2716 pr_warn("map '%s': %s type [%d] not found.\n",
2717 map_name, desc, m->type);
2718 return -EINVAL;
2719 }
2720 if (!btf_is_array(t) || btf_array(t)->nelems) {
2721 pr_warn("map '%s': %s spec is not a zero-sized array.\n",
2722 map_name, desc);
2723 return -EINVAL;
2724 }
2725 t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL);
2726 if (!btf_is_ptr(t)) {
2727 pr_warn("map '%s': %s def is of unexpected kind %s.\n",
2728 map_name, desc, btf_kind_str(t));
2729 return -EINVAL;
2730 }
2731 t = skip_mods_and_typedefs(btf, t->type, NULL);
2732 if (is_prog_array) {
2733 if (!btf_is_func_proto(t)) {
2734 pr_warn("map '%s': prog-array value def is of unexpected kind %s.\n",
2735 map_name, btf_kind_str(t));
2736 return -EINVAL;
2737 }
2738 continue;
2739 }
2740 if (!btf_is_struct(t)) {
2741 pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
2742 map_name, btf_kind_str(t));
2743 return -EINVAL;
2744 }
2745
2746 snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", map_name);
2747 err = parse_btf_map_def(inner_map_name, btf, t, strict, inner_def, NULL);
2748 if (err)
2749 return err;
2750
2751 map_def->parts |= MAP_DEF_INNER_MAP;
2752 } else if (strcmp(name, "pinning") == 0) {
2753 __u32 val;
2754
2755 if (is_inner) {
2756 pr_warn("map '%s': inner def can't be pinned.\n", map_name);
2757 return -EINVAL;
2758 }
2759 if (!get_map_field_int(map_name, btf, m, &val))
2760 return -EINVAL;
2761 if (val != LIBBPF_PIN_NONE && val != LIBBPF_PIN_BY_NAME) {
2762 pr_warn("map '%s': invalid pinning value %u.\n",
2763 map_name, val);
2764 return -EINVAL;
2765 }
2766 map_def->pinning = val;
2767 map_def->parts |= MAP_DEF_PINNING;
2768 } else if (strcmp(name, "map_extra") == 0) {
2769 __u64 map_extra;
2770
2771 if (!get_map_field_long(map_name, btf, m, &map_extra))
2772 return -EINVAL;
2773 map_def->map_extra = map_extra;
2774 map_def->parts |= MAP_DEF_MAP_EXTRA;
2775 } else {
2776 if (strict) {
2777 pr_warn("map '%s': unknown field '%s'.\n", map_name, name);
2778 return -ENOTSUP;
2779 }
2780 pr_debug("map '%s': ignoring unknown field '%s'.\n", map_name, name);
2781 }
2782 }
2783
2784 if (map_def->map_type == BPF_MAP_TYPE_UNSPEC) {
2785 pr_warn("map '%s': map type isn't specified.\n", map_name);
2786 return -EINVAL;
2787 }
2788
2789 return 0;
2790 }
2791
adjust_ringbuf_sz(size_t sz)2792 static size_t adjust_ringbuf_sz(size_t sz)
2793 {
2794 __u32 page_sz = sysconf(_SC_PAGE_SIZE);
2795 __u32 mul;
2796
2797 /* if user forgot to set any size, make sure they see error */
2798 if (sz == 0)
2799 return 0;
2800 /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be
2801 * a power-of-2 multiple of kernel's page size. If user diligently
2802 * satisified these conditions, pass the size through.
2803 */
2804 if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz))
2805 return sz;
2806
2807 /* Otherwise find closest (page_sz * power_of_2) product bigger than
2808 * user-set size to satisfy both user size request and kernel
2809 * requirements and substitute correct max_entries for map creation.
2810 */
2811 for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) {
2812 if (mul * page_sz > sz)
2813 return mul * page_sz;
2814 }
2815
2816 /* if it's impossible to satisfy the conditions (i.e., user size is
2817 * very close to UINT_MAX but is not a power-of-2 multiple of
2818 * page_size) then just return original size and let kernel reject it
2819 */
2820 return sz;
2821 }
2822
map_is_ringbuf(const struct bpf_map * map)2823 static bool map_is_ringbuf(const struct bpf_map *map)
2824 {
2825 return map->def.type == BPF_MAP_TYPE_RINGBUF ||
2826 map->def.type == BPF_MAP_TYPE_USER_RINGBUF;
2827 }
2828
fill_map_from_def(struct bpf_map * map,const struct btf_map_def * def)2829 static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def)
2830 {
2831 map->def.type = def->map_type;
2832 map->def.key_size = def->key_size;
2833 map->def.value_size = def->value_size;
2834 map->def.max_entries = def->max_entries;
2835 map->def.map_flags = def->map_flags;
2836 map->map_extra = def->map_extra;
2837
2838 map->numa_node = def->numa_node;
2839 map->btf_key_type_id = def->key_type_id;
2840 map->btf_value_type_id = def->value_type_id;
2841
2842 /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
2843 if (map_is_ringbuf(map))
2844 map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
2845
2846 if (def->parts & MAP_DEF_MAP_TYPE)
2847 pr_debug("map '%s': found type = %u.\n", map->name, def->map_type);
2848
2849 if (def->parts & MAP_DEF_KEY_TYPE)
2850 pr_debug("map '%s': found key [%u], sz = %u.\n",
2851 map->name, def->key_type_id, def->key_size);
2852 else if (def->parts & MAP_DEF_KEY_SIZE)
2853 pr_debug("map '%s': found key_size = %u.\n", map->name, def->key_size);
2854
2855 if (def->parts & MAP_DEF_VALUE_TYPE)
2856 pr_debug("map '%s': found value [%u], sz = %u.\n",
2857 map->name, def->value_type_id, def->value_size);
2858 else if (def->parts & MAP_DEF_VALUE_SIZE)
2859 pr_debug("map '%s': found value_size = %u.\n", map->name, def->value_size);
2860
2861 if (def->parts & MAP_DEF_MAX_ENTRIES)
2862 pr_debug("map '%s': found max_entries = %u.\n", map->name, def->max_entries);
2863 if (def->parts & MAP_DEF_MAP_FLAGS)
2864 pr_debug("map '%s': found map_flags = 0x%x.\n", map->name, def->map_flags);
2865 if (def->parts & MAP_DEF_MAP_EXTRA)
2866 pr_debug("map '%s': found map_extra = 0x%llx.\n", map->name,
2867 (unsigned long long)def->map_extra);
2868 if (def->parts & MAP_DEF_PINNING)
2869 pr_debug("map '%s': found pinning = %u.\n", map->name, def->pinning);
2870 if (def->parts & MAP_DEF_NUMA_NODE)
2871 pr_debug("map '%s': found numa_node = %u.\n", map->name, def->numa_node);
2872
2873 if (def->parts & MAP_DEF_INNER_MAP)
2874 pr_debug("map '%s': found inner map definition.\n", map->name);
2875 }
2876
btf_var_linkage_str(__u32 linkage)2877 static const char *btf_var_linkage_str(__u32 linkage)
2878 {
2879 switch (linkage) {
2880 case BTF_VAR_STATIC: return "static";
2881 case BTF_VAR_GLOBAL_ALLOCATED: return "global";
2882 case BTF_VAR_GLOBAL_EXTERN: return "extern";
2883 default: return "unknown";
2884 }
2885 }
2886
bpf_object__init_user_btf_map(struct bpf_object * obj,const struct btf_type * sec,int var_idx,int sec_idx,const Elf_Data * data,bool strict,const char * pin_root_path)2887 static int bpf_object__init_user_btf_map(struct bpf_object *obj,
2888 const struct btf_type *sec,
2889 int var_idx, int sec_idx,
2890 const Elf_Data *data, bool strict,
2891 const char *pin_root_path)
2892 {
2893 struct btf_map_def map_def = {}, inner_def = {};
2894 const struct btf_type *var, *def;
2895 const struct btf_var_secinfo *vi;
2896 const struct btf_var *var_extra;
2897 const char *map_name;
2898 struct bpf_map *map;
2899 int err;
2900
2901 vi = btf_var_secinfos(sec) + var_idx;
2902 var = btf__type_by_id(obj->btf, vi->type);
2903 var_extra = btf_var(var);
2904 map_name = btf__name_by_offset(obj->btf, var->name_off);
2905
2906 if (map_name == NULL || map_name[0] == '\0') {
2907 pr_warn("map #%d: empty name.\n", var_idx);
2908 return -EINVAL;
2909 }
2910 if ((__u64)vi->offset + vi->size > data->d_size) {
2911 pr_warn("map '%s' BTF data is corrupted.\n", map_name);
2912 return -EINVAL;
2913 }
2914 if (!btf_is_var(var)) {
2915 pr_warn("map '%s': unexpected var kind %s.\n",
2916 map_name, btf_kind_str(var));
2917 return -EINVAL;
2918 }
2919 if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED) {
2920 pr_warn("map '%s': unsupported map linkage %s.\n",
2921 map_name, btf_var_linkage_str(var_extra->linkage));
2922 return -EOPNOTSUPP;
2923 }
2924
2925 def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
2926 if (!btf_is_struct(def)) {
2927 pr_warn("map '%s': unexpected def kind %s.\n",
2928 map_name, btf_kind_str(var));
2929 return -EINVAL;
2930 }
2931 if (def->size > vi->size) {
2932 pr_warn("map '%s': invalid def size.\n", map_name);
2933 return -EINVAL;
2934 }
2935
2936 map = bpf_object__add_map(obj);
2937 if (IS_ERR(map))
2938 return PTR_ERR(map);
2939 map->name = strdup(map_name);
2940 if (!map->name) {
2941 pr_warn("map '%s': failed to alloc map name.\n", map_name);
2942 return -ENOMEM;
2943 }
2944 map->libbpf_type = LIBBPF_MAP_UNSPEC;
2945 map->def.type = BPF_MAP_TYPE_UNSPEC;
2946 map->sec_idx = sec_idx;
2947 map->sec_offset = vi->offset;
2948 map->btf_var_idx = var_idx;
2949 pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
2950 map_name, map->sec_idx, map->sec_offset);
2951
2952 err = parse_btf_map_def(map->name, obj->btf, def, strict, &map_def, &inner_def);
2953 if (err)
2954 return err;
2955
2956 fill_map_from_def(map, &map_def);
2957
2958 if (map_def.pinning == LIBBPF_PIN_BY_NAME) {
2959 err = build_map_pin_path(map, pin_root_path);
2960 if (err) {
2961 pr_warn("map '%s': couldn't build pin path.\n", map->name);
2962 return err;
2963 }
2964 }
2965
2966 if (map_def.parts & MAP_DEF_INNER_MAP) {
2967 map->inner_map = calloc(1, sizeof(*map->inner_map));
2968 if (!map->inner_map)
2969 return -ENOMEM;
2970 map->inner_map->fd = create_placeholder_fd();
2971 if (map->inner_map->fd < 0)
2972 return map->inner_map->fd;
2973 map->inner_map->sec_idx = sec_idx;
2974 map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1);
2975 if (!map->inner_map->name)
2976 return -ENOMEM;
2977 sprintf(map->inner_map->name, "%s.inner", map_name);
2978
2979 fill_map_from_def(map->inner_map, &inner_def);
2980 }
2981
2982 err = map_fill_btf_type_info(obj, map);
2983 if (err)
2984 return err;
2985
2986 return 0;
2987 }
2988
init_arena_map_data(struct bpf_object * obj,struct bpf_map * map,const char * sec_name,int sec_idx,void * data,size_t data_sz)2989 static int init_arena_map_data(struct bpf_object *obj, struct bpf_map *map,
2990 const char *sec_name, int sec_idx,
2991 void *data, size_t data_sz)
2992 {
2993 const long page_sz = sysconf(_SC_PAGE_SIZE);
2994 size_t mmap_sz;
2995
2996 mmap_sz = bpf_map_mmap_sz(map);
2997 if (roundup(data_sz, page_sz) > mmap_sz) {
2998 pr_warn("elf: sec '%s': declared ARENA map size (%zu) is too small to hold global __arena variables of size %zu\n",
2999 sec_name, mmap_sz, data_sz);
3000 return -E2BIG;
3001 }
3002
3003 obj->arena_data = malloc(data_sz);
3004 if (!obj->arena_data)
3005 return -ENOMEM;
3006 memcpy(obj->arena_data, data, data_sz);
3007 obj->arena_data_sz = data_sz;
3008
3009 /* make bpf_map__init_value() work for ARENA maps */
3010 map->mmaped = obj->arena_data;
3011
3012 return 0;
3013 }
3014
bpf_object__init_user_btf_maps(struct bpf_object * obj,bool strict,const char * pin_root_path)3015 static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
3016 const char *pin_root_path)
3017 {
3018 const struct btf_type *sec = NULL;
3019 int nr_types, i, vlen, err;
3020 const struct btf_type *t;
3021 const char *name;
3022 Elf_Data *data;
3023 Elf_Scn *scn;
3024
3025 if (obj->efile.btf_maps_shndx < 0)
3026 return 0;
3027
3028 scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx);
3029 data = elf_sec_data(obj, scn);
3030 if (!data) {
3031 pr_warn("elf: failed to get %s map definitions for %s\n",
3032 MAPS_ELF_SEC, obj->path);
3033 return -EINVAL;
3034 }
3035
3036 nr_types = btf__type_cnt(obj->btf);
3037 for (i = 1; i < nr_types; i++) {
3038 t = btf__type_by_id(obj->btf, i);
3039 if (!btf_is_datasec(t))
3040 continue;
3041 name = btf__name_by_offset(obj->btf, t->name_off);
3042 if (strcmp(name, MAPS_ELF_SEC) == 0) {
3043 sec = t;
3044 obj->efile.btf_maps_sec_btf_id = i;
3045 break;
3046 }
3047 }
3048
3049 if (!sec) {
3050 pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC);
3051 return -ENOENT;
3052 }
3053
3054 vlen = btf_vlen(sec);
3055 for (i = 0; i < vlen; i++) {
3056 err = bpf_object__init_user_btf_map(obj, sec, i,
3057 obj->efile.btf_maps_shndx,
3058 data, strict,
3059 pin_root_path);
3060 if (err)
3061 return err;
3062 }
3063
3064 for (i = 0; i < obj->nr_maps; i++) {
3065 struct bpf_map *map = &obj->maps[i];
3066
3067 if (map->def.type != BPF_MAP_TYPE_ARENA)
3068 continue;
3069
3070 if (obj->arena_map_idx >= 0) {
3071 pr_warn("map '%s': only single ARENA map is supported (map '%s' is also ARENA)\n",
3072 map->name, obj->maps[obj->arena_map_idx].name);
3073 return -EINVAL;
3074 }
3075 obj->arena_map_idx = i;
3076
3077 if (obj->efile.arena_data) {
3078 err = init_arena_map_data(obj, map, ARENA_SEC, obj->efile.arena_data_shndx,
3079 obj->efile.arena_data->d_buf,
3080 obj->efile.arena_data->d_size);
3081 if (err)
3082 return err;
3083 }
3084 }
3085 if (obj->efile.arena_data && obj->arena_map_idx < 0) {
3086 pr_warn("elf: sec '%s': to use global __arena variables the ARENA map should be explicitly declared in SEC(\".maps\")\n",
3087 ARENA_SEC);
3088 return -ENOENT;
3089 }
3090
3091 return 0;
3092 }
3093
bpf_object__init_maps(struct bpf_object * obj,const struct bpf_object_open_opts * opts)3094 static int bpf_object__init_maps(struct bpf_object *obj,
3095 const struct bpf_object_open_opts *opts)
3096 {
3097 const char *pin_root_path;
3098 bool strict;
3099 int err = 0;
3100
3101 strict = !OPTS_GET(opts, relaxed_maps, false);
3102 pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
3103
3104 err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
3105 err = err ?: bpf_object__init_global_data_maps(obj);
3106 err = err ?: bpf_object__init_kconfig_map(obj);
3107 err = err ?: bpf_object_init_struct_ops(obj);
3108
3109 return err;
3110 }
3111
section_have_execinstr(struct bpf_object * obj,int idx)3112 static bool section_have_execinstr(struct bpf_object *obj, int idx)
3113 {
3114 Elf64_Shdr *sh;
3115
3116 sh = elf_sec_hdr(obj, elf_sec_by_idx(obj, idx));
3117 if (!sh)
3118 return false;
3119
3120 return sh->sh_flags & SHF_EXECINSTR;
3121 }
3122
starts_with_qmark(const char * s)3123 static bool starts_with_qmark(const char *s)
3124 {
3125 return s && s[0] == '?';
3126 }
3127
btf_needs_sanitization(struct bpf_object * obj)3128 static bool btf_needs_sanitization(struct bpf_object *obj)
3129 {
3130 bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
3131 bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
3132 bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
3133 bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
3134 bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
3135 bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
3136 bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
3137 bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC);
3138
3139 return !has_func || !has_datasec || !has_func_global || !has_float ||
3140 !has_decl_tag || !has_type_tag || !has_enum64 || !has_qmark_datasec;
3141 }
3142
bpf_object__sanitize_btf(struct bpf_object * obj,struct btf * btf)3143 static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
3144 {
3145 bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
3146 bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
3147 bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
3148 bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
3149 bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
3150 bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
3151 bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
3152 bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC);
3153 int enum64_placeholder_id = 0;
3154 struct btf_type *t;
3155 int i, j, vlen;
3156
3157 for (i = 1; i < btf__type_cnt(btf); i++) {
3158 t = (struct btf_type *)btf__type_by_id(btf, i);
3159
3160 if ((!has_datasec && btf_is_var(t)) || (!has_decl_tag && btf_is_decl_tag(t))) {
3161 /* replace VAR/DECL_TAG with INT */
3162 t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0);
3163 /*
3164 * using size = 1 is the safest choice, 4 will be too
3165 * big and cause kernel BTF validation failure if
3166 * original variable took less than 4 bytes
3167 */
3168 t->size = 1;
3169 *(int *)(t + 1) = BTF_INT_ENC(0, 0, 8);
3170 } else if (!has_datasec && btf_is_datasec(t)) {
3171 /* replace DATASEC with STRUCT */
3172 const struct btf_var_secinfo *v = btf_var_secinfos(t);
3173 struct btf_member *m = btf_members(t);
3174 struct btf_type *vt;
3175 char *name;
3176
3177 name = (char *)btf__name_by_offset(btf, t->name_off);
3178 while (*name) {
3179 if (*name == '.' || *name == '?')
3180 *name = '_';
3181 name++;
3182 }
3183
3184 vlen = btf_vlen(t);
3185 t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen);
3186 for (j = 0; j < vlen; j++, v++, m++) {
3187 /* order of field assignments is important */
3188 m->offset = v->offset * 8;
3189 m->type = v->type;
3190 /* preserve variable name as member name */
3191 vt = (void *)btf__type_by_id(btf, v->type);
3192 m->name_off = vt->name_off;
3193 }
3194 } else if (!has_qmark_datasec && btf_is_datasec(t) &&
3195 starts_with_qmark(btf__name_by_offset(btf, t->name_off))) {
3196 /* replace '?' prefix with '_' for DATASEC names */
3197 char *name;
3198
3199 name = (char *)btf__name_by_offset(btf, t->name_off);
3200 if (name[0] == '?')
3201 name[0] = '_';
3202 } else if (!has_func && btf_is_func_proto(t)) {
3203 /* replace FUNC_PROTO with ENUM */
3204 vlen = btf_vlen(t);
3205 t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen);
3206 t->size = sizeof(__u32); /* kernel enforced */
3207 } else if (!has_func && btf_is_func(t)) {
3208 /* replace FUNC with TYPEDEF */
3209 t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0);
3210 } else if (!has_func_global && btf_is_func(t)) {
3211 /* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */
3212 t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0);
3213 } else if (!has_float && btf_is_float(t)) {
3214 /* replace FLOAT with an equally-sized empty STRUCT;
3215 * since C compilers do not accept e.g. "float" as a
3216 * valid struct name, make it anonymous
3217 */
3218 t->name_off = 0;
3219 t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0);
3220 } else if (!has_type_tag && btf_is_type_tag(t)) {
3221 /* replace TYPE_TAG with a CONST */
3222 t->name_off = 0;
3223 t->info = BTF_INFO_ENC(BTF_KIND_CONST, 0, 0);
3224 } else if (!has_enum64 && btf_is_enum(t)) {
3225 /* clear the kflag */
3226 t->info = btf_type_info(btf_kind(t), btf_vlen(t), false);
3227 } else if (!has_enum64 && btf_is_enum64(t)) {
3228 /* replace ENUM64 with a union */
3229 struct btf_member *m;
3230
3231 if (enum64_placeholder_id == 0) {
3232 enum64_placeholder_id = btf__add_int(btf, "enum64_placeholder", 1, 0);
3233 if (enum64_placeholder_id < 0)
3234 return enum64_placeholder_id;
3235
3236 t = (struct btf_type *)btf__type_by_id(btf, i);
3237 }
3238
3239 m = btf_members(t);
3240 vlen = btf_vlen(t);
3241 t->info = BTF_INFO_ENC(BTF_KIND_UNION, 0, vlen);
3242 for (j = 0; j < vlen; j++, m++) {
3243 m->type = enum64_placeholder_id;
3244 m->offset = 0;
3245 }
3246 }
3247 }
3248
3249 return 0;
3250 }
3251
libbpf_needs_btf(const struct bpf_object * obj)3252 static bool libbpf_needs_btf(const struct bpf_object *obj)
3253 {
3254 return obj->efile.btf_maps_shndx >= 0 ||
3255 obj->efile.has_st_ops ||
3256 obj->nr_extern > 0;
3257 }
3258
kernel_needs_btf(const struct bpf_object * obj)3259 static bool kernel_needs_btf(const struct bpf_object *obj)
3260 {
3261 return obj->efile.has_st_ops;
3262 }
3263
bpf_object__init_btf(struct bpf_object * obj,Elf_Data * btf_data,Elf_Data * btf_ext_data)3264 static int bpf_object__init_btf(struct bpf_object *obj,
3265 Elf_Data *btf_data,
3266 Elf_Data *btf_ext_data)
3267 {
3268 int err = -ENOENT;
3269
3270 if (btf_data) {
3271 obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
3272 err = libbpf_get_error(obj->btf);
3273 if (err) {
3274 obj->btf = NULL;
3275 pr_warn("Error loading ELF section %s: %s.\n", BTF_ELF_SEC, errstr(err));
3276 goto out;
3277 }
3278 /* enforce 8-byte pointers for BPF-targeted BTFs */
3279 btf__set_pointer_size(obj->btf, 8);
3280 }
3281 if (btf_ext_data) {
3282 struct btf_ext_info *ext_segs[3];
3283 int seg_num, sec_num;
3284
3285 if (!obj->btf) {
3286 pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
3287 BTF_EXT_ELF_SEC, BTF_ELF_SEC);
3288 goto out;
3289 }
3290 obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size);
3291 err = libbpf_get_error(obj->btf_ext);
3292 if (err) {
3293 pr_warn("Error loading ELF section %s: %s. Ignored and continue.\n",
3294 BTF_EXT_ELF_SEC, errstr(err));
3295 obj->btf_ext = NULL;
3296 goto out;
3297 }
3298
3299 /* setup .BTF.ext to ELF section mapping */
3300 ext_segs[0] = &obj->btf_ext->func_info;
3301 ext_segs[1] = &obj->btf_ext->line_info;
3302 ext_segs[2] = &obj->btf_ext->core_relo_info;
3303 for (seg_num = 0; seg_num < ARRAY_SIZE(ext_segs); seg_num++) {
3304 struct btf_ext_info *seg = ext_segs[seg_num];
3305 const struct btf_ext_info_sec *sec;
3306 const char *sec_name;
3307 Elf_Scn *scn;
3308
3309 if (seg->sec_cnt == 0)
3310 continue;
3311
3312 seg->sec_idxs = calloc(seg->sec_cnt, sizeof(*seg->sec_idxs));
3313 if (!seg->sec_idxs) {
3314 err = -ENOMEM;
3315 goto out;
3316 }
3317
3318 sec_num = 0;
3319 for_each_btf_ext_sec(seg, sec) {
3320 /* preventively increment index to avoid doing
3321 * this before every continue below
3322 */
3323 sec_num++;
3324
3325 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
3326 if (str_is_empty(sec_name))
3327 continue;
3328 scn = elf_sec_by_name(obj, sec_name);
3329 if (!scn)
3330 continue;
3331
3332 seg->sec_idxs[sec_num - 1] = elf_ndxscn(scn);
3333 }
3334 }
3335 }
3336 out:
3337 if (err && libbpf_needs_btf(obj)) {
3338 pr_warn("BTF is required, but is missing or corrupted.\n");
3339 return err;
3340 }
3341 return 0;
3342 }
3343
compare_vsi_off(const void * _a,const void * _b)3344 static int compare_vsi_off(const void *_a, const void *_b)
3345 {
3346 const struct btf_var_secinfo *a = _a;
3347 const struct btf_var_secinfo *b = _b;
3348
3349 return a->offset - b->offset;
3350 }
3351
btf_fixup_datasec(struct bpf_object * obj,struct btf * btf,struct btf_type * t)3352 static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
3353 struct btf_type *t)
3354 {
3355 __u32 size = 0, i, vars = btf_vlen(t);
3356 const char *sec_name = btf__name_by_offset(btf, t->name_off);
3357 struct btf_var_secinfo *vsi;
3358 bool fixup_offsets = false;
3359 int err;
3360
3361 if (!sec_name) {
3362 pr_debug("No name found in string section for DATASEC kind.\n");
3363 return -ENOENT;
3364 }
3365
3366 /* Extern-backing datasecs (.ksyms, .kconfig) have their size and
3367 * variable offsets set at the previous step. Further, not every
3368 * extern BTF VAR has corresponding ELF symbol preserved, so we skip
3369 * all fixups altogether for such sections and go straight to sorting
3370 * VARs within their DATASEC.
3371 */
3372 if (strcmp(sec_name, KCONFIG_SEC) == 0 || strcmp(sec_name, KSYMS_SEC) == 0)
3373 goto sort_vars;
3374
3375 /* Clang leaves DATASEC size and VAR offsets as zeroes, so we need to
3376 * fix this up. But BPF static linker already fixes this up and fills
3377 * all the sizes and offsets during static linking. So this step has
3378 * to be optional. But the STV_HIDDEN handling is non-optional for any
3379 * non-extern DATASEC, so the variable fixup loop below handles both
3380 * functions at the same time, paying the cost of BTF VAR <-> ELF
3381 * symbol matching just once.
3382 */
3383 if (t->size == 0) {
3384 err = find_elf_sec_sz(obj, sec_name, &size);
3385 if (err || !size) {
3386 pr_debug("sec '%s': failed to determine size from ELF: size %u, err %s\n",
3387 sec_name, size, errstr(err));
3388 return -ENOENT;
3389 }
3390
3391 t->size = size;
3392 fixup_offsets = true;
3393 }
3394
3395 for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) {
3396 const struct btf_type *t_var;
3397 struct btf_var *var;
3398 const char *var_name;
3399 Elf64_Sym *sym;
3400
3401 t_var = btf__type_by_id(btf, vsi->type);
3402 if (!t_var || !btf_is_var(t_var)) {
3403 pr_debug("sec '%s': unexpected non-VAR type found\n", sec_name);
3404 return -EINVAL;
3405 }
3406
3407 var = btf_var(t_var);
3408 if (var->linkage == BTF_VAR_STATIC || var->linkage == BTF_VAR_GLOBAL_EXTERN)
3409 continue;
3410
3411 var_name = btf__name_by_offset(btf, t_var->name_off);
3412 if (!var_name) {
3413 pr_debug("sec '%s': failed to find name of DATASEC's member #%d\n",
3414 sec_name, i);
3415 return -ENOENT;
3416 }
3417
3418 sym = find_elf_var_sym(obj, var_name);
3419 if (IS_ERR(sym)) {
3420 pr_debug("sec '%s': failed to find ELF symbol for VAR '%s'\n",
3421 sec_name, var_name);
3422 return -ENOENT;
3423 }
3424
3425 if (fixup_offsets)
3426 vsi->offset = sym->st_value;
3427
3428 /* if variable is a global/weak symbol, but has restricted
3429 * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF VAR
3430 * as static. This follows similar logic for functions (BPF
3431 * subprogs) and influences libbpf's further decisions about
3432 * whether to make global data BPF array maps as
3433 * BPF_F_MMAPABLE.
3434 */
3435 if (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
3436 || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)
3437 var->linkage = BTF_VAR_STATIC;
3438 }
3439
3440 sort_vars:
3441 qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off);
3442 return 0;
3443 }
3444
bpf_object_fixup_btf(struct bpf_object * obj)3445 static int bpf_object_fixup_btf(struct bpf_object *obj)
3446 {
3447 int i, n, err = 0;
3448
3449 if (!obj->btf)
3450 return 0;
3451
3452 n = btf__type_cnt(obj->btf);
3453 for (i = 1; i < n; i++) {
3454 struct btf_type *t = btf_type_by_id(obj->btf, i);
3455
3456 /* Loader needs to fix up some of the things compiler
3457 * couldn't get its hands on while emitting BTF. This
3458 * is section size and global variable offset. We use
3459 * the info from the ELF itself for this purpose.
3460 */
3461 if (btf_is_datasec(t)) {
3462 err = btf_fixup_datasec(obj, obj->btf, t);
3463 if (err)
3464 return err;
3465 }
3466 }
3467
3468 return 0;
3469 }
3470
prog_needs_vmlinux_btf(struct bpf_program * prog)3471 static bool prog_needs_vmlinux_btf(struct bpf_program *prog)
3472 {
3473 if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
3474 prog->type == BPF_PROG_TYPE_LSM)
3475 return true;
3476
3477 /* BPF_PROG_TYPE_TRACING programs which do not attach to other programs
3478 * also need vmlinux BTF
3479 */
3480 if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd)
3481 return true;
3482
3483 return false;
3484 }
3485
map_needs_vmlinux_btf(struct bpf_map * map)3486 static bool map_needs_vmlinux_btf(struct bpf_map *map)
3487 {
3488 return bpf_map__is_struct_ops(map);
3489 }
3490
obj_needs_vmlinux_btf(const struct bpf_object * obj)3491 static bool obj_needs_vmlinux_btf(const struct bpf_object *obj)
3492 {
3493 struct bpf_program *prog;
3494 struct bpf_map *map;
3495 int i;
3496
3497 /* CO-RE relocations need kernel BTF, only when btf_custom_path
3498 * is not specified
3499 */
3500 if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path)
3501 return true;
3502
3503 /* Support for typed ksyms needs kernel BTF */
3504 for (i = 0; i < obj->nr_extern; i++) {
3505 const struct extern_desc *ext;
3506
3507 ext = &obj->externs[i];
3508 if (ext->type == EXT_KSYM && ext->ksym.type_id)
3509 return true;
3510 }
3511
3512 bpf_object__for_each_program(prog, obj) {
3513 if (!prog->autoload)
3514 continue;
3515 if (prog_needs_vmlinux_btf(prog))
3516 return true;
3517 }
3518
3519 bpf_object__for_each_map(map, obj) {
3520 if (map_needs_vmlinux_btf(map))
3521 return true;
3522 }
3523
3524 return false;
3525 }
3526
bpf_object__load_vmlinux_btf(struct bpf_object * obj,bool force)3527 static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)
3528 {
3529 int err;
3530
3531 /* btf_vmlinux could be loaded earlier */
3532 if (obj->btf_vmlinux || obj->gen_loader)
3533 return 0;
3534
3535 if (!force && !obj_needs_vmlinux_btf(obj))
3536 return 0;
3537
3538 obj->btf_vmlinux = btf__load_vmlinux_btf();
3539 err = libbpf_get_error(obj->btf_vmlinux);
3540 if (err) {
3541 pr_warn("Error loading vmlinux BTF: %s\n", errstr(err));
3542 obj->btf_vmlinux = NULL;
3543 return err;
3544 }
3545 return 0;
3546 }
3547
bpf_object__sanitize_and_load_btf(struct bpf_object * obj)3548 static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
3549 {
3550 struct btf *kern_btf = obj->btf;
3551 bool btf_mandatory, sanitize;
3552 int i, err = 0;
3553
3554 if (!obj->btf)
3555 return 0;
3556
3557 if (!kernel_supports(obj, FEAT_BTF)) {
3558 if (kernel_needs_btf(obj)) {
3559 err = -EOPNOTSUPP;
3560 goto report;
3561 }
3562 pr_debug("Kernel doesn't support BTF, skipping uploading it.\n");
3563 return 0;
3564 }
3565
3566 /* Even though some subprogs are global/weak, user might prefer more
3567 * permissive BPF verification process that BPF verifier performs for
3568 * static functions, taking into account more context from the caller
3569 * functions. In such case, they need to mark such subprogs with
3570 * __attribute__((visibility("hidden"))) and libbpf will adjust
3571 * corresponding FUNC BTF type to be marked as static and trigger more
3572 * involved BPF verification process.
3573 */
3574 for (i = 0; i < obj->nr_programs; i++) {
3575 struct bpf_program *prog = &obj->programs[i];
3576 struct btf_type *t;
3577 const char *name;
3578 int j, n;
3579
3580 if (!prog->mark_btf_static || !prog_is_subprog(obj, prog))
3581 continue;
3582
3583 n = btf__type_cnt(obj->btf);
3584 for (j = 1; j < n; j++) {
3585 t = btf_type_by_id(obj->btf, j);
3586 if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL)
3587 continue;
3588
3589 name = btf__str_by_offset(obj->btf, t->name_off);
3590 if (strcmp(name, prog->name) != 0)
3591 continue;
3592
3593 t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_STATIC, 0);
3594 break;
3595 }
3596 }
3597
3598 sanitize = btf_needs_sanitization(obj);
3599 if (sanitize) {
3600 const void *raw_data;
3601 __u32 sz;
3602
3603 /* clone BTF to sanitize a copy and leave the original intact */
3604 raw_data = btf__raw_data(obj->btf, &sz);
3605 kern_btf = btf__new(raw_data, sz);
3606 err = libbpf_get_error(kern_btf);
3607 if (err)
3608 return err;
3609
3610 /* enforce 8-byte pointers for BPF-targeted BTFs */
3611 btf__set_pointer_size(obj->btf, 8);
3612 err = bpf_object__sanitize_btf(obj, kern_btf);
3613 if (err)
3614 return err;
3615 }
3616
3617 if (obj->gen_loader) {
3618 __u32 raw_size = 0;
3619 const void *raw_data = btf__raw_data(kern_btf, &raw_size);
3620
3621 if (!raw_data)
3622 return -ENOMEM;
3623 bpf_gen__load_btf(obj->gen_loader, raw_data, raw_size);
3624 /* Pretend to have valid FD to pass various fd >= 0 checks.
3625 * This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
3626 */
3627 btf__set_fd(kern_btf, 0);
3628 } else {
3629 /* currently BPF_BTF_LOAD only supports log_level 1 */
3630 err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size,
3631 obj->log_level ? 1 : 0, obj->token_fd);
3632 }
3633 if (sanitize) {
3634 if (!err) {
3635 /* move fd to libbpf's BTF */
3636 btf__set_fd(obj->btf, btf__fd(kern_btf));
3637 btf__set_fd(kern_btf, -1);
3638 }
3639 btf__free(kern_btf);
3640 }
3641 report:
3642 if (err) {
3643 btf_mandatory = kernel_needs_btf(obj);
3644 if (btf_mandatory) {
3645 pr_warn("Error loading .BTF into kernel: %s. BTF is mandatory, can't proceed.\n",
3646 errstr(err));
3647 } else {
3648 pr_info("Error loading .BTF into kernel: %s. BTF is optional, ignoring.\n",
3649 errstr(err));
3650 err = 0;
3651 }
3652 }
3653 return err;
3654 }
3655
elf_sym_str(const struct bpf_object * obj,size_t off)3656 static const char *elf_sym_str(const struct bpf_object *obj, size_t off)
3657 {
3658 const char *name;
3659
3660 name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off);
3661 if (!name) {
3662 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
3663 off, obj->path, elf_errmsg(-1));
3664 return NULL;
3665 }
3666
3667 return name;
3668 }
3669
elf_sec_str(const struct bpf_object * obj,size_t off)3670 static const char *elf_sec_str(const struct bpf_object *obj, size_t off)
3671 {
3672 const char *name;
3673
3674 name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off);
3675 if (!name) {
3676 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
3677 off, obj->path, elf_errmsg(-1));
3678 return NULL;
3679 }
3680
3681 return name;
3682 }
3683
elf_sec_by_idx(const struct bpf_object * obj,size_t idx)3684 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx)
3685 {
3686 Elf_Scn *scn;
3687
3688 scn = elf_getscn(obj->efile.elf, idx);
3689 if (!scn) {
3690 pr_warn("elf: failed to get section(%zu) from %s: %s\n",
3691 idx, obj->path, elf_errmsg(-1));
3692 return NULL;
3693 }
3694 return scn;
3695 }
3696
elf_sec_by_name(const struct bpf_object * obj,const char * name)3697 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name)
3698 {
3699 Elf_Scn *scn = NULL;
3700 Elf *elf = obj->efile.elf;
3701 const char *sec_name;
3702
3703 while ((scn = elf_nextscn(elf, scn)) != NULL) {
3704 sec_name = elf_sec_name(obj, scn);
3705 if (!sec_name)
3706 return NULL;
3707
3708 if (strcmp(sec_name, name) != 0)
3709 continue;
3710
3711 return scn;
3712 }
3713 return NULL;
3714 }
3715
elf_sec_hdr(const struct bpf_object * obj,Elf_Scn * scn)3716 static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn)
3717 {
3718 Elf64_Shdr *shdr;
3719
3720 if (!scn)
3721 return NULL;
3722
3723 shdr = elf64_getshdr(scn);
3724 if (!shdr) {
3725 pr_warn("elf: failed to get section(%zu) header from %s: %s\n",
3726 elf_ndxscn(scn), obj->path, elf_errmsg(-1));
3727 return NULL;
3728 }
3729
3730 return shdr;
3731 }
3732
elf_sec_name(const struct bpf_object * obj,Elf_Scn * scn)3733 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn)
3734 {
3735 const char *name;
3736 Elf64_Shdr *sh;
3737
3738 if (!scn)
3739 return NULL;
3740
3741 sh = elf_sec_hdr(obj, scn);
3742 if (!sh)
3743 return NULL;
3744
3745 name = elf_sec_str(obj, sh->sh_name);
3746 if (!name) {
3747 pr_warn("elf: failed to get section(%zu) name from %s: %s\n",
3748 elf_ndxscn(scn), obj->path, elf_errmsg(-1));
3749 return NULL;
3750 }
3751
3752 return name;
3753 }
3754
elf_sec_data(const struct bpf_object * obj,Elf_Scn * scn)3755 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn)
3756 {
3757 Elf_Data *data;
3758
3759 if (!scn)
3760 return NULL;
3761
3762 data = elf_getdata(scn, 0);
3763 if (!data) {
3764 pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n",
3765 elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "<?>",
3766 obj->path, elf_errmsg(-1));
3767 return NULL;
3768 }
3769
3770 return data;
3771 }
3772
elf_sym_by_idx(const struct bpf_object * obj,size_t idx)3773 static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx)
3774 {
3775 if (idx >= obj->efile.symbols->d_size / sizeof(Elf64_Sym))
3776 return NULL;
3777
3778 return (Elf64_Sym *)obj->efile.symbols->d_buf + idx;
3779 }
3780
elf_rel_by_idx(Elf_Data * data,size_t idx)3781 static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx)
3782 {
3783 if (idx >= data->d_size / sizeof(Elf64_Rel))
3784 return NULL;
3785
3786 return (Elf64_Rel *)data->d_buf + idx;
3787 }
3788
is_sec_name_dwarf(const char * name)3789 static bool is_sec_name_dwarf(const char *name)
3790 {
3791 /* approximation, but the actual list is too long */
3792 return str_has_pfx(name, ".debug_");
3793 }
3794
ignore_elf_section(Elf64_Shdr * hdr,const char * name)3795 static bool ignore_elf_section(Elf64_Shdr *hdr, const char *name)
3796 {
3797 /* no special handling of .strtab */
3798 if (hdr->sh_type == SHT_STRTAB)
3799 return true;
3800
3801 /* ignore .llvm_addrsig section as well */
3802 if (hdr->sh_type == SHT_LLVM_ADDRSIG)
3803 return true;
3804
3805 /* no subprograms will lead to an empty .text section, ignore it */
3806 if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 &&
3807 strcmp(name, ".text") == 0)
3808 return true;
3809
3810 /* DWARF sections */
3811 if (is_sec_name_dwarf(name))
3812 return true;
3813
3814 if (str_has_pfx(name, ".rel")) {
3815 name += sizeof(".rel") - 1;
3816 /* DWARF section relocations */
3817 if (is_sec_name_dwarf(name))
3818 return true;
3819
3820 /* .BTF and .BTF.ext don't need relocations */
3821 if (strcmp(name, BTF_ELF_SEC) == 0 ||
3822 strcmp(name, BTF_EXT_ELF_SEC) == 0)
3823 return true;
3824 }
3825
3826 return false;
3827 }
3828
cmp_progs(const void * _a,const void * _b)3829 static int cmp_progs(const void *_a, const void *_b)
3830 {
3831 const struct bpf_program *a = _a;
3832 const struct bpf_program *b = _b;
3833
3834 if (a->sec_idx != b->sec_idx)
3835 return a->sec_idx < b->sec_idx ? -1 : 1;
3836
3837 /* sec_insn_off can't be the same within the section */
3838 return a->sec_insn_off < b->sec_insn_off ? -1 : 1;
3839 }
3840
bpf_object__elf_collect(struct bpf_object * obj)3841 static int bpf_object__elf_collect(struct bpf_object *obj)
3842 {
3843 struct elf_sec_desc *sec_desc;
3844 Elf *elf = obj->efile.elf;
3845 Elf_Data *btf_ext_data = NULL;
3846 Elf_Data *btf_data = NULL;
3847 int idx = 0, err = 0;
3848 const char *name;
3849 Elf_Data *data;
3850 Elf_Scn *scn;
3851 Elf64_Shdr *sh;
3852
3853 /* ELF section indices are 0-based, but sec #0 is special "invalid"
3854 * section. Since section count retrieved by elf_getshdrnum() does
3855 * include sec #0, it is already the necessary size of an array to keep
3856 * all the sections.
3857 */
3858 if (elf_getshdrnum(obj->efile.elf, &obj->efile.sec_cnt)) {
3859 pr_warn("elf: failed to get the number of sections for %s: %s\n",
3860 obj->path, elf_errmsg(-1));
3861 return -LIBBPF_ERRNO__FORMAT;
3862 }
3863 obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs));
3864 if (!obj->efile.secs)
3865 return -ENOMEM;
3866
3867 /* a bunch of ELF parsing functionality depends on processing symbols,
3868 * so do the first pass and find the symbol table
3869 */
3870 scn = NULL;
3871 while ((scn = elf_nextscn(elf, scn)) != NULL) {
3872 sh = elf_sec_hdr(obj, scn);
3873 if (!sh)
3874 return -LIBBPF_ERRNO__FORMAT;
3875
3876 if (sh->sh_type == SHT_SYMTAB) {
3877 if (obj->efile.symbols) {
3878 pr_warn("elf: multiple symbol tables in %s\n", obj->path);
3879 return -LIBBPF_ERRNO__FORMAT;
3880 }
3881
3882 data = elf_sec_data(obj, scn);
3883 if (!data)
3884 return -LIBBPF_ERRNO__FORMAT;
3885
3886 idx = elf_ndxscn(scn);
3887
3888 obj->efile.symbols = data;
3889 obj->efile.symbols_shndx = idx;
3890 obj->efile.strtabidx = sh->sh_link;
3891 }
3892 }
3893
3894 if (!obj->efile.symbols) {
3895 pr_warn("elf: couldn't find symbol table in %s, stripped object file?\n",
3896 obj->path);
3897 return -ENOENT;
3898 }
3899
3900 scn = NULL;
3901 while ((scn = elf_nextscn(elf, scn)) != NULL) {
3902 idx = elf_ndxscn(scn);
3903 sec_desc = &obj->efile.secs[idx];
3904
3905 sh = elf_sec_hdr(obj, scn);
3906 if (!sh)
3907 return -LIBBPF_ERRNO__FORMAT;
3908
3909 name = elf_sec_str(obj, sh->sh_name);
3910 if (!name)
3911 return -LIBBPF_ERRNO__FORMAT;
3912
3913 if (ignore_elf_section(sh, name))
3914 continue;
3915
3916 data = elf_sec_data(obj, scn);
3917 if (!data)
3918 return -LIBBPF_ERRNO__FORMAT;
3919
3920 pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
3921 idx, name, (unsigned long)data->d_size,
3922 (int)sh->sh_link, (unsigned long)sh->sh_flags,
3923 (int)sh->sh_type);
3924
3925 if (strcmp(name, "license") == 0) {
3926 err = bpf_object__init_license(obj, data->d_buf, data->d_size);
3927 if (err)
3928 return err;
3929 } else if (strcmp(name, "version") == 0) {
3930 err = bpf_object__init_kversion(obj, data->d_buf, data->d_size);
3931 if (err)
3932 return err;
3933 } else if (strcmp(name, "maps") == 0) {
3934 pr_warn("elf: legacy map definitions in 'maps' section are not supported by libbpf v1.0+\n");
3935 return -ENOTSUP;
3936 } else if (strcmp(name, MAPS_ELF_SEC) == 0) {
3937 obj->efile.btf_maps_shndx = idx;
3938 } else if (strcmp(name, BTF_ELF_SEC) == 0) {
3939 if (sh->sh_type != SHT_PROGBITS)
3940 return -LIBBPF_ERRNO__FORMAT;
3941 btf_data = data;
3942 } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
3943 if (sh->sh_type != SHT_PROGBITS)
3944 return -LIBBPF_ERRNO__FORMAT;
3945 btf_ext_data = data;
3946 } else if (sh->sh_type == SHT_SYMTAB) {
3947 /* already processed during the first pass above */
3948 } else if (sh->sh_type == SHT_PROGBITS && data->d_size > 0) {
3949 if (sh->sh_flags & SHF_EXECINSTR) {
3950 if (strcmp(name, ".text") == 0)
3951 obj->efile.text_shndx = idx;
3952 err = bpf_object__add_programs(obj, data, name, idx);
3953 if (err)
3954 return err;
3955 } else if (strcmp(name, DATA_SEC) == 0 ||
3956 str_has_pfx(name, DATA_SEC ".")) {
3957 sec_desc->sec_type = SEC_DATA;
3958 sec_desc->shdr = sh;
3959 sec_desc->data = data;
3960 } else if (strcmp(name, RODATA_SEC) == 0 ||
3961 str_has_pfx(name, RODATA_SEC ".")) {
3962 sec_desc->sec_type = SEC_RODATA;
3963 sec_desc->shdr = sh;
3964 sec_desc->data = data;
3965 } else if (strcmp(name, STRUCT_OPS_SEC) == 0 ||
3966 strcmp(name, STRUCT_OPS_LINK_SEC) == 0 ||
3967 strcmp(name, "?" STRUCT_OPS_SEC) == 0 ||
3968 strcmp(name, "?" STRUCT_OPS_LINK_SEC) == 0) {
3969 sec_desc->sec_type = SEC_ST_OPS;
3970 sec_desc->shdr = sh;
3971 sec_desc->data = data;
3972 obj->efile.has_st_ops = true;
3973 } else if (strcmp(name, ARENA_SEC) == 0) {
3974 obj->efile.arena_data = data;
3975 obj->efile.arena_data_shndx = idx;
3976 } else if (strcmp(name, JUMPTABLES_SEC) == 0) {
3977 obj->jumptables_data = malloc(data->d_size);
3978 if (!obj->jumptables_data)
3979 return -ENOMEM;
3980 memcpy(obj->jumptables_data, data->d_buf, data->d_size);
3981 obj->jumptables_data_sz = data->d_size;
3982 obj->efile.jumptables_data_shndx = idx;
3983 } else {
3984 pr_info("elf: skipping unrecognized data section(%d) %s\n",
3985 idx, name);
3986 }
3987 } else if (sh->sh_type == SHT_REL) {
3988 int targ_sec_idx = sh->sh_info; /* points to other section */
3989
3990 if (sh->sh_entsize != sizeof(Elf64_Rel) ||
3991 targ_sec_idx >= obj->efile.sec_cnt)
3992 return -LIBBPF_ERRNO__FORMAT;
3993
3994 /* Only do relo for section with exec instructions */
3995 if (!section_have_execinstr(obj, targ_sec_idx) &&
3996 strcmp(name, ".rel" STRUCT_OPS_SEC) &&
3997 strcmp(name, ".rel" STRUCT_OPS_LINK_SEC) &&
3998 strcmp(name, ".rel?" STRUCT_OPS_SEC) &&
3999 strcmp(name, ".rel?" STRUCT_OPS_LINK_SEC) &&
4000 strcmp(name, ".rel" MAPS_ELF_SEC)) {
4001 pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n",
4002 idx, name, targ_sec_idx,
4003 elf_sec_name(obj, elf_sec_by_idx(obj, targ_sec_idx)) ?: "<?>");
4004 continue;
4005 }
4006
4007 sec_desc->sec_type = SEC_RELO;
4008 sec_desc->shdr = sh;
4009 sec_desc->data = data;
4010 } else if (sh->sh_type == SHT_NOBITS && (strcmp(name, BSS_SEC) == 0 ||
4011 str_has_pfx(name, BSS_SEC "."))) {
4012 sec_desc->sec_type = SEC_BSS;
4013 sec_desc->shdr = sh;
4014 sec_desc->data = data;
4015 } else {
4016 pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name,
4017 (size_t)sh->sh_size);
4018 }
4019 }
4020
4021 if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) {
4022 pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path);
4023 return -LIBBPF_ERRNO__FORMAT;
4024 }
4025
4026 /* change BPF program insns to native endianness for introspection */
4027 if (!is_native_endianness(obj))
4028 bpf_object_bswap_progs(obj);
4029
4030 /* sort BPF programs by section name and in-section instruction offset
4031 * for faster search
4032 */
4033 if (obj->nr_programs)
4034 qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs);
4035
4036 return bpf_object__init_btf(obj, btf_data, btf_ext_data);
4037 }
4038
sym_is_extern(const Elf64_Sym * sym)4039 static bool sym_is_extern(const Elf64_Sym *sym)
4040 {
4041 int bind = ELF64_ST_BIND(sym->st_info);
4042 /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */
4043 return sym->st_shndx == SHN_UNDEF &&
4044 (bind == STB_GLOBAL || bind == STB_WEAK) &&
4045 ELF64_ST_TYPE(sym->st_info) == STT_NOTYPE;
4046 }
4047
sym_is_subprog(const Elf64_Sym * sym,int text_shndx)4048 static bool sym_is_subprog(const Elf64_Sym *sym, int text_shndx)
4049 {
4050 int bind = ELF64_ST_BIND(sym->st_info);
4051 int type = ELF64_ST_TYPE(sym->st_info);
4052
4053 /* in .text section */
4054 if (sym->st_shndx != text_shndx)
4055 return false;
4056
4057 /* local function */
4058 if (bind == STB_LOCAL && type == STT_SECTION)
4059 return true;
4060
4061 /* global function */
4062 return (bind == STB_GLOBAL || bind == STB_WEAK) && type == STT_FUNC;
4063 }
4064
find_extern_btf_id(const struct btf * btf,const char * ext_name)4065 static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
4066 {
4067 const struct btf_type *t;
4068 const char *tname;
4069 int i, n;
4070
4071 if (!btf)
4072 return -ESRCH;
4073
4074 n = btf__type_cnt(btf);
4075 for (i = 1; i < n; i++) {
4076 t = btf__type_by_id(btf, i);
4077
4078 if (!btf_is_var(t) && !btf_is_func(t))
4079 continue;
4080
4081 tname = btf__name_by_offset(btf, t->name_off);
4082 if (strcmp(tname, ext_name))
4083 continue;
4084
4085 if (btf_is_var(t) &&
4086 btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
4087 return -EINVAL;
4088
4089 if (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_EXTERN)
4090 return -EINVAL;
4091
4092 return i;
4093 }
4094
4095 return -ENOENT;
4096 }
4097
find_extern_sec_btf_id(struct btf * btf,int ext_btf_id)4098 static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) {
4099 const struct btf_var_secinfo *vs;
4100 const struct btf_type *t;
4101 int i, j, n;
4102
4103 if (!btf)
4104 return -ESRCH;
4105
4106 n = btf__type_cnt(btf);
4107 for (i = 1; i < n; i++) {
4108 t = btf__type_by_id(btf, i);
4109
4110 if (!btf_is_datasec(t))
4111 continue;
4112
4113 vs = btf_var_secinfos(t);
4114 for (j = 0; j < btf_vlen(t); j++, vs++) {
4115 if (vs->type == ext_btf_id)
4116 return i;
4117 }
4118 }
4119
4120 return -ENOENT;
4121 }
4122
find_kcfg_type(const struct btf * btf,int id,bool * is_signed)4123 static enum kcfg_type find_kcfg_type(const struct btf *btf, int id,
4124 bool *is_signed)
4125 {
4126 const struct btf_type *t;
4127 const char *name;
4128
4129 t = skip_mods_and_typedefs(btf, id, NULL);
4130 name = btf__name_by_offset(btf, t->name_off);
4131
4132 if (is_signed)
4133 *is_signed = false;
4134 switch (btf_kind(t)) {
4135 case BTF_KIND_INT: {
4136 int enc = btf_int_encoding(t);
4137
4138 if (enc & BTF_INT_BOOL)
4139 return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN;
4140 if (is_signed)
4141 *is_signed = enc & BTF_INT_SIGNED;
4142 if (t->size == 1)
4143 return KCFG_CHAR;
4144 if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1)))
4145 return KCFG_UNKNOWN;
4146 return KCFG_INT;
4147 }
4148 case BTF_KIND_ENUM:
4149 if (t->size != 4)
4150 return KCFG_UNKNOWN;
4151 if (strcmp(name, "libbpf_tristate"))
4152 return KCFG_UNKNOWN;
4153 return KCFG_TRISTATE;
4154 case BTF_KIND_ENUM64:
4155 if (strcmp(name, "libbpf_tristate"))
4156 return KCFG_UNKNOWN;
4157 return KCFG_TRISTATE;
4158 case BTF_KIND_ARRAY:
4159 if (btf_array(t)->nelems == 0)
4160 return KCFG_UNKNOWN;
4161 if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR)
4162 return KCFG_UNKNOWN;
4163 return KCFG_CHAR_ARR;
4164 default:
4165 return KCFG_UNKNOWN;
4166 }
4167 }
4168
cmp_externs(const void * _a,const void * _b)4169 static int cmp_externs(const void *_a, const void *_b)
4170 {
4171 const struct extern_desc *a = _a;
4172 const struct extern_desc *b = _b;
4173
4174 if (a->type != b->type)
4175 return a->type < b->type ? -1 : 1;
4176
4177 if (a->type == EXT_KCFG) {
4178 /* descending order by alignment requirements */
4179 if (a->kcfg.align != b->kcfg.align)
4180 return a->kcfg.align > b->kcfg.align ? -1 : 1;
4181 /* ascending order by size, within same alignment class */
4182 if (a->kcfg.sz != b->kcfg.sz)
4183 return a->kcfg.sz < b->kcfg.sz ? -1 : 1;
4184 }
4185
4186 /* resolve ties by name */
4187 return strcmp(a->name, b->name);
4188 }
4189
find_int_btf_id(const struct btf * btf)4190 static int find_int_btf_id(const struct btf *btf)
4191 {
4192 const struct btf_type *t;
4193 int i, n;
4194
4195 n = btf__type_cnt(btf);
4196 for (i = 1; i < n; i++) {
4197 t = btf__type_by_id(btf, i);
4198
4199 if (btf_is_int(t) && btf_int_bits(t) == 32)
4200 return i;
4201 }
4202
4203 return 0;
4204 }
4205
add_dummy_ksym_var(struct btf * btf)4206 static int add_dummy_ksym_var(struct btf *btf)
4207 {
4208 int i, int_btf_id, sec_btf_id, dummy_var_btf_id;
4209 const struct btf_var_secinfo *vs;
4210 const struct btf_type *sec;
4211
4212 if (!btf)
4213 return 0;
4214
4215 sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC,
4216 BTF_KIND_DATASEC);
4217 if (sec_btf_id < 0)
4218 return 0;
4219
4220 sec = btf__type_by_id(btf, sec_btf_id);
4221 vs = btf_var_secinfos(sec);
4222 for (i = 0; i < btf_vlen(sec); i++, vs++) {
4223 const struct btf_type *vt;
4224
4225 vt = btf__type_by_id(btf, vs->type);
4226 if (btf_is_func(vt))
4227 break;
4228 }
4229
4230 /* No func in ksyms sec. No need to add dummy var. */
4231 if (i == btf_vlen(sec))
4232 return 0;
4233
4234 int_btf_id = find_int_btf_id(btf);
4235 dummy_var_btf_id = btf__add_var(btf,
4236 "dummy_ksym",
4237 BTF_VAR_GLOBAL_ALLOCATED,
4238 int_btf_id);
4239 if (dummy_var_btf_id < 0)
4240 pr_warn("cannot create a dummy_ksym var\n");
4241
4242 return dummy_var_btf_id;
4243 }
4244
bpf_object__collect_externs(struct bpf_object * obj)4245 static int bpf_object__collect_externs(struct bpf_object *obj)
4246 {
4247 struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
4248 const struct btf_type *t;
4249 struct extern_desc *ext;
4250 int i, n, off, dummy_var_btf_id;
4251 const char *ext_name, *sec_name;
4252 size_t ext_essent_len;
4253 Elf_Scn *scn;
4254 Elf64_Shdr *sh;
4255
4256 if (!obj->efile.symbols)
4257 return 0;
4258
4259 scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx);
4260 sh = elf_sec_hdr(obj, scn);
4261 if (!sh || sh->sh_entsize != sizeof(Elf64_Sym))
4262 return -LIBBPF_ERRNO__FORMAT;
4263
4264 dummy_var_btf_id = add_dummy_ksym_var(obj->btf);
4265 if (dummy_var_btf_id < 0)
4266 return dummy_var_btf_id;
4267
4268 n = sh->sh_size / sh->sh_entsize;
4269 pr_debug("looking for externs among %d symbols...\n", n);
4270
4271 for (i = 0; i < n; i++) {
4272 Elf64_Sym *sym = elf_sym_by_idx(obj, i);
4273
4274 if (!sym)
4275 return -LIBBPF_ERRNO__FORMAT;
4276 if (!sym_is_extern(sym))
4277 continue;
4278 ext_name = elf_sym_str(obj, sym->st_name);
4279 if (!ext_name || !ext_name[0])
4280 continue;
4281
4282 ext = obj->externs;
4283 ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));
4284 if (!ext)
4285 return -ENOMEM;
4286 obj->externs = ext;
4287 ext = &ext[obj->nr_extern];
4288 memset(ext, 0, sizeof(*ext));
4289 obj->nr_extern++;
4290
4291 ext->btf_id = find_extern_btf_id(obj->btf, ext_name);
4292 if (ext->btf_id <= 0) {
4293 pr_warn("failed to find BTF for extern '%s': %d\n",
4294 ext_name, ext->btf_id);
4295 return ext->btf_id;
4296 }
4297 t = btf__type_by_id(obj->btf, ext->btf_id);
4298 ext->name = strdup(btf__name_by_offset(obj->btf, t->name_off));
4299 if (!ext->name)
4300 return -ENOMEM;
4301 ext->sym_idx = i;
4302 ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK;
4303
4304 ext_essent_len = bpf_core_essential_name_len(ext->name);
4305 ext->essent_name = NULL;
4306 if (ext_essent_len != strlen(ext->name)) {
4307 ext->essent_name = strndup(ext->name, ext_essent_len);
4308 if (!ext->essent_name)
4309 return -ENOMEM;
4310 }
4311
4312 ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id);
4313 if (ext->sec_btf_id <= 0) {
4314 pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n",
4315 ext_name, ext->btf_id, ext->sec_btf_id);
4316 return ext->sec_btf_id;
4317 }
4318 sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id);
4319 sec_name = btf__name_by_offset(obj->btf, sec->name_off);
4320
4321 if (strcmp(sec_name, KCONFIG_SEC) == 0) {
4322 if (btf_is_func(t)) {
4323 pr_warn("extern function %s is unsupported under %s section\n",
4324 ext->name, KCONFIG_SEC);
4325 return -ENOTSUP;
4326 }
4327 kcfg_sec = sec;
4328 ext->type = EXT_KCFG;
4329 ext->kcfg.sz = btf__resolve_size(obj->btf, t->type);
4330 if (ext->kcfg.sz <= 0) {
4331 pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n",
4332 ext_name, ext->kcfg.sz);
4333 return ext->kcfg.sz;
4334 }
4335 ext->kcfg.align = btf__align_of(obj->btf, t->type);
4336 if (ext->kcfg.align <= 0) {
4337 pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n",
4338 ext_name, ext->kcfg.align);
4339 return -EINVAL;
4340 }
4341 ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
4342 &ext->kcfg.is_signed);
4343 if (ext->kcfg.type == KCFG_UNKNOWN) {
4344 pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name);
4345 return -ENOTSUP;
4346 }
4347 } else if (strcmp(sec_name, KSYMS_SEC) == 0) {
4348 ksym_sec = sec;
4349 ext->type = EXT_KSYM;
4350 skip_mods_and_typedefs(obj->btf, t->type,
4351 &ext->ksym.type_id);
4352 } else {
4353 pr_warn("unrecognized extern section '%s'\n", sec_name);
4354 return -ENOTSUP;
4355 }
4356 }
4357 pr_debug("collected %d externs total\n", obj->nr_extern);
4358
4359 if (!obj->nr_extern)
4360 return 0;
4361
4362 /* sort externs by type, for kcfg ones also by (align, size, name) */
4363 qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
4364
4365 /* for .ksyms section, we need to turn all externs into allocated
4366 * variables in BTF to pass kernel verification; we do this by
4367 * pretending that each extern is a 8-byte variable
4368 */
4369 if (ksym_sec) {
4370 /* find existing 4-byte integer type in BTF to use for fake
4371 * extern variables in DATASEC
4372 */
4373 int int_btf_id = find_int_btf_id(obj->btf);
4374 /* For extern function, a dummy_var added earlier
4375 * will be used to replace the vs->type and
4376 * its name string will be used to refill
4377 * the missing param's name.
4378 */
4379 const struct btf_type *dummy_var;
4380
4381 dummy_var = btf__type_by_id(obj->btf, dummy_var_btf_id);
4382 for (i = 0; i < obj->nr_extern; i++) {
4383 ext = &obj->externs[i];
4384 if (ext->type != EXT_KSYM)
4385 continue;
4386 pr_debug("extern (ksym) #%d: symbol %d, name %s\n",
4387 i, ext->sym_idx, ext->name);
4388 }
4389
4390 sec = ksym_sec;
4391 n = btf_vlen(sec);
4392 for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
4393 struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
4394 struct btf_type *vt;
4395
4396 vt = (void *)btf__type_by_id(obj->btf, vs->type);
4397 ext_name = btf__name_by_offset(obj->btf, vt->name_off);
4398 ext = find_extern_by_name(obj, ext_name);
4399 if (!ext) {
4400 pr_warn("failed to find extern definition for BTF %s '%s'\n",
4401 btf_kind_str(vt), ext_name);
4402 return -ESRCH;
4403 }
4404 if (btf_is_func(vt)) {
4405 const struct btf_type *func_proto;
4406 struct btf_param *param;
4407 int j;
4408
4409 func_proto = btf__type_by_id(obj->btf,
4410 vt->type);
4411 param = btf_params(func_proto);
4412 /* Reuse the dummy_var string if the
4413 * func proto does not have param name.
4414 */
4415 for (j = 0; j < btf_vlen(func_proto); j++)
4416 if (param[j].type && !param[j].name_off)
4417 param[j].name_off =
4418 dummy_var->name_off;
4419 vs->type = dummy_var_btf_id;
4420 vt->info &= ~0xffff;
4421 vt->info |= BTF_FUNC_GLOBAL;
4422 } else {
4423 btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
4424 vt->type = int_btf_id;
4425 }
4426 vs->offset = off;
4427 vs->size = sizeof(int);
4428 }
4429 sec->size = off;
4430 }
4431
4432 if (kcfg_sec) {
4433 sec = kcfg_sec;
4434 /* for kcfg externs calculate their offsets within a .kconfig map */
4435 off = 0;
4436 for (i = 0; i < obj->nr_extern; i++) {
4437 ext = &obj->externs[i];
4438 if (ext->type != EXT_KCFG)
4439 continue;
4440
4441 ext->kcfg.data_off = roundup(off, ext->kcfg.align);
4442 off = ext->kcfg.data_off + ext->kcfg.sz;
4443 pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n",
4444 i, ext->sym_idx, ext->kcfg.data_off, ext->name);
4445 }
4446 sec->size = off;
4447 n = btf_vlen(sec);
4448 for (i = 0; i < n; i++) {
4449 struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
4450
4451 t = btf__type_by_id(obj->btf, vs->type);
4452 ext_name = btf__name_by_offset(obj->btf, t->name_off);
4453 ext = find_extern_by_name(obj, ext_name);
4454 if (!ext) {
4455 pr_warn("failed to find extern definition for BTF var '%s'\n",
4456 ext_name);
4457 return -ESRCH;
4458 }
4459 btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
4460 vs->offset = ext->kcfg.data_off;
4461 }
4462 }
4463 return 0;
4464 }
4465
prog_is_subprog(const struct bpf_object * obj,const struct bpf_program * prog)4466 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog)
4467 {
4468 return prog->sec_idx == obj->efile.text_shndx;
4469 }
4470
4471 struct bpf_program *
bpf_object__find_program_by_name(const struct bpf_object * obj,const char * name)4472 bpf_object__find_program_by_name(const struct bpf_object *obj,
4473 const char *name)
4474 {
4475 struct bpf_program *prog;
4476
4477 bpf_object__for_each_program(prog, obj) {
4478 if (prog_is_subprog(obj, prog))
4479 continue;
4480 if (!strcmp(prog->name, name))
4481 return prog;
4482 }
4483 return errno = ENOENT, NULL;
4484 }
4485
bpf_object__shndx_is_data(const struct bpf_object * obj,int shndx)4486 static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
4487 int shndx)
4488 {
4489 switch (obj->efile.secs[shndx].sec_type) {
4490 case SEC_BSS:
4491 case SEC_DATA:
4492 case SEC_RODATA:
4493 return true;
4494 default:
4495 return false;
4496 }
4497 }
4498
bpf_object__shndx_is_maps(const struct bpf_object * obj,int shndx)4499 static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
4500 int shndx)
4501 {
4502 return shndx == obj->efile.btf_maps_shndx;
4503 }
4504
4505 static enum libbpf_map_type
bpf_object__section_to_libbpf_map_type(const struct bpf_object * obj,int shndx)4506 bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
4507 {
4508 if (shndx == obj->efile.symbols_shndx)
4509 return LIBBPF_MAP_KCONFIG;
4510
4511 switch (obj->efile.secs[shndx].sec_type) {
4512 case SEC_BSS:
4513 return LIBBPF_MAP_BSS;
4514 case SEC_DATA:
4515 return LIBBPF_MAP_DATA;
4516 case SEC_RODATA:
4517 return LIBBPF_MAP_RODATA;
4518 default:
4519 return LIBBPF_MAP_UNSPEC;
4520 }
4521 }
4522
bpf_prog_compute_hash(struct bpf_program * prog)4523 static int bpf_prog_compute_hash(struct bpf_program *prog)
4524 {
4525 struct bpf_insn *purged;
4526 int i, err = 0;
4527
4528 purged = calloc(prog->insns_cnt, BPF_INSN_SZ);
4529 if (!purged)
4530 return -ENOMEM;
4531
4532 /* If relocations have been done, the map_fd needs to be
4533 * discarded for the digest calculation.
4534 */
4535 for (i = 0; i < prog->insns_cnt; i++) {
4536 purged[i] = prog->insns[i];
4537 if (purged[i].code == (BPF_LD | BPF_IMM | BPF_DW) &&
4538 (purged[i].src_reg == BPF_PSEUDO_MAP_FD ||
4539 purged[i].src_reg == BPF_PSEUDO_MAP_VALUE)) {
4540 purged[i].imm = 0;
4541 i++;
4542 if (i >= prog->insns_cnt ||
4543 prog->insns[i].code != 0 ||
4544 prog->insns[i].dst_reg != 0 ||
4545 prog->insns[i].src_reg != 0 ||
4546 prog->insns[i].off != 0) {
4547 err = -EINVAL;
4548 goto out;
4549 }
4550 purged[i] = prog->insns[i];
4551 purged[i].imm = 0;
4552 }
4553 }
4554 libbpf_sha256(purged, prog->insns_cnt * sizeof(struct bpf_insn),
4555 prog->hash);
4556 out:
4557 free(purged);
4558 return err;
4559 }
4560
bpf_program__record_reloc(struct bpf_program * prog,struct reloc_desc * reloc_desc,__u32 insn_idx,const char * sym_name,const Elf64_Sym * sym,const Elf64_Rel * rel)4561 static int bpf_program__record_reloc(struct bpf_program *prog,
4562 struct reloc_desc *reloc_desc,
4563 __u32 insn_idx, const char *sym_name,
4564 const Elf64_Sym *sym, const Elf64_Rel *rel)
4565 {
4566 struct bpf_insn *insn = &prog->insns[insn_idx];
4567 size_t map_idx, nr_maps = prog->obj->nr_maps;
4568 struct bpf_object *obj = prog->obj;
4569 __u32 shdr_idx = sym->st_shndx;
4570 enum libbpf_map_type type;
4571 const char *sym_sec_name;
4572 struct bpf_map *map;
4573
4574 if (!is_call_insn(insn) && !is_ldimm64_insn(insn)) {
4575 pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
4576 prog->name, sym_name, insn_idx, insn->code);
4577 return -LIBBPF_ERRNO__RELOC;
4578 }
4579
4580 if (sym_is_extern(sym)) {
4581 int sym_idx = ELF64_R_SYM(rel->r_info);
4582 int i, n = obj->nr_extern;
4583 struct extern_desc *ext;
4584
4585 for (i = 0; i < n; i++) {
4586 ext = &obj->externs[i];
4587 if (ext->sym_idx == sym_idx)
4588 break;
4589 }
4590 if (i >= n) {
4591 pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n",
4592 prog->name, sym_name, sym_idx);
4593 return -LIBBPF_ERRNO__RELOC;
4594 }
4595 pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
4596 prog->name, i, ext->name, ext->sym_idx, insn_idx);
4597 if (insn->code == (BPF_JMP | BPF_CALL))
4598 reloc_desc->type = RELO_EXTERN_CALL;
4599 else
4600 reloc_desc->type = RELO_EXTERN_LD64;
4601 reloc_desc->insn_idx = insn_idx;
4602 reloc_desc->ext_idx = i;
4603 return 0;
4604 }
4605
4606 /* sub-program call relocation */
4607 if (is_call_insn(insn)) {
4608 if (insn->src_reg != BPF_PSEUDO_CALL) {
4609 pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
4610 return -LIBBPF_ERRNO__RELOC;
4611 }
4612 /* text_shndx can be 0, if no default "main" program exists */
4613 if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
4614 sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
4615 pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n",
4616 prog->name, sym_name, sym_sec_name);
4617 return -LIBBPF_ERRNO__RELOC;
4618 }
4619 if (sym->st_value % BPF_INSN_SZ) {
4620 pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
4621 prog->name, sym_name, (size_t)sym->st_value);
4622 return -LIBBPF_ERRNO__RELOC;
4623 }
4624 reloc_desc->type = RELO_CALL;
4625 reloc_desc->insn_idx = insn_idx;
4626 reloc_desc->sym_off = sym->st_value;
4627 return 0;
4628 }
4629
4630 if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
4631 pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n",
4632 prog->name, sym_name, shdr_idx);
4633 return -LIBBPF_ERRNO__RELOC;
4634 }
4635
4636 /* loading subprog addresses */
4637 if (sym_is_subprog(sym, obj->efile.text_shndx)) {
4638 /* global_func: sym->st_value = offset in the section, insn->imm = 0.
4639 * local_func: sym->st_value = 0, insn->imm = offset in the section.
4640 */
4641 if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) {
4642 pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n",
4643 prog->name, sym_name, (size_t)sym->st_value, insn->imm);
4644 return -LIBBPF_ERRNO__RELOC;
4645 }
4646
4647 reloc_desc->type = RELO_SUBPROG_ADDR;
4648 reloc_desc->insn_idx = insn_idx;
4649 reloc_desc->sym_off = sym->st_value;
4650 return 0;
4651 }
4652
4653 type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
4654 sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
4655
4656 /* arena data relocation */
4657 if (shdr_idx == obj->efile.arena_data_shndx) {
4658 if (obj->arena_map_idx < 0) {
4659 pr_warn("prog '%s': bad arena data relocation at insn %u, no arena maps defined\n",
4660 prog->name, insn_idx);
4661 return -LIBBPF_ERRNO__RELOC;
4662 }
4663 reloc_desc->type = RELO_DATA;
4664 reloc_desc->insn_idx = insn_idx;
4665 reloc_desc->map_idx = obj->arena_map_idx;
4666 reloc_desc->sym_off = sym->st_value;
4667
4668 map = &obj->maps[obj->arena_map_idx];
4669 pr_debug("prog '%s': found arena map %d (%s, sec %d, off %zu) for insn %u\n",
4670 prog->name, obj->arena_map_idx, map->name, map->sec_idx,
4671 map->sec_offset, insn_idx);
4672 return 0;
4673 }
4674
4675 /* jump table data relocation */
4676 if (shdr_idx == obj->efile.jumptables_data_shndx) {
4677 reloc_desc->type = RELO_INSN_ARRAY;
4678 reloc_desc->insn_idx = insn_idx;
4679 reloc_desc->map_idx = -1;
4680 reloc_desc->sym_off = sym->st_value;
4681 reloc_desc->sym_size = sym->st_size;
4682 return 0;
4683 }
4684
4685 /* generic map reference relocation */
4686 if (type == LIBBPF_MAP_UNSPEC) {
4687 if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
4688 pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n",
4689 prog->name, sym_name, sym_sec_name);
4690 return -LIBBPF_ERRNO__RELOC;
4691 }
4692 for (map_idx = 0; map_idx < nr_maps; map_idx++) {
4693 map = &obj->maps[map_idx];
4694 if (map->libbpf_type != type ||
4695 map->sec_idx != sym->st_shndx ||
4696 map->sec_offset != sym->st_value)
4697 continue;
4698 pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n",
4699 prog->name, map_idx, map->name, map->sec_idx,
4700 map->sec_offset, insn_idx);
4701 break;
4702 }
4703 if (map_idx >= nr_maps) {
4704 pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n",
4705 prog->name, sym_sec_name, (size_t)sym->st_value);
4706 return -LIBBPF_ERRNO__RELOC;
4707 }
4708 reloc_desc->type = RELO_LD64;
4709 reloc_desc->insn_idx = insn_idx;
4710 reloc_desc->map_idx = map_idx;
4711 reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */
4712 return 0;
4713 }
4714
4715 /* global data map relocation */
4716 if (!bpf_object__shndx_is_data(obj, shdr_idx)) {
4717 pr_warn("prog '%s': bad data relo against section '%s'\n",
4718 prog->name, sym_sec_name);
4719 return -LIBBPF_ERRNO__RELOC;
4720 }
4721 for (map_idx = 0; map_idx < nr_maps; map_idx++) {
4722 map = &obj->maps[map_idx];
4723 if (map->libbpf_type != type || map->sec_idx != sym->st_shndx)
4724 continue;
4725 pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n",
4726 prog->name, map_idx, map->name, map->sec_idx,
4727 map->sec_offset, insn_idx);
4728 break;
4729 }
4730 if (map_idx >= nr_maps) {
4731 pr_warn("prog '%s': data relo failed to find map for section '%s'\n",
4732 prog->name, sym_sec_name);
4733 return -LIBBPF_ERRNO__RELOC;
4734 }
4735
4736 reloc_desc->type = RELO_DATA;
4737 reloc_desc->insn_idx = insn_idx;
4738 reloc_desc->map_idx = map_idx;
4739 reloc_desc->sym_off = sym->st_value;
4740 return 0;
4741 }
4742
prog_contains_insn(const struct bpf_program * prog,size_t insn_idx)4743 static bool prog_contains_insn(const struct bpf_program *prog, size_t insn_idx)
4744 {
4745 return insn_idx >= prog->sec_insn_off &&
4746 insn_idx < prog->sec_insn_off + prog->sec_insn_cnt;
4747 }
4748
find_prog_by_sec_insn(const struct bpf_object * obj,size_t sec_idx,size_t insn_idx)4749 static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj,
4750 size_t sec_idx, size_t insn_idx)
4751 {
4752 int l = 0, r = obj->nr_programs - 1, m;
4753 struct bpf_program *prog;
4754
4755 if (!obj->nr_programs)
4756 return NULL;
4757
4758 while (l < r) {
4759 m = l + (r - l + 1) / 2;
4760 prog = &obj->programs[m];
4761
4762 if (prog->sec_idx < sec_idx ||
4763 (prog->sec_idx == sec_idx && prog->sec_insn_off <= insn_idx))
4764 l = m;
4765 else
4766 r = m - 1;
4767 }
4768 /* matching program could be at index l, but it still might be the
4769 * wrong one, so we need to double check conditions for the last time
4770 */
4771 prog = &obj->programs[l];
4772 if (prog->sec_idx == sec_idx && prog_contains_insn(prog, insn_idx))
4773 return prog;
4774 return NULL;
4775 }
4776
4777 static int
bpf_object__collect_prog_relos(struct bpf_object * obj,Elf64_Shdr * shdr,Elf_Data * data)4778 bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data)
4779 {
4780 const char *relo_sec_name, *sec_name;
4781 size_t sec_idx = shdr->sh_info, sym_idx;
4782 struct bpf_program *prog;
4783 struct reloc_desc *relos;
4784 int err, i, nrels;
4785 const char *sym_name;
4786 __u32 insn_idx;
4787 Elf_Scn *scn;
4788 Elf_Data *scn_data;
4789 Elf64_Sym *sym;
4790 Elf64_Rel *rel;
4791
4792 if (sec_idx >= obj->efile.sec_cnt)
4793 return -EINVAL;
4794
4795 scn = elf_sec_by_idx(obj, sec_idx);
4796 scn_data = elf_sec_data(obj, scn);
4797 if (!scn_data)
4798 return -LIBBPF_ERRNO__FORMAT;
4799
4800 relo_sec_name = elf_sec_str(obj, shdr->sh_name);
4801 sec_name = elf_sec_name(obj, scn);
4802 if (!relo_sec_name || !sec_name)
4803 return -EINVAL;
4804
4805 pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n",
4806 relo_sec_name, sec_idx, sec_name);
4807 nrels = shdr->sh_size / shdr->sh_entsize;
4808
4809 for (i = 0; i < nrels; i++) {
4810 rel = elf_rel_by_idx(data, i);
4811 if (!rel) {
4812 pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i);
4813 return -LIBBPF_ERRNO__FORMAT;
4814 }
4815
4816 sym_idx = ELF64_R_SYM(rel->r_info);
4817 sym = elf_sym_by_idx(obj, sym_idx);
4818 if (!sym) {
4819 pr_warn("sec '%s': symbol #%zu not found for relo #%d\n",
4820 relo_sec_name, sym_idx, i);
4821 return -LIBBPF_ERRNO__FORMAT;
4822 }
4823
4824 if (sym->st_shndx >= obj->efile.sec_cnt) {
4825 pr_warn("sec '%s': corrupted symbol #%zu pointing to invalid section #%zu for relo #%d\n",
4826 relo_sec_name, sym_idx, (size_t)sym->st_shndx, i);
4827 return -LIBBPF_ERRNO__FORMAT;
4828 }
4829
4830 if (rel->r_offset % BPF_INSN_SZ || rel->r_offset >= scn_data->d_size) {
4831 pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n",
4832 relo_sec_name, (size_t)rel->r_offset, i);
4833 return -LIBBPF_ERRNO__FORMAT;
4834 }
4835
4836 insn_idx = rel->r_offset / BPF_INSN_SZ;
4837 /* relocations against static functions are recorded as
4838 * relocations against the section that contains a function;
4839 * in such case, symbol will be STT_SECTION and sym.st_name
4840 * will point to empty string (0), so fetch section name
4841 * instead
4842 */
4843 if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && sym->st_name == 0)
4844 sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym->st_shndx));
4845 else
4846 sym_name = elf_sym_str(obj, sym->st_name);
4847 sym_name = sym_name ?: "<?";
4848
4849 pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n",
4850 relo_sec_name, i, insn_idx, sym_name);
4851
4852 prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
4853 if (!prog) {
4854 pr_debug("sec '%s': relo #%d: couldn't find program in section '%s' for insn #%u, probably overridden weak function, skipping...\n",
4855 relo_sec_name, i, sec_name, insn_idx);
4856 continue;
4857 }
4858
4859 relos = libbpf_reallocarray(prog->reloc_desc,
4860 prog->nr_reloc + 1, sizeof(*relos));
4861 if (!relos)
4862 return -ENOMEM;
4863 prog->reloc_desc = relos;
4864
4865 /* adjust insn_idx to local BPF program frame of reference */
4866 insn_idx -= prog->sec_insn_off;
4867 err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc],
4868 insn_idx, sym_name, sym, rel);
4869 if (err)
4870 return err;
4871
4872 prog->nr_reloc++;
4873 }
4874 return 0;
4875 }
4876
map_fill_btf_type_info(struct bpf_object * obj,struct bpf_map * map)4877 static int map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map)
4878 {
4879 int id;
4880
4881 if (!obj->btf)
4882 return -ENOENT;
4883
4884 /* if it's BTF-defined map, we don't need to search for type IDs.
4885 * For struct_ops map, it does not need btf_key_type_id and
4886 * btf_value_type_id.
4887 */
4888 if (map->sec_idx == obj->efile.btf_maps_shndx || bpf_map__is_struct_ops(map))
4889 return 0;
4890
4891 /*
4892 * LLVM annotates global data differently in BTF, that is,
4893 * only as '.data', '.bss' or '.rodata'.
4894 */
4895 if (!bpf_map__is_internal(map))
4896 return -ENOENT;
4897
4898 id = btf__find_by_name(obj->btf, map->real_name);
4899 if (id < 0)
4900 return id;
4901
4902 map->btf_key_type_id = 0;
4903 map->btf_value_type_id = id;
4904 return 0;
4905 }
4906
bpf_get_map_info_from_fdinfo(int fd,struct bpf_map_info * info)4907 static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info)
4908 {
4909 char file[PATH_MAX], buff[4096];
4910 FILE *fp;
4911 __u32 val;
4912 int err;
4913
4914 snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
4915 memset(info, 0, sizeof(*info));
4916
4917 fp = fopen(file, "re");
4918 if (!fp) {
4919 err = -errno;
4920 pr_warn("failed to open %s: %s. No procfs support?\n", file,
4921 errstr(err));
4922 return err;
4923 }
4924
4925 while (fgets(buff, sizeof(buff), fp)) {
4926 if (sscanf(buff, "map_type:\t%u", &val) == 1)
4927 info->type = val;
4928 else if (sscanf(buff, "key_size:\t%u", &val) == 1)
4929 info->key_size = val;
4930 else if (sscanf(buff, "value_size:\t%u", &val) == 1)
4931 info->value_size = val;
4932 else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
4933 info->max_entries = val;
4934 else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
4935 info->map_flags = val;
4936 }
4937
4938 fclose(fp);
4939
4940 return 0;
4941 }
4942
map_is_created(const struct bpf_map * map)4943 static bool map_is_created(const struct bpf_map *map)
4944 {
4945 return map->obj->state >= OBJ_PREPARED || map->reused;
4946 }
4947
bpf_map__autocreate(const struct bpf_map * map)4948 bool bpf_map__autocreate(const struct bpf_map *map)
4949 {
4950 return map->autocreate;
4951 }
4952
bpf_map__set_autocreate(struct bpf_map * map,bool autocreate)4953 int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate)
4954 {
4955 if (map_is_created(map))
4956 return libbpf_err(-EBUSY);
4957
4958 map->autocreate = autocreate;
4959 return 0;
4960 }
4961
bpf_map__set_autoattach(struct bpf_map * map,bool autoattach)4962 int bpf_map__set_autoattach(struct bpf_map *map, bool autoattach)
4963 {
4964 if (!bpf_map__is_struct_ops(map))
4965 return libbpf_err(-EINVAL);
4966
4967 map->autoattach = autoattach;
4968 return 0;
4969 }
4970
bpf_map__autoattach(const struct bpf_map * map)4971 bool bpf_map__autoattach(const struct bpf_map *map)
4972 {
4973 return map->autoattach;
4974 }
4975
bpf_map__reuse_fd(struct bpf_map * map,int fd)4976 int bpf_map__reuse_fd(struct bpf_map *map, int fd)
4977 {
4978 struct bpf_map_info info;
4979 __u32 len = sizeof(info), name_len;
4980 int new_fd, err;
4981 char *new_name;
4982
4983 memset(&info, 0, len);
4984 err = bpf_map_get_info_by_fd(fd, &info, &len);
4985 if (err && errno == EINVAL)
4986 err = bpf_get_map_info_from_fdinfo(fd, &info);
4987 if (err)
4988 return libbpf_err(err);
4989
4990 name_len = strlen(info.name);
4991 if (name_len == BPF_OBJ_NAME_LEN - 1 && strncmp(map->name, info.name, name_len) == 0)
4992 new_name = strdup(map->name);
4993 else
4994 new_name = strdup(info.name);
4995
4996 if (!new_name)
4997 return libbpf_err(-errno);
4998
4999 /*
5000 * Like dup(), but make sure new FD is >= 3 and has O_CLOEXEC set.
5001 * This is similar to what we do in ensure_good_fd(), but without
5002 * closing original FD.
5003 */
5004 new_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
5005 if (new_fd < 0) {
5006 err = -errno;
5007 goto err_free_new_name;
5008 }
5009
5010 err = reuse_fd(map->fd, new_fd);
5011 if (err)
5012 goto err_free_new_name;
5013
5014 free(map->name);
5015
5016 map->name = new_name;
5017 map->def.type = info.type;
5018 map->def.key_size = info.key_size;
5019 map->def.value_size = info.value_size;
5020 map->def.max_entries = info.max_entries;
5021 map->def.map_flags = info.map_flags;
5022 map->btf_key_type_id = info.btf_key_type_id;
5023 map->btf_value_type_id = info.btf_value_type_id;
5024 map->reused = true;
5025 map->map_extra = info.map_extra;
5026
5027 return 0;
5028
5029 err_free_new_name:
5030 free(new_name);
5031 return libbpf_err(err);
5032 }
5033
bpf_map__max_entries(const struct bpf_map * map)5034 __u32 bpf_map__max_entries(const struct bpf_map *map)
5035 {
5036 return map->def.max_entries;
5037 }
5038
bpf_map__inner_map(struct bpf_map * map)5039 struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
5040 {
5041 if (!bpf_map_type__is_map_in_map(map->def.type))
5042 return errno = EINVAL, NULL;
5043
5044 return map->inner_map;
5045 }
5046
bpf_map__set_max_entries(struct bpf_map * map,__u32 max_entries)5047 int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
5048 {
5049 if (map_is_created(map))
5050 return libbpf_err(-EBUSY);
5051
5052 map->def.max_entries = max_entries;
5053
5054 /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
5055 if (map_is_ringbuf(map))
5056 map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
5057
5058 return 0;
5059 }
5060
bpf_object_prepare_token(struct bpf_object * obj)5061 static int bpf_object_prepare_token(struct bpf_object *obj)
5062 {
5063 const char *bpffs_path;
5064 int bpffs_fd = -1, token_fd, err;
5065 bool mandatory;
5066 enum libbpf_print_level level;
5067
5068 /* token is explicitly prevented */
5069 if (obj->token_path && obj->token_path[0] == '\0') {
5070 pr_debug("object '%s': token is prevented, skipping...\n", obj->name);
5071 return 0;
5072 }
5073
5074 mandatory = obj->token_path != NULL;
5075 level = mandatory ? LIBBPF_WARN : LIBBPF_DEBUG;
5076
5077 bpffs_path = obj->token_path ?: BPF_FS_DEFAULT_PATH;
5078 bpffs_fd = open(bpffs_path, O_DIRECTORY, O_RDWR);
5079 if (bpffs_fd < 0) {
5080 err = -errno;
5081 __pr(level, "object '%s': failed (%s) to open BPF FS mount at '%s'%s\n",
5082 obj->name, errstr(err), bpffs_path,
5083 mandatory ? "" : ", skipping optional step...");
5084 return mandatory ? err : 0;
5085 }
5086
5087 token_fd = bpf_token_create(bpffs_fd, 0);
5088 close(bpffs_fd);
5089 if (token_fd < 0) {
5090 if (!mandatory && token_fd == -ENOENT) {
5091 pr_debug("object '%s': BPF FS at '%s' doesn't have BPF token delegation set up, skipping...\n",
5092 obj->name, bpffs_path);
5093 return 0;
5094 }
5095 __pr(level, "object '%s': failed (%d) to create BPF token from '%s'%s\n",
5096 obj->name, token_fd, bpffs_path,
5097 mandatory ? "" : ", skipping optional step...");
5098 return mandatory ? token_fd : 0;
5099 }
5100
5101 obj->feat_cache = calloc(1, sizeof(*obj->feat_cache));
5102 if (!obj->feat_cache) {
5103 close(token_fd);
5104 return -ENOMEM;
5105 }
5106
5107 obj->token_fd = token_fd;
5108 obj->feat_cache->token_fd = token_fd;
5109
5110 return 0;
5111 }
5112
5113 static int
bpf_object__probe_loading(struct bpf_object * obj)5114 bpf_object__probe_loading(struct bpf_object *obj)
5115 {
5116 struct bpf_insn insns[] = {
5117 BPF_MOV64_IMM(BPF_REG_0, 0),
5118 BPF_EXIT_INSN(),
5119 };
5120 int ret, insn_cnt = ARRAY_SIZE(insns);
5121 LIBBPF_OPTS(bpf_prog_load_opts, opts,
5122 .token_fd = obj->token_fd,
5123 .prog_flags = obj->token_fd ? BPF_F_TOKEN_FD : 0,
5124 );
5125
5126 if (obj->gen_loader)
5127 return 0;
5128
5129 ret = bump_rlimit_memlock();
5130 if (ret)
5131 pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %s), you might need to do it explicitly!\n",
5132 errstr(ret));
5133
5134 /* make sure basic loading works */
5135 ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &opts);
5136 if (ret < 0)
5137 ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
5138 if (ret < 0) {
5139 ret = errno;
5140 pr_warn("Error in %s(): %s. Couldn't load trivial BPF program. Make sure your kernel supports BPF (CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is set to big enough value.\n",
5141 __func__, errstr(ret));
5142 return -ret;
5143 }
5144 close(ret);
5145
5146 return 0;
5147 }
5148
kernel_supports(const struct bpf_object * obj,enum kern_feature_id feat_id)5149 bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
5150 {
5151 if (obj->gen_loader)
5152 /* To generate loader program assume the latest kernel
5153 * to avoid doing extra prog_load, map_create syscalls.
5154 */
5155 return true;
5156
5157 if (obj->token_fd)
5158 return feat_supported(obj->feat_cache, feat_id);
5159
5160 return feat_supported(NULL, feat_id);
5161 }
5162
map_is_reuse_compat(const struct bpf_map * map,int map_fd)5163 static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
5164 {
5165 struct bpf_map_info map_info;
5166 __u32 map_info_len = sizeof(map_info);
5167 int err;
5168
5169 memset(&map_info, 0, map_info_len);
5170 err = bpf_map_get_info_by_fd(map_fd, &map_info, &map_info_len);
5171 if (err && errno == EINVAL)
5172 err = bpf_get_map_info_from_fdinfo(map_fd, &map_info);
5173 if (err) {
5174 pr_warn("failed to get map info for map FD %d: %s\n", map_fd,
5175 errstr(err));
5176 return false;
5177 }
5178
5179 /*
5180 * bpf_get_map_info_by_fd() for DEVMAP will always return flags with
5181 * BPF_F_RDONLY_PROG set, but it generally is not set at map creation time.
5182 * Thus, ignore the BPF_F_RDONLY_PROG flag in the flags returned from
5183 * bpf_get_map_info_by_fd() when checking for compatibility with an
5184 * existing DEVMAP.
5185 */
5186 if (map->def.type == BPF_MAP_TYPE_DEVMAP || map->def.type == BPF_MAP_TYPE_DEVMAP_HASH)
5187 map_info.map_flags &= ~BPF_F_RDONLY_PROG;
5188
5189 return (map_info.type == map->def.type &&
5190 map_info.key_size == map->def.key_size &&
5191 map_info.value_size == map->def.value_size &&
5192 map_info.max_entries == map->def.max_entries &&
5193 map_info.map_flags == map->def.map_flags &&
5194 map_info.map_extra == map->map_extra);
5195 }
5196
5197 static int
bpf_object__reuse_map(struct bpf_map * map)5198 bpf_object__reuse_map(struct bpf_map *map)
5199 {
5200 int err, pin_fd;
5201
5202 pin_fd = bpf_obj_get(map->pin_path);
5203 if (pin_fd < 0) {
5204 err = -errno;
5205 if (err == -ENOENT) {
5206 pr_debug("found no pinned map to reuse at '%s'\n",
5207 map->pin_path);
5208 return 0;
5209 }
5210
5211 pr_warn("couldn't retrieve pinned map '%s': %s\n",
5212 map->pin_path, errstr(err));
5213 return err;
5214 }
5215
5216 if (!map_is_reuse_compat(map, pin_fd)) {
5217 pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n",
5218 map->pin_path);
5219 close(pin_fd);
5220 return -EINVAL;
5221 }
5222
5223 err = bpf_map__reuse_fd(map, pin_fd);
5224 close(pin_fd);
5225 if (err)
5226 return err;
5227
5228 map->pinned = true;
5229 pr_debug("reused pinned map at '%s'\n", map->pin_path);
5230
5231 return 0;
5232 }
5233
5234 static int
bpf_object__populate_internal_map(struct bpf_object * obj,struct bpf_map * map)5235 bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
5236 {
5237 enum libbpf_map_type map_type = map->libbpf_type;
5238 int err, zero = 0;
5239 size_t mmap_sz;
5240
5241 if (obj->gen_loader) {
5242 bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps,
5243 map->mmaped, map->def.value_size);
5244 if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG)
5245 bpf_gen__map_freeze(obj->gen_loader, map - obj->maps);
5246 return 0;
5247 }
5248
5249 err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
5250 if (err) {
5251 err = -errno;
5252 pr_warn("map '%s': failed to set initial contents: %s\n",
5253 bpf_map__name(map), errstr(err));
5254 return err;
5255 }
5256
5257 /* Freeze .rodata and .kconfig map as read-only from syscall side. */
5258 if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) {
5259 err = bpf_map_freeze(map->fd);
5260 if (err) {
5261 err = -errno;
5262 pr_warn("map '%s': failed to freeze as read-only: %s\n",
5263 bpf_map__name(map), errstr(err));
5264 return err;
5265 }
5266 }
5267
5268 /* Remap anonymous mmap()-ed "map initialization image" as
5269 * a BPF map-backed mmap()-ed memory, but preserving the same
5270 * memory address. This will cause kernel to change process'
5271 * page table to point to a different piece of kernel memory,
5272 * but from userspace point of view memory address (and its
5273 * contents, being identical at this point) will stay the
5274 * same. This mapping will be released by bpf_object__close()
5275 * as per normal clean up procedure.
5276 */
5277 mmap_sz = bpf_map_mmap_sz(map);
5278 if (map->def.map_flags & BPF_F_MMAPABLE) {
5279 void *mmaped;
5280 int prot;
5281
5282 if (map->def.map_flags & BPF_F_RDONLY_PROG)
5283 prot = PROT_READ;
5284 else
5285 prot = PROT_READ | PROT_WRITE;
5286 mmaped = mmap(map->mmaped, mmap_sz, prot, MAP_SHARED | MAP_FIXED, map->fd, 0);
5287 if (mmaped == MAP_FAILED) {
5288 err = -errno;
5289 pr_warn("map '%s': failed to re-mmap() contents: %s\n",
5290 bpf_map__name(map), errstr(err));
5291 return err;
5292 }
5293 map->mmaped = mmaped;
5294 } else if (map->mmaped) {
5295 munmap(map->mmaped, mmap_sz);
5296 map->mmaped = NULL;
5297 }
5298
5299 return 0;
5300 }
5301
5302 static void bpf_map__destroy(struct bpf_map *map);
5303
bpf_object__create_map(struct bpf_object * obj,struct bpf_map * map,bool is_inner)5304 static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner)
5305 {
5306 LIBBPF_OPTS(bpf_map_create_opts, create_attr);
5307 struct bpf_map_def *def = &map->def;
5308 const char *map_name = NULL;
5309 int err = 0, map_fd;
5310
5311 if (kernel_supports(obj, FEAT_PROG_NAME))
5312 map_name = map->name;
5313 create_attr.map_ifindex = map->map_ifindex;
5314 create_attr.map_flags = def->map_flags;
5315 create_attr.numa_node = map->numa_node;
5316 create_attr.map_extra = map->map_extra;
5317 create_attr.token_fd = obj->token_fd;
5318 if (obj->token_fd)
5319 create_attr.map_flags |= BPF_F_TOKEN_FD;
5320 if (map->excl_prog) {
5321 err = bpf_prog_compute_hash(map->excl_prog);
5322 if (err)
5323 return err;
5324
5325 create_attr.excl_prog_hash = map->excl_prog->hash;
5326 create_attr.excl_prog_hash_size = SHA256_DIGEST_LENGTH;
5327 }
5328
5329 if (bpf_map__is_struct_ops(map)) {
5330 create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
5331 if (map->mod_btf_fd >= 0) {
5332 create_attr.value_type_btf_obj_fd = map->mod_btf_fd;
5333 create_attr.map_flags |= BPF_F_VTYPE_BTF_OBJ_FD;
5334 }
5335 }
5336
5337 if (obj->btf && btf__fd(obj->btf) >= 0) {
5338 create_attr.btf_fd = btf__fd(obj->btf);
5339 create_attr.btf_key_type_id = map->btf_key_type_id;
5340 create_attr.btf_value_type_id = map->btf_value_type_id;
5341 }
5342
5343 if (bpf_map_type__is_map_in_map(def->type)) {
5344 if (map->inner_map) {
5345 err = map_set_def_max_entries(map->inner_map);
5346 if (err)
5347 return err;
5348 err = bpf_object__create_map(obj, map->inner_map, true);
5349 if (err) {
5350 pr_warn("map '%s': failed to create inner map: %s\n",
5351 map->name, errstr(err));
5352 return err;
5353 }
5354 map->inner_map_fd = map->inner_map->fd;
5355 }
5356 if (map->inner_map_fd >= 0)
5357 create_attr.inner_map_fd = map->inner_map_fd;
5358 }
5359
5360 switch (def->type) {
5361 case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
5362 case BPF_MAP_TYPE_CGROUP_ARRAY:
5363 case BPF_MAP_TYPE_STACK_TRACE:
5364 case BPF_MAP_TYPE_ARRAY_OF_MAPS:
5365 case BPF_MAP_TYPE_HASH_OF_MAPS:
5366 case BPF_MAP_TYPE_DEVMAP:
5367 case BPF_MAP_TYPE_DEVMAP_HASH:
5368 case BPF_MAP_TYPE_CPUMAP:
5369 case BPF_MAP_TYPE_XSKMAP:
5370 case BPF_MAP_TYPE_SOCKMAP:
5371 case BPF_MAP_TYPE_SOCKHASH:
5372 case BPF_MAP_TYPE_QUEUE:
5373 case BPF_MAP_TYPE_STACK:
5374 case BPF_MAP_TYPE_ARENA:
5375 create_attr.btf_fd = 0;
5376 create_attr.btf_key_type_id = 0;
5377 create_attr.btf_value_type_id = 0;
5378 map->btf_key_type_id = 0;
5379 map->btf_value_type_id = 0;
5380 break;
5381 case BPF_MAP_TYPE_STRUCT_OPS:
5382 create_attr.btf_value_type_id = 0;
5383 break;
5384 default:
5385 break;
5386 }
5387
5388 if (obj->gen_loader) {
5389 bpf_gen__map_create(obj->gen_loader, def->type, map_name,
5390 def->key_size, def->value_size, def->max_entries,
5391 &create_attr, is_inner ? -1 : map - obj->maps);
5392 /* We keep pretenting we have valid FD to pass various fd >= 0
5393 * checks by just keeping original placeholder FDs in place.
5394 * See bpf_object__add_map() comment.
5395 * This placeholder fd will not be used with any syscall and
5396 * will be reset to -1 eventually.
5397 */
5398 map_fd = map->fd;
5399 } else {
5400 map_fd = bpf_map_create(def->type, map_name,
5401 def->key_size, def->value_size,
5402 def->max_entries, &create_attr);
5403 }
5404 if (map_fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) {
5405 err = -errno;
5406 pr_warn("Error in bpf_create_map_xattr(%s): %s. Retrying without BTF.\n",
5407 map->name, errstr(err));
5408 create_attr.btf_fd = 0;
5409 create_attr.btf_key_type_id = 0;
5410 create_attr.btf_value_type_id = 0;
5411 map->btf_key_type_id = 0;
5412 map->btf_value_type_id = 0;
5413 map_fd = bpf_map_create(def->type, map_name,
5414 def->key_size, def->value_size,
5415 def->max_entries, &create_attr);
5416 }
5417
5418 if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
5419 if (obj->gen_loader)
5420 map->inner_map->fd = -1;
5421 bpf_map__destroy(map->inner_map);
5422 zfree(&map->inner_map);
5423 }
5424
5425 if (map_fd < 0)
5426 return map_fd;
5427
5428 /* obj->gen_loader case, prevent reuse_fd() from closing map_fd */
5429 if (map->fd == map_fd)
5430 return 0;
5431
5432 /* Keep placeholder FD value but now point it to the BPF map object.
5433 * This way everything that relied on this map's FD (e.g., relocated
5434 * ldimm64 instructions) will stay valid and won't need adjustments.
5435 * map->fd stays valid but now point to what map_fd points to.
5436 */
5437 return reuse_fd(map->fd, map_fd);
5438 }
5439
init_map_in_map_slots(struct bpf_object * obj,struct bpf_map * map)5440 static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map)
5441 {
5442 const struct bpf_map *targ_map;
5443 unsigned int i;
5444 int fd, err = 0;
5445
5446 for (i = 0; i < map->init_slots_sz; i++) {
5447 if (!map->init_slots[i])
5448 continue;
5449
5450 targ_map = map->init_slots[i];
5451 fd = targ_map->fd;
5452
5453 if (obj->gen_loader) {
5454 bpf_gen__populate_outer_map(obj->gen_loader,
5455 map - obj->maps, i,
5456 targ_map - obj->maps);
5457 } else {
5458 err = bpf_map_update_elem(map->fd, &i, &fd, 0);
5459 }
5460 if (err) {
5461 err = -errno;
5462 pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %s\n",
5463 map->name, i, targ_map->name, fd, errstr(err));
5464 return err;
5465 }
5466 pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
5467 map->name, i, targ_map->name, fd);
5468 }
5469
5470 zfree(&map->init_slots);
5471 map->init_slots_sz = 0;
5472
5473 return 0;
5474 }
5475
init_prog_array_slots(struct bpf_object * obj,struct bpf_map * map)5476 static int init_prog_array_slots(struct bpf_object *obj, struct bpf_map *map)
5477 {
5478 const struct bpf_program *targ_prog;
5479 unsigned int i;
5480 int fd, err;
5481
5482 if (obj->gen_loader)
5483 return -ENOTSUP;
5484
5485 for (i = 0; i < map->init_slots_sz; i++) {
5486 if (!map->init_slots[i])
5487 continue;
5488
5489 targ_prog = map->init_slots[i];
5490 fd = bpf_program__fd(targ_prog);
5491
5492 err = bpf_map_update_elem(map->fd, &i, &fd, 0);
5493 if (err) {
5494 err = -errno;
5495 pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %s\n",
5496 map->name, i, targ_prog->name, fd, errstr(err));
5497 return err;
5498 }
5499 pr_debug("map '%s': slot [%d] set to prog '%s' fd=%d\n",
5500 map->name, i, targ_prog->name, fd);
5501 }
5502
5503 zfree(&map->init_slots);
5504 map->init_slots_sz = 0;
5505
5506 return 0;
5507 }
5508
bpf_object_init_prog_arrays(struct bpf_object * obj)5509 static int bpf_object_init_prog_arrays(struct bpf_object *obj)
5510 {
5511 struct bpf_map *map;
5512 int i, err;
5513
5514 for (i = 0; i < obj->nr_maps; i++) {
5515 map = &obj->maps[i];
5516
5517 if (!map->init_slots_sz || map->def.type != BPF_MAP_TYPE_PROG_ARRAY)
5518 continue;
5519
5520 err = init_prog_array_slots(obj, map);
5521 if (err < 0)
5522 return err;
5523 }
5524 return 0;
5525 }
5526
map_set_def_max_entries(struct bpf_map * map)5527 static int map_set_def_max_entries(struct bpf_map *map)
5528 {
5529 if (map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !map->def.max_entries) {
5530 int nr_cpus;
5531
5532 nr_cpus = libbpf_num_possible_cpus();
5533 if (nr_cpus < 0) {
5534 pr_warn("map '%s': failed to determine number of system CPUs: %d\n",
5535 map->name, nr_cpus);
5536 return nr_cpus;
5537 }
5538 pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus);
5539 map->def.max_entries = nr_cpus;
5540 }
5541
5542 return 0;
5543 }
5544
5545 static int
bpf_object__create_maps(struct bpf_object * obj)5546 bpf_object__create_maps(struct bpf_object *obj)
5547 {
5548 struct bpf_map *map;
5549 unsigned int i, j;
5550 int err;
5551 bool retried;
5552
5553 for (i = 0; i < obj->nr_maps; i++) {
5554 map = &obj->maps[i];
5555
5556 /* To support old kernels, we skip creating global data maps
5557 * (.rodata, .data, .kconfig, etc); later on, during program
5558 * loading, if we detect that at least one of the to-be-loaded
5559 * programs is referencing any global data map, we'll error
5560 * out with program name and relocation index logged.
5561 * This approach allows to accommodate Clang emitting
5562 * unnecessary .rodata.str1.1 sections for string literals,
5563 * but also it allows to have CO-RE applications that use
5564 * global variables in some of BPF programs, but not others.
5565 * If those global variable-using programs are not loaded at
5566 * runtime due to bpf_program__set_autoload(prog, false),
5567 * bpf_object loading will succeed just fine even on old
5568 * kernels.
5569 */
5570 if (bpf_map__is_internal(map) && !kernel_supports(obj, FEAT_GLOBAL_DATA))
5571 map->autocreate = false;
5572
5573 if (!map->autocreate) {
5574 pr_debug("map '%s': skipped auto-creating...\n", map->name);
5575 continue;
5576 }
5577
5578 err = map_set_def_max_entries(map);
5579 if (err)
5580 goto err_out;
5581
5582 retried = false;
5583 retry:
5584 if (map->pin_path) {
5585 err = bpf_object__reuse_map(map);
5586 if (err) {
5587 pr_warn("map '%s': error reusing pinned map\n",
5588 map->name);
5589 goto err_out;
5590 }
5591 if (retried && map->fd < 0) {
5592 pr_warn("map '%s': cannot find pinned map\n",
5593 map->name);
5594 err = -ENOENT;
5595 goto err_out;
5596 }
5597 }
5598
5599 if (map->reused) {
5600 pr_debug("map '%s': skipping creation (preset fd=%d)\n",
5601 map->name, map->fd);
5602 } else {
5603 err = bpf_object__create_map(obj, map, false);
5604 if (err)
5605 goto err_out;
5606
5607 pr_debug("map '%s': created successfully, fd=%d\n",
5608 map->name, map->fd);
5609
5610 if (bpf_map__is_internal(map)) {
5611 err = bpf_object__populate_internal_map(obj, map);
5612 if (err < 0)
5613 goto err_out;
5614 } else if (map->def.type == BPF_MAP_TYPE_ARENA) {
5615 map->mmaped = mmap((void *)(long)map->map_extra,
5616 bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE,
5617 map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED,
5618 map->fd, 0);
5619 if (map->mmaped == MAP_FAILED) {
5620 err = -errno;
5621 map->mmaped = NULL;
5622 pr_warn("map '%s': failed to mmap arena: %s\n",
5623 map->name, errstr(err));
5624 return err;
5625 }
5626 if (obj->arena_data) {
5627 memcpy(map->mmaped, obj->arena_data, obj->arena_data_sz);
5628 zfree(&obj->arena_data);
5629 }
5630 }
5631 if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) {
5632 err = init_map_in_map_slots(obj, map);
5633 if (err < 0)
5634 goto err_out;
5635 }
5636 }
5637
5638 if (map->pin_path && !map->pinned) {
5639 err = bpf_map__pin(map, NULL);
5640 if (err) {
5641 if (!retried && err == -EEXIST) {
5642 retried = true;
5643 goto retry;
5644 }
5645 pr_warn("map '%s': failed to auto-pin at '%s': %s\n",
5646 map->name, map->pin_path, errstr(err));
5647 goto err_out;
5648 }
5649 }
5650 }
5651
5652 return 0;
5653
5654 err_out:
5655 pr_warn("map '%s': failed to create: %s\n", map->name, errstr(err));
5656 pr_perm_msg(err);
5657 for (j = 0; j < i; j++)
5658 zclose(obj->maps[j].fd);
5659 return err;
5660 }
5661
bpf_core_is_flavor_sep(const char * s)5662 static bool bpf_core_is_flavor_sep(const char *s)
5663 {
5664 /* check X___Y name pattern, where X and Y are not underscores */
5665 return s[0] != '_' && /* X */
5666 s[1] == '_' && s[2] == '_' && s[3] == '_' && /* ___ */
5667 s[4] != '_'; /* Y */
5668 }
5669
5670 /* Given 'some_struct_name___with_flavor' return the length of a name prefix
5671 * before last triple underscore. Struct name part after last triple
5672 * underscore is ignored by BPF CO-RE relocation during relocation matching.
5673 */
bpf_core_essential_name_len(const char * name)5674 size_t bpf_core_essential_name_len(const char *name)
5675 {
5676 size_t n = strlen(name);
5677 int i;
5678
5679 for (i = n - 5; i >= 0; i--) {
5680 if (bpf_core_is_flavor_sep(name + i))
5681 return i + 1;
5682 }
5683 return n;
5684 }
5685
bpf_core_free_cands(struct bpf_core_cand_list * cands)5686 void bpf_core_free_cands(struct bpf_core_cand_list *cands)
5687 {
5688 if (!cands)
5689 return;
5690
5691 free(cands->cands);
5692 free(cands);
5693 }
5694
bpf_core_add_cands(struct bpf_core_cand * local_cand,size_t local_essent_len,const struct btf * targ_btf,const char * targ_btf_name,int targ_start_id,struct bpf_core_cand_list * cands)5695 int bpf_core_add_cands(struct bpf_core_cand *local_cand,
5696 size_t local_essent_len,
5697 const struct btf *targ_btf,
5698 const char *targ_btf_name,
5699 int targ_start_id,
5700 struct bpf_core_cand_list *cands)
5701 {
5702 struct bpf_core_cand *new_cands, *cand;
5703 const struct btf_type *t, *local_t;
5704 const char *targ_name, *local_name;
5705 size_t targ_essent_len;
5706 int n, i;
5707
5708 local_t = btf__type_by_id(local_cand->btf, local_cand->id);
5709 local_name = btf__str_by_offset(local_cand->btf, local_t->name_off);
5710
5711 n = btf__type_cnt(targ_btf);
5712 for (i = targ_start_id; i < n; i++) {
5713 t = btf__type_by_id(targ_btf, i);
5714 if (!btf_kind_core_compat(t, local_t))
5715 continue;
5716
5717 targ_name = btf__name_by_offset(targ_btf, t->name_off);
5718 if (str_is_empty(targ_name))
5719 continue;
5720
5721 targ_essent_len = bpf_core_essential_name_len(targ_name);
5722 if (targ_essent_len != local_essent_len)
5723 continue;
5724
5725 if (strncmp(local_name, targ_name, local_essent_len) != 0)
5726 continue;
5727
5728 pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n",
5729 local_cand->id, btf_kind_str(local_t),
5730 local_name, i, btf_kind_str(t), targ_name,
5731 targ_btf_name);
5732 new_cands = libbpf_reallocarray(cands->cands, cands->len + 1,
5733 sizeof(*cands->cands));
5734 if (!new_cands)
5735 return -ENOMEM;
5736
5737 cand = &new_cands[cands->len];
5738 cand->btf = targ_btf;
5739 cand->id = i;
5740
5741 cands->cands = new_cands;
5742 cands->len++;
5743 }
5744 return 0;
5745 }
5746
load_module_btfs(struct bpf_object * obj)5747 static int load_module_btfs(struct bpf_object *obj)
5748 {
5749 struct bpf_btf_info info;
5750 struct module_btf *mod_btf;
5751 struct btf *btf;
5752 char name[64];
5753 __u32 id = 0, len;
5754 int err, fd;
5755
5756 if (obj->btf_modules_loaded)
5757 return 0;
5758
5759 if (obj->gen_loader)
5760 return 0;
5761
5762 /* don't do this again, even if we find no module BTFs */
5763 obj->btf_modules_loaded = true;
5764
5765 /* kernel too old to support module BTFs */
5766 if (!kernel_supports(obj, FEAT_MODULE_BTF))
5767 return 0;
5768
5769 while (true) {
5770 err = bpf_btf_get_next_id(id, &id);
5771 if (err && errno == ENOENT)
5772 return 0;
5773 if (err && errno == EPERM) {
5774 pr_debug("skipping module BTFs loading, missing privileges\n");
5775 return 0;
5776 }
5777 if (err) {
5778 err = -errno;
5779 pr_warn("failed to iterate BTF objects: %s\n", errstr(err));
5780 return err;
5781 }
5782
5783 fd = bpf_btf_get_fd_by_id(id);
5784 if (fd < 0) {
5785 if (errno == ENOENT)
5786 continue; /* expected race: BTF was unloaded */
5787 err = -errno;
5788 pr_warn("failed to get BTF object #%d FD: %s\n", id, errstr(err));
5789 return err;
5790 }
5791
5792 len = sizeof(info);
5793 memset(&info, 0, sizeof(info));
5794 info.name = ptr_to_u64(name);
5795 info.name_len = sizeof(name);
5796
5797 err = bpf_btf_get_info_by_fd(fd, &info, &len);
5798 if (err) {
5799 err = -errno;
5800 pr_warn("failed to get BTF object #%d info: %s\n", id, errstr(err));
5801 goto err_out;
5802 }
5803
5804 /* ignore non-module BTFs */
5805 if (!info.kernel_btf || strcmp(name, "vmlinux") == 0) {
5806 close(fd);
5807 continue;
5808 }
5809
5810 btf = btf_get_from_fd(fd, obj->btf_vmlinux);
5811 err = libbpf_get_error(btf);
5812 if (err) {
5813 pr_warn("failed to load module [%s]'s BTF object #%d: %s\n",
5814 name, id, errstr(err));
5815 goto err_out;
5816 }
5817
5818 err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap,
5819 sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
5820 if (err)
5821 goto err_out;
5822
5823 mod_btf = &obj->btf_modules[obj->btf_module_cnt++];
5824
5825 mod_btf->btf = btf;
5826 mod_btf->id = id;
5827 mod_btf->fd = fd;
5828 mod_btf->name = strdup(name);
5829 if (!mod_btf->name) {
5830 err = -ENOMEM;
5831 goto err_out;
5832 }
5833 continue;
5834
5835 err_out:
5836 close(fd);
5837 return err;
5838 }
5839
5840 return 0;
5841 }
5842
5843 static struct bpf_core_cand_list *
bpf_core_find_cands(struct bpf_object * obj,const struct btf * local_btf,__u32 local_type_id)5844 bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id)
5845 {
5846 struct bpf_core_cand local_cand = {};
5847 struct bpf_core_cand_list *cands;
5848 const struct btf *main_btf;
5849 const struct btf_type *local_t;
5850 const char *local_name;
5851 size_t local_essent_len;
5852 int err, i;
5853
5854 local_cand.btf = local_btf;
5855 local_cand.id = local_type_id;
5856 local_t = btf__type_by_id(local_btf, local_type_id);
5857 if (!local_t)
5858 return ERR_PTR(-EINVAL);
5859
5860 local_name = btf__name_by_offset(local_btf, local_t->name_off);
5861 if (str_is_empty(local_name))
5862 return ERR_PTR(-EINVAL);
5863 local_essent_len = bpf_core_essential_name_len(local_name);
5864
5865 cands = calloc(1, sizeof(*cands));
5866 if (!cands)
5867 return ERR_PTR(-ENOMEM);
5868
5869 /* Attempt to find target candidates in vmlinux BTF first */
5870 main_btf = obj->btf_vmlinux_override ?: obj->btf_vmlinux;
5871 err = bpf_core_add_cands(&local_cand, local_essent_len, main_btf, "vmlinux", 1, cands);
5872 if (err)
5873 goto err_out;
5874
5875 /* if vmlinux BTF has any candidate, don't got for module BTFs */
5876 if (cands->len)
5877 return cands;
5878
5879 /* if vmlinux BTF was overridden, don't attempt to load module BTFs */
5880 if (obj->btf_vmlinux_override)
5881 return cands;
5882
5883 /* now look through module BTFs, trying to still find candidates */
5884 err = load_module_btfs(obj);
5885 if (err)
5886 goto err_out;
5887
5888 for (i = 0; i < obj->btf_module_cnt; i++) {
5889 err = bpf_core_add_cands(&local_cand, local_essent_len,
5890 obj->btf_modules[i].btf,
5891 obj->btf_modules[i].name,
5892 btf__type_cnt(obj->btf_vmlinux),
5893 cands);
5894 if (err)
5895 goto err_out;
5896 }
5897
5898 return cands;
5899 err_out:
5900 bpf_core_free_cands(cands);
5901 return ERR_PTR(err);
5902 }
5903
5904 /* Check local and target types for compatibility. This check is used for
5905 * type-based CO-RE relocations and follow slightly different rules than
5906 * field-based relocations. This function assumes that root types were already
5907 * checked for name match. Beyond that initial root-level name check, names
5908 * are completely ignored. Compatibility rules are as follows:
5909 * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
5910 * kind should match for local and target types (i.e., STRUCT is not
5911 * compatible with UNION);
5912 * - for ENUMs, the size is ignored;
5913 * - for INT, size and signedness are ignored;
5914 * - for ARRAY, dimensionality is ignored, element types are checked for
5915 * compatibility recursively;
5916 * - CONST/VOLATILE/RESTRICT modifiers are ignored;
5917 * - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
5918 * - FUNC_PROTOs are compatible if they have compatible signature: same
5919 * number of input args and compatible return and argument types.
5920 * These rules are not set in stone and probably will be adjusted as we get
5921 * more experience with using BPF CO-RE relocations.
5922 */
bpf_core_types_are_compat(const struct btf * local_btf,__u32 local_id,const struct btf * targ_btf,__u32 targ_id)5923 int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
5924 const struct btf *targ_btf, __u32 targ_id)
5925 {
5926 return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, 32);
5927 }
5928
bpf_core_types_match(const struct btf * local_btf,__u32 local_id,const struct btf * targ_btf,__u32 targ_id)5929 int bpf_core_types_match(const struct btf *local_btf, __u32 local_id,
5930 const struct btf *targ_btf, __u32 targ_id)
5931 {
5932 return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false, 32);
5933 }
5934
bpf_core_hash_fn(const long key,void * ctx)5935 static size_t bpf_core_hash_fn(const long key, void *ctx)
5936 {
5937 return key;
5938 }
5939
bpf_core_equal_fn(const long k1,const long k2,void * ctx)5940 static bool bpf_core_equal_fn(const long k1, const long k2, void *ctx)
5941 {
5942 return k1 == k2;
5943 }
5944
record_relo_core(struct bpf_program * prog,const struct bpf_core_relo * core_relo,int insn_idx)5945 static int record_relo_core(struct bpf_program *prog,
5946 const struct bpf_core_relo *core_relo, int insn_idx)
5947 {
5948 struct reloc_desc *relos, *relo;
5949
5950 relos = libbpf_reallocarray(prog->reloc_desc,
5951 prog->nr_reloc + 1, sizeof(*relos));
5952 if (!relos)
5953 return -ENOMEM;
5954 relo = &relos[prog->nr_reloc];
5955 relo->type = RELO_CORE;
5956 relo->insn_idx = insn_idx;
5957 relo->core_relo = core_relo;
5958 prog->reloc_desc = relos;
5959 prog->nr_reloc++;
5960 return 0;
5961 }
5962
find_relo_core(struct bpf_program * prog,int insn_idx)5963 static const struct bpf_core_relo *find_relo_core(struct bpf_program *prog, int insn_idx)
5964 {
5965 struct reloc_desc *relo;
5966 int i;
5967
5968 for (i = 0; i < prog->nr_reloc; i++) {
5969 relo = &prog->reloc_desc[i];
5970 if (relo->type != RELO_CORE || relo->insn_idx != insn_idx)
5971 continue;
5972
5973 return relo->core_relo;
5974 }
5975
5976 return NULL;
5977 }
5978
bpf_core_resolve_relo(struct bpf_program * prog,const struct bpf_core_relo * relo,int relo_idx,const struct btf * local_btf,struct hashmap * cand_cache,struct bpf_core_relo_res * targ_res)5979 static int bpf_core_resolve_relo(struct bpf_program *prog,
5980 const struct bpf_core_relo *relo,
5981 int relo_idx,
5982 const struct btf *local_btf,
5983 struct hashmap *cand_cache,
5984 struct bpf_core_relo_res *targ_res)
5985 {
5986 struct bpf_core_spec specs_scratch[3] = {};
5987 struct bpf_core_cand_list *cands = NULL;
5988 const char *prog_name = prog->name;
5989 const struct btf_type *local_type;
5990 const char *local_name;
5991 __u32 local_id = relo->type_id;
5992 int err;
5993
5994 local_type = btf__type_by_id(local_btf, local_id);
5995 if (!local_type)
5996 return -EINVAL;
5997
5998 local_name = btf__name_by_offset(local_btf, local_type->name_off);
5999 if (!local_name)
6000 return -EINVAL;
6001
6002 if (relo->kind != BPF_CORE_TYPE_ID_LOCAL &&
6003 !hashmap__find(cand_cache, local_id, &cands)) {
6004 cands = bpf_core_find_cands(prog->obj, local_btf, local_id);
6005 if (IS_ERR(cands)) {
6006 pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n",
6007 prog_name, relo_idx, local_id, btf_kind_str(local_type),
6008 local_name, PTR_ERR(cands));
6009 return PTR_ERR(cands);
6010 }
6011 err = hashmap__set(cand_cache, local_id, cands, NULL, NULL);
6012 if (err) {
6013 bpf_core_free_cands(cands);
6014 return err;
6015 }
6016 }
6017
6018 return bpf_core_calc_relo_insn(prog_name, relo, relo_idx, local_btf, cands, specs_scratch,
6019 targ_res);
6020 }
6021
6022 static int
bpf_object__relocate_core(struct bpf_object * obj,const char * targ_btf_path)6023 bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
6024 {
6025 const struct btf_ext_info_sec *sec;
6026 struct bpf_core_relo_res targ_res;
6027 const struct bpf_core_relo *rec;
6028 const struct btf_ext_info *seg;
6029 struct hashmap_entry *entry;
6030 struct hashmap *cand_cache = NULL;
6031 struct bpf_program *prog;
6032 struct bpf_insn *insn;
6033 const char *sec_name;
6034 int i, err = 0, insn_idx, sec_idx, sec_num;
6035
6036 if (obj->btf_ext->core_relo_info.len == 0)
6037 return 0;
6038
6039 if (targ_btf_path) {
6040 obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL);
6041 err = libbpf_get_error(obj->btf_vmlinux_override);
6042 if (err) {
6043 pr_warn("failed to parse target BTF: %s\n", errstr(err));
6044 return err;
6045 }
6046 }
6047
6048 cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL);
6049 if (IS_ERR(cand_cache)) {
6050 err = PTR_ERR(cand_cache);
6051 goto out;
6052 }
6053
6054 seg = &obj->btf_ext->core_relo_info;
6055 sec_num = 0;
6056 for_each_btf_ext_sec(seg, sec) {
6057 sec_idx = seg->sec_idxs[sec_num];
6058 sec_num++;
6059
6060 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
6061 if (str_is_empty(sec_name)) {
6062 err = -EINVAL;
6063 goto out;
6064 }
6065
6066 pr_debug("sec '%s': found %d CO-RE relocations\n", sec_name, sec->num_info);
6067
6068 for_each_btf_ext_rec(seg, sec, i, rec) {
6069 if (rec->insn_off % BPF_INSN_SZ)
6070 return -EINVAL;
6071 insn_idx = rec->insn_off / BPF_INSN_SZ;
6072 prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
6073 if (!prog) {
6074 /* When __weak subprog is "overridden" by another instance
6075 * of the subprog from a different object file, linker still
6076 * appends all the .BTF.ext info that used to belong to that
6077 * eliminated subprogram.
6078 * This is similar to what x86-64 linker does for relocations.
6079 * So just ignore such relocations just like we ignore
6080 * subprog instructions when discovering subprograms.
6081 */
6082 pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n",
6083 sec_name, i, insn_idx);
6084 continue;
6085 }
6086 /* no need to apply CO-RE relocation if the program is
6087 * not going to be loaded
6088 */
6089 if (!prog->autoload)
6090 continue;
6091
6092 /* adjust insn_idx from section frame of reference to the local
6093 * program's frame of reference; (sub-)program code is not yet
6094 * relocated, so it's enough to just subtract in-section offset
6095 */
6096 insn_idx = insn_idx - prog->sec_insn_off;
6097 if (insn_idx >= prog->insns_cnt)
6098 return -EINVAL;
6099 insn = &prog->insns[insn_idx];
6100
6101 err = record_relo_core(prog, rec, insn_idx);
6102 if (err) {
6103 pr_warn("prog '%s': relo #%d: failed to record relocation: %s\n",
6104 prog->name, i, errstr(err));
6105 goto out;
6106 }
6107
6108 if (prog->obj->gen_loader)
6109 continue;
6110
6111 err = bpf_core_resolve_relo(prog, rec, i, obj->btf, cand_cache, &targ_res);
6112 if (err) {
6113 pr_warn("prog '%s': relo #%d: failed to relocate: %s\n",
6114 prog->name, i, errstr(err));
6115 goto out;
6116 }
6117
6118 err = bpf_core_patch_insn(prog->name, insn, insn_idx, rec, i, &targ_res);
6119 if (err) {
6120 pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %s\n",
6121 prog->name, i, insn_idx, errstr(err));
6122 goto out;
6123 }
6124 }
6125 }
6126
6127 out:
6128 /* obj->btf_vmlinux and module BTFs are freed after object load */
6129 btf__free(obj->btf_vmlinux_override);
6130 obj->btf_vmlinux_override = NULL;
6131
6132 if (!IS_ERR_OR_NULL(cand_cache)) {
6133 hashmap__for_each_entry(cand_cache, entry, i) {
6134 bpf_core_free_cands(entry->pvalue);
6135 }
6136 hashmap__free(cand_cache);
6137 }
6138 return err;
6139 }
6140
6141 /* base map load ldimm64 special constant, used also for log fixup logic */
6142 #define POISON_LDIMM64_MAP_BASE 2001000000
6143 #define POISON_LDIMM64_MAP_PFX "200100"
6144
poison_map_ldimm64(struct bpf_program * prog,int relo_idx,int insn_idx,struct bpf_insn * insn,int map_idx,const struct bpf_map * map)6145 static void poison_map_ldimm64(struct bpf_program *prog, int relo_idx,
6146 int insn_idx, struct bpf_insn *insn,
6147 int map_idx, const struct bpf_map *map)
6148 {
6149 int i;
6150
6151 pr_debug("prog '%s': relo #%d: poisoning insn #%d that loads map #%d '%s'\n",
6152 prog->name, relo_idx, insn_idx, map_idx, map->name);
6153
6154 /* we turn single ldimm64 into two identical invalid calls */
6155 for (i = 0; i < 2; i++) {
6156 insn->code = BPF_JMP | BPF_CALL;
6157 insn->dst_reg = 0;
6158 insn->src_reg = 0;
6159 insn->off = 0;
6160 /* if this instruction is reachable (not a dead code),
6161 * verifier will complain with something like:
6162 * invalid func unknown#2001000123
6163 * where lower 123 is map index into obj->maps[] array
6164 */
6165 insn->imm = POISON_LDIMM64_MAP_BASE + map_idx;
6166
6167 insn++;
6168 }
6169 }
6170
6171 /* unresolved kfunc call special constant, used also for log fixup logic */
6172 #define POISON_CALL_KFUNC_BASE 2002000000
6173 #define POISON_CALL_KFUNC_PFX "2002"
6174
poison_kfunc_call(struct bpf_program * prog,int relo_idx,int insn_idx,struct bpf_insn * insn,int ext_idx,const struct extern_desc * ext)6175 static void poison_kfunc_call(struct bpf_program *prog, int relo_idx,
6176 int insn_idx, struct bpf_insn *insn,
6177 int ext_idx, const struct extern_desc *ext)
6178 {
6179 pr_debug("prog '%s': relo #%d: poisoning insn #%d that calls kfunc '%s'\n",
6180 prog->name, relo_idx, insn_idx, ext->name);
6181
6182 /* we turn kfunc call into invalid helper call with identifiable constant */
6183 insn->code = BPF_JMP | BPF_CALL;
6184 insn->dst_reg = 0;
6185 insn->src_reg = 0;
6186 insn->off = 0;
6187 /* if this instruction is reachable (not a dead code),
6188 * verifier will complain with something like:
6189 * invalid func unknown#2001000123
6190 * where lower 123 is extern index into obj->externs[] array
6191 */
6192 insn->imm = POISON_CALL_KFUNC_BASE + ext_idx;
6193 }
6194
find_jt_map(struct bpf_object * obj,struct bpf_program * prog,int sym_off)6195 static int find_jt_map(struct bpf_object *obj, struct bpf_program *prog, int sym_off)
6196 {
6197 size_t i;
6198
6199 for (i = 0; i < obj->jumptable_map_cnt; i++) {
6200 /*
6201 * This might happen that same offset is used for two different
6202 * programs (as jump tables can be the same). However, for
6203 * different programs different maps should be created.
6204 */
6205 if (obj->jumptable_maps[i].sym_off == sym_off &&
6206 obj->jumptable_maps[i].prog == prog)
6207 return obj->jumptable_maps[i].fd;
6208 }
6209
6210 return -ENOENT;
6211 }
6212
add_jt_map(struct bpf_object * obj,struct bpf_program * prog,int sym_off,int map_fd)6213 static int add_jt_map(struct bpf_object *obj, struct bpf_program *prog, int sym_off, int map_fd)
6214 {
6215 size_t cnt = obj->jumptable_map_cnt;
6216 size_t size = sizeof(obj->jumptable_maps[0]);
6217 void *tmp;
6218
6219 tmp = libbpf_reallocarray(obj->jumptable_maps, cnt + 1, size);
6220 if (!tmp)
6221 return -ENOMEM;
6222
6223 obj->jumptable_maps = tmp;
6224 obj->jumptable_maps[cnt].prog = prog;
6225 obj->jumptable_maps[cnt].sym_off = sym_off;
6226 obj->jumptable_maps[cnt].fd = map_fd;
6227 obj->jumptable_map_cnt++;
6228
6229 return 0;
6230 }
6231
find_subprog_idx(struct bpf_program * prog,int insn_idx)6232 static int find_subprog_idx(struct bpf_program *prog, int insn_idx)
6233 {
6234 int i;
6235
6236 for (i = prog->subprog_cnt - 1; i >= 0; i--) {
6237 if (insn_idx >= prog->subprogs[i].sub_insn_off)
6238 return i;
6239 }
6240
6241 return -1;
6242 }
6243
create_jt_map(struct bpf_object * obj,struct bpf_program * prog,struct reloc_desc * relo)6244 static int create_jt_map(struct bpf_object *obj, struct bpf_program *prog, struct reloc_desc *relo)
6245 {
6246 const __u32 jt_entry_size = 8;
6247 int sym_off = relo->sym_off;
6248 int jt_size = relo->sym_size;
6249 __u32 max_entries = jt_size / jt_entry_size;
6250 __u32 value_size = sizeof(struct bpf_insn_array_value);
6251 struct bpf_insn_array_value val = {};
6252 int subprog_idx;
6253 int map_fd, err;
6254 __u64 insn_off;
6255 __u64 *jt;
6256 __u32 i;
6257
6258 map_fd = find_jt_map(obj, prog, sym_off);
6259 if (map_fd >= 0)
6260 return map_fd;
6261
6262 if (sym_off % jt_entry_size) {
6263 pr_warn("map '.jumptables': jumptable start %d should be multiple of %u\n",
6264 sym_off, jt_entry_size);
6265 return -EINVAL;
6266 }
6267
6268 if (jt_size % jt_entry_size) {
6269 pr_warn("map '.jumptables': jumptable size %d should be multiple of %u\n",
6270 jt_size, jt_entry_size);
6271 return -EINVAL;
6272 }
6273
6274 map_fd = bpf_map_create(BPF_MAP_TYPE_INSN_ARRAY, ".jumptables",
6275 4, value_size, max_entries, NULL);
6276 if (map_fd < 0)
6277 return map_fd;
6278
6279 if (!obj->jumptables_data) {
6280 pr_warn("map '.jumptables': ELF file is missing jump table data\n");
6281 err = -EINVAL;
6282 goto err_close;
6283 }
6284 if (sym_off + jt_size > obj->jumptables_data_sz) {
6285 pr_warn("map '.jumptables': jumptables_data size is %zd, trying to access %d\n",
6286 obj->jumptables_data_sz, sym_off + jt_size);
6287 err = -EINVAL;
6288 goto err_close;
6289 }
6290
6291 subprog_idx = -1; /* main program */
6292 if (relo->insn_idx < 0 || relo->insn_idx >= prog->insns_cnt) {
6293 pr_warn("map '.jumptables': invalid instruction index %d\n", relo->insn_idx);
6294 err = -EINVAL;
6295 goto err_close;
6296 }
6297 if (prog->subprogs)
6298 subprog_idx = find_subprog_idx(prog, relo->insn_idx);
6299
6300 jt = (__u64 *)(obj->jumptables_data + sym_off);
6301 for (i = 0; i < max_entries; i++) {
6302 /*
6303 * The offset should be made to be relative to the beginning of
6304 * the main function, not the subfunction.
6305 */
6306 insn_off = jt[i]/sizeof(struct bpf_insn);
6307 if (subprog_idx >= 0) {
6308 insn_off -= prog->subprogs[subprog_idx].sec_insn_off;
6309 insn_off += prog->subprogs[subprog_idx].sub_insn_off;
6310 } else {
6311 insn_off -= prog->sec_insn_off;
6312 }
6313
6314 /*
6315 * LLVM-generated jump tables contain u64 records, however
6316 * should contain values that fit in u32.
6317 */
6318 if (insn_off > UINT32_MAX) {
6319 pr_warn("map '.jumptables': invalid jump table value 0x%llx at offset %d\n",
6320 (long long)jt[i], sym_off + i * jt_entry_size);
6321 err = -EINVAL;
6322 goto err_close;
6323 }
6324
6325 val.orig_off = insn_off;
6326 err = bpf_map_update_elem(map_fd, &i, &val, 0);
6327 if (err)
6328 goto err_close;
6329 }
6330
6331 err = bpf_map_freeze(map_fd);
6332 if (err)
6333 goto err_close;
6334
6335 err = add_jt_map(obj, prog, sym_off, map_fd);
6336 if (err)
6337 goto err_close;
6338
6339 return map_fd;
6340
6341 err_close:
6342 close(map_fd);
6343 return err;
6344 }
6345
6346 /* Relocate data references within program code:
6347 * - map references;
6348 * - global variable references;
6349 * - extern references.
6350 */
6351 static int
bpf_object__relocate_data(struct bpf_object * obj,struct bpf_program * prog)6352 bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
6353 {
6354 int i;
6355
6356 for (i = 0; i < prog->nr_reloc; i++) {
6357 struct reloc_desc *relo = &prog->reloc_desc[i];
6358 struct bpf_insn *insn = &prog->insns[relo->insn_idx];
6359 const struct bpf_map *map;
6360 struct extern_desc *ext;
6361
6362 switch (relo->type) {
6363 case RELO_LD64:
6364 map = &obj->maps[relo->map_idx];
6365 if (obj->gen_loader) {
6366 insn[0].src_reg = BPF_PSEUDO_MAP_IDX;
6367 insn[0].imm = relo->map_idx;
6368 } else if (map->autocreate) {
6369 insn[0].src_reg = BPF_PSEUDO_MAP_FD;
6370 insn[0].imm = map->fd;
6371 } else {
6372 poison_map_ldimm64(prog, i, relo->insn_idx, insn,
6373 relo->map_idx, map);
6374 }
6375 break;
6376 case RELO_DATA:
6377 map = &obj->maps[relo->map_idx];
6378 insn[1].imm = insn[0].imm + relo->sym_off;
6379 if (obj->gen_loader) {
6380 insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
6381 insn[0].imm = relo->map_idx;
6382 } else if (map->autocreate) {
6383 insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
6384 insn[0].imm = map->fd;
6385 } else {
6386 poison_map_ldimm64(prog, i, relo->insn_idx, insn,
6387 relo->map_idx, map);
6388 }
6389 break;
6390 case RELO_EXTERN_LD64:
6391 ext = &obj->externs[relo->ext_idx];
6392 if (ext->type == EXT_KCFG) {
6393 if (obj->gen_loader) {
6394 insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
6395 insn[0].imm = obj->kconfig_map_idx;
6396 } else {
6397 insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
6398 insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
6399 }
6400 insn[1].imm = ext->kcfg.data_off;
6401 } else /* EXT_KSYM */ {
6402 if (ext->ksym.type_id && ext->is_set) { /* typed ksyms */
6403 insn[0].src_reg = BPF_PSEUDO_BTF_ID;
6404 insn[0].imm = ext->ksym.kernel_btf_id;
6405 insn[1].imm = ext->ksym.kernel_btf_obj_fd;
6406 } else { /* typeless ksyms or unresolved typed ksyms */
6407 insn[0].imm = (__u32)ext->ksym.addr;
6408 insn[1].imm = ext->ksym.addr >> 32;
6409 }
6410 }
6411 break;
6412 case RELO_EXTERN_CALL:
6413 ext = &obj->externs[relo->ext_idx];
6414 insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL;
6415 if (ext->is_set) {
6416 insn[0].imm = ext->ksym.kernel_btf_id;
6417 insn[0].off = ext->ksym.btf_fd_idx;
6418 } else { /* unresolved weak kfunc call */
6419 poison_kfunc_call(prog, i, relo->insn_idx, insn,
6420 relo->ext_idx, ext);
6421 }
6422 break;
6423 case RELO_SUBPROG_ADDR:
6424 if (insn[0].src_reg != BPF_PSEUDO_FUNC) {
6425 pr_warn("prog '%s': relo #%d: bad insn\n",
6426 prog->name, i);
6427 return -EINVAL;
6428 }
6429 /* handled already */
6430 break;
6431 case RELO_CALL:
6432 /* handled already */
6433 break;
6434 case RELO_CORE:
6435 /* will be handled by bpf_program_record_relos() */
6436 break;
6437 case RELO_INSN_ARRAY: {
6438 int map_fd;
6439
6440 map_fd = create_jt_map(obj, prog, relo);
6441 if (map_fd < 0) {
6442 pr_warn("prog '%s': relo #%d: can't create jump table: sym_off %u\n",
6443 prog->name, i, relo->sym_off);
6444 return map_fd;
6445 }
6446 insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
6447 insn->imm = map_fd;
6448 insn->off = 0;
6449 }
6450 break;
6451 default:
6452 pr_warn("prog '%s': relo #%d: bad relo type %d\n",
6453 prog->name, i, relo->type);
6454 return -EINVAL;
6455 }
6456 }
6457
6458 return 0;
6459 }
6460
adjust_prog_btf_ext_info(const struct bpf_object * obj,const struct bpf_program * prog,const struct btf_ext_info * ext_info,void ** prog_info,__u32 * prog_rec_cnt,__u32 * prog_rec_sz)6461 static int adjust_prog_btf_ext_info(const struct bpf_object *obj,
6462 const struct bpf_program *prog,
6463 const struct btf_ext_info *ext_info,
6464 void **prog_info, __u32 *prog_rec_cnt,
6465 __u32 *prog_rec_sz)
6466 {
6467 void *copy_start = NULL, *copy_end = NULL;
6468 void *rec, *rec_end, *new_prog_info;
6469 const struct btf_ext_info_sec *sec;
6470 size_t old_sz, new_sz;
6471 int i, sec_num, sec_idx, off_adj;
6472
6473 sec_num = 0;
6474 for_each_btf_ext_sec(ext_info, sec) {
6475 sec_idx = ext_info->sec_idxs[sec_num];
6476 sec_num++;
6477 if (prog->sec_idx != sec_idx)
6478 continue;
6479
6480 for_each_btf_ext_rec(ext_info, sec, i, rec) {
6481 __u32 insn_off = *(__u32 *)rec / BPF_INSN_SZ;
6482
6483 if (insn_off < prog->sec_insn_off)
6484 continue;
6485 if (insn_off >= prog->sec_insn_off + prog->sec_insn_cnt)
6486 break;
6487
6488 if (!copy_start)
6489 copy_start = rec;
6490 copy_end = rec + ext_info->rec_size;
6491 }
6492
6493 if (!copy_start)
6494 return -ENOENT;
6495
6496 /* append func/line info of a given (sub-)program to the main
6497 * program func/line info
6498 */
6499 old_sz = (size_t)(*prog_rec_cnt) * ext_info->rec_size;
6500 new_sz = old_sz + (copy_end - copy_start);
6501 new_prog_info = realloc(*prog_info, new_sz);
6502 if (!new_prog_info)
6503 return -ENOMEM;
6504 *prog_info = new_prog_info;
6505 *prog_rec_cnt = new_sz / ext_info->rec_size;
6506 memcpy(new_prog_info + old_sz, copy_start, copy_end - copy_start);
6507
6508 /* Kernel instruction offsets are in units of 8-byte
6509 * instructions, while .BTF.ext instruction offsets generated
6510 * by Clang are in units of bytes. So convert Clang offsets
6511 * into kernel offsets and adjust offset according to program
6512 * relocated position.
6513 */
6514 off_adj = prog->sub_insn_off - prog->sec_insn_off;
6515 rec = new_prog_info + old_sz;
6516 rec_end = new_prog_info + new_sz;
6517 for (; rec < rec_end; rec += ext_info->rec_size) {
6518 __u32 *insn_off = rec;
6519
6520 *insn_off = *insn_off / BPF_INSN_SZ + off_adj;
6521 }
6522 *prog_rec_sz = ext_info->rec_size;
6523 return 0;
6524 }
6525
6526 return -ENOENT;
6527 }
6528
6529 static int
reloc_prog_func_and_line_info(const struct bpf_object * obj,struct bpf_program * main_prog,const struct bpf_program * prog)6530 reloc_prog_func_and_line_info(const struct bpf_object *obj,
6531 struct bpf_program *main_prog,
6532 const struct bpf_program *prog)
6533 {
6534 int err;
6535
6536 /* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't
6537 * support func/line info
6538 */
6539 if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC))
6540 return 0;
6541
6542 /* only attempt func info relocation if main program's func_info
6543 * relocation was successful
6544 */
6545 if (main_prog != prog && !main_prog->func_info)
6546 goto line_info;
6547
6548 err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->func_info,
6549 &main_prog->func_info,
6550 &main_prog->func_info_cnt,
6551 &main_prog->func_info_rec_size);
6552 if (err) {
6553 if (err != -ENOENT) {
6554 pr_warn("prog '%s': error relocating .BTF.ext function info: %s\n",
6555 prog->name, errstr(err));
6556 return err;
6557 }
6558 if (main_prog->func_info) {
6559 /*
6560 * Some info has already been found but has problem
6561 * in the last btf_ext reloc. Must have to error out.
6562 */
6563 pr_warn("prog '%s': missing .BTF.ext function info.\n", prog->name);
6564 return err;
6565 }
6566 /* Have problem loading the very first info. Ignore the rest. */
6567 pr_warn("prog '%s': missing .BTF.ext function info for the main program, skipping all of .BTF.ext func info.\n",
6568 prog->name);
6569 }
6570
6571 line_info:
6572 /* don't relocate line info if main program's relocation failed */
6573 if (main_prog != prog && !main_prog->line_info)
6574 return 0;
6575
6576 err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->line_info,
6577 &main_prog->line_info,
6578 &main_prog->line_info_cnt,
6579 &main_prog->line_info_rec_size);
6580 if (err) {
6581 if (err != -ENOENT) {
6582 pr_warn("prog '%s': error relocating .BTF.ext line info: %s\n",
6583 prog->name, errstr(err));
6584 return err;
6585 }
6586 if (main_prog->line_info) {
6587 /*
6588 * Some info has already been found but has problem
6589 * in the last btf_ext reloc. Must have to error out.
6590 */
6591 pr_warn("prog '%s': missing .BTF.ext line info.\n", prog->name);
6592 return err;
6593 }
6594 /* Have problem loading the very first info. Ignore the rest. */
6595 pr_warn("prog '%s': missing .BTF.ext line info for the main program, skipping all of .BTF.ext line info.\n",
6596 prog->name);
6597 }
6598 return 0;
6599 }
6600
cmp_relo_by_insn_idx(const void * key,const void * elem)6601 static int cmp_relo_by_insn_idx(const void *key, const void *elem)
6602 {
6603 size_t insn_idx = *(const size_t *)key;
6604 const struct reloc_desc *relo = elem;
6605
6606 if (insn_idx == relo->insn_idx)
6607 return 0;
6608 return insn_idx < relo->insn_idx ? -1 : 1;
6609 }
6610
find_prog_insn_relo(const struct bpf_program * prog,size_t insn_idx)6611 static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx)
6612 {
6613 if (!prog->nr_reloc)
6614 return NULL;
6615 return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc,
6616 sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx);
6617 }
6618
append_subprog_relos(struct bpf_program * main_prog,struct bpf_program * subprog)6619 static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_program *subprog)
6620 {
6621 int new_cnt = main_prog->nr_reloc + subprog->nr_reloc;
6622 struct reloc_desc *relos;
6623 int i;
6624
6625 if (main_prog == subprog)
6626 return 0;
6627 relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos));
6628 /* if new count is zero, reallocarray can return a valid NULL result;
6629 * in this case the previous pointer will be freed, so we *have to*
6630 * reassign old pointer to the new value (even if it's NULL)
6631 */
6632 if (!relos && new_cnt)
6633 return -ENOMEM;
6634 if (subprog->nr_reloc)
6635 memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc,
6636 sizeof(*relos) * subprog->nr_reloc);
6637
6638 for (i = main_prog->nr_reloc; i < new_cnt; i++)
6639 relos[i].insn_idx += subprog->sub_insn_off;
6640 /* After insn_idx adjustment the 'relos' array is still sorted
6641 * by insn_idx and doesn't break bsearch.
6642 */
6643 main_prog->reloc_desc = relos;
6644 main_prog->nr_reloc = new_cnt;
6645 return 0;
6646 }
6647
save_subprog_offsets(struct bpf_program * main_prog,struct bpf_program * subprog)6648 static int save_subprog_offsets(struct bpf_program *main_prog, struct bpf_program *subprog)
6649 {
6650 size_t size = sizeof(main_prog->subprogs[0]);
6651 int cnt = main_prog->subprog_cnt;
6652 void *tmp;
6653
6654 tmp = libbpf_reallocarray(main_prog->subprogs, cnt + 1, size);
6655 if (!tmp)
6656 return -ENOMEM;
6657
6658 main_prog->subprogs = tmp;
6659 main_prog->subprogs[cnt].sec_insn_off = subprog->sec_insn_off;
6660 main_prog->subprogs[cnt].sub_insn_off = subprog->sub_insn_off;
6661 main_prog->subprog_cnt++;
6662
6663 return 0;
6664 }
6665
6666 static int
bpf_object__append_subprog_code(struct bpf_object * obj,struct bpf_program * main_prog,struct bpf_program * subprog)6667 bpf_object__append_subprog_code(struct bpf_object *obj, struct bpf_program *main_prog,
6668 struct bpf_program *subprog)
6669 {
6670 struct bpf_insn *insns;
6671 size_t new_cnt;
6672 int err;
6673
6674 subprog->sub_insn_off = main_prog->insns_cnt;
6675
6676 new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
6677 insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
6678 if (!insns) {
6679 pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
6680 return -ENOMEM;
6681 }
6682 main_prog->insns = insns;
6683 main_prog->insns_cnt = new_cnt;
6684
6685 memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
6686 subprog->insns_cnt * sizeof(*insns));
6687
6688 pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
6689 main_prog->name, subprog->insns_cnt, subprog->name);
6690
6691 /* The subprog insns are now appended. Append its relos too. */
6692 err = append_subprog_relos(main_prog, subprog);
6693 if (err)
6694 return err;
6695
6696 err = save_subprog_offsets(main_prog, subprog);
6697 if (err) {
6698 pr_warn("prog '%s': failed to add subprog offsets: %s\n",
6699 main_prog->name, errstr(err));
6700 return err;
6701 }
6702
6703 return 0;
6704 }
6705
6706 static int
bpf_object__reloc_code(struct bpf_object * obj,struct bpf_program * main_prog,struct bpf_program * prog)6707 bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
6708 struct bpf_program *prog)
6709 {
6710 size_t sub_insn_idx, insn_idx;
6711 struct bpf_program *subprog;
6712 struct reloc_desc *relo;
6713 struct bpf_insn *insn;
6714 int err;
6715
6716 err = reloc_prog_func_and_line_info(obj, main_prog, prog);
6717 if (err)
6718 return err;
6719
6720 for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
6721 insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
6722 if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn))
6723 continue;
6724
6725 relo = find_prog_insn_relo(prog, insn_idx);
6726 if (relo && relo->type == RELO_EXTERN_CALL)
6727 /* kfunc relocations will be handled later
6728 * in bpf_object__relocate_data()
6729 */
6730 continue;
6731 if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) {
6732 pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
6733 prog->name, insn_idx, relo->type);
6734 return -LIBBPF_ERRNO__RELOC;
6735 }
6736 if (relo) {
6737 /* sub-program instruction index is a combination of
6738 * an offset of a symbol pointed to by relocation and
6739 * call instruction's imm field; for global functions,
6740 * call always has imm = -1, but for static functions
6741 * relocation is against STT_SECTION and insn->imm
6742 * points to a start of a static function
6743 *
6744 * for subprog addr relocation, the relo->sym_off + insn->imm is
6745 * the byte offset in the corresponding section.
6746 */
6747 if (relo->type == RELO_CALL)
6748 sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
6749 else
6750 sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ;
6751 } else if (insn_is_pseudo_func(insn)) {
6752 /*
6753 * RELO_SUBPROG_ADDR relo is always emitted even if both
6754 * functions are in the same section, so it shouldn't reach here.
6755 */
6756 pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n",
6757 prog->name, insn_idx);
6758 return -LIBBPF_ERRNO__RELOC;
6759 } else {
6760 /* if subprogram call is to a static function within
6761 * the same ELF section, there won't be any relocation
6762 * emitted, but it also means there is no additional
6763 * offset necessary, insns->imm is relative to
6764 * instruction's original position within the section
6765 */
6766 sub_insn_idx = prog->sec_insn_off + insn_idx + insn->imm + 1;
6767 }
6768
6769 /* we enforce that sub-programs should be in .text section */
6770 subprog = find_prog_by_sec_insn(obj, obj->efile.text_shndx, sub_insn_idx);
6771 if (!subprog) {
6772 pr_warn("prog '%s': no .text section found yet sub-program call exists\n",
6773 prog->name);
6774 return -LIBBPF_ERRNO__RELOC;
6775 }
6776
6777 /* if it's the first call instruction calling into this
6778 * subprogram (meaning this subprog hasn't been processed
6779 * yet) within the context of current main program:
6780 * - append it at the end of main program's instructions blog;
6781 * - process is recursively, while current program is put on hold;
6782 * - if that subprogram calls some other not yet processes
6783 * subprogram, same thing will happen recursively until
6784 * there are no more unprocesses subprograms left to append
6785 * and relocate.
6786 */
6787 if (subprog->sub_insn_off == 0) {
6788 err = bpf_object__append_subprog_code(obj, main_prog, subprog);
6789 if (err)
6790 return err;
6791 err = bpf_object__reloc_code(obj, main_prog, subprog);
6792 if (err)
6793 return err;
6794 }
6795
6796 /* main_prog->insns memory could have been re-allocated, so
6797 * calculate pointer again
6798 */
6799 insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
6800 /* calculate correct instruction position within current main
6801 * prog; each main prog can have a different set of
6802 * subprograms appended (potentially in different order as
6803 * well), so position of any subprog can be different for
6804 * different main programs
6805 */
6806 insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1;
6807
6808 pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
6809 prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off);
6810 }
6811
6812 return 0;
6813 }
6814
6815 /*
6816 * Relocate sub-program calls.
6817 *
6818 * Algorithm operates as follows. Each entry-point BPF program (referred to as
6819 * main prog) is processed separately. For each subprog (non-entry functions,
6820 * that can be called from either entry progs or other subprogs) gets their
6821 * sub_insn_off reset to zero. This serves as indicator that this subprogram
6822 * hasn't been yet appended and relocated within current main prog. Once its
6823 * relocated, sub_insn_off will point at the position within current main prog
6824 * where given subprog was appended. This will further be used to relocate all
6825 * the call instructions jumping into this subprog.
6826 *
6827 * We start with main program and process all call instructions. If the call
6828 * is into a subprog that hasn't been processed (i.e., subprog->sub_insn_off
6829 * is zero), subprog instructions are appended at the end of main program's
6830 * instruction array. Then main program is "put on hold" while we recursively
6831 * process newly appended subprogram. If that subprogram calls into another
6832 * subprogram that hasn't been appended, new subprogram is appended again to
6833 * the *main* prog's instructions (subprog's instructions are always left
6834 * untouched, as they need to be in unmodified state for subsequent main progs
6835 * and subprog instructions are always sent only as part of a main prog) and
6836 * the process continues recursively. Once all the subprogs called from a main
6837 * prog or any of its subprogs are appended (and relocated), all their
6838 * positions within finalized instructions array are known, so it's easy to
6839 * rewrite call instructions with correct relative offsets, corresponding to
6840 * desired target subprog.
6841 *
6842 * Its important to realize that some subprogs might not be called from some
6843 * main prog and any of its called/used subprogs. Those will keep their
6844 * subprog->sub_insn_off as zero at all times and won't be appended to current
6845 * main prog and won't be relocated within the context of current main prog.
6846 * They might still be used from other main progs later.
6847 *
6848 * Visually this process can be shown as below. Suppose we have two main
6849 * programs mainA and mainB and BPF object contains three subprogs: subA,
6850 * subB, and subC. mainA calls only subA, mainB calls only subC, but subA and
6851 * subC both call subB:
6852 *
6853 * +--------+ +-------+
6854 * | v v |
6855 * +--+---+ +--+-+-+ +---+--+
6856 * | subA | | subB | | subC |
6857 * +--+---+ +------+ +---+--+
6858 * ^ ^
6859 * | |
6860 * +---+-------+ +------+----+
6861 * | mainA | | mainB |
6862 * +-----------+ +-----------+
6863 *
6864 * We'll start relocating mainA, will find subA, append it and start
6865 * processing sub A recursively:
6866 *
6867 * +-----------+------+
6868 * | mainA | subA |
6869 * +-----------+------+
6870 *
6871 * At this point we notice that subB is used from subA, so we append it and
6872 * relocate (there are no further subcalls from subB):
6873 *
6874 * +-----------+------+------+
6875 * | mainA | subA | subB |
6876 * +-----------+------+------+
6877 *
6878 * At this point, we relocate subA calls, then go one level up and finish with
6879 * relocatin mainA calls. mainA is done.
6880 *
6881 * For mainB process is similar but results in different order. We start with
6882 * mainB and skip subA and subB, as mainB never calls them (at least
6883 * directly), but we see subC is needed, so we append and start processing it:
6884 *
6885 * +-----------+------+
6886 * | mainB | subC |
6887 * +-----------+------+
6888 * Now we see subC needs subB, so we go back to it, append and relocate it:
6889 *
6890 * +-----------+------+------+
6891 * | mainB | subC | subB |
6892 * +-----------+------+------+
6893 *
6894 * At this point we unwind recursion, relocate calls in subC, then in mainB.
6895 */
6896 static int
bpf_object__relocate_calls(struct bpf_object * obj,struct bpf_program * prog)6897 bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
6898 {
6899 struct bpf_program *subprog;
6900 int i, err;
6901
6902 /* mark all subprogs as not relocated (yet) within the context of
6903 * current main program
6904 */
6905 for (i = 0; i < obj->nr_programs; i++) {
6906 subprog = &obj->programs[i];
6907 if (!prog_is_subprog(obj, subprog))
6908 continue;
6909
6910 subprog->sub_insn_off = 0;
6911 }
6912
6913 err = bpf_object__reloc_code(obj, prog, prog);
6914 if (err)
6915 return err;
6916
6917 return 0;
6918 }
6919
6920 static void
bpf_object__free_relocs(struct bpf_object * obj)6921 bpf_object__free_relocs(struct bpf_object *obj)
6922 {
6923 struct bpf_program *prog;
6924 int i;
6925
6926 /* free up relocation descriptors */
6927 for (i = 0; i < obj->nr_programs; i++) {
6928 prog = &obj->programs[i];
6929 zfree(&prog->reloc_desc);
6930 prog->nr_reloc = 0;
6931 }
6932 }
6933
cmp_relocs(const void * _a,const void * _b)6934 static int cmp_relocs(const void *_a, const void *_b)
6935 {
6936 const struct reloc_desc *a = _a;
6937 const struct reloc_desc *b = _b;
6938
6939 if (a->insn_idx != b->insn_idx)
6940 return a->insn_idx < b->insn_idx ? -1 : 1;
6941
6942 /* no two relocations should have the same insn_idx, but ... */
6943 if (a->type != b->type)
6944 return a->type < b->type ? -1 : 1;
6945
6946 return 0;
6947 }
6948
bpf_object__sort_relos(struct bpf_object * obj)6949 static void bpf_object__sort_relos(struct bpf_object *obj)
6950 {
6951 int i;
6952
6953 for (i = 0; i < obj->nr_programs; i++) {
6954 struct bpf_program *p = &obj->programs[i];
6955
6956 if (!p->nr_reloc)
6957 continue;
6958
6959 qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs);
6960 }
6961 }
6962
bpf_prog_assign_exc_cb(struct bpf_object * obj,struct bpf_program * prog)6963 static int bpf_prog_assign_exc_cb(struct bpf_object *obj, struct bpf_program *prog)
6964 {
6965 const char *str = "exception_callback:";
6966 size_t pfx_len = strlen(str);
6967 int i, j, n;
6968
6969 if (!obj->btf || !kernel_supports(obj, FEAT_BTF_DECL_TAG))
6970 return 0;
6971
6972 n = btf__type_cnt(obj->btf);
6973 for (i = 1; i < n; i++) {
6974 const char *name;
6975 struct btf_type *t;
6976
6977 t = btf_type_by_id(obj->btf, i);
6978 if (!btf_is_decl_tag(t) || btf_decl_tag(t)->component_idx != -1)
6979 continue;
6980
6981 name = btf__str_by_offset(obj->btf, t->name_off);
6982 if (strncmp(name, str, pfx_len) != 0)
6983 continue;
6984
6985 t = btf_type_by_id(obj->btf, t->type);
6986 if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) {
6987 pr_warn("prog '%s': exception_callback:<value> decl tag not applied to the main program\n",
6988 prog->name);
6989 return -EINVAL;
6990 }
6991 if (strcmp(prog->name, btf__str_by_offset(obj->btf, t->name_off)) != 0)
6992 continue;
6993 /* Multiple callbacks are specified for the same prog,
6994 * the verifier will eventually return an error for this
6995 * case, hence simply skip appending a subprog.
6996 */
6997 if (prog->exception_cb_idx >= 0) {
6998 prog->exception_cb_idx = -1;
6999 break;
7000 }
7001
7002 name += pfx_len;
7003 if (str_is_empty(name)) {
7004 pr_warn("prog '%s': exception_callback:<value> decl tag contains empty value\n",
7005 prog->name);
7006 return -EINVAL;
7007 }
7008
7009 for (j = 0; j < obj->nr_programs; j++) {
7010 struct bpf_program *subprog = &obj->programs[j];
7011
7012 if (!prog_is_subprog(obj, subprog))
7013 continue;
7014 if (strcmp(name, subprog->name) != 0)
7015 continue;
7016 /* Enforce non-hidden, as from verifier point of
7017 * view it expects global functions, whereas the
7018 * mark_btf_static fixes up linkage as static.
7019 */
7020 if (!subprog->sym_global || subprog->mark_btf_static) {
7021 pr_warn("prog '%s': exception callback %s must be a global non-hidden function\n",
7022 prog->name, subprog->name);
7023 return -EINVAL;
7024 }
7025 /* Let's see if we already saw a static exception callback with the same name */
7026 if (prog->exception_cb_idx >= 0) {
7027 pr_warn("prog '%s': multiple subprogs with same name as exception callback '%s'\n",
7028 prog->name, subprog->name);
7029 return -EINVAL;
7030 }
7031 prog->exception_cb_idx = j;
7032 break;
7033 }
7034
7035 if (prog->exception_cb_idx >= 0)
7036 continue;
7037
7038 pr_warn("prog '%s': cannot find exception callback '%s'\n", prog->name, name);
7039 return -ENOENT;
7040 }
7041
7042 return 0;
7043 }
7044
7045 static struct {
7046 enum bpf_prog_type prog_type;
7047 const char *ctx_name;
7048 } global_ctx_map[] = {
7049 { BPF_PROG_TYPE_CGROUP_DEVICE, "bpf_cgroup_dev_ctx" },
7050 { BPF_PROG_TYPE_CGROUP_SKB, "__sk_buff" },
7051 { BPF_PROG_TYPE_CGROUP_SOCK, "bpf_sock" },
7052 { BPF_PROG_TYPE_CGROUP_SOCK_ADDR, "bpf_sock_addr" },
7053 { BPF_PROG_TYPE_CGROUP_SOCKOPT, "bpf_sockopt" },
7054 { BPF_PROG_TYPE_CGROUP_SYSCTL, "bpf_sysctl" },
7055 { BPF_PROG_TYPE_FLOW_DISSECTOR, "__sk_buff" },
7056 { BPF_PROG_TYPE_KPROBE, "bpf_user_pt_regs_t" },
7057 { BPF_PROG_TYPE_LWT_IN, "__sk_buff" },
7058 { BPF_PROG_TYPE_LWT_OUT, "__sk_buff" },
7059 { BPF_PROG_TYPE_LWT_SEG6LOCAL, "__sk_buff" },
7060 { BPF_PROG_TYPE_LWT_XMIT, "__sk_buff" },
7061 { BPF_PROG_TYPE_NETFILTER, "bpf_nf_ctx" },
7062 { BPF_PROG_TYPE_PERF_EVENT, "bpf_perf_event_data" },
7063 { BPF_PROG_TYPE_RAW_TRACEPOINT, "bpf_raw_tracepoint_args" },
7064 { BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, "bpf_raw_tracepoint_args" },
7065 { BPF_PROG_TYPE_SCHED_ACT, "__sk_buff" },
7066 { BPF_PROG_TYPE_SCHED_CLS, "__sk_buff" },
7067 { BPF_PROG_TYPE_SK_LOOKUP, "bpf_sk_lookup" },
7068 { BPF_PROG_TYPE_SK_MSG, "sk_msg_md" },
7069 { BPF_PROG_TYPE_SK_REUSEPORT, "sk_reuseport_md" },
7070 { BPF_PROG_TYPE_SK_SKB, "__sk_buff" },
7071 { BPF_PROG_TYPE_SOCK_OPS, "bpf_sock_ops" },
7072 { BPF_PROG_TYPE_SOCKET_FILTER, "__sk_buff" },
7073 { BPF_PROG_TYPE_XDP, "xdp_md" },
7074 /* all other program types don't have "named" context structs */
7075 };
7076
7077 /* forward declarations for arch-specific underlying types of bpf_user_pt_regs_t typedef,
7078 * for below __builtin_types_compatible_p() checks;
7079 * with this approach we don't need any extra arch-specific #ifdef guards
7080 */
7081 struct pt_regs;
7082 struct user_pt_regs;
7083 struct user_regs_struct;
7084
need_func_arg_type_fixup(const struct btf * btf,const struct bpf_program * prog,const char * subprog_name,int arg_idx,int arg_type_id,const char * ctx_name)7085 static bool need_func_arg_type_fixup(const struct btf *btf, const struct bpf_program *prog,
7086 const char *subprog_name, int arg_idx,
7087 int arg_type_id, const char *ctx_name)
7088 {
7089 const struct btf_type *t;
7090 const char *tname;
7091
7092 /* check if existing parameter already matches verifier expectations */
7093 t = skip_mods_and_typedefs(btf, arg_type_id, NULL);
7094 if (!btf_is_ptr(t))
7095 goto out_warn;
7096
7097 /* typedef bpf_user_pt_regs_t is a special PITA case, valid for kprobe
7098 * and perf_event programs, so check this case early on and forget
7099 * about it for subsequent checks
7100 */
7101 while (btf_is_mod(t))
7102 t = btf__type_by_id(btf, t->type);
7103 if (btf_is_typedef(t) &&
7104 (prog->type == BPF_PROG_TYPE_KPROBE || prog->type == BPF_PROG_TYPE_PERF_EVENT)) {
7105 tname = btf__str_by_offset(btf, t->name_off) ?: "<anon>";
7106 if (strcmp(tname, "bpf_user_pt_regs_t") == 0)
7107 return false; /* canonical type for kprobe/perf_event */
7108 }
7109
7110 /* now we can ignore typedefs moving forward */
7111 t = skip_mods_and_typedefs(btf, t->type, NULL);
7112
7113 /* if it's `void *`, definitely fix up BTF info */
7114 if (btf_is_void(t))
7115 return true;
7116
7117 /* if it's already proper canonical type, no need to fix up */
7118 tname = btf__str_by_offset(btf, t->name_off) ?: "<anon>";
7119 if (btf_is_struct(t) && strcmp(tname, ctx_name) == 0)
7120 return false;
7121
7122 /* special cases */
7123 switch (prog->type) {
7124 case BPF_PROG_TYPE_KPROBE:
7125 /* `struct pt_regs *` is expected, but we need to fix up */
7126 if (btf_is_struct(t) && strcmp(tname, "pt_regs") == 0)
7127 return true;
7128 break;
7129 case BPF_PROG_TYPE_PERF_EVENT:
7130 if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct pt_regs) &&
7131 btf_is_struct(t) && strcmp(tname, "pt_regs") == 0)
7132 return true;
7133 if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_pt_regs) &&
7134 btf_is_struct(t) && strcmp(tname, "user_pt_regs") == 0)
7135 return true;
7136 if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_regs_struct) &&
7137 btf_is_struct(t) && strcmp(tname, "user_regs_struct") == 0)
7138 return true;
7139 break;
7140 case BPF_PROG_TYPE_RAW_TRACEPOINT:
7141 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
7142 /* allow u64* as ctx */
7143 if (btf_is_int(t) && t->size == 8)
7144 return true;
7145 break;
7146 default:
7147 break;
7148 }
7149
7150 out_warn:
7151 pr_warn("prog '%s': subprog '%s' arg#%d is expected to be of `struct %s *` type\n",
7152 prog->name, subprog_name, arg_idx, ctx_name);
7153 return false;
7154 }
7155
clone_func_btf_info(struct btf * btf,int orig_fn_id,struct bpf_program * prog)7156 static int clone_func_btf_info(struct btf *btf, int orig_fn_id, struct bpf_program *prog)
7157 {
7158 int fn_id, fn_proto_id, ret_type_id, orig_proto_id;
7159 int i, err, arg_cnt, fn_name_off, linkage;
7160 struct btf_type *fn_t, *fn_proto_t, *t;
7161 struct btf_param *p;
7162
7163 /* caller already validated FUNC -> FUNC_PROTO validity */
7164 fn_t = btf_type_by_id(btf, orig_fn_id);
7165 fn_proto_t = btf_type_by_id(btf, fn_t->type);
7166
7167 /* Note that each btf__add_xxx() operation invalidates
7168 * all btf_type and string pointers, so we need to be
7169 * very careful when cloning BTF types. BTF type
7170 * pointers have to be always refetched. And to avoid
7171 * problems with invalidated string pointers, we
7172 * add empty strings initially, then just fix up
7173 * name_off offsets in place. Offsets are stable for
7174 * existing strings, so that works out.
7175 */
7176 fn_name_off = fn_t->name_off; /* we are about to invalidate fn_t */
7177 linkage = btf_func_linkage(fn_t);
7178 orig_proto_id = fn_t->type; /* original FUNC_PROTO ID */
7179 ret_type_id = fn_proto_t->type; /* fn_proto_t will be invalidated */
7180 arg_cnt = btf_vlen(fn_proto_t);
7181
7182 /* clone FUNC_PROTO and its params */
7183 fn_proto_id = btf__add_func_proto(btf, ret_type_id);
7184 if (fn_proto_id < 0)
7185 return -EINVAL;
7186
7187 for (i = 0; i < arg_cnt; i++) {
7188 int name_off;
7189
7190 /* copy original parameter data */
7191 t = btf_type_by_id(btf, orig_proto_id);
7192 p = &btf_params(t)[i];
7193 name_off = p->name_off;
7194
7195 err = btf__add_func_param(btf, "", p->type);
7196 if (err)
7197 return err;
7198
7199 fn_proto_t = btf_type_by_id(btf, fn_proto_id);
7200 p = &btf_params(fn_proto_t)[i];
7201 p->name_off = name_off; /* use remembered str offset */
7202 }
7203
7204 /* clone FUNC now, btf__add_func() enforces non-empty name, so use
7205 * entry program's name as a placeholder, which we replace immediately
7206 * with original name_off
7207 */
7208 fn_id = btf__add_func(btf, prog->name, linkage, fn_proto_id);
7209 if (fn_id < 0)
7210 return -EINVAL;
7211
7212 fn_t = btf_type_by_id(btf, fn_id);
7213 fn_t->name_off = fn_name_off; /* reuse original string */
7214
7215 return fn_id;
7216 }
7217
7218 /* Check if main program or global subprog's function prototype has `arg:ctx`
7219 * argument tags, and, if necessary, substitute correct type to match what BPF
7220 * verifier would expect, taking into account specific program type. This
7221 * allows to support __arg_ctx tag transparently on old kernels that don't yet
7222 * have a native support for it in the verifier, making user's life much
7223 * easier.
7224 */
bpf_program_fixup_func_info(struct bpf_object * obj,struct bpf_program * prog)7225 static int bpf_program_fixup_func_info(struct bpf_object *obj, struct bpf_program *prog)
7226 {
7227 const char *ctx_name = NULL, *ctx_tag = "arg:ctx", *fn_name;
7228 struct bpf_func_info_min *func_rec;
7229 struct btf_type *fn_t, *fn_proto_t;
7230 struct btf *btf = obj->btf;
7231 const struct btf_type *t;
7232 struct btf_param *p;
7233 int ptr_id = 0, struct_id, tag_id, orig_fn_id;
7234 int i, n, arg_idx, arg_cnt, err, rec_idx;
7235 int *orig_ids;
7236
7237 /* no .BTF.ext, no problem */
7238 if (!obj->btf_ext || !prog->func_info)
7239 return 0;
7240
7241 /* don't do any fix ups if kernel natively supports __arg_ctx */
7242 if (kernel_supports(obj, FEAT_ARG_CTX_TAG))
7243 return 0;
7244
7245 /* some BPF program types just don't have named context structs, so
7246 * this fallback mechanism doesn't work for them
7247 */
7248 for (i = 0; i < ARRAY_SIZE(global_ctx_map); i++) {
7249 if (global_ctx_map[i].prog_type != prog->type)
7250 continue;
7251 ctx_name = global_ctx_map[i].ctx_name;
7252 break;
7253 }
7254 if (!ctx_name)
7255 return 0;
7256
7257 /* remember original func BTF IDs to detect if we already cloned them */
7258 orig_ids = calloc(prog->func_info_cnt, sizeof(*orig_ids));
7259 if (!orig_ids)
7260 return -ENOMEM;
7261 for (i = 0; i < prog->func_info_cnt; i++) {
7262 func_rec = prog->func_info + prog->func_info_rec_size * i;
7263 orig_ids[i] = func_rec->type_id;
7264 }
7265
7266 /* go through each DECL_TAG with "arg:ctx" and see if it points to one
7267 * of our subprogs; if yes and subprog is global and needs adjustment,
7268 * clone and adjust FUNC -> FUNC_PROTO combo
7269 */
7270 for (i = 1, n = btf__type_cnt(btf); i < n; i++) {
7271 /* only DECL_TAG with "arg:ctx" value are interesting */
7272 t = btf__type_by_id(btf, i);
7273 if (!btf_is_decl_tag(t))
7274 continue;
7275 if (strcmp(btf__str_by_offset(btf, t->name_off), ctx_tag) != 0)
7276 continue;
7277
7278 /* only global funcs need adjustment, if at all */
7279 orig_fn_id = t->type;
7280 fn_t = btf_type_by_id(btf, orig_fn_id);
7281 if (!btf_is_func(fn_t) || btf_func_linkage(fn_t) != BTF_FUNC_GLOBAL)
7282 continue;
7283
7284 /* sanity check FUNC -> FUNC_PROTO chain, just in case */
7285 fn_proto_t = btf_type_by_id(btf, fn_t->type);
7286 if (!fn_proto_t || !btf_is_func_proto(fn_proto_t))
7287 continue;
7288
7289 /* find corresponding func_info record */
7290 func_rec = NULL;
7291 for (rec_idx = 0; rec_idx < prog->func_info_cnt; rec_idx++) {
7292 if (orig_ids[rec_idx] == t->type) {
7293 func_rec = prog->func_info + prog->func_info_rec_size * rec_idx;
7294 break;
7295 }
7296 }
7297 /* current main program doesn't call into this subprog */
7298 if (!func_rec)
7299 continue;
7300
7301 /* some more sanity checking of DECL_TAG */
7302 arg_cnt = btf_vlen(fn_proto_t);
7303 arg_idx = btf_decl_tag(t)->component_idx;
7304 if (arg_idx < 0 || arg_idx >= arg_cnt)
7305 continue;
7306
7307 /* check if we should fix up argument type */
7308 p = &btf_params(fn_proto_t)[arg_idx];
7309 fn_name = btf__str_by_offset(btf, fn_t->name_off) ?: "<anon>";
7310 if (!need_func_arg_type_fixup(btf, prog, fn_name, arg_idx, p->type, ctx_name))
7311 continue;
7312
7313 /* clone fn/fn_proto, unless we already did it for another arg */
7314 if (func_rec->type_id == orig_fn_id) {
7315 int fn_id;
7316
7317 fn_id = clone_func_btf_info(btf, orig_fn_id, prog);
7318 if (fn_id < 0) {
7319 err = fn_id;
7320 goto err_out;
7321 }
7322
7323 /* point func_info record to a cloned FUNC type */
7324 func_rec->type_id = fn_id;
7325 }
7326
7327 /* create PTR -> STRUCT type chain to mark PTR_TO_CTX argument;
7328 * we do it just once per main BPF program, as all global
7329 * funcs share the same program type, so need only PTR ->
7330 * STRUCT type chain
7331 */
7332 if (ptr_id == 0) {
7333 struct_id = btf__add_struct(btf, ctx_name, 0);
7334 ptr_id = btf__add_ptr(btf, struct_id);
7335 if (ptr_id < 0 || struct_id < 0) {
7336 err = -EINVAL;
7337 goto err_out;
7338 }
7339 }
7340
7341 /* for completeness, clone DECL_TAG and point it to cloned param */
7342 tag_id = btf__add_decl_tag(btf, ctx_tag, func_rec->type_id, arg_idx);
7343 if (tag_id < 0) {
7344 err = -EINVAL;
7345 goto err_out;
7346 }
7347
7348 /* all the BTF manipulations invalidated pointers, refetch them */
7349 fn_t = btf_type_by_id(btf, func_rec->type_id);
7350 fn_proto_t = btf_type_by_id(btf, fn_t->type);
7351
7352 /* fix up type ID pointed to by param */
7353 p = &btf_params(fn_proto_t)[arg_idx];
7354 p->type = ptr_id;
7355 }
7356
7357 free(orig_ids);
7358 return 0;
7359 err_out:
7360 free(orig_ids);
7361 return err;
7362 }
7363
bpf_object__relocate(struct bpf_object * obj,const char * targ_btf_path)7364 static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
7365 {
7366 struct bpf_program *prog;
7367 size_t i, j;
7368 int err;
7369
7370 if (obj->btf_ext) {
7371 err = bpf_object__relocate_core(obj, targ_btf_path);
7372 if (err) {
7373 pr_warn("failed to perform CO-RE relocations: %s\n",
7374 errstr(err));
7375 return err;
7376 }
7377 bpf_object__sort_relos(obj);
7378 }
7379
7380 /* Before relocating calls pre-process relocations and mark
7381 * few ld_imm64 instructions that points to subprogs.
7382 * Otherwise bpf_object__reloc_code() later would have to consider
7383 * all ld_imm64 insns as relocation candidates. That would
7384 * reduce relocation speed, since amount of find_prog_insn_relo()
7385 * would increase and most of them will fail to find a relo.
7386 */
7387 for (i = 0; i < obj->nr_programs; i++) {
7388 prog = &obj->programs[i];
7389 for (j = 0; j < prog->nr_reloc; j++) {
7390 struct reloc_desc *relo = &prog->reloc_desc[j];
7391 struct bpf_insn *insn = &prog->insns[relo->insn_idx];
7392
7393 /* mark the insn, so it's recognized by insn_is_pseudo_func() */
7394 if (relo->type == RELO_SUBPROG_ADDR)
7395 insn[0].src_reg = BPF_PSEUDO_FUNC;
7396 }
7397 }
7398
7399 /* relocate subprogram calls and append used subprograms to main
7400 * programs; each copy of subprogram code needs to be relocated
7401 * differently for each main program, because its code location might
7402 * have changed.
7403 * Append subprog relos to main programs to allow data relos to be
7404 * processed after text is completely relocated.
7405 */
7406 for (i = 0; i < obj->nr_programs; i++) {
7407 prog = &obj->programs[i];
7408 /* sub-program's sub-calls are relocated within the context of
7409 * its main program only
7410 */
7411 if (prog_is_subprog(obj, prog))
7412 continue;
7413 if (!prog->autoload)
7414 continue;
7415
7416 err = bpf_object__relocate_calls(obj, prog);
7417 if (err) {
7418 pr_warn("prog '%s': failed to relocate calls: %s\n",
7419 prog->name, errstr(err));
7420 return err;
7421 }
7422
7423 err = bpf_prog_assign_exc_cb(obj, prog);
7424 if (err)
7425 return err;
7426 /* Now, also append exception callback if it has not been done already. */
7427 if (prog->exception_cb_idx >= 0) {
7428 struct bpf_program *subprog = &obj->programs[prog->exception_cb_idx];
7429
7430 /* Calling exception callback directly is disallowed, which the
7431 * verifier will reject later. In case it was processed already,
7432 * we can skip this step, otherwise for all other valid cases we
7433 * have to append exception callback now.
7434 */
7435 if (subprog->sub_insn_off == 0) {
7436 err = bpf_object__append_subprog_code(obj, prog, subprog);
7437 if (err)
7438 return err;
7439 err = bpf_object__reloc_code(obj, prog, subprog);
7440 if (err)
7441 return err;
7442 }
7443 }
7444 }
7445 for (i = 0; i < obj->nr_programs; i++) {
7446 prog = &obj->programs[i];
7447 if (prog_is_subprog(obj, prog))
7448 continue;
7449 if (!prog->autoload)
7450 continue;
7451
7452 /* Process data relos for main programs */
7453 err = bpf_object__relocate_data(obj, prog);
7454 if (err) {
7455 pr_warn("prog '%s': failed to relocate data references: %s\n",
7456 prog->name, errstr(err));
7457 return err;
7458 }
7459
7460 /* Fix up .BTF.ext information, if necessary */
7461 err = bpf_program_fixup_func_info(obj, prog);
7462 if (err) {
7463 pr_warn("prog '%s': failed to perform .BTF.ext fix ups: %s\n",
7464 prog->name, errstr(err));
7465 return err;
7466 }
7467 }
7468
7469 return 0;
7470 }
7471
7472 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
7473 Elf64_Shdr *shdr, Elf_Data *data);
7474
bpf_object__collect_map_relos(struct bpf_object * obj,Elf64_Shdr * shdr,Elf_Data * data)7475 static int bpf_object__collect_map_relos(struct bpf_object *obj,
7476 Elf64_Shdr *shdr, Elf_Data *data)
7477 {
7478 const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *);
7479 int i, j, nrels, new_sz;
7480 const struct btf_var_secinfo *vi = NULL;
7481 const struct btf_type *sec, *var, *def;
7482 struct bpf_map *map = NULL, *targ_map = NULL;
7483 struct bpf_program *targ_prog = NULL;
7484 bool is_prog_array, is_map_in_map;
7485 const struct btf_member *member;
7486 const char *name, *mname, *type;
7487 unsigned int moff;
7488 Elf64_Sym *sym;
7489 Elf64_Rel *rel;
7490 void *tmp;
7491
7492 if (!obj->efile.btf_maps_sec_btf_id || !obj->btf)
7493 return -EINVAL;
7494 sec = btf__type_by_id(obj->btf, obj->efile.btf_maps_sec_btf_id);
7495 if (!sec)
7496 return -EINVAL;
7497
7498 nrels = shdr->sh_size / shdr->sh_entsize;
7499 for (i = 0; i < nrels; i++) {
7500 rel = elf_rel_by_idx(data, i);
7501 if (!rel) {
7502 pr_warn(".maps relo #%d: failed to get ELF relo\n", i);
7503 return -LIBBPF_ERRNO__FORMAT;
7504 }
7505
7506 sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
7507 if (!sym) {
7508 pr_warn(".maps relo #%d: symbol %zx not found\n",
7509 i, (size_t)ELF64_R_SYM(rel->r_info));
7510 return -LIBBPF_ERRNO__FORMAT;
7511 }
7512 name = elf_sym_str(obj, sym->st_name) ?: "<?>";
7513
7514 pr_debug(".maps relo #%d: for %zd value %zd rel->r_offset %zu name %d ('%s')\n",
7515 i, (ssize_t)(rel->r_info >> 32), (size_t)sym->st_value,
7516 (size_t)rel->r_offset, sym->st_name, name);
7517
7518 for (j = 0; j < obj->nr_maps; j++) {
7519 map = &obj->maps[j];
7520 if (map->sec_idx != obj->efile.btf_maps_shndx)
7521 continue;
7522
7523 vi = btf_var_secinfos(sec) + map->btf_var_idx;
7524 if (vi->offset <= rel->r_offset &&
7525 rel->r_offset + bpf_ptr_sz <= vi->offset + vi->size)
7526 break;
7527 }
7528 if (j == obj->nr_maps) {
7529 pr_warn(".maps relo #%d: cannot find map '%s' at rel->r_offset %zu\n",
7530 i, name, (size_t)rel->r_offset);
7531 return -EINVAL;
7532 }
7533
7534 is_map_in_map = bpf_map_type__is_map_in_map(map->def.type);
7535 is_prog_array = map->def.type == BPF_MAP_TYPE_PROG_ARRAY;
7536 type = is_map_in_map ? "map" : "prog";
7537 if (is_map_in_map) {
7538 if (sym->st_shndx != obj->efile.btf_maps_shndx) {
7539 pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",
7540 i, name);
7541 return -LIBBPF_ERRNO__RELOC;
7542 }
7543 if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS &&
7544 map->def.key_size != sizeof(int)) {
7545 pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n",
7546 i, map->name, sizeof(int));
7547 return -EINVAL;
7548 }
7549 targ_map = bpf_object__find_map_by_name(obj, name);
7550 if (!targ_map) {
7551 pr_warn(".maps relo #%d: '%s' isn't a valid map reference\n",
7552 i, name);
7553 return -ESRCH;
7554 }
7555 } else if (is_prog_array) {
7556 targ_prog = bpf_object__find_program_by_name(obj, name);
7557 if (!targ_prog) {
7558 pr_warn(".maps relo #%d: '%s' isn't a valid program reference\n",
7559 i, name);
7560 return -ESRCH;
7561 }
7562 if (targ_prog->sec_idx != sym->st_shndx ||
7563 targ_prog->sec_insn_off * 8 != sym->st_value ||
7564 prog_is_subprog(obj, targ_prog)) {
7565 pr_warn(".maps relo #%d: '%s' isn't an entry-point program\n",
7566 i, name);
7567 return -LIBBPF_ERRNO__RELOC;
7568 }
7569 } else {
7570 return -EINVAL;
7571 }
7572
7573 var = btf__type_by_id(obj->btf, vi->type);
7574 def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
7575 if (btf_vlen(def) == 0)
7576 return -EINVAL;
7577 member = btf_members(def) + btf_vlen(def) - 1;
7578 mname = btf__name_by_offset(obj->btf, member->name_off);
7579 if (strcmp(mname, "values"))
7580 return -EINVAL;
7581
7582 moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8;
7583 if (rel->r_offset - vi->offset < moff)
7584 return -EINVAL;
7585
7586 moff = rel->r_offset - vi->offset - moff;
7587 /* here we use BPF pointer size, which is always 64 bit, as we
7588 * are parsing ELF that was built for BPF target
7589 */
7590 if (moff % bpf_ptr_sz)
7591 return -EINVAL;
7592 moff /= bpf_ptr_sz;
7593 if (moff >= map->init_slots_sz) {
7594 new_sz = moff + 1;
7595 tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz);
7596 if (!tmp)
7597 return -ENOMEM;
7598 map->init_slots = tmp;
7599 memset(map->init_slots + map->init_slots_sz, 0,
7600 (new_sz - map->init_slots_sz) * host_ptr_sz);
7601 map->init_slots_sz = new_sz;
7602 }
7603 map->init_slots[moff] = is_map_in_map ? (void *)targ_map : (void *)targ_prog;
7604
7605 pr_debug(".maps relo #%d: map '%s' slot [%d] points to %s '%s'\n",
7606 i, map->name, moff, type, name);
7607 }
7608
7609 return 0;
7610 }
7611
bpf_object__collect_relos(struct bpf_object * obj)7612 static int bpf_object__collect_relos(struct bpf_object *obj)
7613 {
7614 int i, err;
7615
7616 for (i = 0; i < obj->efile.sec_cnt; i++) {
7617 struct elf_sec_desc *sec_desc = &obj->efile.secs[i];
7618 Elf64_Shdr *shdr;
7619 Elf_Data *data;
7620 int idx;
7621
7622 if (sec_desc->sec_type != SEC_RELO)
7623 continue;
7624
7625 shdr = sec_desc->shdr;
7626 data = sec_desc->data;
7627 idx = shdr->sh_info;
7628
7629 if (shdr->sh_type != SHT_REL || idx < 0 || idx >= obj->efile.sec_cnt) {
7630 pr_warn("internal error at %d\n", __LINE__);
7631 return -LIBBPF_ERRNO__INTERNAL;
7632 }
7633
7634 if (obj->efile.secs[idx].sec_type == SEC_ST_OPS)
7635 err = bpf_object__collect_st_ops_relos(obj, shdr, data);
7636 else if (idx == obj->efile.btf_maps_shndx)
7637 err = bpf_object__collect_map_relos(obj, shdr, data);
7638 else
7639 err = bpf_object__collect_prog_relos(obj, shdr, data);
7640 if (err)
7641 return err;
7642 }
7643
7644 bpf_object__sort_relos(obj);
7645 return 0;
7646 }
7647
insn_is_helper_call(struct bpf_insn * insn,enum bpf_func_id * func_id)7648 static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id)
7649 {
7650 if (BPF_CLASS(insn->code) == BPF_JMP &&
7651 BPF_OP(insn->code) == BPF_CALL &&
7652 BPF_SRC(insn->code) == BPF_K &&
7653 insn->src_reg == 0 &&
7654 insn->dst_reg == 0) {
7655 *func_id = insn->imm;
7656 return true;
7657 }
7658 return false;
7659 }
7660
bpf_object__sanitize_prog(struct bpf_object * obj,struct bpf_program * prog)7661 static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program *prog)
7662 {
7663 struct bpf_insn *insn = prog->insns;
7664 enum bpf_func_id func_id;
7665 int i;
7666
7667 if (obj->gen_loader)
7668 return 0;
7669
7670 for (i = 0; i < prog->insns_cnt; i++, insn++) {
7671 if (!insn_is_helper_call(insn, &func_id))
7672 continue;
7673
7674 /* on kernels that don't yet support
7675 * bpf_probe_read_{kernel,user}[_str] helpers, fall back
7676 * to bpf_probe_read() which works well for old kernels
7677 */
7678 switch (func_id) {
7679 case BPF_FUNC_probe_read_kernel:
7680 case BPF_FUNC_probe_read_user:
7681 if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
7682 insn->imm = BPF_FUNC_probe_read;
7683 break;
7684 case BPF_FUNC_probe_read_kernel_str:
7685 case BPF_FUNC_probe_read_user_str:
7686 if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
7687 insn->imm = BPF_FUNC_probe_read_str;
7688 break;
7689 default:
7690 break;
7691 }
7692 }
7693 return 0;
7694 }
7695
7696 static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name,
7697 int *btf_obj_fd, int *btf_type_id);
7698
7699 /* this is called as prog->sec_def->prog_prepare_load_fn for libbpf-supported sec_defs */
libbpf_prepare_prog_load(struct bpf_program * prog,struct bpf_prog_load_opts * opts,long cookie)7700 static int libbpf_prepare_prog_load(struct bpf_program *prog,
7701 struct bpf_prog_load_opts *opts, long cookie)
7702 {
7703 enum sec_def_flags def = cookie;
7704
7705 /* old kernels might not support specifying expected_attach_type */
7706 if ((def & SEC_EXP_ATTACH_OPT) && !kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE))
7707 opts->expected_attach_type = 0;
7708
7709 if (def & SEC_SLEEPABLE)
7710 opts->prog_flags |= BPF_F_SLEEPABLE;
7711
7712 if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS))
7713 opts->prog_flags |= BPF_F_XDP_HAS_FRAGS;
7714
7715 /* special check for usdt to use uprobe_multi link */
7716 if ((def & SEC_USDT) && kernel_supports(prog->obj, FEAT_UPROBE_MULTI_LINK)) {
7717 /* for BPF_TRACE_UPROBE_MULTI, user might want to query expected_attach_type
7718 * in prog, and expected_attach_type we set in kernel is from opts, so we
7719 * update both.
7720 */
7721 prog->expected_attach_type = BPF_TRACE_UPROBE_MULTI;
7722 opts->expected_attach_type = BPF_TRACE_UPROBE_MULTI;
7723 }
7724
7725 if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) {
7726 int btf_obj_fd = 0, btf_type_id = 0, err;
7727 const char *attach_name;
7728
7729 attach_name = strchr(prog->sec_name, '/');
7730 if (!attach_name) {
7731 /* if BPF program is annotated with just SEC("fentry")
7732 * (or similar) without declaratively specifying
7733 * target, then it is expected that target will be
7734 * specified with bpf_program__set_attach_target() at
7735 * runtime before BPF object load step. If not, then
7736 * there is nothing to load into the kernel as BPF
7737 * verifier won't be able to validate BPF program
7738 * correctness anyways.
7739 */
7740 pr_warn("prog '%s': no BTF-based attach target is specified, use bpf_program__set_attach_target()\n",
7741 prog->name);
7742 return -EINVAL;
7743 }
7744 attach_name++; /* skip over / */
7745
7746 err = libbpf_find_attach_btf_id(prog, attach_name, &btf_obj_fd, &btf_type_id);
7747 if (err)
7748 return err;
7749
7750 /* cache resolved BTF FD and BTF type ID in the prog */
7751 prog->attach_btf_obj_fd = btf_obj_fd;
7752 prog->attach_btf_id = btf_type_id;
7753
7754 /* but by now libbpf common logic is not utilizing
7755 * prog->atach_btf_obj_fd/prog->attach_btf_id anymore because
7756 * this callback is called after opts were populated by
7757 * libbpf, so this callback has to update opts explicitly here
7758 */
7759 opts->attach_btf_obj_fd = btf_obj_fd;
7760 opts->attach_btf_id = btf_type_id;
7761 }
7762 return 0;
7763 }
7764
7765 static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz);
7766
bpf_object_load_prog(struct bpf_object * obj,struct bpf_program * prog,struct bpf_insn * insns,int insns_cnt,const char * license,__u32 kern_version,int * prog_fd)7767 static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog,
7768 struct bpf_insn *insns, int insns_cnt,
7769 const char *license, __u32 kern_version, int *prog_fd)
7770 {
7771 LIBBPF_OPTS(bpf_prog_load_opts, load_attr);
7772 const char *prog_name = NULL;
7773 size_t log_buf_size = 0;
7774 char *log_buf = NULL, *tmp;
7775 bool own_log_buf = true;
7776 __u32 log_level = prog->log_level;
7777 int ret, err;
7778
7779 /* Be more helpful by rejecting programs that can't be validated early
7780 * with more meaningful and actionable error message.
7781 */
7782 switch (prog->type) {
7783 case BPF_PROG_TYPE_UNSPEC:
7784 /*
7785 * The program type must be set. Most likely we couldn't find a proper
7786 * section definition at load time, and thus we didn't infer the type.
7787 */
7788 pr_warn("prog '%s': missing BPF prog type, check ELF section name '%s'\n",
7789 prog->name, prog->sec_name);
7790 return -EINVAL;
7791 case BPF_PROG_TYPE_STRUCT_OPS:
7792 if (prog->attach_btf_id == 0) {
7793 pr_warn("prog '%s': SEC(\"struct_ops\") program isn't referenced anywhere, did you forget to use it?\n",
7794 prog->name);
7795 return -EINVAL;
7796 }
7797 break;
7798 default:
7799 break;
7800 }
7801
7802 if (!insns || !insns_cnt)
7803 return -EINVAL;
7804
7805 if (kernel_supports(obj, FEAT_PROG_NAME))
7806 prog_name = prog->name;
7807 load_attr.attach_prog_fd = prog->attach_prog_fd;
7808 load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd;
7809 load_attr.attach_btf_id = prog->attach_btf_id;
7810 load_attr.kern_version = kern_version;
7811 load_attr.prog_ifindex = prog->prog_ifindex;
7812 load_attr.expected_attach_type = prog->expected_attach_type;
7813
7814 /* specify func_info/line_info only if kernel supports them */
7815 if (obj->btf && btf__fd(obj->btf) >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) {
7816 load_attr.prog_btf_fd = btf__fd(obj->btf);
7817 load_attr.func_info = prog->func_info;
7818 load_attr.func_info_rec_size = prog->func_info_rec_size;
7819 load_attr.func_info_cnt = prog->func_info_cnt;
7820 load_attr.line_info = prog->line_info;
7821 load_attr.line_info_rec_size = prog->line_info_rec_size;
7822 load_attr.line_info_cnt = prog->line_info_cnt;
7823 }
7824 load_attr.log_level = log_level;
7825 load_attr.prog_flags = prog->prog_flags;
7826 load_attr.fd_array = obj->fd_array;
7827
7828 load_attr.token_fd = obj->token_fd;
7829 if (obj->token_fd)
7830 load_attr.prog_flags |= BPF_F_TOKEN_FD;
7831
7832 /* adjust load_attr if sec_def provides custom preload callback */
7833 if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) {
7834 err = prog->sec_def->prog_prepare_load_fn(prog, &load_attr, prog->sec_def->cookie);
7835 if (err < 0) {
7836 pr_warn("prog '%s': failed to prepare load attributes: %s\n",
7837 prog->name, errstr(err));
7838 return err;
7839 }
7840 insns = prog->insns;
7841 insns_cnt = prog->insns_cnt;
7842 }
7843
7844 if (obj->gen_loader) {
7845 bpf_gen__prog_load(obj->gen_loader, prog->type, prog->name,
7846 license, insns, insns_cnt, &load_attr,
7847 prog - obj->programs);
7848 *prog_fd = -1;
7849 return 0;
7850 }
7851
7852 retry_load:
7853 /* if log_level is zero, we don't request logs initially even if
7854 * custom log_buf is specified; if the program load fails, then we'll
7855 * bump log_level to 1 and use either custom log_buf or we'll allocate
7856 * our own and retry the load to get details on what failed
7857 */
7858 if (log_level) {
7859 if (prog->log_buf) {
7860 log_buf = prog->log_buf;
7861 log_buf_size = prog->log_size;
7862 own_log_buf = false;
7863 } else if (obj->log_buf) {
7864 log_buf = obj->log_buf;
7865 log_buf_size = obj->log_size;
7866 own_log_buf = false;
7867 } else {
7868 log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, log_buf_size * 2);
7869 tmp = realloc(log_buf, log_buf_size);
7870 if (!tmp) {
7871 ret = -ENOMEM;
7872 goto out;
7873 }
7874 log_buf = tmp;
7875 log_buf[0] = '\0';
7876 own_log_buf = true;
7877 }
7878 }
7879
7880 load_attr.log_buf = log_buf;
7881 load_attr.log_size = log_buf_size;
7882 load_attr.log_level = log_level;
7883
7884 ret = bpf_prog_load(prog->type, prog_name, license, insns, insns_cnt, &load_attr);
7885 if (ret >= 0) {
7886 if (log_level && own_log_buf) {
7887 pr_debug("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
7888 prog->name, log_buf);
7889 }
7890
7891 if (obj->has_rodata && kernel_supports(obj, FEAT_PROG_BIND_MAP)) {
7892 struct bpf_map *map;
7893 int i;
7894
7895 for (i = 0; i < obj->nr_maps; i++) {
7896 map = &prog->obj->maps[i];
7897 if (map->libbpf_type != LIBBPF_MAP_RODATA)
7898 continue;
7899
7900 if (bpf_prog_bind_map(ret, map->fd, NULL)) {
7901 pr_warn("prog '%s': failed to bind map '%s': %s\n",
7902 prog->name, map->real_name, errstr(errno));
7903 /* Don't fail hard if can't bind rodata. */
7904 }
7905 }
7906 }
7907
7908 *prog_fd = ret;
7909 ret = 0;
7910 goto out;
7911 }
7912
7913 if (log_level == 0) {
7914 log_level = 1;
7915 goto retry_load;
7916 }
7917 /* On ENOSPC, increase log buffer size and retry, unless custom
7918 * log_buf is specified.
7919 * Be careful to not overflow u32, though. Kernel's log buf size limit
7920 * isn't part of UAPI so it can always be bumped to full 4GB. So don't
7921 * multiply by 2 unless we are sure we'll fit within 32 bits.
7922 * Currently, we'll get -EINVAL when we reach (UINT_MAX >> 2).
7923 */
7924 if (own_log_buf && errno == ENOSPC && log_buf_size <= UINT_MAX / 2)
7925 goto retry_load;
7926
7927 ret = -errno;
7928
7929 /* post-process verifier log to improve error descriptions */
7930 fixup_verifier_log(prog, log_buf, log_buf_size);
7931
7932 pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, errstr(errno));
7933 pr_perm_msg(ret);
7934
7935 if (own_log_buf && log_buf && log_buf[0] != '\0') {
7936 pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
7937 prog->name, log_buf);
7938 }
7939
7940 out:
7941 if (own_log_buf)
7942 free(log_buf);
7943 return ret;
7944 }
7945
find_prev_line(char * buf,char * cur)7946 static char *find_prev_line(char *buf, char *cur)
7947 {
7948 char *p;
7949
7950 if (cur == buf) /* end of a log buf */
7951 return NULL;
7952
7953 p = cur - 1;
7954 while (p - 1 >= buf && *(p - 1) != '\n')
7955 p--;
7956
7957 return p;
7958 }
7959
patch_log(char * buf,size_t buf_sz,size_t log_sz,char * orig,size_t orig_sz,const char * patch)7960 static void patch_log(char *buf, size_t buf_sz, size_t log_sz,
7961 char *orig, size_t orig_sz, const char *patch)
7962 {
7963 /* size of the remaining log content to the right from the to-be-replaced part */
7964 size_t rem_sz = (buf + log_sz) - (orig + orig_sz);
7965 size_t patch_sz = strlen(patch);
7966
7967 if (patch_sz != orig_sz) {
7968 /* If patch line(s) are longer than original piece of verifier log,
7969 * shift log contents by (patch_sz - orig_sz) bytes to the right
7970 * starting from after to-be-replaced part of the log.
7971 *
7972 * If patch line(s) are shorter than original piece of verifier log,
7973 * shift log contents by (orig_sz - patch_sz) bytes to the left
7974 * starting from after to-be-replaced part of the log
7975 *
7976 * We need to be careful about not overflowing available
7977 * buf_sz capacity. If that's the case, we'll truncate the end
7978 * of the original log, as necessary.
7979 */
7980 if (patch_sz > orig_sz) {
7981 if (orig + patch_sz >= buf + buf_sz) {
7982 /* patch is big enough to cover remaining space completely */
7983 patch_sz -= (orig + patch_sz) - (buf + buf_sz) + 1;
7984 rem_sz = 0;
7985 } else if (patch_sz - orig_sz > buf_sz - log_sz) {
7986 /* patch causes part of remaining log to be truncated */
7987 rem_sz -= (patch_sz - orig_sz) - (buf_sz - log_sz);
7988 }
7989 }
7990 /* shift remaining log to the right by calculated amount */
7991 memmove(orig + patch_sz, orig + orig_sz, rem_sz);
7992 }
7993
7994 memcpy(orig, patch, patch_sz);
7995 }
7996
fixup_log_failed_core_relo(struct bpf_program * prog,char * buf,size_t buf_sz,size_t log_sz,char * line1,char * line2,char * line3)7997 static void fixup_log_failed_core_relo(struct bpf_program *prog,
7998 char *buf, size_t buf_sz, size_t log_sz,
7999 char *line1, char *line2, char *line3)
8000 {
8001 /* Expected log for failed and not properly guarded CO-RE relocation:
8002 * line1 -> 123: (85) call unknown#195896080
8003 * line2 -> invalid func unknown#195896080
8004 * line3 -> <anything else or end of buffer>
8005 *
8006 * "123" is the index of the instruction that was poisoned. We extract
8007 * instruction index to find corresponding CO-RE relocation and
8008 * replace this part of the log with more relevant information about
8009 * failed CO-RE relocation.
8010 */
8011 const struct bpf_core_relo *relo;
8012 struct bpf_core_spec spec;
8013 char patch[512], spec_buf[256];
8014 int insn_idx, err, spec_len;
8015
8016 if (sscanf(line1, "%d: (%*d) call unknown#195896080\n", &insn_idx) != 1)
8017 return;
8018
8019 relo = find_relo_core(prog, insn_idx);
8020 if (!relo)
8021 return;
8022
8023 err = bpf_core_parse_spec(prog->name, prog->obj->btf, relo, &spec);
8024 if (err)
8025 return;
8026
8027 spec_len = bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec);
8028 snprintf(patch, sizeof(patch),
8029 "%d: <invalid CO-RE relocation>\n"
8030 "failed to resolve CO-RE relocation %s%s\n",
8031 insn_idx, spec_buf, spec_len >= sizeof(spec_buf) ? "..." : "");
8032
8033 patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
8034 }
8035
fixup_log_missing_map_load(struct bpf_program * prog,char * buf,size_t buf_sz,size_t log_sz,char * line1,char * line2,char * line3)8036 static void fixup_log_missing_map_load(struct bpf_program *prog,
8037 char *buf, size_t buf_sz, size_t log_sz,
8038 char *line1, char *line2, char *line3)
8039 {
8040 /* Expected log for failed and not properly guarded map reference:
8041 * line1 -> 123: (85) call unknown#2001000345
8042 * line2 -> invalid func unknown#2001000345
8043 * line3 -> <anything else or end of buffer>
8044 *
8045 * "123" is the index of the instruction that was poisoned.
8046 * "345" in "2001000345" is a map index in obj->maps to fetch map name.
8047 */
8048 struct bpf_object *obj = prog->obj;
8049 const struct bpf_map *map;
8050 int insn_idx, map_idx;
8051 char patch[128];
8052
8053 if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &map_idx) != 2)
8054 return;
8055
8056 map_idx -= POISON_LDIMM64_MAP_BASE;
8057 if (map_idx < 0 || map_idx >= obj->nr_maps)
8058 return;
8059 map = &obj->maps[map_idx];
8060
8061 snprintf(patch, sizeof(patch),
8062 "%d: <invalid BPF map reference>\n"
8063 "BPF map '%s' is referenced but wasn't created\n",
8064 insn_idx, map->name);
8065
8066 patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
8067 }
8068
fixup_log_missing_kfunc_call(struct bpf_program * prog,char * buf,size_t buf_sz,size_t log_sz,char * line1,char * line2,char * line3)8069 static void fixup_log_missing_kfunc_call(struct bpf_program *prog,
8070 char *buf, size_t buf_sz, size_t log_sz,
8071 char *line1, char *line2, char *line3)
8072 {
8073 /* Expected log for failed and not properly guarded kfunc call:
8074 * line1 -> 123: (85) call unknown#2002000345
8075 * line2 -> invalid func unknown#2002000345
8076 * line3 -> <anything else or end of buffer>
8077 *
8078 * "123" is the index of the instruction that was poisoned.
8079 * "345" in "2002000345" is an extern index in obj->externs to fetch kfunc name.
8080 */
8081 struct bpf_object *obj = prog->obj;
8082 const struct extern_desc *ext;
8083 int insn_idx, ext_idx;
8084 char patch[128];
8085
8086 if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &ext_idx) != 2)
8087 return;
8088
8089 ext_idx -= POISON_CALL_KFUNC_BASE;
8090 if (ext_idx < 0 || ext_idx >= obj->nr_extern)
8091 return;
8092 ext = &obj->externs[ext_idx];
8093
8094 snprintf(patch, sizeof(patch),
8095 "%d: <invalid kfunc call>\n"
8096 "kfunc '%s' is referenced but wasn't resolved\n",
8097 insn_idx, ext->name);
8098
8099 patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
8100 }
8101
fixup_verifier_log(struct bpf_program * prog,char * buf,size_t buf_sz)8102 static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz)
8103 {
8104 /* look for familiar error patterns in last N lines of the log */
8105 const size_t max_last_line_cnt = 10;
8106 char *prev_line, *cur_line, *next_line;
8107 size_t log_sz;
8108 int i;
8109
8110 if (!buf)
8111 return;
8112
8113 log_sz = strlen(buf) + 1;
8114 next_line = buf + log_sz - 1;
8115
8116 for (i = 0; i < max_last_line_cnt; i++, next_line = cur_line) {
8117 cur_line = find_prev_line(buf, next_line);
8118 if (!cur_line)
8119 return;
8120
8121 if (str_has_pfx(cur_line, "invalid func unknown#195896080\n")) {
8122 prev_line = find_prev_line(buf, cur_line);
8123 if (!prev_line)
8124 continue;
8125
8126 /* failed CO-RE relocation case */
8127 fixup_log_failed_core_relo(prog, buf, buf_sz, log_sz,
8128 prev_line, cur_line, next_line);
8129 return;
8130 } else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_LDIMM64_MAP_PFX)) {
8131 prev_line = find_prev_line(buf, cur_line);
8132 if (!prev_line)
8133 continue;
8134
8135 /* reference to uncreated BPF map */
8136 fixup_log_missing_map_load(prog, buf, buf_sz, log_sz,
8137 prev_line, cur_line, next_line);
8138 return;
8139 } else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_CALL_KFUNC_PFX)) {
8140 prev_line = find_prev_line(buf, cur_line);
8141 if (!prev_line)
8142 continue;
8143
8144 /* reference to unresolved kfunc */
8145 fixup_log_missing_kfunc_call(prog, buf, buf_sz, log_sz,
8146 prev_line, cur_line, next_line);
8147 return;
8148 }
8149 }
8150 }
8151
bpf_program_record_relos(struct bpf_program * prog)8152 static int bpf_program_record_relos(struct bpf_program *prog)
8153 {
8154 struct bpf_object *obj = prog->obj;
8155 int i;
8156
8157 for (i = 0; i < prog->nr_reloc; i++) {
8158 struct reloc_desc *relo = &prog->reloc_desc[i];
8159 struct extern_desc *ext = &obj->externs[relo->ext_idx];
8160 int kind;
8161
8162 switch (relo->type) {
8163 case RELO_EXTERN_LD64:
8164 if (ext->type != EXT_KSYM)
8165 continue;
8166 kind = btf_is_var(btf__type_by_id(obj->btf, ext->btf_id)) ?
8167 BTF_KIND_VAR : BTF_KIND_FUNC;
8168 bpf_gen__record_extern(obj->gen_loader, ext->name,
8169 ext->is_weak, !ext->ksym.type_id,
8170 true, kind, relo->insn_idx);
8171 break;
8172 case RELO_EXTERN_CALL:
8173 bpf_gen__record_extern(obj->gen_loader, ext->name,
8174 ext->is_weak, false, false, BTF_KIND_FUNC,
8175 relo->insn_idx);
8176 break;
8177 case RELO_CORE: {
8178 struct bpf_core_relo cr = {
8179 .insn_off = relo->insn_idx * 8,
8180 .type_id = relo->core_relo->type_id,
8181 .access_str_off = relo->core_relo->access_str_off,
8182 .kind = relo->core_relo->kind,
8183 };
8184
8185 bpf_gen__record_relo_core(obj->gen_loader, &cr);
8186 break;
8187 }
8188 default:
8189 continue;
8190 }
8191 }
8192 return 0;
8193 }
8194
8195 static int
bpf_object__load_progs(struct bpf_object * obj,int log_level)8196 bpf_object__load_progs(struct bpf_object *obj, int log_level)
8197 {
8198 struct bpf_program *prog;
8199 size_t i;
8200 int err;
8201
8202 for (i = 0; i < obj->nr_programs; i++) {
8203 prog = &obj->programs[i];
8204 if (prog_is_subprog(obj, prog))
8205 continue;
8206 if (!prog->autoload) {
8207 pr_debug("prog '%s': skipped loading\n", prog->name);
8208 continue;
8209 }
8210 prog->log_level |= log_level;
8211
8212 if (obj->gen_loader)
8213 bpf_program_record_relos(prog);
8214
8215 err = bpf_object_load_prog(obj, prog, prog->insns, prog->insns_cnt,
8216 obj->license, obj->kern_version, &prog->fd);
8217 if (err) {
8218 pr_warn("prog '%s': failed to load: %s\n", prog->name, errstr(err));
8219 return err;
8220 }
8221 }
8222
8223 bpf_object__free_relocs(obj);
8224 return 0;
8225 }
8226
bpf_object_prepare_progs(struct bpf_object * obj)8227 static int bpf_object_prepare_progs(struct bpf_object *obj)
8228 {
8229 struct bpf_program *prog;
8230 size_t i;
8231 int err;
8232
8233 for (i = 0; i < obj->nr_programs; i++) {
8234 prog = &obj->programs[i];
8235 err = bpf_object__sanitize_prog(obj, prog);
8236 if (err)
8237 return err;
8238 }
8239 return 0;
8240 }
8241
8242 static const struct bpf_sec_def *find_sec_def(const char *sec_name);
8243
bpf_object_init_progs(struct bpf_object * obj,const struct bpf_object_open_opts * opts)8244 static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object_open_opts *opts)
8245 {
8246 struct bpf_program *prog;
8247 int err;
8248
8249 bpf_object__for_each_program(prog, obj) {
8250 prog->sec_def = find_sec_def(prog->sec_name);
8251 if (!prog->sec_def) {
8252 /* couldn't guess, but user might manually specify */
8253 pr_debug("prog '%s': unrecognized ELF section name '%s'\n",
8254 prog->name, prog->sec_name);
8255 continue;
8256 }
8257
8258 prog->type = prog->sec_def->prog_type;
8259 prog->expected_attach_type = prog->sec_def->expected_attach_type;
8260
8261 /* sec_def can have custom callback which should be called
8262 * after bpf_program is initialized to adjust its properties
8263 */
8264 if (prog->sec_def->prog_setup_fn) {
8265 err = prog->sec_def->prog_setup_fn(prog, prog->sec_def->cookie);
8266 if (err < 0) {
8267 pr_warn("prog '%s': failed to initialize: %s\n",
8268 prog->name, errstr(err));
8269 return err;
8270 }
8271 }
8272 }
8273
8274 return 0;
8275 }
8276
bpf_object_open(const char * path,const void * obj_buf,size_t obj_buf_sz,const char * obj_name,const struct bpf_object_open_opts * opts)8277 static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz,
8278 const char *obj_name,
8279 const struct bpf_object_open_opts *opts)
8280 {
8281 const char *kconfig, *btf_tmp_path, *token_path;
8282 struct bpf_object *obj;
8283 int err;
8284 char *log_buf;
8285 size_t log_size;
8286 __u32 log_level;
8287
8288 if (obj_buf && !obj_name)
8289 return ERR_PTR(-EINVAL);
8290
8291 if (elf_version(EV_CURRENT) == EV_NONE) {
8292 pr_warn("failed to init libelf for %s\n",
8293 path ? : "(mem buf)");
8294 return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
8295 }
8296
8297 if (!OPTS_VALID(opts, bpf_object_open_opts))
8298 return ERR_PTR(-EINVAL);
8299
8300 obj_name = OPTS_GET(opts, object_name, NULL) ?: obj_name;
8301 if (obj_buf) {
8302 path = obj_name;
8303 pr_debug("loading object '%s' from buffer\n", obj_name);
8304 } else {
8305 pr_debug("loading object from %s\n", path);
8306 }
8307
8308 log_buf = OPTS_GET(opts, kernel_log_buf, NULL);
8309 log_size = OPTS_GET(opts, kernel_log_size, 0);
8310 log_level = OPTS_GET(opts, kernel_log_level, 0);
8311 if (log_size > UINT_MAX)
8312 return ERR_PTR(-EINVAL);
8313 if (log_size && !log_buf)
8314 return ERR_PTR(-EINVAL);
8315
8316 token_path = OPTS_GET(opts, bpf_token_path, NULL);
8317 /* if user didn't specify bpf_token_path explicitly, check if
8318 * LIBBPF_BPF_TOKEN_PATH envvar was set and treat it as bpf_token_path
8319 * option
8320 */
8321 if (!token_path)
8322 token_path = getenv("LIBBPF_BPF_TOKEN_PATH");
8323 if (token_path && strlen(token_path) >= PATH_MAX)
8324 return ERR_PTR(-ENAMETOOLONG);
8325
8326 obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
8327 if (IS_ERR(obj))
8328 return obj;
8329
8330 obj->log_buf = log_buf;
8331 obj->log_size = log_size;
8332 obj->log_level = log_level;
8333
8334 if (token_path) {
8335 obj->token_path = strdup(token_path);
8336 if (!obj->token_path) {
8337 err = -ENOMEM;
8338 goto out;
8339 }
8340 }
8341
8342 btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL);
8343 if (btf_tmp_path) {
8344 if (strlen(btf_tmp_path) >= PATH_MAX) {
8345 err = -ENAMETOOLONG;
8346 goto out;
8347 }
8348 obj->btf_custom_path = strdup(btf_tmp_path);
8349 if (!obj->btf_custom_path) {
8350 err = -ENOMEM;
8351 goto out;
8352 }
8353 }
8354
8355 kconfig = OPTS_GET(opts, kconfig, NULL);
8356 if (kconfig) {
8357 obj->kconfig = strdup(kconfig);
8358 if (!obj->kconfig) {
8359 err = -ENOMEM;
8360 goto out;
8361 }
8362 }
8363
8364 err = bpf_object__elf_init(obj);
8365 err = err ? : bpf_object__elf_collect(obj);
8366 err = err ? : bpf_object__collect_externs(obj);
8367 err = err ? : bpf_object_fixup_btf(obj);
8368 err = err ? : bpf_object__init_maps(obj, opts);
8369 err = err ? : bpf_object_init_progs(obj, opts);
8370 err = err ? : bpf_object__collect_relos(obj);
8371 if (err)
8372 goto out;
8373
8374 bpf_object__elf_finish(obj);
8375
8376 return obj;
8377 out:
8378 bpf_object__close(obj);
8379 return ERR_PTR(err);
8380 }
8381
8382 struct bpf_object *
bpf_object__open_file(const char * path,const struct bpf_object_open_opts * opts)8383 bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
8384 {
8385 if (!path)
8386 return libbpf_err_ptr(-EINVAL);
8387
8388 return libbpf_ptr(bpf_object_open(path, NULL, 0, NULL, opts));
8389 }
8390
bpf_object__open(const char * path)8391 struct bpf_object *bpf_object__open(const char *path)
8392 {
8393 return bpf_object__open_file(path, NULL);
8394 }
8395
8396 struct bpf_object *
bpf_object__open_mem(const void * obj_buf,size_t obj_buf_sz,const struct bpf_object_open_opts * opts)8397 bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
8398 const struct bpf_object_open_opts *opts)
8399 {
8400 char tmp_name[64];
8401
8402 if (!obj_buf || obj_buf_sz == 0)
8403 return libbpf_err_ptr(-EINVAL);
8404
8405 /* create a (quite useless) default "name" for this memory buffer object */
8406 snprintf(tmp_name, sizeof(tmp_name), "%lx-%zx", (unsigned long)obj_buf, obj_buf_sz);
8407
8408 return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, tmp_name, opts));
8409 }
8410
bpf_object_unload(struct bpf_object * obj)8411 static int bpf_object_unload(struct bpf_object *obj)
8412 {
8413 size_t i;
8414
8415 if (!obj)
8416 return libbpf_err(-EINVAL);
8417
8418 for (i = 0; i < obj->nr_maps; i++) {
8419 zclose(obj->maps[i].fd);
8420 if (obj->maps[i].st_ops)
8421 zfree(&obj->maps[i].st_ops->kern_vdata);
8422 }
8423
8424 for (i = 0; i < obj->nr_programs; i++)
8425 bpf_program__unload(&obj->programs[i]);
8426
8427 return 0;
8428 }
8429
bpf_object__sanitize_maps(struct bpf_object * obj)8430 static int bpf_object__sanitize_maps(struct bpf_object *obj)
8431 {
8432 struct bpf_map *m;
8433
8434 bpf_object__for_each_map(m, obj) {
8435 if (!bpf_map__is_internal(m))
8436 continue;
8437 if (!kernel_supports(obj, FEAT_ARRAY_MMAP))
8438 m->def.map_flags &= ~BPF_F_MMAPABLE;
8439 }
8440
8441 return 0;
8442 }
8443
8444 typedef int (*kallsyms_cb_t)(unsigned long long sym_addr, char sym_type,
8445 const char *sym_name, void *ctx);
8446
libbpf_kallsyms_parse(kallsyms_cb_t cb,void * ctx)8447 static int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *ctx)
8448 {
8449 char sym_type, sym_name[500];
8450 unsigned long long sym_addr;
8451 int ret, err = 0;
8452 FILE *f;
8453
8454 f = fopen("/proc/kallsyms", "re");
8455 if (!f) {
8456 err = -errno;
8457 pr_warn("failed to open /proc/kallsyms: %s\n", errstr(err));
8458 return err;
8459 }
8460
8461 while (true) {
8462 ret = fscanf(f, "%llx %c %499s%*[^\n]\n",
8463 &sym_addr, &sym_type, sym_name);
8464 if (ret == EOF && feof(f))
8465 break;
8466 if (ret != 3) {
8467 pr_warn("failed to read kallsyms entry: %d\n", ret);
8468 err = -EINVAL;
8469 break;
8470 }
8471
8472 err = cb(sym_addr, sym_type, sym_name, ctx);
8473 if (err)
8474 break;
8475 }
8476
8477 fclose(f);
8478 return err;
8479 }
8480
kallsyms_cb(unsigned long long sym_addr,char sym_type,const char * sym_name,void * ctx)8481 static int kallsyms_cb(unsigned long long sym_addr, char sym_type,
8482 const char *sym_name, void *ctx)
8483 {
8484 struct bpf_object *obj = ctx;
8485 const struct btf_type *t;
8486 struct extern_desc *ext;
8487 const char *res;
8488
8489 res = strstr(sym_name, ".llvm.");
8490 if (sym_type == 'd' && res)
8491 ext = find_extern_by_name_with_len(obj, sym_name, res - sym_name);
8492 else
8493 ext = find_extern_by_name(obj, sym_name);
8494 if (!ext || ext->type != EXT_KSYM)
8495 return 0;
8496
8497 t = btf__type_by_id(obj->btf, ext->btf_id);
8498 if (!btf_is_var(t))
8499 return 0;
8500
8501 if (ext->is_set && ext->ksym.addr != sym_addr) {
8502 pr_warn("extern (ksym) '%s': resolution is ambiguous: 0x%llx or 0x%llx\n",
8503 sym_name, ext->ksym.addr, sym_addr);
8504 return -EINVAL;
8505 }
8506 if (!ext->is_set) {
8507 ext->is_set = true;
8508 ext->ksym.addr = sym_addr;
8509 pr_debug("extern (ksym) '%s': set to 0x%llx\n", sym_name, sym_addr);
8510 }
8511 return 0;
8512 }
8513
bpf_object__read_kallsyms_file(struct bpf_object * obj)8514 static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
8515 {
8516 return libbpf_kallsyms_parse(kallsyms_cb, obj);
8517 }
8518
find_ksym_btf_id(struct bpf_object * obj,const char * ksym_name,__u16 kind,struct btf ** res_btf,struct module_btf ** res_mod_btf)8519 static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
8520 __u16 kind, struct btf **res_btf,
8521 struct module_btf **res_mod_btf)
8522 {
8523 struct module_btf *mod_btf;
8524 struct btf *btf;
8525 int i, id, err;
8526
8527 btf = obj->btf_vmlinux;
8528 mod_btf = NULL;
8529 id = btf__find_by_name_kind(btf, ksym_name, kind);
8530
8531 if (id == -ENOENT) {
8532 err = load_module_btfs(obj);
8533 if (err)
8534 return err;
8535
8536 for (i = 0; i < obj->btf_module_cnt; i++) {
8537 /* we assume module_btf's BTF FD is always >0 */
8538 mod_btf = &obj->btf_modules[i];
8539 btf = mod_btf->btf;
8540 id = btf__find_by_name_kind_own(btf, ksym_name, kind);
8541 if (id != -ENOENT)
8542 break;
8543 }
8544 }
8545 if (id <= 0)
8546 return -ESRCH;
8547
8548 *res_btf = btf;
8549 *res_mod_btf = mod_btf;
8550 return id;
8551 }
8552
bpf_object__resolve_ksym_var_btf_id(struct bpf_object * obj,struct extern_desc * ext)8553 static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj,
8554 struct extern_desc *ext)
8555 {
8556 const struct btf_type *targ_var, *targ_type;
8557 __u32 targ_type_id, local_type_id;
8558 struct module_btf *mod_btf = NULL;
8559 const char *targ_var_name;
8560 struct btf *btf = NULL;
8561 int id, err;
8562
8563 id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &mod_btf);
8564 if (id < 0) {
8565 if (id == -ESRCH && ext->is_weak)
8566 return 0;
8567 pr_warn("extern (var ksym) '%s': not found in kernel BTF\n",
8568 ext->name);
8569 return id;
8570 }
8571
8572 /* find local type_id */
8573 local_type_id = ext->ksym.type_id;
8574
8575 /* find target type_id */
8576 targ_var = btf__type_by_id(btf, id);
8577 targ_var_name = btf__name_by_offset(btf, targ_var->name_off);
8578 targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id);
8579
8580 err = bpf_core_types_are_compat(obj->btf, local_type_id,
8581 btf, targ_type_id);
8582 if (err <= 0) {
8583 const struct btf_type *local_type;
8584 const char *targ_name, *local_name;
8585
8586 local_type = btf__type_by_id(obj->btf, local_type_id);
8587 local_name = btf__name_by_offset(obj->btf, local_type->name_off);
8588 targ_name = btf__name_by_offset(btf, targ_type->name_off);
8589
8590 pr_warn("extern (var ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
8591 ext->name, local_type_id,
8592 btf_kind_str(local_type), local_name, targ_type_id,
8593 btf_kind_str(targ_type), targ_name);
8594 return -EINVAL;
8595 }
8596
8597 ext->is_set = true;
8598 ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0;
8599 ext->ksym.kernel_btf_id = id;
8600 pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n",
8601 ext->name, id, btf_kind_str(targ_var), targ_var_name);
8602
8603 return 0;
8604 }
8605
bpf_object__resolve_ksym_func_btf_id(struct bpf_object * obj,struct extern_desc * ext)8606 static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj,
8607 struct extern_desc *ext)
8608 {
8609 int local_func_proto_id, kfunc_proto_id, kfunc_id;
8610 struct module_btf *mod_btf = NULL;
8611 const struct btf_type *kern_func;
8612 struct btf *kern_btf = NULL;
8613 int ret;
8614
8615 local_func_proto_id = ext->ksym.type_id;
8616
8617 kfunc_id = find_ksym_btf_id(obj, ext->essent_name ?: ext->name, BTF_KIND_FUNC, &kern_btf,
8618 &mod_btf);
8619 if (kfunc_id < 0) {
8620 if (kfunc_id == -ESRCH && ext->is_weak)
8621 return 0;
8622 pr_warn("extern (func ksym) '%s': not found in kernel or module BTFs\n",
8623 ext->name);
8624 return kfunc_id;
8625 }
8626
8627 kern_func = btf__type_by_id(kern_btf, kfunc_id);
8628 kfunc_proto_id = kern_func->type;
8629
8630 ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id,
8631 kern_btf, kfunc_proto_id);
8632 if (ret <= 0) {
8633 if (ext->is_weak)
8634 return 0;
8635
8636 pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with %s [%d]\n",
8637 ext->name, local_func_proto_id,
8638 mod_btf ? mod_btf->name : "vmlinux", kfunc_proto_id);
8639 return -EINVAL;
8640 }
8641
8642 /* set index for module BTF fd in fd_array, if unset */
8643 if (mod_btf && !mod_btf->fd_array_idx) {
8644 /* insn->off is s16 */
8645 if (obj->fd_array_cnt == INT16_MAX) {
8646 pr_warn("extern (func ksym) '%s': module BTF fd index %d too big to fit in bpf_insn offset\n",
8647 ext->name, mod_btf->fd_array_idx);
8648 return -E2BIG;
8649 }
8650 /* Cannot use index 0 for module BTF fd */
8651 if (!obj->fd_array_cnt)
8652 obj->fd_array_cnt = 1;
8653
8654 ret = libbpf_ensure_mem((void **)&obj->fd_array, &obj->fd_array_cap, sizeof(int),
8655 obj->fd_array_cnt + 1);
8656 if (ret)
8657 return ret;
8658 mod_btf->fd_array_idx = obj->fd_array_cnt;
8659 /* we assume module BTF FD is always >0 */
8660 obj->fd_array[obj->fd_array_cnt++] = mod_btf->fd;
8661 }
8662
8663 ext->is_set = true;
8664 ext->ksym.kernel_btf_id = kfunc_id;
8665 ext->ksym.btf_fd_idx = mod_btf ? mod_btf->fd_array_idx : 0;
8666 /* Also set kernel_btf_obj_fd to make sure that bpf_object__relocate_data()
8667 * populates FD into ld_imm64 insn when it's used to point to kfunc.
8668 * {kernel_btf_id, btf_fd_idx} -> fixup bpf_call.
8669 * {kernel_btf_id, kernel_btf_obj_fd} -> fixup ld_imm64.
8670 */
8671 ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0;
8672 pr_debug("extern (func ksym) '%s': resolved to %s [%d]\n",
8673 ext->name, mod_btf ? mod_btf->name : "vmlinux", kfunc_id);
8674
8675 return 0;
8676 }
8677
bpf_object__resolve_ksyms_btf_id(struct bpf_object * obj)8678 static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
8679 {
8680 const struct btf_type *t;
8681 struct extern_desc *ext;
8682 int i, err;
8683
8684 for (i = 0; i < obj->nr_extern; i++) {
8685 ext = &obj->externs[i];
8686 if (ext->type != EXT_KSYM || !ext->ksym.type_id)
8687 continue;
8688
8689 if (obj->gen_loader) {
8690 ext->is_set = true;
8691 ext->ksym.kernel_btf_obj_fd = 0;
8692 ext->ksym.kernel_btf_id = 0;
8693 continue;
8694 }
8695 t = btf__type_by_id(obj->btf, ext->btf_id);
8696 if (btf_is_var(t))
8697 err = bpf_object__resolve_ksym_var_btf_id(obj, ext);
8698 else
8699 err = bpf_object__resolve_ksym_func_btf_id(obj, ext);
8700 if (err)
8701 return err;
8702 }
8703 return 0;
8704 }
8705
bpf_object__resolve_externs(struct bpf_object * obj,const char * extra_kconfig)8706 static int bpf_object__resolve_externs(struct bpf_object *obj,
8707 const char *extra_kconfig)
8708 {
8709 bool need_config = false, need_kallsyms = false;
8710 bool need_vmlinux_btf = false;
8711 struct extern_desc *ext;
8712 void *kcfg_data = NULL;
8713 int err, i;
8714
8715 if (obj->nr_extern == 0)
8716 return 0;
8717
8718 if (obj->kconfig_map_idx >= 0)
8719 kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped;
8720
8721 for (i = 0; i < obj->nr_extern; i++) {
8722 ext = &obj->externs[i];
8723
8724 if (ext->type == EXT_KSYM) {
8725 if (ext->ksym.type_id)
8726 need_vmlinux_btf = true;
8727 else
8728 need_kallsyms = true;
8729 continue;
8730 } else if (ext->type == EXT_KCFG) {
8731 void *ext_ptr = kcfg_data + ext->kcfg.data_off;
8732 __u64 value = 0;
8733
8734 /* Kconfig externs need actual /proc/config.gz */
8735 if (str_has_pfx(ext->name, "CONFIG_")) {
8736 need_config = true;
8737 continue;
8738 }
8739
8740 /* Virtual kcfg externs are customly handled by libbpf */
8741 if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
8742 value = get_kernel_version();
8743 if (!value) {
8744 pr_warn("extern (kcfg) '%s': failed to get kernel version\n", ext->name);
8745 return -EINVAL;
8746 }
8747 } else if (strcmp(ext->name, "LINUX_HAS_BPF_COOKIE") == 0) {
8748 value = kernel_supports(obj, FEAT_BPF_COOKIE);
8749 } else if (strcmp(ext->name, "LINUX_HAS_SYSCALL_WRAPPER") == 0) {
8750 value = kernel_supports(obj, FEAT_SYSCALL_WRAPPER);
8751 } else if (!str_has_pfx(ext->name, "LINUX_") || !ext->is_weak) {
8752 /* Currently libbpf supports only CONFIG_ and LINUX_ prefixed
8753 * __kconfig externs, where LINUX_ ones are virtual and filled out
8754 * customly by libbpf (their values don't come from Kconfig).
8755 * If LINUX_xxx variable is not recognized by libbpf, but is marked
8756 * __weak, it defaults to zero value, just like for CONFIG_xxx
8757 * externs.
8758 */
8759 pr_warn("extern (kcfg) '%s': unrecognized virtual extern\n", ext->name);
8760 return -EINVAL;
8761 }
8762
8763 err = set_kcfg_value_num(ext, ext_ptr, value);
8764 if (err)
8765 return err;
8766 pr_debug("extern (kcfg) '%s': set to 0x%llx\n",
8767 ext->name, (long long)value);
8768 } else {
8769 pr_warn("extern '%s': unrecognized extern kind\n", ext->name);
8770 return -EINVAL;
8771 }
8772 }
8773 if (need_config && extra_kconfig) {
8774 err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data);
8775 if (err)
8776 return -EINVAL;
8777 need_config = false;
8778 for (i = 0; i < obj->nr_extern; i++) {
8779 ext = &obj->externs[i];
8780 if (ext->type == EXT_KCFG && !ext->is_set) {
8781 need_config = true;
8782 break;
8783 }
8784 }
8785 }
8786 if (need_config) {
8787 err = bpf_object__read_kconfig_file(obj, kcfg_data);
8788 if (err)
8789 return -EINVAL;
8790 }
8791 if (need_kallsyms) {
8792 err = bpf_object__read_kallsyms_file(obj);
8793 if (err)
8794 return -EINVAL;
8795 }
8796 if (need_vmlinux_btf) {
8797 err = bpf_object__resolve_ksyms_btf_id(obj);
8798 if (err)
8799 return -EINVAL;
8800 }
8801 for (i = 0; i < obj->nr_extern; i++) {
8802 ext = &obj->externs[i];
8803
8804 if (!ext->is_set && !ext->is_weak) {
8805 pr_warn("extern '%s' (strong): not resolved\n", ext->name);
8806 return -ESRCH;
8807 } else if (!ext->is_set) {
8808 pr_debug("extern '%s' (weak): not resolved, defaulting to zero\n",
8809 ext->name);
8810 }
8811 }
8812
8813 return 0;
8814 }
8815
bpf_map_prepare_vdata(const struct bpf_map * map)8816 static void bpf_map_prepare_vdata(const struct bpf_map *map)
8817 {
8818 const struct btf_type *type;
8819 struct bpf_struct_ops *st_ops;
8820 __u32 i;
8821
8822 st_ops = map->st_ops;
8823 type = btf__type_by_id(map->obj->btf, st_ops->type_id);
8824 for (i = 0; i < btf_vlen(type); i++) {
8825 struct bpf_program *prog = st_ops->progs[i];
8826 void *kern_data;
8827 int prog_fd;
8828
8829 if (!prog)
8830 continue;
8831
8832 prog_fd = bpf_program__fd(prog);
8833 kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i];
8834 *(unsigned long *)kern_data = prog_fd;
8835 }
8836 }
8837
bpf_object_prepare_struct_ops(struct bpf_object * obj)8838 static int bpf_object_prepare_struct_ops(struct bpf_object *obj)
8839 {
8840 struct bpf_map *map;
8841 int i;
8842
8843 for (i = 0; i < obj->nr_maps; i++) {
8844 map = &obj->maps[i];
8845
8846 if (!bpf_map__is_struct_ops(map))
8847 continue;
8848
8849 if (!map->autocreate)
8850 continue;
8851
8852 bpf_map_prepare_vdata(map);
8853 }
8854
8855 return 0;
8856 }
8857
bpf_object_unpin(struct bpf_object * obj)8858 static void bpf_object_unpin(struct bpf_object *obj)
8859 {
8860 int i;
8861
8862 /* unpin any maps that were auto-pinned during load */
8863 for (i = 0; i < obj->nr_maps; i++)
8864 if (obj->maps[i].pinned && !obj->maps[i].reused)
8865 bpf_map__unpin(&obj->maps[i], NULL);
8866 }
8867
bpf_object_post_load_cleanup(struct bpf_object * obj)8868 static void bpf_object_post_load_cleanup(struct bpf_object *obj)
8869 {
8870 int i;
8871
8872 /* clean up fd_array */
8873 zfree(&obj->fd_array);
8874
8875 /* clean up module BTFs */
8876 for (i = 0; i < obj->btf_module_cnt; i++) {
8877 close(obj->btf_modules[i].fd);
8878 btf__free(obj->btf_modules[i].btf);
8879 free(obj->btf_modules[i].name);
8880 }
8881 obj->btf_module_cnt = 0;
8882 zfree(&obj->btf_modules);
8883
8884 /* clean up vmlinux BTF */
8885 btf__free(obj->btf_vmlinux);
8886 obj->btf_vmlinux = NULL;
8887 }
8888
bpf_object_prepare(struct bpf_object * obj,const char * target_btf_path)8889 static int bpf_object_prepare(struct bpf_object *obj, const char *target_btf_path)
8890 {
8891 int err;
8892
8893 if (obj->state >= OBJ_PREPARED) {
8894 pr_warn("object '%s': prepare loading can't be attempted twice\n", obj->name);
8895 return -EINVAL;
8896 }
8897
8898 err = bpf_object_prepare_token(obj);
8899 err = err ? : bpf_object__probe_loading(obj);
8900 err = err ? : bpf_object__load_vmlinux_btf(obj, false);
8901 err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
8902 err = err ? : bpf_object__sanitize_maps(obj);
8903 err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
8904 err = err ? : bpf_object_adjust_struct_ops_autoload(obj);
8905 err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path);
8906 err = err ? : bpf_object__sanitize_and_load_btf(obj);
8907 err = err ? : bpf_object__create_maps(obj);
8908 err = err ? : bpf_object_prepare_progs(obj);
8909
8910 if (err) {
8911 bpf_object_unpin(obj);
8912 bpf_object_unload(obj);
8913 obj->state = OBJ_LOADED;
8914 return err;
8915 }
8916
8917 obj->state = OBJ_PREPARED;
8918 return 0;
8919 }
8920
bpf_object_load(struct bpf_object * obj,int extra_log_level,const char * target_btf_path)8921 static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path)
8922 {
8923 int err;
8924
8925 if (!obj)
8926 return libbpf_err(-EINVAL);
8927
8928 if (obj->state >= OBJ_LOADED) {
8929 pr_warn("object '%s': load can't be attempted twice\n", obj->name);
8930 return libbpf_err(-EINVAL);
8931 }
8932
8933 /* Disallow kernel loading programs of non-native endianness but
8934 * permit cross-endian creation of "light skeleton".
8935 */
8936 if (obj->gen_loader) {
8937 bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps);
8938 } else if (!is_native_endianness(obj)) {
8939 pr_warn("object '%s': loading non-native endianness is unsupported\n", obj->name);
8940 return libbpf_err(-LIBBPF_ERRNO__ENDIAN);
8941 }
8942
8943 if (obj->state < OBJ_PREPARED) {
8944 err = bpf_object_prepare(obj, target_btf_path);
8945 if (err)
8946 return libbpf_err(err);
8947 }
8948 err = bpf_object__load_progs(obj, extra_log_level);
8949 err = err ? : bpf_object_init_prog_arrays(obj);
8950 err = err ? : bpf_object_prepare_struct_ops(obj);
8951
8952 if (obj->gen_loader) {
8953 /* reset FDs */
8954 if (obj->btf)
8955 btf__set_fd(obj->btf, -1);
8956 if (!err)
8957 err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps);
8958 }
8959
8960 bpf_object_post_load_cleanup(obj);
8961 obj->state = OBJ_LOADED; /* doesn't matter if successfully or not */
8962
8963 if (err) {
8964 bpf_object_unpin(obj);
8965 bpf_object_unload(obj);
8966 pr_warn("failed to load object '%s'\n", obj->path);
8967 return libbpf_err(err);
8968 }
8969
8970 return 0;
8971 }
8972
bpf_object__prepare(struct bpf_object * obj)8973 int bpf_object__prepare(struct bpf_object *obj)
8974 {
8975 return libbpf_err(bpf_object_prepare(obj, NULL));
8976 }
8977
bpf_object__load(struct bpf_object * obj)8978 int bpf_object__load(struct bpf_object *obj)
8979 {
8980 return bpf_object_load(obj, 0, NULL);
8981 }
8982
make_parent_dir(const char * path)8983 static int make_parent_dir(const char *path)
8984 {
8985 char *dname, *dir;
8986 int err = 0;
8987
8988 dname = strdup(path);
8989 if (dname == NULL)
8990 return -ENOMEM;
8991
8992 dir = dirname(dname);
8993 if (mkdir(dir, 0700) && errno != EEXIST)
8994 err = -errno;
8995
8996 free(dname);
8997 if (err) {
8998 pr_warn("failed to mkdir %s: %s\n", path, errstr(err));
8999 }
9000 return err;
9001 }
9002
check_path(const char * path)9003 static int check_path(const char *path)
9004 {
9005 struct statfs st_fs;
9006 char *dname, *dir;
9007 int err = 0;
9008
9009 if (path == NULL)
9010 return -EINVAL;
9011
9012 dname = strdup(path);
9013 if (dname == NULL)
9014 return -ENOMEM;
9015
9016 dir = dirname(dname);
9017 if (statfs(dir, &st_fs)) {
9018 pr_warn("failed to statfs %s: %s\n", dir, errstr(errno));
9019 err = -errno;
9020 }
9021 free(dname);
9022
9023 if (!err && st_fs.f_type != BPF_FS_MAGIC) {
9024 pr_warn("specified path %s is not on BPF FS\n", path);
9025 err = -EINVAL;
9026 }
9027
9028 return err;
9029 }
9030
bpf_program__pin(struct bpf_program * prog,const char * path)9031 int bpf_program__pin(struct bpf_program *prog, const char *path)
9032 {
9033 int err;
9034
9035 if (prog->fd < 0) {
9036 pr_warn("prog '%s': can't pin program that wasn't loaded\n", prog->name);
9037 return libbpf_err(-EINVAL);
9038 }
9039
9040 err = make_parent_dir(path);
9041 if (err)
9042 return libbpf_err(err);
9043
9044 err = check_path(path);
9045 if (err)
9046 return libbpf_err(err);
9047
9048 if (bpf_obj_pin(prog->fd, path)) {
9049 err = -errno;
9050 pr_warn("prog '%s': failed to pin at '%s': %s\n", prog->name, path, errstr(err));
9051 return libbpf_err(err);
9052 }
9053
9054 pr_debug("prog '%s': pinned at '%s'\n", prog->name, path);
9055 return 0;
9056 }
9057
bpf_program__unpin(struct bpf_program * prog,const char * path)9058 int bpf_program__unpin(struct bpf_program *prog, const char *path)
9059 {
9060 int err;
9061
9062 if (prog->fd < 0) {
9063 pr_warn("prog '%s': can't unpin program that wasn't loaded\n", prog->name);
9064 return libbpf_err(-EINVAL);
9065 }
9066
9067 err = check_path(path);
9068 if (err)
9069 return libbpf_err(err);
9070
9071 err = unlink(path);
9072 if (err)
9073 return libbpf_err(-errno);
9074
9075 pr_debug("prog '%s': unpinned from '%s'\n", prog->name, path);
9076 return 0;
9077 }
9078
bpf_map__pin(struct bpf_map * map,const char * path)9079 int bpf_map__pin(struct bpf_map *map, const char *path)
9080 {
9081 int err;
9082
9083 if (map == NULL) {
9084 pr_warn("invalid map pointer\n");
9085 return libbpf_err(-EINVAL);
9086 }
9087
9088 if (map->fd < 0) {
9089 pr_warn("map '%s': can't pin BPF map without FD (was it created?)\n", map->name);
9090 return libbpf_err(-EINVAL);
9091 }
9092
9093 if (map->pin_path) {
9094 if (path && strcmp(path, map->pin_path)) {
9095 pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
9096 bpf_map__name(map), map->pin_path, path);
9097 return libbpf_err(-EINVAL);
9098 } else if (map->pinned) {
9099 pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
9100 bpf_map__name(map), map->pin_path);
9101 return 0;
9102 }
9103 } else {
9104 if (!path) {
9105 pr_warn("missing a path to pin map '%s' at\n",
9106 bpf_map__name(map));
9107 return libbpf_err(-EINVAL);
9108 } else if (map->pinned) {
9109 pr_warn("map '%s' already pinned\n", bpf_map__name(map));
9110 return libbpf_err(-EEXIST);
9111 }
9112
9113 map->pin_path = strdup(path);
9114 if (!map->pin_path) {
9115 err = -errno;
9116 goto out_err;
9117 }
9118 }
9119
9120 err = make_parent_dir(map->pin_path);
9121 if (err)
9122 return libbpf_err(err);
9123
9124 err = check_path(map->pin_path);
9125 if (err)
9126 return libbpf_err(err);
9127
9128 if (bpf_obj_pin(map->fd, map->pin_path)) {
9129 err = -errno;
9130 goto out_err;
9131 }
9132
9133 map->pinned = true;
9134 pr_debug("pinned map '%s'\n", map->pin_path);
9135
9136 return 0;
9137
9138 out_err:
9139 pr_warn("failed to pin map: %s\n", errstr(err));
9140 return libbpf_err(err);
9141 }
9142
bpf_map__unpin(struct bpf_map * map,const char * path)9143 int bpf_map__unpin(struct bpf_map *map, const char *path)
9144 {
9145 int err;
9146
9147 if (map == NULL) {
9148 pr_warn("invalid map pointer\n");
9149 return libbpf_err(-EINVAL);
9150 }
9151
9152 if (map->pin_path) {
9153 if (path && strcmp(path, map->pin_path)) {
9154 pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
9155 bpf_map__name(map), map->pin_path, path);
9156 return libbpf_err(-EINVAL);
9157 }
9158 path = map->pin_path;
9159 } else if (!path) {
9160 pr_warn("no path to unpin map '%s' from\n",
9161 bpf_map__name(map));
9162 return libbpf_err(-EINVAL);
9163 }
9164
9165 err = check_path(path);
9166 if (err)
9167 return libbpf_err(err);
9168
9169 err = unlink(path);
9170 if (err != 0)
9171 return libbpf_err(-errno);
9172
9173 map->pinned = false;
9174 pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
9175
9176 return 0;
9177 }
9178
bpf_map__set_pin_path(struct bpf_map * map,const char * path)9179 int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
9180 {
9181 char *new = NULL;
9182
9183 if (path) {
9184 new = strdup(path);
9185 if (!new)
9186 return libbpf_err(-errno);
9187 }
9188
9189 free(map->pin_path);
9190 map->pin_path = new;
9191 return 0;
9192 }
9193
9194 __alias(bpf_map__pin_path)
9195 const char *bpf_map__get_pin_path(const struct bpf_map *map);
9196
bpf_map__pin_path(const struct bpf_map * map)9197 const char *bpf_map__pin_path(const struct bpf_map *map)
9198 {
9199 return map->pin_path;
9200 }
9201
bpf_map__is_pinned(const struct bpf_map * map)9202 bool bpf_map__is_pinned(const struct bpf_map *map)
9203 {
9204 return map->pinned;
9205 }
9206
sanitize_pin_path(char * s)9207 static void sanitize_pin_path(char *s)
9208 {
9209 /* bpffs disallows periods in path names */
9210 while (*s) {
9211 if (*s == '.')
9212 *s = '_';
9213 s++;
9214 }
9215 }
9216
bpf_object__pin_maps(struct bpf_object * obj,const char * path)9217 int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
9218 {
9219 struct bpf_map *map;
9220 int err;
9221
9222 if (!obj)
9223 return libbpf_err(-ENOENT);
9224
9225 if (obj->state < OBJ_PREPARED) {
9226 pr_warn("object not yet loaded; load it first\n");
9227 return libbpf_err(-ENOENT);
9228 }
9229
9230 bpf_object__for_each_map(map, obj) {
9231 char *pin_path = NULL;
9232 char buf[PATH_MAX];
9233
9234 if (!map->autocreate)
9235 continue;
9236
9237 if (path) {
9238 err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
9239 if (err)
9240 goto err_unpin_maps;
9241 sanitize_pin_path(buf);
9242 pin_path = buf;
9243 } else if (!map->pin_path) {
9244 continue;
9245 }
9246
9247 err = bpf_map__pin(map, pin_path);
9248 if (err)
9249 goto err_unpin_maps;
9250 }
9251
9252 return 0;
9253
9254 err_unpin_maps:
9255 while ((map = bpf_object__prev_map(obj, map))) {
9256 if (!map->pin_path)
9257 continue;
9258
9259 bpf_map__unpin(map, NULL);
9260 }
9261
9262 return libbpf_err(err);
9263 }
9264
bpf_object__unpin_maps(struct bpf_object * obj,const char * path)9265 int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
9266 {
9267 struct bpf_map *map;
9268 int err;
9269
9270 if (!obj)
9271 return libbpf_err(-ENOENT);
9272
9273 bpf_object__for_each_map(map, obj) {
9274 char *pin_path = NULL;
9275 char buf[PATH_MAX];
9276
9277 if (path) {
9278 err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
9279 if (err)
9280 return libbpf_err(err);
9281 sanitize_pin_path(buf);
9282 pin_path = buf;
9283 } else if (!map->pin_path) {
9284 continue;
9285 }
9286
9287 err = bpf_map__unpin(map, pin_path);
9288 if (err)
9289 return libbpf_err(err);
9290 }
9291
9292 return 0;
9293 }
9294
bpf_object__pin_programs(struct bpf_object * obj,const char * path)9295 int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
9296 {
9297 struct bpf_program *prog;
9298 char buf[PATH_MAX];
9299 int err;
9300
9301 if (!obj)
9302 return libbpf_err(-ENOENT);
9303
9304 if (obj->state < OBJ_LOADED) {
9305 pr_warn("object not yet loaded; load it first\n");
9306 return libbpf_err(-ENOENT);
9307 }
9308
9309 bpf_object__for_each_program(prog, obj) {
9310 err = pathname_concat(buf, sizeof(buf), path, prog->name);
9311 if (err)
9312 goto err_unpin_programs;
9313
9314 err = bpf_program__pin(prog, buf);
9315 if (err)
9316 goto err_unpin_programs;
9317 }
9318
9319 return 0;
9320
9321 err_unpin_programs:
9322 while ((prog = bpf_object__prev_program(obj, prog))) {
9323 if (pathname_concat(buf, sizeof(buf), path, prog->name))
9324 continue;
9325
9326 bpf_program__unpin(prog, buf);
9327 }
9328
9329 return libbpf_err(err);
9330 }
9331
bpf_object__unpin_programs(struct bpf_object * obj,const char * path)9332 int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
9333 {
9334 struct bpf_program *prog;
9335 int err;
9336
9337 if (!obj)
9338 return libbpf_err(-ENOENT);
9339
9340 bpf_object__for_each_program(prog, obj) {
9341 char buf[PATH_MAX];
9342
9343 err = pathname_concat(buf, sizeof(buf), path, prog->name);
9344 if (err)
9345 return libbpf_err(err);
9346
9347 err = bpf_program__unpin(prog, buf);
9348 if (err)
9349 return libbpf_err(err);
9350 }
9351
9352 return 0;
9353 }
9354
bpf_object__pin(struct bpf_object * obj,const char * path)9355 int bpf_object__pin(struct bpf_object *obj, const char *path)
9356 {
9357 int err;
9358
9359 err = bpf_object__pin_maps(obj, path);
9360 if (err)
9361 return libbpf_err(err);
9362
9363 err = bpf_object__pin_programs(obj, path);
9364 if (err) {
9365 bpf_object__unpin_maps(obj, path);
9366 return libbpf_err(err);
9367 }
9368
9369 return 0;
9370 }
9371
bpf_object__unpin(struct bpf_object * obj,const char * path)9372 int bpf_object__unpin(struct bpf_object *obj, const char *path)
9373 {
9374 int err;
9375
9376 err = bpf_object__unpin_programs(obj, path);
9377 if (err)
9378 return libbpf_err(err);
9379
9380 err = bpf_object__unpin_maps(obj, path);
9381 if (err)
9382 return libbpf_err(err);
9383
9384 return 0;
9385 }
9386
bpf_map__destroy(struct bpf_map * map)9387 static void bpf_map__destroy(struct bpf_map *map)
9388 {
9389 if (map->inner_map) {
9390 bpf_map__destroy(map->inner_map);
9391 zfree(&map->inner_map);
9392 }
9393
9394 zfree(&map->init_slots);
9395 map->init_slots_sz = 0;
9396
9397 if (map->mmaped && map->mmaped != map->obj->arena_data)
9398 munmap(map->mmaped, bpf_map_mmap_sz(map));
9399 map->mmaped = NULL;
9400
9401 if (map->st_ops) {
9402 zfree(&map->st_ops->data);
9403 zfree(&map->st_ops->progs);
9404 zfree(&map->st_ops->kern_func_off);
9405 zfree(&map->st_ops);
9406 }
9407
9408 zfree(&map->name);
9409 zfree(&map->real_name);
9410 zfree(&map->pin_path);
9411
9412 if (map->fd >= 0)
9413 zclose(map->fd);
9414 }
9415
bpf_object__close(struct bpf_object * obj)9416 void bpf_object__close(struct bpf_object *obj)
9417 {
9418 size_t i;
9419
9420 if (IS_ERR_OR_NULL(obj))
9421 return;
9422
9423 /*
9424 * if user called bpf_object__prepare() without ever getting to
9425 * bpf_object__load(), we need to clean up stuff that is normally
9426 * cleaned up at the end of loading step
9427 */
9428 bpf_object_post_load_cleanup(obj);
9429
9430 usdt_manager_free(obj->usdt_man);
9431 obj->usdt_man = NULL;
9432
9433 bpf_gen__free(obj->gen_loader);
9434 bpf_object__elf_finish(obj);
9435 bpf_object_unload(obj);
9436 btf__free(obj->btf);
9437 btf__free(obj->btf_vmlinux);
9438 btf_ext__free(obj->btf_ext);
9439
9440 for (i = 0; i < obj->nr_maps; i++)
9441 bpf_map__destroy(&obj->maps[i]);
9442
9443 zfree(&obj->btf_custom_path);
9444 zfree(&obj->kconfig);
9445
9446 for (i = 0; i < obj->nr_extern; i++) {
9447 zfree(&obj->externs[i].name);
9448 zfree(&obj->externs[i].essent_name);
9449 }
9450
9451 zfree(&obj->externs);
9452 obj->nr_extern = 0;
9453
9454 zfree(&obj->maps);
9455 obj->nr_maps = 0;
9456
9457 if (obj->programs && obj->nr_programs) {
9458 for (i = 0; i < obj->nr_programs; i++)
9459 bpf_program__exit(&obj->programs[i]);
9460 }
9461 zfree(&obj->programs);
9462
9463 zfree(&obj->feat_cache);
9464 zfree(&obj->token_path);
9465 if (obj->token_fd > 0)
9466 close(obj->token_fd);
9467
9468 zfree(&obj->arena_data);
9469
9470 zfree(&obj->jumptables_data);
9471 obj->jumptables_data_sz = 0;
9472
9473 for (i = 0; i < obj->jumptable_map_cnt; i++)
9474 close(obj->jumptable_maps[i].fd);
9475 zfree(&obj->jumptable_maps);
9476
9477 free(obj);
9478 }
9479
bpf_object__name(const struct bpf_object * obj)9480 const char *bpf_object__name(const struct bpf_object *obj)
9481 {
9482 return obj ? obj->name : libbpf_err_ptr(-EINVAL);
9483 }
9484
bpf_object__kversion(const struct bpf_object * obj)9485 unsigned int bpf_object__kversion(const struct bpf_object *obj)
9486 {
9487 return obj ? obj->kern_version : 0;
9488 }
9489
bpf_object__token_fd(const struct bpf_object * obj)9490 int bpf_object__token_fd(const struct bpf_object *obj)
9491 {
9492 return obj->token_fd ?: -1;
9493 }
9494
bpf_object__btf(const struct bpf_object * obj)9495 struct btf *bpf_object__btf(const struct bpf_object *obj)
9496 {
9497 return obj ? obj->btf : NULL;
9498 }
9499
bpf_object__btf_fd(const struct bpf_object * obj)9500 int bpf_object__btf_fd(const struct bpf_object *obj)
9501 {
9502 return obj->btf ? btf__fd(obj->btf) : -1;
9503 }
9504
bpf_object__set_kversion(struct bpf_object * obj,__u32 kern_version)9505 int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version)
9506 {
9507 if (obj->state >= OBJ_LOADED)
9508 return libbpf_err(-EINVAL);
9509
9510 obj->kern_version = kern_version;
9511
9512 return 0;
9513 }
9514
bpf_object__gen_loader(struct bpf_object * obj,struct gen_loader_opts * opts)9515 int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts)
9516 {
9517 struct bpf_gen *gen;
9518
9519 if (!opts)
9520 return libbpf_err(-EFAULT);
9521 if (!OPTS_VALID(opts, gen_loader_opts))
9522 return libbpf_err(-EINVAL);
9523 gen = calloc(1, sizeof(*gen));
9524 if (!gen)
9525 return libbpf_err(-ENOMEM);
9526 gen->opts = opts;
9527 gen->swapped_endian = !is_native_endianness(obj);
9528 obj->gen_loader = gen;
9529 return 0;
9530 }
9531
9532 static struct bpf_program *
__bpf_program__iter(const struct bpf_program * p,const struct bpf_object * obj,bool forward)9533 __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
9534 bool forward)
9535 {
9536 size_t nr_programs = obj->nr_programs;
9537 ssize_t idx;
9538
9539 if (!nr_programs)
9540 return NULL;
9541
9542 if (!p)
9543 /* Iter from the beginning */
9544 return forward ? &obj->programs[0] :
9545 &obj->programs[nr_programs - 1];
9546
9547 if (p->obj != obj) {
9548 pr_warn("error: program handler doesn't match object\n");
9549 return errno = EINVAL, NULL;
9550 }
9551
9552 idx = (p - obj->programs) + (forward ? 1 : -1);
9553 if (idx >= obj->nr_programs || idx < 0)
9554 return NULL;
9555 return &obj->programs[idx];
9556 }
9557
9558 struct bpf_program *
bpf_object__next_program(const struct bpf_object * obj,struct bpf_program * prev)9559 bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev)
9560 {
9561 struct bpf_program *prog = prev;
9562
9563 do {
9564 prog = __bpf_program__iter(prog, obj, true);
9565 } while (prog && prog_is_subprog(obj, prog));
9566
9567 return prog;
9568 }
9569
9570 struct bpf_program *
bpf_object__prev_program(const struct bpf_object * obj,struct bpf_program * next)9571 bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next)
9572 {
9573 struct bpf_program *prog = next;
9574
9575 do {
9576 prog = __bpf_program__iter(prog, obj, false);
9577 } while (prog && prog_is_subprog(obj, prog));
9578
9579 return prog;
9580 }
9581
bpf_program__set_ifindex(struct bpf_program * prog,__u32 ifindex)9582 void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
9583 {
9584 prog->prog_ifindex = ifindex;
9585 }
9586
bpf_program__name(const struct bpf_program * prog)9587 const char *bpf_program__name(const struct bpf_program *prog)
9588 {
9589 return prog->name;
9590 }
9591
bpf_program__section_name(const struct bpf_program * prog)9592 const char *bpf_program__section_name(const struct bpf_program *prog)
9593 {
9594 return prog->sec_name;
9595 }
9596
bpf_program__autoload(const struct bpf_program * prog)9597 bool bpf_program__autoload(const struct bpf_program *prog)
9598 {
9599 return prog->autoload;
9600 }
9601
bpf_program__set_autoload(struct bpf_program * prog,bool autoload)9602 int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
9603 {
9604 if (prog->obj->state >= OBJ_LOADED)
9605 return libbpf_err(-EINVAL);
9606
9607 prog->autoload = autoload;
9608 return 0;
9609 }
9610
bpf_program__autoattach(const struct bpf_program * prog)9611 bool bpf_program__autoattach(const struct bpf_program *prog)
9612 {
9613 return prog->autoattach;
9614 }
9615
bpf_program__set_autoattach(struct bpf_program * prog,bool autoattach)9616 void bpf_program__set_autoattach(struct bpf_program *prog, bool autoattach)
9617 {
9618 prog->autoattach = autoattach;
9619 }
9620
bpf_program__insns(const struct bpf_program * prog)9621 const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog)
9622 {
9623 return prog->insns;
9624 }
9625
bpf_program__insn_cnt(const struct bpf_program * prog)9626 size_t bpf_program__insn_cnt(const struct bpf_program *prog)
9627 {
9628 return prog->insns_cnt;
9629 }
9630
bpf_program__set_insns(struct bpf_program * prog,struct bpf_insn * new_insns,size_t new_insn_cnt)9631 int bpf_program__set_insns(struct bpf_program *prog,
9632 struct bpf_insn *new_insns, size_t new_insn_cnt)
9633 {
9634 struct bpf_insn *insns;
9635
9636 if (prog->obj->state >= OBJ_LOADED)
9637 return libbpf_err(-EBUSY);
9638
9639 insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns));
9640 /* NULL is a valid return from reallocarray if the new count is zero */
9641 if (!insns && new_insn_cnt) {
9642 pr_warn("prog '%s': failed to realloc prog code\n", prog->name);
9643 return libbpf_err(-ENOMEM);
9644 }
9645 memcpy(insns, new_insns, new_insn_cnt * sizeof(*insns));
9646
9647 prog->insns = insns;
9648 prog->insns_cnt = new_insn_cnt;
9649 return 0;
9650 }
9651
bpf_program__fd(const struct bpf_program * prog)9652 int bpf_program__fd(const struct bpf_program *prog)
9653 {
9654 if (!prog)
9655 return libbpf_err(-EINVAL);
9656
9657 if (prog->fd < 0)
9658 return libbpf_err(-ENOENT);
9659
9660 return prog->fd;
9661 }
9662
9663 __alias(bpf_program__type)
9664 enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog);
9665
bpf_program__type(const struct bpf_program * prog)9666 enum bpf_prog_type bpf_program__type(const struct bpf_program *prog)
9667 {
9668 return prog->type;
9669 }
9670
9671 static size_t custom_sec_def_cnt;
9672 static struct bpf_sec_def *custom_sec_defs;
9673 static struct bpf_sec_def custom_fallback_def;
9674 static bool has_custom_fallback_def;
9675 static int last_custom_sec_def_handler_id;
9676
bpf_program__set_type(struct bpf_program * prog,enum bpf_prog_type type)9677 int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
9678 {
9679 if (prog->obj->state >= OBJ_LOADED)
9680 return libbpf_err(-EBUSY);
9681
9682 /* if type is not changed, do nothing */
9683 if (prog->type == type)
9684 return 0;
9685
9686 prog->type = type;
9687
9688 /* If a program type was changed, we need to reset associated SEC()
9689 * handler, as it will be invalid now. The only exception is a generic
9690 * fallback handler, which by definition is program type-agnostic and
9691 * is a catch-all custom handler, optionally set by the application,
9692 * so should be able to handle any type of BPF program.
9693 */
9694 if (prog->sec_def != &custom_fallback_def)
9695 prog->sec_def = NULL;
9696 return 0;
9697 }
9698
9699 __alias(bpf_program__expected_attach_type)
9700 enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog);
9701
bpf_program__expected_attach_type(const struct bpf_program * prog)9702 enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program *prog)
9703 {
9704 return prog->expected_attach_type;
9705 }
9706
bpf_program__set_expected_attach_type(struct bpf_program * prog,enum bpf_attach_type type)9707 int bpf_program__set_expected_attach_type(struct bpf_program *prog,
9708 enum bpf_attach_type type)
9709 {
9710 if (prog->obj->state >= OBJ_LOADED)
9711 return libbpf_err(-EBUSY);
9712
9713 prog->expected_attach_type = type;
9714 return 0;
9715 }
9716
bpf_program__flags(const struct bpf_program * prog)9717 __u32 bpf_program__flags(const struct bpf_program *prog)
9718 {
9719 return prog->prog_flags;
9720 }
9721
bpf_program__set_flags(struct bpf_program * prog,__u32 flags)9722 int bpf_program__set_flags(struct bpf_program *prog, __u32 flags)
9723 {
9724 if (prog->obj->state >= OBJ_LOADED)
9725 return libbpf_err(-EBUSY);
9726
9727 prog->prog_flags = flags;
9728 return 0;
9729 }
9730
bpf_program__log_level(const struct bpf_program * prog)9731 __u32 bpf_program__log_level(const struct bpf_program *prog)
9732 {
9733 return prog->log_level;
9734 }
9735
bpf_program__set_log_level(struct bpf_program * prog,__u32 log_level)9736 int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level)
9737 {
9738 if (prog->obj->state >= OBJ_LOADED)
9739 return libbpf_err(-EBUSY);
9740
9741 prog->log_level = log_level;
9742 return 0;
9743 }
9744
bpf_program__log_buf(const struct bpf_program * prog,size_t * log_size)9745 const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size)
9746 {
9747 *log_size = prog->log_size;
9748 return prog->log_buf;
9749 }
9750
bpf_program__set_log_buf(struct bpf_program * prog,char * log_buf,size_t log_size)9751 int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size)
9752 {
9753 if (log_size && !log_buf)
9754 return libbpf_err(-EINVAL);
9755 if (prog->log_size > UINT_MAX)
9756 return libbpf_err(-EINVAL);
9757 if (prog->obj->state >= OBJ_LOADED)
9758 return libbpf_err(-EBUSY);
9759
9760 prog->log_buf = log_buf;
9761 prog->log_size = log_size;
9762 return 0;
9763 }
9764
bpf_program__func_info(const struct bpf_program * prog)9765 struct bpf_func_info *bpf_program__func_info(const struct bpf_program *prog)
9766 {
9767 if (prog->func_info_rec_size != sizeof(struct bpf_func_info))
9768 return libbpf_err_ptr(-EOPNOTSUPP);
9769 return prog->func_info;
9770 }
9771
bpf_program__func_info_cnt(const struct bpf_program * prog)9772 __u32 bpf_program__func_info_cnt(const struct bpf_program *prog)
9773 {
9774 return prog->func_info_cnt;
9775 }
9776
bpf_program__line_info(const struct bpf_program * prog)9777 struct bpf_line_info *bpf_program__line_info(const struct bpf_program *prog)
9778 {
9779 if (prog->line_info_rec_size != sizeof(struct bpf_line_info))
9780 return libbpf_err_ptr(-EOPNOTSUPP);
9781 return prog->line_info;
9782 }
9783
bpf_program__line_info_cnt(const struct bpf_program * prog)9784 __u32 bpf_program__line_info_cnt(const struct bpf_program *prog)
9785 {
9786 return prog->line_info_cnt;
9787 }
9788
9789 #define SEC_DEF(sec_pfx, ptype, atype, flags, ...) { \
9790 .sec = (char *)sec_pfx, \
9791 .prog_type = BPF_PROG_TYPE_##ptype, \
9792 .expected_attach_type = atype, \
9793 .cookie = (long)(flags), \
9794 .prog_prepare_load_fn = libbpf_prepare_prog_load, \
9795 __VA_ARGS__ \
9796 }
9797
9798 static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9799 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9800 static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9801 static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9802 static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9803 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9804 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9805 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9806 static int attach_kprobe_session(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9807 static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9808 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9809 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9810
9811 static const struct bpf_sec_def section_defs[] = {
9812 SEC_DEF("socket", SOCKET_FILTER, 0, SEC_NONE),
9813 SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE),
9814 SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE),
9815 SEC_DEF("kprobe+", KPROBE, 0, SEC_NONE, attach_kprobe),
9816 SEC_DEF("uprobe+", KPROBE, 0, SEC_NONE, attach_uprobe),
9817 SEC_DEF("uprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
9818 SEC_DEF("kretprobe+", KPROBE, 0, SEC_NONE, attach_kprobe),
9819 SEC_DEF("uretprobe+", KPROBE, 0, SEC_NONE, attach_uprobe),
9820 SEC_DEF("uretprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
9821 SEC_DEF("kprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
9822 SEC_DEF("kretprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
9823 SEC_DEF("kprobe.session+", KPROBE, BPF_TRACE_KPROBE_SESSION, SEC_NONE, attach_kprobe_session),
9824 SEC_DEF("uprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi),
9825 SEC_DEF("uretprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi),
9826 SEC_DEF("uprobe.session+", KPROBE, BPF_TRACE_UPROBE_SESSION, SEC_NONE, attach_uprobe_multi),
9827 SEC_DEF("uprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi),
9828 SEC_DEF("uretprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi),
9829 SEC_DEF("uprobe.session.s+", KPROBE, BPF_TRACE_UPROBE_SESSION, SEC_SLEEPABLE, attach_uprobe_multi),
9830 SEC_DEF("ksyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall),
9831 SEC_DEF("kretsyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall),
9832 SEC_DEF("usdt+", KPROBE, 0, SEC_USDT, attach_usdt),
9833 SEC_DEF("usdt.s+", KPROBE, 0, SEC_USDT | SEC_SLEEPABLE, attach_usdt),
9834 SEC_DEF("tc/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE), /* alias for tcx */
9835 SEC_DEF("tc/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE), /* alias for tcx */
9836 SEC_DEF("tcx/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE),
9837 SEC_DEF("tcx/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE),
9838 SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */
9839 SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */
9840 SEC_DEF("action", SCHED_ACT, 0, SEC_NONE), /* deprecated / legacy, use tcx */
9841 SEC_DEF("netkit/primary", SCHED_CLS, BPF_NETKIT_PRIMARY, SEC_NONE),
9842 SEC_DEF("netkit/peer", SCHED_CLS, BPF_NETKIT_PEER, SEC_NONE),
9843 SEC_DEF("tracepoint+", TRACEPOINT, 0, SEC_NONE, attach_tp),
9844 SEC_DEF("tp+", TRACEPOINT, 0, SEC_NONE, attach_tp),
9845 SEC_DEF("raw_tracepoint+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
9846 SEC_DEF("raw_tp+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
9847 SEC_DEF("raw_tracepoint.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp),
9848 SEC_DEF("raw_tp.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp),
9849 SEC_DEF("tp_btf+", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace),
9850 SEC_DEF("fentry+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace),
9851 SEC_DEF("fmod_ret+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace),
9852 SEC_DEF("fexit+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace),
9853 SEC_DEF("fentry.s+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
9854 SEC_DEF("fmod_ret.s+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
9855 SEC_DEF("fexit.s+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
9856 SEC_DEF("freplace+", EXT, 0, SEC_ATTACH_BTF, attach_trace),
9857 SEC_DEF("lsm+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm),
9858 SEC_DEF("lsm.s+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
9859 SEC_DEF("lsm_cgroup+", LSM, BPF_LSM_CGROUP, SEC_ATTACH_BTF),
9860 SEC_DEF("iter+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
9861 SEC_DEF("iter.s+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter),
9862 SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE),
9863 SEC_DEF("xdp.frags/devmap", XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS),
9864 SEC_DEF("xdp/devmap", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
9865 SEC_DEF("xdp.frags/cpumap", XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS),
9866 SEC_DEF("xdp/cpumap", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE),
9867 SEC_DEF("xdp.frags", XDP, BPF_XDP, SEC_XDP_FRAGS),
9868 SEC_DEF("xdp", XDP, BPF_XDP, SEC_ATTACHABLE_OPT),
9869 SEC_DEF("perf_event", PERF_EVENT, 0, SEC_NONE),
9870 SEC_DEF("lwt_in", LWT_IN, 0, SEC_NONE),
9871 SEC_DEF("lwt_out", LWT_OUT, 0, SEC_NONE),
9872 SEC_DEF("lwt_xmit", LWT_XMIT, 0, SEC_NONE),
9873 SEC_DEF("lwt_seg6local", LWT_SEG6LOCAL, 0, SEC_NONE),
9874 SEC_DEF("sockops", SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT),
9875 SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT),
9876 SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT),
9877 SEC_DEF("sk_skb/verdict", SK_SKB, BPF_SK_SKB_VERDICT, SEC_ATTACHABLE_OPT),
9878 SEC_DEF("sk_skb", SK_SKB, 0, SEC_NONE),
9879 SEC_DEF("sk_msg", SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT),
9880 SEC_DEF("lirc_mode2", LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT),
9881 SEC_DEF("flow_dissector", FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT),
9882 SEC_DEF("cgroup_skb/ingress", CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT),
9883 SEC_DEF("cgroup_skb/egress", CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT),
9884 SEC_DEF("cgroup/skb", CGROUP_SKB, 0, SEC_NONE),
9885 SEC_DEF("cgroup/sock_create", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE),
9886 SEC_DEF("cgroup/sock_release", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE),
9887 SEC_DEF("cgroup/sock", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT),
9888 SEC_DEF("cgroup/post_bind4", CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE),
9889 SEC_DEF("cgroup/post_bind6", CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE),
9890 SEC_DEF("cgroup/bind4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE),
9891 SEC_DEF("cgroup/bind6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE),
9892 SEC_DEF("cgroup/connect4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE),
9893 SEC_DEF("cgroup/connect6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE),
9894 SEC_DEF("cgroup/connect_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_CONNECT, SEC_ATTACHABLE),
9895 SEC_DEF("cgroup/sendmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE),
9896 SEC_DEF("cgroup/sendmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE),
9897 SEC_DEF("cgroup/sendmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_SENDMSG, SEC_ATTACHABLE),
9898 SEC_DEF("cgroup/recvmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE),
9899 SEC_DEF("cgroup/recvmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE),
9900 SEC_DEF("cgroup/recvmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_RECVMSG, SEC_ATTACHABLE),
9901 SEC_DEF("cgroup/getpeername4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE),
9902 SEC_DEF("cgroup/getpeername6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE),
9903 SEC_DEF("cgroup/getpeername_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETPEERNAME, SEC_ATTACHABLE),
9904 SEC_DEF("cgroup/getsockname4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE),
9905 SEC_DEF("cgroup/getsockname6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE),
9906 SEC_DEF("cgroup/getsockname_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETSOCKNAME, SEC_ATTACHABLE),
9907 SEC_DEF("cgroup/sysctl", CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE),
9908 SEC_DEF("cgroup/getsockopt", CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE),
9909 SEC_DEF("cgroup/setsockopt", CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE),
9910 SEC_DEF("cgroup/dev", CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT),
9911 SEC_DEF("struct_ops+", STRUCT_OPS, 0, SEC_NONE),
9912 SEC_DEF("struct_ops.s+", STRUCT_OPS, 0, SEC_SLEEPABLE),
9913 SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE),
9914 SEC_DEF("netfilter", NETFILTER, BPF_NETFILTER, SEC_NONE),
9915 };
9916
libbpf_register_prog_handler(const char * sec,enum bpf_prog_type prog_type,enum bpf_attach_type exp_attach_type,const struct libbpf_prog_handler_opts * opts)9917 int libbpf_register_prog_handler(const char *sec,
9918 enum bpf_prog_type prog_type,
9919 enum bpf_attach_type exp_attach_type,
9920 const struct libbpf_prog_handler_opts *opts)
9921 {
9922 struct bpf_sec_def *sec_def;
9923
9924 if (!OPTS_VALID(opts, libbpf_prog_handler_opts))
9925 return libbpf_err(-EINVAL);
9926
9927 if (last_custom_sec_def_handler_id == INT_MAX) /* prevent overflow */
9928 return libbpf_err(-E2BIG);
9929
9930 if (sec) {
9931 sec_def = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt + 1,
9932 sizeof(*sec_def));
9933 if (!sec_def)
9934 return libbpf_err(-ENOMEM);
9935
9936 custom_sec_defs = sec_def;
9937 sec_def = &custom_sec_defs[custom_sec_def_cnt];
9938 } else {
9939 if (has_custom_fallback_def)
9940 return libbpf_err(-EBUSY);
9941
9942 sec_def = &custom_fallback_def;
9943 }
9944
9945 sec_def->sec = sec ? strdup(sec) : NULL;
9946 if (sec && !sec_def->sec)
9947 return libbpf_err(-ENOMEM);
9948
9949 sec_def->prog_type = prog_type;
9950 sec_def->expected_attach_type = exp_attach_type;
9951 sec_def->cookie = OPTS_GET(opts, cookie, 0);
9952
9953 sec_def->prog_setup_fn = OPTS_GET(opts, prog_setup_fn, NULL);
9954 sec_def->prog_prepare_load_fn = OPTS_GET(opts, prog_prepare_load_fn, NULL);
9955 sec_def->prog_attach_fn = OPTS_GET(opts, prog_attach_fn, NULL);
9956
9957 sec_def->handler_id = ++last_custom_sec_def_handler_id;
9958
9959 if (sec)
9960 custom_sec_def_cnt++;
9961 else
9962 has_custom_fallback_def = true;
9963
9964 return sec_def->handler_id;
9965 }
9966
libbpf_unregister_prog_handler(int handler_id)9967 int libbpf_unregister_prog_handler(int handler_id)
9968 {
9969 struct bpf_sec_def *sec_defs;
9970 int i;
9971
9972 if (handler_id <= 0)
9973 return libbpf_err(-EINVAL);
9974
9975 if (has_custom_fallback_def && custom_fallback_def.handler_id == handler_id) {
9976 memset(&custom_fallback_def, 0, sizeof(custom_fallback_def));
9977 has_custom_fallback_def = false;
9978 return 0;
9979 }
9980
9981 for (i = 0; i < custom_sec_def_cnt; i++) {
9982 if (custom_sec_defs[i].handler_id == handler_id)
9983 break;
9984 }
9985
9986 if (i == custom_sec_def_cnt)
9987 return libbpf_err(-ENOENT);
9988
9989 free(custom_sec_defs[i].sec);
9990 for (i = i + 1; i < custom_sec_def_cnt; i++)
9991 custom_sec_defs[i - 1] = custom_sec_defs[i];
9992 custom_sec_def_cnt--;
9993
9994 /* try to shrink the array, but it's ok if we couldn't */
9995 sec_defs = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt, sizeof(*sec_defs));
9996 /* if new count is zero, reallocarray can return a valid NULL result;
9997 * in this case the previous pointer will be freed, so we *have to*
9998 * reassign old pointer to the new value (even if it's NULL)
9999 */
10000 if (sec_defs || custom_sec_def_cnt == 0)
10001 custom_sec_defs = sec_defs;
10002
10003 return 0;
10004 }
10005
sec_def_matches(const struct bpf_sec_def * sec_def,const char * sec_name)10006 static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_name)
10007 {
10008 size_t len = strlen(sec_def->sec);
10009
10010 /* "type/" always has to have proper SEC("type/extras") form */
10011 if (sec_def->sec[len - 1] == '/') {
10012 if (str_has_pfx(sec_name, sec_def->sec))
10013 return true;
10014 return false;
10015 }
10016
10017 /* "type+" means it can be either exact SEC("type") or
10018 * well-formed SEC("type/extras") with proper '/' separator
10019 */
10020 if (sec_def->sec[len - 1] == '+') {
10021 len--;
10022 /* not even a prefix */
10023 if (strncmp(sec_name, sec_def->sec, len) != 0)
10024 return false;
10025 /* exact match or has '/' separator */
10026 if (sec_name[len] == '\0' || sec_name[len] == '/')
10027 return true;
10028 return false;
10029 }
10030
10031 return strcmp(sec_name, sec_def->sec) == 0;
10032 }
10033
find_sec_def(const char * sec_name)10034 static const struct bpf_sec_def *find_sec_def(const char *sec_name)
10035 {
10036 const struct bpf_sec_def *sec_def;
10037 int i, n;
10038
10039 n = custom_sec_def_cnt;
10040 for (i = 0; i < n; i++) {
10041 sec_def = &custom_sec_defs[i];
10042 if (sec_def_matches(sec_def, sec_name))
10043 return sec_def;
10044 }
10045
10046 n = ARRAY_SIZE(section_defs);
10047 for (i = 0; i < n; i++) {
10048 sec_def = §ion_defs[i];
10049 if (sec_def_matches(sec_def, sec_name))
10050 return sec_def;
10051 }
10052
10053 if (has_custom_fallback_def)
10054 return &custom_fallback_def;
10055
10056 return NULL;
10057 }
10058
10059 #define MAX_TYPE_NAME_SIZE 32
10060
libbpf_get_type_names(bool attach_type)10061 static char *libbpf_get_type_names(bool attach_type)
10062 {
10063 int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE;
10064 char *buf;
10065
10066 buf = malloc(len);
10067 if (!buf)
10068 return NULL;
10069
10070 buf[0] = '\0';
10071 /* Forge string buf with all available names */
10072 for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
10073 const struct bpf_sec_def *sec_def = §ion_defs[i];
10074
10075 if (attach_type) {
10076 if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load)
10077 continue;
10078
10079 if (!(sec_def->cookie & SEC_ATTACHABLE))
10080 continue;
10081 }
10082
10083 if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) {
10084 free(buf);
10085 return NULL;
10086 }
10087 strcat(buf, " ");
10088 strcat(buf, section_defs[i].sec);
10089 }
10090
10091 return buf;
10092 }
10093
libbpf_prog_type_by_name(const char * name,enum bpf_prog_type * prog_type,enum bpf_attach_type * expected_attach_type)10094 int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
10095 enum bpf_attach_type *expected_attach_type)
10096 {
10097 const struct bpf_sec_def *sec_def;
10098 char *type_names;
10099
10100 if (!name)
10101 return libbpf_err(-EINVAL);
10102
10103 sec_def = find_sec_def(name);
10104 if (sec_def) {
10105 *prog_type = sec_def->prog_type;
10106 *expected_attach_type = sec_def->expected_attach_type;
10107 return 0;
10108 }
10109
10110 pr_debug("failed to guess program type from ELF section '%s'\n", name);
10111 type_names = libbpf_get_type_names(false);
10112 if (type_names != NULL) {
10113 pr_debug("supported section(type) names are:%s\n", type_names);
10114 free(type_names);
10115 }
10116
10117 return libbpf_err(-ESRCH);
10118 }
10119
libbpf_bpf_attach_type_str(enum bpf_attach_type t)10120 const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t)
10121 {
10122 if (t < 0 || t >= ARRAY_SIZE(attach_type_name))
10123 return NULL;
10124
10125 return attach_type_name[t];
10126 }
10127
libbpf_bpf_link_type_str(enum bpf_link_type t)10128 const char *libbpf_bpf_link_type_str(enum bpf_link_type t)
10129 {
10130 if (t < 0 || t >= ARRAY_SIZE(link_type_name))
10131 return NULL;
10132
10133 return link_type_name[t];
10134 }
10135
libbpf_bpf_map_type_str(enum bpf_map_type t)10136 const char *libbpf_bpf_map_type_str(enum bpf_map_type t)
10137 {
10138 if (t < 0 || t >= ARRAY_SIZE(map_type_name))
10139 return NULL;
10140
10141 return map_type_name[t];
10142 }
10143
libbpf_bpf_prog_type_str(enum bpf_prog_type t)10144 const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t)
10145 {
10146 if (t < 0 || t >= ARRAY_SIZE(prog_type_name))
10147 return NULL;
10148
10149 return prog_type_name[t];
10150 }
10151
find_struct_ops_map_by_offset(struct bpf_object * obj,int sec_idx,size_t offset)10152 static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
10153 int sec_idx,
10154 size_t offset)
10155 {
10156 struct bpf_map *map;
10157 size_t i;
10158
10159 for (i = 0; i < obj->nr_maps; i++) {
10160 map = &obj->maps[i];
10161 if (!bpf_map__is_struct_ops(map))
10162 continue;
10163 if (map->sec_idx == sec_idx &&
10164 map->sec_offset <= offset &&
10165 offset - map->sec_offset < map->def.value_size)
10166 return map;
10167 }
10168
10169 return NULL;
10170 }
10171
10172 /* Collect the reloc from ELF, populate the st_ops->progs[], and update
10173 * st_ops->data for shadow type.
10174 */
bpf_object__collect_st_ops_relos(struct bpf_object * obj,Elf64_Shdr * shdr,Elf_Data * data)10175 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
10176 Elf64_Shdr *shdr, Elf_Data *data)
10177 {
10178 const struct btf_type *type;
10179 const struct btf_member *member;
10180 struct bpf_struct_ops *st_ops;
10181 struct bpf_program *prog;
10182 unsigned int shdr_idx;
10183 const struct btf *btf;
10184 struct bpf_map *map;
10185 unsigned int moff, insn_idx;
10186 const char *name;
10187 __u32 member_idx;
10188 Elf64_Sym *sym;
10189 Elf64_Rel *rel;
10190 int i, nrels;
10191
10192 btf = obj->btf;
10193 nrels = shdr->sh_size / shdr->sh_entsize;
10194 for (i = 0; i < nrels; i++) {
10195 rel = elf_rel_by_idx(data, i);
10196 if (!rel) {
10197 pr_warn("struct_ops reloc: failed to get %d reloc\n", i);
10198 return -LIBBPF_ERRNO__FORMAT;
10199 }
10200
10201 sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
10202 if (!sym) {
10203 pr_warn("struct_ops reloc: symbol %zx not found\n",
10204 (size_t)ELF64_R_SYM(rel->r_info));
10205 return -LIBBPF_ERRNO__FORMAT;
10206 }
10207
10208 name = elf_sym_str(obj, sym->st_name) ?: "<?>";
10209 map = find_struct_ops_map_by_offset(obj, shdr->sh_info, rel->r_offset);
10210 if (!map) {
10211 pr_warn("struct_ops reloc: cannot find map at rel->r_offset %zu\n",
10212 (size_t)rel->r_offset);
10213 return -EINVAL;
10214 }
10215
10216 moff = rel->r_offset - map->sec_offset;
10217 shdr_idx = sym->st_shndx;
10218 st_ops = map->st_ops;
10219 pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel->r_offset %zu map->sec_offset %zu name %d (\'%s\')\n",
10220 map->name,
10221 (long long)(rel->r_info >> 32),
10222 (long long)sym->st_value,
10223 shdr_idx, (size_t)rel->r_offset,
10224 map->sec_offset, sym->st_name, name);
10225
10226 if (shdr_idx >= SHN_LORESERVE) {
10227 pr_warn("struct_ops reloc %s: rel->r_offset %zu shdr_idx %u unsupported non-static function\n",
10228 map->name, (size_t)rel->r_offset, shdr_idx);
10229 return -LIBBPF_ERRNO__RELOC;
10230 }
10231 if (sym->st_value % BPF_INSN_SZ) {
10232 pr_warn("struct_ops reloc %s: invalid target program offset %llu\n",
10233 map->name, (unsigned long long)sym->st_value);
10234 return -LIBBPF_ERRNO__FORMAT;
10235 }
10236 insn_idx = sym->st_value / BPF_INSN_SZ;
10237
10238 type = btf__type_by_id(btf, st_ops->type_id);
10239 member = find_member_by_offset(type, moff * 8);
10240 if (!member) {
10241 pr_warn("struct_ops reloc %s: cannot find member at moff %u\n",
10242 map->name, moff);
10243 return -EINVAL;
10244 }
10245 member_idx = member - btf_members(type);
10246 name = btf__name_by_offset(btf, member->name_off);
10247
10248 if (!resolve_func_ptr(btf, member->type, NULL)) {
10249 pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n",
10250 map->name, name);
10251 return -EINVAL;
10252 }
10253
10254 prog = find_prog_by_sec_insn(obj, shdr_idx, insn_idx);
10255 if (!prog) {
10256 pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n",
10257 map->name, shdr_idx, name);
10258 return -EINVAL;
10259 }
10260
10261 /* prevent the use of BPF prog with invalid type */
10262 if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) {
10263 pr_warn("struct_ops reloc %s: prog %s is not struct_ops BPF program\n",
10264 map->name, prog->name);
10265 return -EINVAL;
10266 }
10267
10268 st_ops->progs[member_idx] = prog;
10269
10270 /* st_ops->data will be exposed to users, being returned by
10271 * bpf_map__initial_value() as a pointer to the shadow
10272 * type. All function pointers in the original struct type
10273 * should be converted to a pointer to struct bpf_program
10274 * in the shadow type.
10275 */
10276 *((struct bpf_program **)(st_ops->data + moff)) = prog;
10277 }
10278
10279 return 0;
10280 }
10281
10282 #define BTF_TRACE_PREFIX "btf_trace_"
10283 #define BTF_LSM_PREFIX "bpf_lsm_"
10284 #define BTF_ITER_PREFIX "bpf_iter_"
10285 #define BTF_MAX_NAME_SIZE 128
10286
btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,const char ** prefix,int * kind)10287 void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
10288 const char **prefix, int *kind)
10289 {
10290 switch (attach_type) {
10291 case BPF_TRACE_RAW_TP:
10292 *prefix = BTF_TRACE_PREFIX;
10293 *kind = BTF_KIND_TYPEDEF;
10294 break;
10295 case BPF_LSM_MAC:
10296 case BPF_LSM_CGROUP:
10297 *prefix = BTF_LSM_PREFIX;
10298 *kind = BTF_KIND_FUNC;
10299 break;
10300 case BPF_TRACE_ITER:
10301 *prefix = BTF_ITER_PREFIX;
10302 *kind = BTF_KIND_FUNC;
10303 break;
10304 default:
10305 *prefix = "";
10306 *kind = BTF_KIND_FUNC;
10307 }
10308 }
10309
find_btf_by_prefix_kind(const struct btf * btf,const char * prefix,const char * name,__u32 kind)10310 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
10311 const char *name, __u32 kind)
10312 {
10313 char btf_type_name[BTF_MAX_NAME_SIZE];
10314 int ret;
10315
10316 ret = snprintf(btf_type_name, sizeof(btf_type_name),
10317 "%s%s", prefix, name);
10318 /* snprintf returns the number of characters written excluding the
10319 * terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
10320 * indicates truncation.
10321 */
10322 if (ret < 0 || ret >= sizeof(btf_type_name))
10323 return -ENAMETOOLONG;
10324 return btf__find_by_name_kind(btf, btf_type_name, kind);
10325 }
10326
find_attach_btf_id(struct btf * btf,const char * name,enum bpf_attach_type attach_type)10327 static inline int find_attach_btf_id(struct btf *btf, const char *name,
10328 enum bpf_attach_type attach_type)
10329 {
10330 const char *prefix;
10331 int kind;
10332
10333 btf_get_kernel_prefix_kind(attach_type, &prefix, &kind);
10334 return find_btf_by_prefix_kind(btf, prefix, name, kind);
10335 }
10336
libbpf_find_vmlinux_btf_id(const char * name,enum bpf_attach_type attach_type)10337 int libbpf_find_vmlinux_btf_id(const char *name,
10338 enum bpf_attach_type attach_type)
10339 {
10340 struct btf *btf;
10341 int err;
10342
10343 btf = btf__load_vmlinux_btf();
10344 err = libbpf_get_error(btf);
10345 if (err) {
10346 pr_warn("vmlinux BTF is not found\n");
10347 return libbpf_err(err);
10348 }
10349
10350 err = find_attach_btf_id(btf, name, attach_type);
10351 if (err <= 0)
10352 pr_warn("%s is not found in vmlinux BTF\n", name);
10353
10354 btf__free(btf);
10355 return libbpf_err(err);
10356 }
10357
libbpf_find_prog_btf_id(const char * name,__u32 attach_prog_fd,int token_fd)10358 static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd, int token_fd)
10359 {
10360 struct bpf_prog_info info;
10361 __u32 info_len = sizeof(info);
10362 struct btf *btf;
10363 int err;
10364
10365 memset(&info, 0, info_len);
10366 err = bpf_prog_get_info_by_fd(attach_prog_fd, &info, &info_len);
10367 if (err) {
10368 pr_warn("failed bpf_prog_get_info_by_fd for FD %d: %s\n",
10369 attach_prog_fd, errstr(err));
10370 return err;
10371 }
10372
10373 err = -EINVAL;
10374 if (!info.btf_id) {
10375 pr_warn("The target program doesn't have BTF\n");
10376 goto out;
10377 }
10378 btf = btf_load_from_kernel(info.btf_id, NULL, token_fd);
10379 err = libbpf_get_error(btf);
10380 if (err) {
10381 pr_warn("Failed to get BTF %d of the program: %s\n", info.btf_id, errstr(err));
10382 goto out;
10383 }
10384 err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
10385 btf__free(btf);
10386 if (err <= 0) {
10387 pr_warn("%s is not found in prog's BTF\n", name);
10388 goto out;
10389 }
10390 out:
10391 return err;
10392 }
10393
find_kernel_btf_id(struct bpf_object * obj,const char * attach_name,enum bpf_attach_type attach_type,int * btf_obj_fd,int * btf_type_id)10394 static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name,
10395 enum bpf_attach_type attach_type,
10396 int *btf_obj_fd, int *btf_type_id)
10397 {
10398 int ret, i, mod_len = 0;
10399 const char *fn_name, *mod_name = NULL;
10400
10401 fn_name = strchr(attach_name, ':');
10402 if (fn_name) {
10403 mod_name = attach_name;
10404 mod_len = fn_name - mod_name;
10405 fn_name++;
10406 }
10407
10408 if (!mod_name || strncmp(mod_name, "vmlinux", mod_len) == 0) {
10409 ret = find_attach_btf_id(obj->btf_vmlinux,
10410 mod_name ? fn_name : attach_name,
10411 attach_type);
10412 if (ret > 0) {
10413 *btf_obj_fd = 0; /* vmlinux BTF */
10414 *btf_type_id = ret;
10415 return 0;
10416 }
10417 if (ret != -ENOENT)
10418 return ret;
10419 }
10420
10421 ret = load_module_btfs(obj);
10422 if (ret)
10423 return ret;
10424
10425 for (i = 0; i < obj->btf_module_cnt; i++) {
10426 const struct module_btf *mod = &obj->btf_modules[i];
10427
10428 if (mod_name && strncmp(mod->name, mod_name, mod_len) != 0)
10429 continue;
10430
10431 ret = find_attach_btf_id(mod->btf,
10432 mod_name ? fn_name : attach_name,
10433 attach_type);
10434 if (ret > 0) {
10435 *btf_obj_fd = mod->fd;
10436 *btf_type_id = ret;
10437 return 0;
10438 }
10439 if (ret == -ENOENT)
10440 continue;
10441
10442 return ret;
10443 }
10444
10445 return -ESRCH;
10446 }
10447
libbpf_find_attach_btf_id(struct bpf_program * prog,const char * attach_name,int * btf_obj_fd,int * btf_type_id)10448 static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name,
10449 int *btf_obj_fd, int *btf_type_id)
10450 {
10451 enum bpf_attach_type attach_type = prog->expected_attach_type;
10452 __u32 attach_prog_fd = prog->attach_prog_fd;
10453 int err = 0;
10454
10455 /* BPF program's BTF ID */
10456 if (prog->type == BPF_PROG_TYPE_EXT || attach_prog_fd) {
10457 if (!attach_prog_fd) {
10458 pr_warn("prog '%s': attach program FD is not set\n", prog->name);
10459 return -EINVAL;
10460 }
10461 err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd, prog->obj->token_fd);
10462 if (err < 0) {
10463 pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %s\n",
10464 prog->name, attach_prog_fd, attach_name, errstr(err));
10465 return err;
10466 }
10467 *btf_obj_fd = 0;
10468 *btf_type_id = err;
10469 return 0;
10470 }
10471
10472 /* kernel/module BTF ID */
10473 if (prog->obj->gen_loader) {
10474 bpf_gen__record_attach_target(prog->obj->gen_loader, attach_name, attach_type);
10475 *btf_obj_fd = 0;
10476 *btf_type_id = 1;
10477 } else {
10478 err = find_kernel_btf_id(prog->obj, attach_name,
10479 attach_type, btf_obj_fd,
10480 btf_type_id);
10481 }
10482 if (err) {
10483 pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %s\n",
10484 prog->name, attach_name, errstr(err));
10485 return err;
10486 }
10487 return 0;
10488 }
10489
libbpf_attach_type_by_name(const char * name,enum bpf_attach_type * attach_type)10490 int libbpf_attach_type_by_name(const char *name,
10491 enum bpf_attach_type *attach_type)
10492 {
10493 char *type_names;
10494 const struct bpf_sec_def *sec_def;
10495
10496 if (!name)
10497 return libbpf_err(-EINVAL);
10498
10499 sec_def = find_sec_def(name);
10500 if (!sec_def) {
10501 pr_debug("failed to guess attach type based on ELF section name '%s'\n", name);
10502 type_names = libbpf_get_type_names(true);
10503 if (type_names != NULL) {
10504 pr_debug("attachable section(type) names are:%s\n", type_names);
10505 free(type_names);
10506 }
10507
10508 return libbpf_err(-EINVAL);
10509 }
10510
10511 if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load)
10512 return libbpf_err(-EINVAL);
10513 if (!(sec_def->cookie & SEC_ATTACHABLE))
10514 return libbpf_err(-EINVAL);
10515
10516 *attach_type = sec_def->expected_attach_type;
10517 return 0;
10518 }
10519
bpf_map__fd(const struct bpf_map * map)10520 int bpf_map__fd(const struct bpf_map *map)
10521 {
10522 if (!map)
10523 return libbpf_err(-EINVAL);
10524 if (!map_is_created(map))
10525 return -1;
10526 return map->fd;
10527 }
10528
map_uses_real_name(const struct bpf_map * map)10529 static bool map_uses_real_name(const struct bpf_map *map)
10530 {
10531 /* Since libbpf started to support custom .data.* and .rodata.* maps,
10532 * their user-visible name differs from kernel-visible name. Users see
10533 * such map's corresponding ELF section name as a map name.
10534 * This check distinguishes .data/.rodata from .data.* and .rodata.*
10535 * maps to know which name has to be returned to the user.
10536 */
10537 if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0)
10538 return true;
10539 if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0)
10540 return true;
10541 return false;
10542 }
10543
bpf_map__name(const struct bpf_map * map)10544 const char *bpf_map__name(const struct bpf_map *map)
10545 {
10546 if (!map)
10547 return NULL;
10548
10549 if (map_uses_real_name(map))
10550 return map->real_name;
10551
10552 return map->name;
10553 }
10554
bpf_map__type(const struct bpf_map * map)10555 enum bpf_map_type bpf_map__type(const struct bpf_map *map)
10556 {
10557 return map->def.type;
10558 }
10559
bpf_map__set_type(struct bpf_map * map,enum bpf_map_type type)10560 int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
10561 {
10562 if (map_is_created(map))
10563 return libbpf_err(-EBUSY);
10564 map->def.type = type;
10565 return 0;
10566 }
10567
bpf_map__map_flags(const struct bpf_map * map)10568 __u32 bpf_map__map_flags(const struct bpf_map *map)
10569 {
10570 return map->def.map_flags;
10571 }
10572
bpf_map__set_map_flags(struct bpf_map * map,__u32 flags)10573 int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
10574 {
10575 if (map_is_created(map))
10576 return libbpf_err(-EBUSY);
10577 map->def.map_flags = flags;
10578 return 0;
10579 }
10580
bpf_map__map_extra(const struct bpf_map * map)10581 __u64 bpf_map__map_extra(const struct bpf_map *map)
10582 {
10583 return map->map_extra;
10584 }
10585
bpf_map__set_map_extra(struct bpf_map * map,__u64 map_extra)10586 int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra)
10587 {
10588 if (map_is_created(map))
10589 return libbpf_err(-EBUSY);
10590 map->map_extra = map_extra;
10591 return 0;
10592 }
10593
bpf_map__numa_node(const struct bpf_map * map)10594 __u32 bpf_map__numa_node(const struct bpf_map *map)
10595 {
10596 return map->numa_node;
10597 }
10598
bpf_map__set_numa_node(struct bpf_map * map,__u32 numa_node)10599 int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
10600 {
10601 if (map_is_created(map))
10602 return libbpf_err(-EBUSY);
10603 map->numa_node = numa_node;
10604 return 0;
10605 }
10606
bpf_map__key_size(const struct bpf_map * map)10607 __u32 bpf_map__key_size(const struct bpf_map *map)
10608 {
10609 return map->def.key_size;
10610 }
10611
bpf_map__set_key_size(struct bpf_map * map,__u32 size)10612 int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
10613 {
10614 if (map_is_created(map))
10615 return libbpf_err(-EBUSY);
10616 map->def.key_size = size;
10617 return 0;
10618 }
10619
bpf_map__value_size(const struct bpf_map * map)10620 __u32 bpf_map__value_size(const struct bpf_map *map)
10621 {
10622 return map->def.value_size;
10623 }
10624
map_btf_datasec_resize(struct bpf_map * map,__u32 size)10625 static int map_btf_datasec_resize(struct bpf_map *map, __u32 size)
10626 {
10627 struct btf *btf;
10628 struct btf_type *datasec_type, *var_type;
10629 struct btf_var_secinfo *var;
10630 const struct btf_type *array_type;
10631 const struct btf_array *array;
10632 int vlen, element_sz, new_array_id;
10633 __u32 nr_elements;
10634
10635 /* check btf existence */
10636 btf = bpf_object__btf(map->obj);
10637 if (!btf)
10638 return -ENOENT;
10639
10640 /* verify map is datasec */
10641 datasec_type = btf_type_by_id(btf, bpf_map__btf_value_type_id(map));
10642 if (!btf_is_datasec(datasec_type)) {
10643 pr_warn("map '%s': cannot be resized, map value type is not a datasec\n",
10644 bpf_map__name(map));
10645 return -EINVAL;
10646 }
10647
10648 /* verify datasec has at least one var */
10649 vlen = btf_vlen(datasec_type);
10650 if (vlen == 0) {
10651 pr_warn("map '%s': cannot be resized, map value datasec is empty\n",
10652 bpf_map__name(map));
10653 return -EINVAL;
10654 }
10655
10656 /* verify last var in the datasec is an array */
10657 var = &btf_var_secinfos(datasec_type)[vlen - 1];
10658 var_type = btf_type_by_id(btf, var->type);
10659 array_type = skip_mods_and_typedefs(btf, var_type->type, NULL);
10660 if (!btf_is_array(array_type)) {
10661 pr_warn("map '%s': cannot be resized, last var must be an array\n",
10662 bpf_map__name(map));
10663 return -EINVAL;
10664 }
10665
10666 /* verify request size aligns with array */
10667 array = btf_array(array_type);
10668 element_sz = btf__resolve_size(btf, array->type);
10669 if (element_sz <= 0 || (size - var->offset) % element_sz != 0) {
10670 pr_warn("map '%s': cannot be resized, element size (%d) doesn't align with new total size (%u)\n",
10671 bpf_map__name(map), element_sz, size);
10672 return -EINVAL;
10673 }
10674
10675 /* create a new array based on the existing array, but with new length */
10676 nr_elements = (size - var->offset) / element_sz;
10677 new_array_id = btf__add_array(btf, array->index_type, array->type, nr_elements);
10678 if (new_array_id < 0)
10679 return new_array_id;
10680
10681 /* adding a new btf type invalidates existing pointers to btf objects,
10682 * so refresh pointers before proceeding
10683 */
10684 datasec_type = btf_type_by_id(btf, map->btf_value_type_id);
10685 var = &btf_var_secinfos(datasec_type)[vlen - 1];
10686 var_type = btf_type_by_id(btf, var->type);
10687
10688 /* finally update btf info */
10689 datasec_type->size = size;
10690 var->size = size - var->offset;
10691 var_type->type = new_array_id;
10692
10693 return 0;
10694 }
10695
bpf_map__set_value_size(struct bpf_map * map,__u32 size)10696 int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
10697 {
10698 if (map_is_created(map))
10699 return libbpf_err(-EBUSY);
10700
10701 if (map->mmaped) {
10702 size_t mmap_old_sz, mmap_new_sz;
10703 int err;
10704
10705 if (map->def.type != BPF_MAP_TYPE_ARRAY)
10706 return libbpf_err(-EOPNOTSUPP);
10707
10708 mmap_old_sz = bpf_map_mmap_sz(map);
10709 mmap_new_sz = array_map_mmap_sz(size, map->def.max_entries);
10710 err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz);
10711 if (err) {
10712 pr_warn("map '%s': failed to resize memory-mapped region: %s\n",
10713 bpf_map__name(map), errstr(err));
10714 return libbpf_err(err);
10715 }
10716 err = map_btf_datasec_resize(map, size);
10717 if (err && err != -ENOENT) {
10718 pr_warn("map '%s': failed to adjust resized BTF, clearing BTF key/value info: %s\n",
10719 bpf_map__name(map), errstr(err));
10720 map->btf_value_type_id = 0;
10721 map->btf_key_type_id = 0;
10722 }
10723 }
10724
10725 map->def.value_size = size;
10726 return 0;
10727 }
10728
bpf_map__btf_key_type_id(const struct bpf_map * map)10729 __u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
10730 {
10731 return map ? map->btf_key_type_id : 0;
10732 }
10733
bpf_map__btf_value_type_id(const struct bpf_map * map)10734 __u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
10735 {
10736 return map ? map->btf_value_type_id : 0;
10737 }
10738
bpf_map__set_initial_value(struct bpf_map * map,const void * data,size_t size)10739 int bpf_map__set_initial_value(struct bpf_map *map,
10740 const void *data, size_t size)
10741 {
10742 size_t actual_sz;
10743
10744 if (map_is_created(map))
10745 return libbpf_err(-EBUSY);
10746
10747 if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG)
10748 return libbpf_err(-EINVAL);
10749
10750 if (map->def.type == BPF_MAP_TYPE_ARENA)
10751 actual_sz = map->obj->arena_data_sz;
10752 else
10753 actual_sz = map->def.value_size;
10754 if (size != actual_sz)
10755 return libbpf_err(-EINVAL);
10756
10757 memcpy(map->mmaped, data, size);
10758 return 0;
10759 }
10760
bpf_map__initial_value(const struct bpf_map * map,size_t * psize)10761 void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize)
10762 {
10763 if (bpf_map__is_struct_ops(map)) {
10764 if (psize)
10765 *psize = map->def.value_size;
10766 return map->st_ops->data;
10767 }
10768
10769 if (!map->mmaped)
10770 return NULL;
10771
10772 if (map->def.type == BPF_MAP_TYPE_ARENA)
10773 *psize = map->obj->arena_data_sz;
10774 else
10775 *psize = map->def.value_size;
10776
10777 return map->mmaped;
10778 }
10779
bpf_map__is_internal(const struct bpf_map * map)10780 bool bpf_map__is_internal(const struct bpf_map *map)
10781 {
10782 return map->libbpf_type != LIBBPF_MAP_UNSPEC;
10783 }
10784
bpf_map__ifindex(const struct bpf_map * map)10785 __u32 bpf_map__ifindex(const struct bpf_map *map)
10786 {
10787 return map->map_ifindex;
10788 }
10789
bpf_map__set_ifindex(struct bpf_map * map,__u32 ifindex)10790 int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
10791 {
10792 if (map_is_created(map))
10793 return libbpf_err(-EBUSY);
10794 map->map_ifindex = ifindex;
10795 return 0;
10796 }
10797
bpf_map__set_inner_map_fd(struct bpf_map * map,int fd)10798 int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
10799 {
10800 if (!bpf_map_type__is_map_in_map(map->def.type)) {
10801 pr_warn("error: unsupported map type\n");
10802 return libbpf_err(-EINVAL);
10803 }
10804 if (map->inner_map_fd != -1) {
10805 pr_warn("error: inner_map_fd already specified\n");
10806 return libbpf_err(-EINVAL);
10807 }
10808 if (map->inner_map) {
10809 bpf_map__destroy(map->inner_map);
10810 zfree(&map->inner_map);
10811 }
10812 map->inner_map_fd = fd;
10813 return 0;
10814 }
10815
bpf_map__set_exclusive_program(struct bpf_map * map,struct bpf_program * prog)10816 int bpf_map__set_exclusive_program(struct bpf_map *map, struct bpf_program *prog)
10817 {
10818 if (map_is_created(map)) {
10819 pr_warn("exclusive programs must be set before map creation\n");
10820 return libbpf_err(-EINVAL);
10821 }
10822
10823 if (map->obj != prog->obj) {
10824 pr_warn("excl_prog and map must be from the same bpf object\n");
10825 return libbpf_err(-EINVAL);
10826 }
10827
10828 map->excl_prog = prog;
10829 return 0;
10830 }
10831
bpf_map__exclusive_program(struct bpf_map * map)10832 struct bpf_program *bpf_map__exclusive_program(struct bpf_map *map)
10833 {
10834 return map->excl_prog;
10835 }
10836
10837 static struct bpf_map *
__bpf_map__iter(const struct bpf_map * m,const struct bpf_object * obj,int i)10838 __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
10839 {
10840 ssize_t idx;
10841 struct bpf_map *s, *e;
10842
10843 if (!obj || !obj->maps)
10844 return errno = EINVAL, NULL;
10845
10846 s = obj->maps;
10847 e = obj->maps + obj->nr_maps;
10848
10849 if ((m < s) || (m >= e)) {
10850 pr_warn("error in %s: map handler doesn't belong to object\n",
10851 __func__);
10852 return errno = EINVAL, NULL;
10853 }
10854
10855 idx = (m - obj->maps) + i;
10856 if (idx >= obj->nr_maps || idx < 0)
10857 return NULL;
10858 return &obj->maps[idx];
10859 }
10860
10861 struct bpf_map *
bpf_object__next_map(const struct bpf_object * obj,const struct bpf_map * prev)10862 bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev)
10863 {
10864 if (prev == NULL && obj != NULL)
10865 return obj->maps;
10866
10867 return __bpf_map__iter(prev, obj, 1);
10868 }
10869
10870 struct bpf_map *
bpf_object__prev_map(const struct bpf_object * obj,const struct bpf_map * next)10871 bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next)
10872 {
10873 if (next == NULL && obj != NULL) {
10874 if (!obj->nr_maps)
10875 return NULL;
10876 return obj->maps + obj->nr_maps - 1;
10877 }
10878
10879 return __bpf_map__iter(next, obj, -1);
10880 }
10881
10882 struct bpf_map *
bpf_object__find_map_by_name(const struct bpf_object * obj,const char * name)10883 bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
10884 {
10885 struct bpf_map *pos;
10886
10887 bpf_object__for_each_map(pos, obj) {
10888 /* if it's a special internal map name (which always starts
10889 * with dot) then check if that special name matches the
10890 * real map name (ELF section name)
10891 */
10892 if (name[0] == '.') {
10893 if (pos->real_name && strcmp(pos->real_name, name) == 0)
10894 return pos;
10895 continue;
10896 }
10897 /* otherwise map name has to be an exact match */
10898 if (map_uses_real_name(pos)) {
10899 if (strcmp(pos->real_name, name) == 0)
10900 return pos;
10901 continue;
10902 }
10903 if (strcmp(pos->name, name) == 0)
10904 return pos;
10905 }
10906 return errno = ENOENT, NULL;
10907 }
10908
10909 int
bpf_object__find_map_fd_by_name(const struct bpf_object * obj,const char * name)10910 bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
10911 {
10912 return bpf_map__fd(bpf_object__find_map_by_name(obj, name));
10913 }
10914
validate_map_op(const struct bpf_map * map,size_t key_sz,size_t value_sz,bool check_value_sz)10915 static int validate_map_op(const struct bpf_map *map, size_t key_sz,
10916 size_t value_sz, bool check_value_sz)
10917 {
10918 if (!map_is_created(map)) /* map is not yet created */
10919 return -ENOENT;
10920
10921 if (map->def.key_size != key_sz) {
10922 pr_warn("map '%s': unexpected key size %zu provided, expected %u\n",
10923 map->name, key_sz, map->def.key_size);
10924 return -EINVAL;
10925 }
10926
10927 if (map->fd < 0) {
10928 pr_warn("map '%s': can't use BPF map without FD (was it created?)\n", map->name);
10929 return -EINVAL;
10930 }
10931
10932 if (!check_value_sz)
10933 return 0;
10934
10935 switch (map->def.type) {
10936 case BPF_MAP_TYPE_PERCPU_ARRAY:
10937 case BPF_MAP_TYPE_PERCPU_HASH:
10938 case BPF_MAP_TYPE_LRU_PERCPU_HASH:
10939 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: {
10940 int num_cpu = libbpf_num_possible_cpus();
10941 size_t elem_sz = roundup(map->def.value_size, 8);
10942
10943 if (value_sz != num_cpu * elem_sz) {
10944 pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n",
10945 map->name, value_sz, num_cpu, elem_sz, num_cpu * elem_sz);
10946 return -EINVAL;
10947 }
10948 break;
10949 }
10950 default:
10951 if (map->def.value_size != value_sz) {
10952 pr_warn("map '%s': unexpected value size %zu provided, expected %u\n",
10953 map->name, value_sz, map->def.value_size);
10954 return -EINVAL;
10955 }
10956 break;
10957 }
10958 return 0;
10959 }
10960
bpf_map__lookup_elem(const struct bpf_map * map,const void * key,size_t key_sz,void * value,size_t value_sz,__u64 flags)10961 int bpf_map__lookup_elem(const struct bpf_map *map,
10962 const void *key, size_t key_sz,
10963 void *value, size_t value_sz, __u64 flags)
10964 {
10965 int err;
10966
10967 err = validate_map_op(map, key_sz, value_sz, true);
10968 if (err)
10969 return libbpf_err(err);
10970
10971 return bpf_map_lookup_elem_flags(map->fd, key, value, flags);
10972 }
10973
bpf_map__update_elem(const struct bpf_map * map,const void * key,size_t key_sz,const void * value,size_t value_sz,__u64 flags)10974 int bpf_map__update_elem(const struct bpf_map *map,
10975 const void *key, size_t key_sz,
10976 const void *value, size_t value_sz, __u64 flags)
10977 {
10978 int err;
10979
10980 err = validate_map_op(map, key_sz, value_sz, true);
10981 if (err)
10982 return libbpf_err(err);
10983
10984 return bpf_map_update_elem(map->fd, key, value, flags);
10985 }
10986
bpf_map__delete_elem(const struct bpf_map * map,const void * key,size_t key_sz,__u64 flags)10987 int bpf_map__delete_elem(const struct bpf_map *map,
10988 const void *key, size_t key_sz, __u64 flags)
10989 {
10990 int err;
10991
10992 err = validate_map_op(map, key_sz, 0, false /* check_value_sz */);
10993 if (err)
10994 return libbpf_err(err);
10995
10996 return bpf_map_delete_elem_flags(map->fd, key, flags);
10997 }
10998
bpf_map__lookup_and_delete_elem(const struct bpf_map * map,const void * key,size_t key_sz,void * value,size_t value_sz,__u64 flags)10999 int bpf_map__lookup_and_delete_elem(const struct bpf_map *map,
11000 const void *key, size_t key_sz,
11001 void *value, size_t value_sz, __u64 flags)
11002 {
11003 int err;
11004
11005 err = validate_map_op(map, key_sz, value_sz, true);
11006 if (err)
11007 return libbpf_err(err);
11008
11009 return bpf_map_lookup_and_delete_elem_flags(map->fd, key, value, flags);
11010 }
11011
bpf_map__get_next_key(const struct bpf_map * map,const void * cur_key,void * next_key,size_t key_sz)11012 int bpf_map__get_next_key(const struct bpf_map *map,
11013 const void *cur_key, void *next_key, size_t key_sz)
11014 {
11015 int err;
11016
11017 err = validate_map_op(map, key_sz, 0, false /* check_value_sz */);
11018 if (err)
11019 return libbpf_err(err);
11020
11021 return bpf_map_get_next_key(map->fd, cur_key, next_key);
11022 }
11023
libbpf_get_error(const void * ptr)11024 long libbpf_get_error(const void *ptr)
11025 {
11026 if (!IS_ERR_OR_NULL(ptr))
11027 return 0;
11028
11029 if (IS_ERR(ptr))
11030 errno = -PTR_ERR(ptr);
11031
11032 /* If ptr == NULL, then errno should be already set by the failing
11033 * API, because libbpf never returns NULL on success and it now always
11034 * sets errno on error. So no extra errno handling for ptr == NULL
11035 * case.
11036 */
11037 return -errno;
11038 }
11039
11040 /* Replace link's underlying BPF program with the new one */
bpf_link__update_program(struct bpf_link * link,struct bpf_program * prog)11041 int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
11042 {
11043 int ret;
11044 int prog_fd = bpf_program__fd(prog);
11045
11046 if (prog_fd < 0) {
11047 pr_warn("prog '%s': can't use BPF program without FD (was it loaded?)\n",
11048 prog->name);
11049 return libbpf_err(-EINVAL);
11050 }
11051
11052 ret = bpf_link_update(bpf_link__fd(link), prog_fd, NULL);
11053 return libbpf_err_errno(ret);
11054 }
11055
11056 /* Release "ownership" of underlying BPF resource (typically, BPF program
11057 * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected
11058 * link, when destructed through bpf_link__destroy() call won't attempt to
11059 * detach/unregisted that BPF resource. This is useful in situations where,
11060 * say, attached BPF program has to outlive userspace program that attached it
11061 * in the system. Depending on type of BPF program, though, there might be
11062 * additional steps (like pinning BPF program in BPF FS) necessary to ensure
11063 * exit of userspace program doesn't trigger automatic detachment and clean up
11064 * inside the kernel.
11065 */
bpf_link__disconnect(struct bpf_link * link)11066 void bpf_link__disconnect(struct bpf_link *link)
11067 {
11068 link->disconnected = true;
11069 }
11070
bpf_link__destroy(struct bpf_link * link)11071 int bpf_link__destroy(struct bpf_link *link)
11072 {
11073 int err = 0;
11074
11075 if (IS_ERR_OR_NULL(link))
11076 return 0;
11077
11078 if (!link->disconnected && link->detach)
11079 err = link->detach(link);
11080 if (link->pin_path)
11081 free(link->pin_path);
11082 if (link->dealloc)
11083 link->dealloc(link);
11084 else
11085 free(link);
11086
11087 return libbpf_err(err);
11088 }
11089
bpf_link__fd(const struct bpf_link * link)11090 int bpf_link__fd(const struct bpf_link *link)
11091 {
11092 return link->fd;
11093 }
11094
bpf_link__pin_path(const struct bpf_link * link)11095 const char *bpf_link__pin_path(const struct bpf_link *link)
11096 {
11097 return link->pin_path;
11098 }
11099
bpf_link__detach_fd(struct bpf_link * link)11100 static int bpf_link__detach_fd(struct bpf_link *link)
11101 {
11102 return libbpf_err_errno(close(link->fd));
11103 }
11104
bpf_link__open(const char * path)11105 struct bpf_link *bpf_link__open(const char *path)
11106 {
11107 struct bpf_link *link;
11108 int fd;
11109
11110 fd = bpf_obj_get(path);
11111 if (fd < 0) {
11112 fd = -errno;
11113 pr_warn("failed to open link at %s: %d\n", path, fd);
11114 return libbpf_err_ptr(fd);
11115 }
11116
11117 link = calloc(1, sizeof(*link));
11118 if (!link) {
11119 close(fd);
11120 return libbpf_err_ptr(-ENOMEM);
11121 }
11122 link->detach = &bpf_link__detach_fd;
11123 link->fd = fd;
11124
11125 link->pin_path = strdup(path);
11126 if (!link->pin_path) {
11127 bpf_link__destroy(link);
11128 return libbpf_err_ptr(-ENOMEM);
11129 }
11130
11131 return link;
11132 }
11133
bpf_link__detach(struct bpf_link * link)11134 int bpf_link__detach(struct bpf_link *link)
11135 {
11136 return bpf_link_detach(link->fd) ? -errno : 0;
11137 }
11138
bpf_link__pin(struct bpf_link * link,const char * path)11139 int bpf_link__pin(struct bpf_link *link, const char *path)
11140 {
11141 int err;
11142
11143 if (link->pin_path)
11144 return libbpf_err(-EBUSY);
11145 err = make_parent_dir(path);
11146 if (err)
11147 return libbpf_err(err);
11148 err = check_path(path);
11149 if (err)
11150 return libbpf_err(err);
11151
11152 link->pin_path = strdup(path);
11153 if (!link->pin_path)
11154 return libbpf_err(-ENOMEM);
11155
11156 if (bpf_obj_pin(link->fd, link->pin_path)) {
11157 err = -errno;
11158 zfree(&link->pin_path);
11159 return libbpf_err(err);
11160 }
11161
11162 pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path);
11163 return 0;
11164 }
11165
bpf_link__unpin(struct bpf_link * link)11166 int bpf_link__unpin(struct bpf_link *link)
11167 {
11168 int err;
11169
11170 if (!link->pin_path)
11171 return libbpf_err(-EINVAL);
11172
11173 err = unlink(link->pin_path);
11174 if (err != 0)
11175 return -errno;
11176
11177 pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path);
11178 zfree(&link->pin_path);
11179 return 0;
11180 }
11181
11182 struct bpf_link_perf {
11183 struct bpf_link link;
11184 int perf_event_fd;
11185 /* legacy kprobe support: keep track of probe identifier and type */
11186 char *legacy_probe_name;
11187 bool legacy_is_kprobe;
11188 bool legacy_is_retprobe;
11189 };
11190
11191 static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe);
11192 static int remove_uprobe_event_legacy(const char *probe_name, bool retprobe);
11193
bpf_link_perf_detach(struct bpf_link * link)11194 static int bpf_link_perf_detach(struct bpf_link *link)
11195 {
11196 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
11197 int err = 0;
11198
11199 if (ioctl(perf_link->perf_event_fd, PERF_EVENT_IOC_DISABLE, 0) < 0)
11200 err = -errno;
11201
11202 if (perf_link->perf_event_fd != link->fd)
11203 close(perf_link->perf_event_fd);
11204 close(link->fd);
11205
11206 /* legacy uprobe/kprobe needs to be removed after perf event fd closure */
11207 if (perf_link->legacy_probe_name) {
11208 if (perf_link->legacy_is_kprobe) {
11209 err = remove_kprobe_event_legacy(perf_link->legacy_probe_name,
11210 perf_link->legacy_is_retprobe);
11211 } else {
11212 err = remove_uprobe_event_legacy(perf_link->legacy_probe_name,
11213 perf_link->legacy_is_retprobe);
11214 }
11215 }
11216
11217 return err;
11218 }
11219
bpf_link_perf_dealloc(struct bpf_link * link)11220 static void bpf_link_perf_dealloc(struct bpf_link *link)
11221 {
11222 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
11223
11224 free(perf_link->legacy_probe_name);
11225 free(perf_link);
11226 }
11227
bpf_program__attach_perf_event_opts(const struct bpf_program * prog,int pfd,const struct bpf_perf_event_opts * opts)11228 struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd,
11229 const struct bpf_perf_event_opts *opts)
11230 {
11231 struct bpf_link_perf *link;
11232 int prog_fd, link_fd = -1, err;
11233 bool force_ioctl_attach;
11234
11235 if (!OPTS_VALID(opts, bpf_perf_event_opts))
11236 return libbpf_err_ptr(-EINVAL);
11237
11238 if (pfd < 0) {
11239 pr_warn("prog '%s': invalid perf event FD %d\n",
11240 prog->name, pfd);
11241 return libbpf_err_ptr(-EINVAL);
11242 }
11243 prog_fd = bpf_program__fd(prog);
11244 if (prog_fd < 0) {
11245 pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n",
11246 prog->name);
11247 return libbpf_err_ptr(-EINVAL);
11248 }
11249
11250 link = calloc(1, sizeof(*link));
11251 if (!link)
11252 return libbpf_err_ptr(-ENOMEM);
11253 link->link.detach = &bpf_link_perf_detach;
11254 link->link.dealloc = &bpf_link_perf_dealloc;
11255 link->perf_event_fd = pfd;
11256
11257 force_ioctl_attach = OPTS_GET(opts, force_ioctl_attach, false);
11258 if (kernel_supports(prog->obj, FEAT_PERF_LINK) && !force_ioctl_attach) {
11259 DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_opts,
11260 .perf_event.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0));
11261
11262 link_fd = bpf_link_create(prog_fd, pfd, BPF_PERF_EVENT, &link_opts);
11263 if (link_fd < 0) {
11264 err = -errno;
11265 pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %s\n",
11266 prog->name, pfd, errstr(err));
11267 goto err_out;
11268 }
11269 link->link.fd = link_fd;
11270 } else {
11271 if (OPTS_GET(opts, bpf_cookie, 0)) {
11272 pr_warn("prog '%s': user context value is not supported\n", prog->name);
11273 err = -EOPNOTSUPP;
11274 goto err_out;
11275 }
11276
11277 if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
11278 err = -errno;
11279 pr_warn("prog '%s': failed to attach to perf_event FD %d: %s\n",
11280 prog->name, pfd, errstr(err));
11281 if (err == -EPROTO)
11282 pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
11283 prog->name, pfd);
11284 goto err_out;
11285 }
11286 link->link.fd = pfd;
11287 }
11288
11289 if (!OPTS_GET(opts, dont_enable, false)) {
11290 if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
11291 err = -errno;
11292 pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n",
11293 prog->name, pfd, errstr(err));
11294 goto err_out;
11295 }
11296 }
11297
11298 return &link->link;
11299 err_out:
11300 if (link_fd >= 0)
11301 close(link_fd);
11302 free(link);
11303 return libbpf_err_ptr(err);
11304 }
11305
bpf_program__attach_perf_event(const struct bpf_program * prog,int pfd)11306 struct bpf_link *bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd)
11307 {
11308 return bpf_program__attach_perf_event_opts(prog, pfd, NULL);
11309 }
11310
11311 /*
11312 * this function is expected to parse integer in the range of [0, 2^31-1] from
11313 * given file using scanf format string fmt. If actual parsed value is
11314 * negative, the result might be indistinguishable from error
11315 */
parse_uint_from_file(const char * file,const char * fmt)11316 static int parse_uint_from_file(const char *file, const char *fmt)
11317 {
11318 int err, ret;
11319 FILE *f;
11320
11321 f = fopen(file, "re");
11322 if (!f) {
11323 err = -errno;
11324 pr_debug("failed to open '%s': %s\n", file, errstr(err));
11325 return err;
11326 }
11327 err = fscanf(f, fmt, &ret);
11328 if (err != 1) {
11329 err = err == EOF ? -EIO : -errno;
11330 pr_debug("failed to parse '%s': %s\n", file, errstr(err));
11331 fclose(f);
11332 return err;
11333 }
11334 fclose(f);
11335 return ret;
11336 }
11337
determine_kprobe_perf_type(void)11338 static int determine_kprobe_perf_type(void)
11339 {
11340 const char *file = "/sys/bus/event_source/devices/kprobe/type";
11341
11342 return parse_uint_from_file(file, "%d\n");
11343 }
11344
determine_uprobe_perf_type(void)11345 static int determine_uprobe_perf_type(void)
11346 {
11347 const char *file = "/sys/bus/event_source/devices/uprobe/type";
11348
11349 return parse_uint_from_file(file, "%d\n");
11350 }
11351
determine_kprobe_retprobe_bit(void)11352 static int determine_kprobe_retprobe_bit(void)
11353 {
11354 const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe";
11355
11356 return parse_uint_from_file(file, "config:%d\n");
11357 }
11358
determine_uprobe_retprobe_bit(void)11359 static int determine_uprobe_retprobe_bit(void)
11360 {
11361 const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
11362
11363 return parse_uint_from_file(file, "config:%d\n");
11364 }
11365
11366 #define PERF_UPROBE_REF_CTR_OFFSET_BITS 32
11367 #define PERF_UPROBE_REF_CTR_OFFSET_SHIFT 32
11368
perf_event_open_probe(bool uprobe,bool retprobe,const char * name,uint64_t offset,int pid,size_t ref_ctr_off)11369 static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
11370 uint64_t offset, int pid, size_t ref_ctr_off)
11371 {
11372 const size_t attr_sz = sizeof(struct perf_event_attr);
11373 struct perf_event_attr attr;
11374 int type, pfd;
11375
11376 if ((__u64)ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS))
11377 return -EINVAL;
11378
11379 memset(&attr, 0, attr_sz);
11380
11381 type = uprobe ? determine_uprobe_perf_type()
11382 : determine_kprobe_perf_type();
11383 if (type < 0) {
11384 pr_warn("failed to determine %s perf type: %s\n",
11385 uprobe ? "uprobe" : "kprobe",
11386 errstr(type));
11387 return type;
11388 }
11389 if (retprobe) {
11390 int bit = uprobe ? determine_uprobe_retprobe_bit()
11391 : determine_kprobe_retprobe_bit();
11392
11393 if (bit < 0) {
11394 pr_warn("failed to determine %s retprobe bit: %s\n",
11395 uprobe ? "uprobe" : "kprobe",
11396 errstr(bit));
11397 return bit;
11398 }
11399 attr.config |= 1 << bit;
11400 }
11401 attr.size = attr_sz;
11402 attr.type = type;
11403 attr.config |= (__u64)ref_ctr_off << PERF_UPROBE_REF_CTR_OFFSET_SHIFT;
11404 attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
11405 attr.config2 = offset; /* kprobe_addr or probe_offset */
11406
11407 /* pid filter is meaningful only for uprobes */
11408 pfd = syscall(__NR_perf_event_open, &attr,
11409 pid < 0 ? -1 : pid /* pid */,
11410 pid == -1 ? 0 : -1 /* cpu */,
11411 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
11412 return pfd >= 0 ? pfd : -errno;
11413 }
11414
append_to_file(const char * file,const char * fmt,...)11415 static int append_to_file(const char *file, const char *fmt, ...)
11416 {
11417 int fd, n, err = 0;
11418 va_list ap;
11419 char buf[1024];
11420
11421 va_start(ap, fmt);
11422 n = vsnprintf(buf, sizeof(buf), fmt, ap);
11423 va_end(ap);
11424
11425 if (n < 0 || n >= sizeof(buf))
11426 return -EINVAL;
11427
11428 fd = open(file, O_WRONLY | O_APPEND | O_CLOEXEC, 0);
11429 if (fd < 0)
11430 return -errno;
11431
11432 if (write(fd, buf, n) < 0)
11433 err = -errno;
11434
11435 close(fd);
11436 return err;
11437 }
11438
11439 #define DEBUGFS "/sys/kernel/debug/tracing"
11440 #define TRACEFS "/sys/kernel/tracing"
11441
use_debugfs(void)11442 static bool use_debugfs(void)
11443 {
11444 static int has_debugfs = -1;
11445
11446 if (has_debugfs < 0)
11447 has_debugfs = faccessat(AT_FDCWD, DEBUGFS, F_OK, AT_EACCESS) == 0;
11448
11449 return has_debugfs == 1;
11450 }
11451
tracefs_path(void)11452 static const char *tracefs_path(void)
11453 {
11454 return use_debugfs() ? DEBUGFS : TRACEFS;
11455 }
11456
tracefs_kprobe_events(void)11457 static const char *tracefs_kprobe_events(void)
11458 {
11459 return use_debugfs() ? DEBUGFS"/kprobe_events" : TRACEFS"/kprobe_events";
11460 }
11461
tracefs_uprobe_events(void)11462 static const char *tracefs_uprobe_events(void)
11463 {
11464 return use_debugfs() ? DEBUGFS"/uprobe_events" : TRACEFS"/uprobe_events";
11465 }
11466
tracefs_available_filter_functions(void)11467 static const char *tracefs_available_filter_functions(void)
11468 {
11469 return use_debugfs() ? DEBUGFS"/available_filter_functions"
11470 : TRACEFS"/available_filter_functions";
11471 }
11472
tracefs_available_filter_functions_addrs(void)11473 static const char *tracefs_available_filter_functions_addrs(void)
11474 {
11475 return use_debugfs() ? DEBUGFS"/available_filter_functions_addrs"
11476 : TRACEFS"/available_filter_functions_addrs";
11477 }
11478
gen_probe_legacy_event_name(char * buf,size_t buf_sz,const char * name,size_t offset)11479 static void gen_probe_legacy_event_name(char *buf, size_t buf_sz,
11480 const char *name, size_t offset)
11481 {
11482 static int index = 0;
11483 int i;
11484
11485 snprintf(buf, buf_sz, "libbpf_%u_%d_%s_0x%zx", getpid(),
11486 __sync_fetch_and_add(&index, 1), name, offset);
11487
11488 /* sanitize name in the probe name */
11489 for (i = 0; buf[i]; i++) {
11490 if (!isalnum(buf[i]))
11491 buf[i] = '_';
11492 }
11493 }
11494
add_kprobe_event_legacy(const char * probe_name,bool retprobe,const char * kfunc_name,size_t offset)11495 static int add_kprobe_event_legacy(const char *probe_name, bool retprobe,
11496 const char *kfunc_name, size_t offset)
11497 {
11498 return append_to_file(tracefs_kprobe_events(), "%c:%s/%s %s+0x%zx",
11499 retprobe ? 'r' : 'p',
11500 retprobe ? "kretprobes" : "kprobes",
11501 probe_name, kfunc_name, offset);
11502 }
11503
remove_kprobe_event_legacy(const char * probe_name,bool retprobe)11504 static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe)
11505 {
11506 return append_to_file(tracefs_kprobe_events(), "-:%s/%s",
11507 retprobe ? "kretprobes" : "kprobes", probe_name);
11508 }
11509
determine_kprobe_perf_type_legacy(const char * probe_name,bool retprobe)11510 static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retprobe)
11511 {
11512 char file[256];
11513
11514 snprintf(file, sizeof(file), "%s/events/%s/%s/id",
11515 tracefs_path(), retprobe ? "kretprobes" : "kprobes", probe_name);
11516
11517 return parse_uint_from_file(file, "%d\n");
11518 }
11519
perf_event_kprobe_open_legacy(const char * probe_name,bool retprobe,const char * kfunc_name,size_t offset,int pid)11520 static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe,
11521 const char *kfunc_name, size_t offset, int pid)
11522 {
11523 const size_t attr_sz = sizeof(struct perf_event_attr);
11524 struct perf_event_attr attr;
11525 int type, pfd, err;
11526
11527 err = add_kprobe_event_legacy(probe_name, retprobe, kfunc_name, offset);
11528 if (err < 0) {
11529 pr_warn("failed to add legacy kprobe event for '%s+0x%zx': %s\n",
11530 kfunc_name, offset,
11531 errstr(err));
11532 return err;
11533 }
11534 type = determine_kprobe_perf_type_legacy(probe_name, retprobe);
11535 if (type < 0) {
11536 err = type;
11537 pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n",
11538 kfunc_name, offset,
11539 errstr(err));
11540 goto err_clean_legacy;
11541 }
11542
11543 memset(&attr, 0, attr_sz);
11544 attr.size = attr_sz;
11545 attr.config = type;
11546 attr.type = PERF_TYPE_TRACEPOINT;
11547
11548 pfd = syscall(__NR_perf_event_open, &attr,
11549 pid < 0 ? -1 : pid, /* pid */
11550 pid == -1 ? 0 : -1, /* cpu */
11551 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
11552 if (pfd < 0) {
11553 err = -errno;
11554 pr_warn("legacy kprobe perf_event_open() failed: %s\n",
11555 errstr(err));
11556 goto err_clean_legacy;
11557 }
11558 return pfd;
11559
11560 err_clean_legacy:
11561 /* Clear the newly added legacy kprobe_event */
11562 remove_kprobe_event_legacy(probe_name, retprobe);
11563 return err;
11564 }
11565
arch_specific_syscall_pfx(void)11566 static const char *arch_specific_syscall_pfx(void)
11567 {
11568 #if defined(__x86_64__)
11569 return "x64";
11570 #elif defined(__i386__)
11571 return "ia32";
11572 #elif defined(__s390x__)
11573 return "s390x";
11574 #elif defined(__arm__)
11575 return "arm";
11576 #elif defined(__aarch64__)
11577 return "arm64";
11578 #elif defined(__mips__)
11579 return "mips";
11580 #elif defined(__riscv)
11581 return "riscv";
11582 #elif defined(__powerpc__)
11583 return "powerpc";
11584 #elif defined(__powerpc64__)
11585 return "powerpc64";
11586 #else
11587 return NULL;
11588 #endif
11589 }
11590
probe_kern_syscall_wrapper(int token_fd)11591 int probe_kern_syscall_wrapper(int token_fd)
11592 {
11593 char syscall_name[64];
11594 const char *ksys_pfx;
11595
11596 ksys_pfx = arch_specific_syscall_pfx();
11597 if (!ksys_pfx)
11598 return 0;
11599
11600 snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx);
11601
11602 if (determine_kprobe_perf_type() >= 0) {
11603 int pfd;
11604
11605 pfd = perf_event_open_probe(false, false, syscall_name, 0, getpid(), 0);
11606 if (pfd >= 0)
11607 close(pfd);
11608
11609 return pfd >= 0 ? 1 : 0;
11610 } else { /* legacy mode */
11611 char probe_name[MAX_EVENT_NAME_LEN];
11612
11613 gen_probe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0);
11614 if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0)
11615 return 0;
11616
11617 (void)remove_kprobe_event_legacy(probe_name, false);
11618 return 1;
11619 }
11620 }
11621
11622 struct bpf_link *
bpf_program__attach_kprobe_opts(const struct bpf_program * prog,const char * func_name,const struct bpf_kprobe_opts * opts)11623 bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
11624 const char *func_name,
11625 const struct bpf_kprobe_opts *opts)
11626 {
11627 DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
11628 enum probe_attach_mode attach_mode;
11629 char *legacy_probe = NULL;
11630 struct bpf_link *link;
11631 size_t offset;
11632 bool retprobe, legacy;
11633 int pfd, err;
11634
11635 if (!OPTS_VALID(opts, bpf_kprobe_opts))
11636 return libbpf_err_ptr(-EINVAL);
11637
11638 attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT);
11639 retprobe = OPTS_GET(opts, retprobe, false);
11640 offset = OPTS_GET(opts, offset, 0);
11641 pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
11642
11643 legacy = determine_kprobe_perf_type() < 0;
11644 switch (attach_mode) {
11645 case PROBE_ATTACH_MODE_LEGACY:
11646 legacy = true;
11647 pe_opts.force_ioctl_attach = true;
11648 break;
11649 case PROBE_ATTACH_MODE_PERF:
11650 if (legacy)
11651 return libbpf_err_ptr(-ENOTSUP);
11652 pe_opts.force_ioctl_attach = true;
11653 break;
11654 case PROBE_ATTACH_MODE_LINK:
11655 if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK))
11656 return libbpf_err_ptr(-ENOTSUP);
11657 break;
11658 case PROBE_ATTACH_MODE_DEFAULT:
11659 break;
11660 default:
11661 return libbpf_err_ptr(-EINVAL);
11662 }
11663
11664 if (!legacy) {
11665 pfd = perf_event_open_probe(false /* uprobe */, retprobe,
11666 func_name, offset,
11667 -1 /* pid */, 0 /* ref_ctr_off */);
11668 } else {
11669 char probe_name[MAX_EVENT_NAME_LEN];
11670
11671 gen_probe_legacy_event_name(probe_name, sizeof(probe_name),
11672 func_name, offset);
11673
11674 legacy_probe = strdup(probe_name);
11675 if (!legacy_probe)
11676 return libbpf_err_ptr(-ENOMEM);
11677
11678 pfd = perf_event_kprobe_open_legacy(legacy_probe, retprobe, func_name,
11679 offset, -1 /* pid */);
11680 }
11681 if (pfd < 0) {
11682 err = -errno;
11683 pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n",
11684 prog->name, retprobe ? "kretprobe" : "kprobe",
11685 func_name, offset,
11686 errstr(err));
11687 goto err_out;
11688 }
11689 link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
11690 err = libbpf_get_error(link);
11691 if (err) {
11692 close(pfd);
11693 pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n",
11694 prog->name, retprobe ? "kretprobe" : "kprobe",
11695 func_name, offset,
11696 errstr(err));
11697 goto err_clean_legacy;
11698 }
11699 if (legacy) {
11700 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
11701
11702 perf_link->legacy_probe_name = legacy_probe;
11703 perf_link->legacy_is_kprobe = true;
11704 perf_link->legacy_is_retprobe = retprobe;
11705 }
11706
11707 return link;
11708
11709 err_clean_legacy:
11710 if (legacy)
11711 remove_kprobe_event_legacy(legacy_probe, retprobe);
11712 err_out:
11713 free(legacy_probe);
11714 return libbpf_err_ptr(err);
11715 }
11716
bpf_program__attach_kprobe(const struct bpf_program * prog,bool retprobe,const char * func_name)11717 struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog,
11718 bool retprobe,
11719 const char *func_name)
11720 {
11721 DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts,
11722 .retprobe = retprobe,
11723 );
11724
11725 return bpf_program__attach_kprobe_opts(prog, func_name, &opts);
11726 }
11727
bpf_program__attach_ksyscall(const struct bpf_program * prog,const char * syscall_name,const struct bpf_ksyscall_opts * opts)11728 struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog,
11729 const char *syscall_name,
11730 const struct bpf_ksyscall_opts *opts)
11731 {
11732 LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts);
11733 char func_name[128];
11734
11735 if (!OPTS_VALID(opts, bpf_ksyscall_opts))
11736 return libbpf_err_ptr(-EINVAL);
11737
11738 if (kernel_supports(prog->obj, FEAT_SYSCALL_WRAPPER)) {
11739 /* arch_specific_syscall_pfx() should never return NULL here
11740 * because it is guarded by kernel_supports(). However, since
11741 * compiler does not know that we have an explicit conditional
11742 * as well.
11743 */
11744 snprintf(func_name, sizeof(func_name), "__%s_sys_%s",
11745 arch_specific_syscall_pfx() ? : "", syscall_name);
11746 } else {
11747 snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name);
11748 }
11749
11750 kprobe_opts.retprobe = OPTS_GET(opts, retprobe, false);
11751 kprobe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
11752
11753 return bpf_program__attach_kprobe_opts(prog, func_name, &kprobe_opts);
11754 }
11755
11756 /* Adapted from perf/util/string.c */
glob_match(const char * str,const char * pat)11757 bool glob_match(const char *str, const char *pat)
11758 {
11759 while (*str && *pat && *pat != '*') {
11760 if (*pat == '?') { /* Matches any single character */
11761 str++;
11762 pat++;
11763 continue;
11764 }
11765 if (*str != *pat)
11766 return false;
11767 str++;
11768 pat++;
11769 }
11770 /* Check wild card */
11771 if (*pat == '*') {
11772 while (*pat == '*')
11773 pat++;
11774 if (!*pat) /* Tail wild card matches all */
11775 return true;
11776 while (*str)
11777 if (glob_match(str++, pat))
11778 return true;
11779 }
11780 return !*str && !*pat;
11781 }
11782
11783 struct kprobe_multi_resolve {
11784 const char *pattern;
11785 unsigned long *addrs;
11786 size_t cap;
11787 size_t cnt;
11788 };
11789
11790 struct avail_kallsyms_data {
11791 char **syms;
11792 size_t cnt;
11793 struct kprobe_multi_resolve *res;
11794 };
11795
avail_func_cmp(const void * a,const void * b)11796 static int avail_func_cmp(const void *a, const void *b)
11797 {
11798 return strcmp(*(const char **)a, *(const char **)b);
11799 }
11800
avail_kallsyms_cb(unsigned long long sym_addr,char sym_type,const char * sym_name,void * ctx)11801 static int avail_kallsyms_cb(unsigned long long sym_addr, char sym_type,
11802 const char *sym_name, void *ctx)
11803 {
11804 struct avail_kallsyms_data *data = ctx;
11805 struct kprobe_multi_resolve *res = data->res;
11806 int err;
11807
11808 if (!glob_match(sym_name, res->pattern))
11809 return 0;
11810
11811 if (!bsearch(&sym_name, data->syms, data->cnt, sizeof(*data->syms), avail_func_cmp)) {
11812 /* Some versions of kernel strip out .llvm.<hash> suffix from
11813 * function names reported in available_filter_functions, but
11814 * don't do so for kallsyms. While this is clearly a kernel
11815 * bug (fixed by [0]) we try to accommodate that in libbpf to
11816 * make multi-kprobe usability a bit better: if no match is
11817 * found, we will strip .llvm. suffix and try one more time.
11818 *
11819 * [0] fb6a421fb615 ("kallsyms: Match symbols exactly with CONFIG_LTO_CLANG")
11820 */
11821 char sym_trim[256], *psym_trim = sym_trim;
11822 const char *sym_sfx;
11823
11824 if (!(sym_sfx = strstr(sym_name, ".llvm.")))
11825 return 0;
11826
11827 /* psym_trim vs sym_trim dance is done to avoid pointer vs array
11828 * coercion differences and get proper `const char **` pointer
11829 * which avail_func_cmp() expects
11830 */
11831 snprintf(sym_trim, sizeof(sym_trim), "%.*s", (int)(sym_sfx - sym_name), sym_name);
11832 if (!bsearch(&psym_trim, data->syms, data->cnt, sizeof(*data->syms), avail_func_cmp))
11833 return 0;
11834 }
11835
11836 err = libbpf_ensure_mem((void **)&res->addrs, &res->cap, sizeof(*res->addrs), res->cnt + 1);
11837 if (err)
11838 return err;
11839
11840 res->addrs[res->cnt++] = (unsigned long)sym_addr;
11841 return 0;
11842 }
11843
libbpf_available_kallsyms_parse(struct kprobe_multi_resolve * res)11844 static int libbpf_available_kallsyms_parse(struct kprobe_multi_resolve *res)
11845 {
11846 const char *available_functions_file = tracefs_available_filter_functions();
11847 struct avail_kallsyms_data data;
11848 char sym_name[500];
11849 FILE *f;
11850 int err = 0, ret, i;
11851 char **syms = NULL;
11852 size_t cap = 0, cnt = 0;
11853
11854 f = fopen(available_functions_file, "re");
11855 if (!f) {
11856 err = -errno;
11857 pr_warn("failed to open %s: %s\n", available_functions_file, errstr(err));
11858 return err;
11859 }
11860
11861 while (true) {
11862 char *name;
11863
11864 ret = fscanf(f, "%499s%*[^\n]\n", sym_name);
11865 if (ret == EOF && feof(f))
11866 break;
11867
11868 if (ret != 1) {
11869 pr_warn("failed to parse available_filter_functions entry: %d\n", ret);
11870 err = -EINVAL;
11871 goto cleanup;
11872 }
11873
11874 if (!glob_match(sym_name, res->pattern))
11875 continue;
11876
11877 err = libbpf_ensure_mem((void **)&syms, &cap, sizeof(*syms), cnt + 1);
11878 if (err)
11879 goto cleanup;
11880
11881 name = strdup(sym_name);
11882 if (!name) {
11883 err = -errno;
11884 goto cleanup;
11885 }
11886
11887 syms[cnt++] = name;
11888 }
11889
11890 /* no entries found, bail out */
11891 if (cnt == 0) {
11892 err = -ENOENT;
11893 goto cleanup;
11894 }
11895
11896 /* sort available functions */
11897 qsort(syms, cnt, sizeof(*syms), avail_func_cmp);
11898
11899 data.syms = syms;
11900 data.res = res;
11901 data.cnt = cnt;
11902 libbpf_kallsyms_parse(avail_kallsyms_cb, &data);
11903
11904 if (res->cnt == 0)
11905 err = -ENOENT;
11906
11907 cleanup:
11908 for (i = 0; i < cnt; i++)
11909 free((char *)syms[i]);
11910 free(syms);
11911
11912 fclose(f);
11913 return err;
11914 }
11915
has_available_filter_functions_addrs(void)11916 static bool has_available_filter_functions_addrs(void)
11917 {
11918 return access(tracefs_available_filter_functions_addrs(), R_OK) != -1;
11919 }
11920
libbpf_available_kprobes_parse(struct kprobe_multi_resolve * res)11921 static int libbpf_available_kprobes_parse(struct kprobe_multi_resolve *res)
11922 {
11923 const char *available_path = tracefs_available_filter_functions_addrs();
11924 char sym_name[500];
11925 FILE *f;
11926 int ret, err = 0;
11927 unsigned long long sym_addr;
11928
11929 f = fopen(available_path, "re");
11930 if (!f) {
11931 err = -errno;
11932 pr_warn("failed to open %s: %s\n", available_path, errstr(err));
11933 return err;
11934 }
11935
11936 while (true) {
11937 ret = fscanf(f, "%llx %499s%*[^\n]\n", &sym_addr, sym_name);
11938 if (ret == EOF && feof(f))
11939 break;
11940
11941 if (ret != 2) {
11942 pr_warn("failed to parse available_filter_functions_addrs entry: %d\n",
11943 ret);
11944 err = -EINVAL;
11945 goto cleanup;
11946 }
11947
11948 if (!glob_match(sym_name, res->pattern))
11949 continue;
11950
11951 err = libbpf_ensure_mem((void **)&res->addrs, &res->cap,
11952 sizeof(*res->addrs), res->cnt + 1);
11953 if (err)
11954 goto cleanup;
11955
11956 res->addrs[res->cnt++] = (unsigned long)sym_addr;
11957 }
11958
11959 if (res->cnt == 0)
11960 err = -ENOENT;
11961
11962 cleanup:
11963 fclose(f);
11964 return err;
11965 }
11966
11967 struct bpf_link *
bpf_program__attach_kprobe_multi_opts(const struct bpf_program * prog,const char * pattern,const struct bpf_kprobe_multi_opts * opts)11968 bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog,
11969 const char *pattern,
11970 const struct bpf_kprobe_multi_opts *opts)
11971 {
11972 LIBBPF_OPTS(bpf_link_create_opts, lopts);
11973 struct kprobe_multi_resolve res = {
11974 .pattern = pattern,
11975 };
11976 enum bpf_attach_type attach_type;
11977 struct bpf_link *link = NULL;
11978 const unsigned long *addrs;
11979 int err, link_fd, prog_fd;
11980 bool retprobe, session, unique_match;
11981 const __u64 *cookies;
11982 const char **syms;
11983 size_t cnt;
11984
11985 if (!OPTS_VALID(opts, bpf_kprobe_multi_opts))
11986 return libbpf_err_ptr(-EINVAL);
11987
11988 prog_fd = bpf_program__fd(prog);
11989 if (prog_fd < 0) {
11990 pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n",
11991 prog->name);
11992 return libbpf_err_ptr(-EINVAL);
11993 }
11994
11995 syms = OPTS_GET(opts, syms, false);
11996 addrs = OPTS_GET(opts, addrs, false);
11997 cnt = OPTS_GET(opts, cnt, false);
11998 cookies = OPTS_GET(opts, cookies, false);
11999 unique_match = OPTS_GET(opts, unique_match, false);
12000
12001 if (!pattern && !addrs && !syms)
12002 return libbpf_err_ptr(-EINVAL);
12003 if (pattern && (addrs || syms || cookies || cnt))
12004 return libbpf_err_ptr(-EINVAL);
12005 if (!pattern && !cnt)
12006 return libbpf_err_ptr(-EINVAL);
12007 if (!pattern && unique_match)
12008 return libbpf_err_ptr(-EINVAL);
12009 if (addrs && syms)
12010 return libbpf_err_ptr(-EINVAL);
12011
12012 if (pattern) {
12013 if (has_available_filter_functions_addrs())
12014 err = libbpf_available_kprobes_parse(&res);
12015 else
12016 err = libbpf_available_kallsyms_parse(&res);
12017 if (err)
12018 goto error;
12019
12020 if (unique_match && res.cnt != 1) {
12021 pr_warn("prog '%s': failed to find a unique match for '%s' (%zu matches)\n",
12022 prog->name, pattern, res.cnt);
12023 err = -EINVAL;
12024 goto error;
12025 }
12026
12027 addrs = res.addrs;
12028 cnt = res.cnt;
12029 }
12030
12031 retprobe = OPTS_GET(opts, retprobe, false);
12032 session = OPTS_GET(opts, session, false);
12033
12034 if (retprobe && session)
12035 return libbpf_err_ptr(-EINVAL);
12036
12037 attach_type = session ? BPF_TRACE_KPROBE_SESSION : BPF_TRACE_KPROBE_MULTI;
12038
12039 lopts.kprobe_multi.syms = syms;
12040 lopts.kprobe_multi.addrs = addrs;
12041 lopts.kprobe_multi.cookies = cookies;
12042 lopts.kprobe_multi.cnt = cnt;
12043 lopts.kprobe_multi.flags = retprobe ? BPF_F_KPROBE_MULTI_RETURN : 0;
12044
12045 link = calloc(1, sizeof(*link));
12046 if (!link) {
12047 err = -ENOMEM;
12048 goto error;
12049 }
12050 link->detach = &bpf_link__detach_fd;
12051
12052 link_fd = bpf_link_create(prog_fd, 0, attach_type, &lopts);
12053 if (link_fd < 0) {
12054 err = -errno;
12055 pr_warn("prog '%s': failed to attach: %s\n",
12056 prog->name, errstr(err));
12057 goto error;
12058 }
12059 link->fd = link_fd;
12060 free(res.addrs);
12061 return link;
12062
12063 error:
12064 free(link);
12065 free(res.addrs);
12066 return libbpf_err_ptr(err);
12067 }
12068
attach_kprobe(const struct bpf_program * prog,long cookie,struct bpf_link ** link)12069 static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12070 {
12071 DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts);
12072 unsigned long offset = 0;
12073 const char *func_name;
12074 char *func;
12075 int n;
12076
12077 *link = NULL;
12078
12079 /* no auto-attach for SEC("kprobe") and SEC("kretprobe") */
12080 if (strcmp(prog->sec_name, "kprobe") == 0 || strcmp(prog->sec_name, "kretprobe") == 0)
12081 return 0;
12082
12083 opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe/");
12084 if (opts.retprobe)
12085 func_name = prog->sec_name + sizeof("kretprobe/") - 1;
12086 else
12087 func_name = prog->sec_name + sizeof("kprobe/") - 1;
12088
12089 n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset);
12090 if (n < 1) {
12091 pr_warn("kprobe name is invalid: %s\n", func_name);
12092 return -EINVAL;
12093 }
12094 if (opts.retprobe && offset != 0) {
12095 free(func);
12096 pr_warn("kretprobes do not support offset specification\n");
12097 return -EINVAL;
12098 }
12099
12100 opts.offset = offset;
12101 *link = bpf_program__attach_kprobe_opts(prog, func, &opts);
12102 free(func);
12103 return libbpf_get_error(*link);
12104 }
12105
attach_ksyscall(const struct bpf_program * prog,long cookie,struct bpf_link ** link)12106 static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12107 {
12108 LIBBPF_OPTS(bpf_ksyscall_opts, opts);
12109 const char *syscall_name;
12110
12111 *link = NULL;
12112
12113 /* no auto-attach for SEC("ksyscall") and SEC("kretsyscall") */
12114 if (strcmp(prog->sec_name, "ksyscall") == 0 || strcmp(prog->sec_name, "kretsyscall") == 0)
12115 return 0;
12116
12117 opts.retprobe = str_has_pfx(prog->sec_name, "kretsyscall/");
12118 if (opts.retprobe)
12119 syscall_name = prog->sec_name + sizeof("kretsyscall/") - 1;
12120 else
12121 syscall_name = prog->sec_name + sizeof("ksyscall/") - 1;
12122
12123 *link = bpf_program__attach_ksyscall(prog, syscall_name, &opts);
12124 return *link ? 0 : -errno;
12125 }
12126
attach_kprobe_multi(const struct bpf_program * prog,long cookie,struct bpf_link ** link)12127 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12128 {
12129 LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
12130 const char *spec;
12131 char *pattern;
12132 int n;
12133
12134 *link = NULL;
12135
12136 /* no auto-attach for SEC("kprobe.multi") and SEC("kretprobe.multi") */
12137 if (strcmp(prog->sec_name, "kprobe.multi") == 0 ||
12138 strcmp(prog->sec_name, "kretprobe.multi") == 0)
12139 return 0;
12140
12141 opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe.multi/");
12142 if (opts.retprobe)
12143 spec = prog->sec_name + sizeof("kretprobe.multi/") - 1;
12144 else
12145 spec = prog->sec_name + sizeof("kprobe.multi/") - 1;
12146
12147 n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern);
12148 if (n < 1) {
12149 pr_warn("kprobe multi pattern is invalid: %s\n", spec);
12150 return -EINVAL;
12151 }
12152
12153 *link = bpf_program__attach_kprobe_multi_opts(prog, pattern, &opts);
12154 free(pattern);
12155 return libbpf_get_error(*link);
12156 }
12157
attach_kprobe_session(const struct bpf_program * prog,long cookie,struct bpf_link ** link)12158 static int attach_kprobe_session(const struct bpf_program *prog, long cookie,
12159 struct bpf_link **link)
12160 {
12161 LIBBPF_OPTS(bpf_kprobe_multi_opts, opts, .session = true);
12162 const char *spec;
12163 char *pattern;
12164 int n;
12165
12166 *link = NULL;
12167
12168 /* no auto-attach for SEC("kprobe.session") */
12169 if (strcmp(prog->sec_name, "kprobe.session") == 0)
12170 return 0;
12171
12172 spec = prog->sec_name + sizeof("kprobe.session/") - 1;
12173 n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern);
12174 if (n < 1) {
12175 pr_warn("kprobe session pattern is invalid: %s\n", spec);
12176 return -EINVAL;
12177 }
12178
12179 *link = bpf_program__attach_kprobe_multi_opts(prog, pattern, &opts);
12180 free(pattern);
12181 return *link ? 0 : -errno;
12182 }
12183
attach_uprobe_multi(const struct bpf_program * prog,long cookie,struct bpf_link ** link)12184 static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12185 {
12186 char *probe_type = NULL, *binary_path = NULL, *func_name = NULL;
12187 LIBBPF_OPTS(bpf_uprobe_multi_opts, opts);
12188 int n, ret = -EINVAL;
12189
12190 *link = NULL;
12191
12192 n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]",
12193 &probe_type, &binary_path, &func_name);
12194 switch (n) {
12195 case 1:
12196 /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
12197 ret = 0;
12198 break;
12199 case 3:
12200 opts.session = str_has_pfx(probe_type, "uprobe.session");
12201 opts.retprobe = str_has_pfx(probe_type, "uretprobe.multi");
12202
12203 *link = bpf_program__attach_uprobe_multi(prog, -1, binary_path, func_name, &opts);
12204 ret = libbpf_get_error(*link);
12205 break;
12206 default:
12207 pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name,
12208 prog->sec_name);
12209 break;
12210 }
12211 free(probe_type);
12212 free(binary_path);
12213 free(func_name);
12214 return ret;
12215 }
12216
add_uprobe_event_legacy(const char * probe_name,bool retprobe,const char * binary_path,size_t offset)12217 static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe,
12218 const char *binary_path, size_t offset)
12219 {
12220 return append_to_file(tracefs_uprobe_events(), "%c:%s/%s %s:0x%zx",
12221 retprobe ? 'r' : 'p',
12222 retprobe ? "uretprobes" : "uprobes",
12223 probe_name, binary_path, offset);
12224 }
12225
remove_uprobe_event_legacy(const char * probe_name,bool retprobe)12226 static inline int remove_uprobe_event_legacy(const char *probe_name, bool retprobe)
12227 {
12228 return append_to_file(tracefs_uprobe_events(), "-:%s/%s",
12229 retprobe ? "uretprobes" : "uprobes", probe_name);
12230 }
12231
determine_uprobe_perf_type_legacy(const char * probe_name,bool retprobe)12232 static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retprobe)
12233 {
12234 char file[512];
12235
12236 snprintf(file, sizeof(file), "%s/events/%s/%s/id",
12237 tracefs_path(), retprobe ? "uretprobes" : "uprobes", probe_name);
12238
12239 return parse_uint_from_file(file, "%d\n");
12240 }
12241
perf_event_uprobe_open_legacy(const char * probe_name,bool retprobe,const char * binary_path,size_t offset,int pid)12242 static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe,
12243 const char *binary_path, size_t offset, int pid)
12244 {
12245 const size_t attr_sz = sizeof(struct perf_event_attr);
12246 struct perf_event_attr attr;
12247 int type, pfd, err;
12248
12249 err = add_uprobe_event_legacy(probe_name, retprobe, binary_path, offset);
12250 if (err < 0) {
12251 pr_warn("failed to add legacy uprobe event for %s:0x%zx: %s\n",
12252 binary_path, (size_t)offset, errstr(err));
12253 return err;
12254 }
12255 type = determine_uprobe_perf_type_legacy(probe_name, retprobe);
12256 if (type < 0) {
12257 err = type;
12258 pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %s\n",
12259 binary_path, offset, errstr(err));
12260 goto err_clean_legacy;
12261 }
12262
12263 memset(&attr, 0, attr_sz);
12264 attr.size = attr_sz;
12265 attr.config = type;
12266 attr.type = PERF_TYPE_TRACEPOINT;
12267
12268 pfd = syscall(__NR_perf_event_open, &attr,
12269 pid < 0 ? -1 : pid, /* pid */
12270 pid == -1 ? 0 : -1, /* cpu */
12271 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
12272 if (pfd < 0) {
12273 err = -errno;
12274 pr_warn("legacy uprobe perf_event_open() failed: %s\n", errstr(err));
12275 goto err_clean_legacy;
12276 }
12277 return pfd;
12278
12279 err_clean_legacy:
12280 /* Clear the newly added legacy uprobe_event */
12281 remove_uprobe_event_legacy(probe_name, retprobe);
12282 return err;
12283 }
12284
12285 /* Find offset of function name in archive specified by path. Currently
12286 * supported are .zip files that do not compress their contents, as used on
12287 * Android in the form of APKs, for example. "file_name" is the name of the ELF
12288 * file inside the archive. "func_name" matches symbol name or name@@LIB for
12289 * library functions.
12290 *
12291 * An overview of the APK format specifically provided here:
12292 * https://en.wikipedia.org/w/index.php?title=Apk_(file_format)&oldid=1139099120#Package_contents
12293 */
elf_find_func_offset_from_archive(const char * archive_path,const char * file_name,const char * func_name)12294 static long elf_find_func_offset_from_archive(const char *archive_path, const char *file_name,
12295 const char *func_name)
12296 {
12297 struct zip_archive *archive;
12298 struct zip_entry entry;
12299 long ret;
12300 Elf *elf;
12301
12302 archive = zip_archive_open(archive_path);
12303 if (IS_ERR(archive)) {
12304 ret = PTR_ERR(archive);
12305 pr_warn("zip: failed to open %s: %ld\n", archive_path, ret);
12306 return ret;
12307 }
12308
12309 ret = zip_archive_find_entry(archive, file_name, &entry);
12310 if (ret) {
12311 pr_warn("zip: could not find archive member %s in %s: %ld\n", file_name,
12312 archive_path, ret);
12313 goto out;
12314 }
12315 pr_debug("zip: found entry for %s in %s at 0x%lx\n", file_name, archive_path,
12316 (unsigned long)entry.data_offset);
12317
12318 if (entry.compression) {
12319 pr_warn("zip: entry %s of %s is compressed and cannot be handled\n", file_name,
12320 archive_path);
12321 ret = -LIBBPF_ERRNO__FORMAT;
12322 goto out;
12323 }
12324
12325 elf = elf_memory((void *)entry.data, entry.data_length);
12326 if (!elf) {
12327 pr_warn("elf: could not read elf file %s from %s: %s\n", file_name, archive_path,
12328 elf_errmsg(-1));
12329 ret = -LIBBPF_ERRNO__LIBELF;
12330 goto out;
12331 }
12332
12333 ret = elf_find_func_offset(elf, file_name, func_name);
12334 if (ret > 0) {
12335 pr_debug("elf: symbol address match for %s of %s in %s: 0x%x + 0x%lx = 0x%lx\n",
12336 func_name, file_name, archive_path, entry.data_offset, ret,
12337 ret + entry.data_offset);
12338 ret += entry.data_offset;
12339 }
12340 elf_end(elf);
12341
12342 out:
12343 zip_archive_close(archive);
12344 return ret;
12345 }
12346
arch_specific_lib_paths(void)12347 static const char *arch_specific_lib_paths(void)
12348 {
12349 /*
12350 * Based on https://packages.debian.org/sid/libc6.
12351 *
12352 * Assume that the traced program is built for the same architecture
12353 * as libbpf, which should cover the vast majority of cases.
12354 */
12355 #if defined(__x86_64__)
12356 return "/lib/x86_64-linux-gnu";
12357 #elif defined(__i386__)
12358 return "/lib/i386-linux-gnu";
12359 #elif defined(__s390x__)
12360 return "/lib/s390x-linux-gnu";
12361 #elif defined(__arm__) && defined(__SOFTFP__)
12362 return "/lib/arm-linux-gnueabi";
12363 #elif defined(__arm__) && !defined(__SOFTFP__)
12364 return "/lib/arm-linux-gnueabihf";
12365 #elif defined(__aarch64__)
12366 return "/lib/aarch64-linux-gnu";
12367 #elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 64
12368 return "/lib/mips64el-linux-gnuabi64";
12369 #elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 32
12370 return "/lib/mipsel-linux-gnu";
12371 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
12372 return "/lib/powerpc64le-linux-gnu";
12373 #elif defined(__sparc__) && defined(__arch64__)
12374 return "/lib/sparc64-linux-gnu";
12375 #elif defined(__riscv) && __riscv_xlen == 64
12376 return "/lib/riscv64-linux-gnu";
12377 #else
12378 return NULL;
12379 #endif
12380 }
12381
12382 /* Get full path to program/shared library. */
resolve_full_path(const char * file,char * result,size_t result_sz)12383 static int resolve_full_path(const char *file, char *result, size_t result_sz)
12384 {
12385 const char *search_paths[3] = {};
12386 int i, perm;
12387
12388 if (str_has_sfx(file, ".so") || strstr(file, ".so.")) {
12389 search_paths[0] = getenv("LD_LIBRARY_PATH");
12390 search_paths[1] = "/usr/lib64:/usr/lib";
12391 search_paths[2] = arch_specific_lib_paths();
12392 perm = R_OK;
12393 } else {
12394 search_paths[0] = getenv("PATH");
12395 search_paths[1] = "/usr/bin:/usr/sbin";
12396 perm = R_OK | X_OK;
12397 }
12398
12399 for (i = 0; i < ARRAY_SIZE(search_paths); i++) {
12400 const char *s;
12401
12402 if (!search_paths[i])
12403 continue;
12404 for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) {
12405 const char *next_path;
12406 int seg_len;
12407
12408 if (s[0] == ':')
12409 s++;
12410 next_path = strchr(s, ':');
12411 seg_len = next_path ? next_path - s : strlen(s);
12412 if (!seg_len)
12413 continue;
12414 snprintf(result, result_sz, "%.*s/%s", seg_len, s, file);
12415 /* ensure it has required permissions */
12416 if (faccessat(AT_FDCWD, result, perm, AT_EACCESS) < 0)
12417 continue;
12418 pr_debug("resolved '%s' to '%s'\n", file, result);
12419 return 0;
12420 }
12421 }
12422 return -ENOENT;
12423 }
12424
12425 struct bpf_link *
bpf_program__attach_uprobe_multi(const struct bpf_program * prog,pid_t pid,const char * path,const char * func_pattern,const struct bpf_uprobe_multi_opts * opts)12426 bpf_program__attach_uprobe_multi(const struct bpf_program *prog,
12427 pid_t pid,
12428 const char *path,
12429 const char *func_pattern,
12430 const struct bpf_uprobe_multi_opts *opts)
12431 {
12432 const unsigned long *ref_ctr_offsets = NULL, *offsets = NULL;
12433 LIBBPF_OPTS(bpf_link_create_opts, lopts);
12434 unsigned long *resolved_offsets = NULL;
12435 enum bpf_attach_type attach_type;
12436 int err = 0, link_fd, prog_fd;
12437 struct bpf_link *link = NULL;
12438 char full_path[PATH_MAX];
12439 bool retprobe, session;
12440 const __u64 *cookies;
12441 const char **syms;
12442 size_t cnt;
12443
12444 if (!OPTS_VALID(opts, bpf_uprobe_multi_opts))
12445 return libbpf_err_ptr(-EINVAL);
12446
12447 prog_fd = bpf_program__fd(prog);
12448 if (prog_fd < 0) {
12449 pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n",
12450 prog->name);
12451 return libbpf_err_ptr(-EINVAL);
12452 }
12453
12454 syms = OPTS_GET(opts, syms, NULL);
12455 offsets = OPTS_GET(opts, offsets, NULL);
12456 ref_ctr_offsets = OPTS_GET(opts, ref_ctr_offsets, NULL);
12457 cookies = OPTS_GET(opts, cookies, NULL);
12458 cnt = OPTS_GET(opts, cnt, 0);
12459 retprobe = OPTS_GET(opts, retprobe, false);
12460 session = OPTS_GET(opts, session, false);
12461
12462 /*
12463 * User can specify 2 mutually exclusive set of inputs:
12464 *
12465 * 1) use only path/func_pattern/pid arguments
12466 *
12467 * 2) use path/pid with allowed combinations of:
12468 * syms/offsets/ref_ctr_offsets/cookies/cnt
12469 *
12470 * - syms and offsets are mutually exclusive
12471 * - ref_ctr_offsets and cookies are optional
12472 *
12473 * Any other usage results in error.
12474 */
12475
12476 if (!path)
12477 return libbpf_err_ptr(-EINVAL);
12478 if (!func_pattern && cnt == 0)
12479 return libbpf_err_ptr(-EINVAL);
12480
12481 if (func_pattern) {
12482 if (syms || offsets || ref_ctr_offsets || cookies || cnt)
12483 return libbpf_err_ptr(-EINVAL);
12484 } else {
12485 if (!!syms == !!offsets)
12486 return libbpf_err_ptr(-EINVAL);
12487 }
12488
12489 if (retprobe && session)
12490 return libbpf_err_ptr(-EINVAL);
12491
12492 if (func_pattern) {
12493 if (!strchr(path, '/')) {
12494 err = resolve_full_path(path, full_path, sizeof(full_path));
12495 if (err) {
12496 pr_warn("prog '%s': failed to resolve full path for '%s': %s\n",
12497 prog->name, path, errstr(err));
12498 return libbpf_err_ptr(err);
12499 }
12500 path = full_path;
12501 }
12502
12503 err = elf_resolve_pattern_offsets(path, func_pattern,
12504 &resolved_offsets, &cnt);
12505 if (err < 0)
12506 return libbpf_err_ptr(err);
12507 offsets = resolved_offsets;
12508 } else if (syms) {
12509 err = elf_resolve_syms_offsets(path, cnt, syms, &resolved_offsets, STT_FUNC);
12510 if (err < 0)
12511 return libbpf_err_ptr(err);
12512 offsets = resolved_offsets;
12513 }
12514
12515 attach_type = session ? BPF_TRACE_UPROBE_SESSION : BPF_TRACE_UPROBE_MULTI;
12516
12517 lopts.uprobe_multi.path = path;
12518 lopts.uprobe_multi.offsets = offsets;
12519 lopts.uprobe_multi.ref_ctr_offsets = ref_ctr_offsets;
12520 lopts.uprobe_multi.cookies = cookies;
12521 lopts.uprobe_multi.cnt = cnt;
12522 lopts.uprobe_multi.flags = retprobe ? BPF_F_UPROBE_MULTI_RETURN : 0;
12523
12524 if (pid == 0)
12525 pid = getpid();
12526 if (pid > 0)
12527 lopts.uprobe_multi.pid = pid;
12528
12529 link = calloc(1, sizeof(*link));
12530 if (!link) {
12531 err = -ENOMEM;
12532 goto error;
12533 }
12534 link->detach = &bpf_link__detach_fd;
12535
12536 link_fd = bpf_link_create(prog_fd, 0, attach_type, &lopts);
12537 if (link_fd < 0) {
12538 err = -errno;
12539 pr_warn("prog '%s': failed to attach multi-uprobe: %s\n",
12540 prog->name, errstr(err));
12541 goto error;
12542 }
12543 link->fd = link_fd;
12544 free(resolved_offsets);
12545 return link;
12546
12547 error:
12548 free(resolved_offsets);
12549 free(link);
12550 return libbpf_err_ptr(err);
12551 }
12552
12553 LIBBPF_API struct bpf_link *
bpf_program__attach_uprobe_opts(const struct bpf_program * prog,pid_t pid,const char * binary_path,size_t func_offset,const struct bpf_uprobe_opts * opts)12554 bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
12555 const char *binary_path, size_t func_offset,
12556 const struct bpf_uprobe_opts *opts)
12557 {
12558 const char *archive_path = NULL, *archive_sep = NULL;
12559 char *legacy_probe = NULL;
12560 DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
12561 enum probe_attach_mode attach_mode;
12562 char full_path[PATH_MAX];
12563 struct bpf_link *link;
12564 size_t ref_ctr_off;
12565 int pfd, err;
12566 bool retprobe, legacy;
12567 const char *func_name;
12568
12569 if (!OPTS_VALID(opts, bpf_uprobe_opts))
12570 return libbpf_err_ptr(-EINVAL);
12571
12572 attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT);
12573 retprobe = OPTS_GET(opts, retprobe, false);
12574 ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0);
12575 pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
12576
12577 if (!binary_path)
12578 return libbpf_err_ptr(-EINVAL);
12579
12580 /* Check if "binary_path" refers to an archive. */
12581 archive_sep = strstr(binary_path, "!/");
12582 if (archive_sep) {
12583 full_path[0] = '\0';
12584 libbpf_strlcpy(full_path, binary_path,
12585 min(sizeof(full_path), (size_t)(archive_sep - binary_path + 1)));
12586 archive_path = full_path;
12587 binary_path = archive_sep + 2;
12588 } else if (!strchr(binary_path, '/')) {
12589 err = resolve_full_path(binary_path, full_path, sizeof(full_path));
12590 if (err) {
12591 pr_warn("prog '%s': failed to resolve full path for '%s': %s\n",
12592 prog->name, binary_path, errstr(err));
12593 return libbpf_err_ptr(err);
12594 }
12595 binary_path = full_path;
12596 }
12597 func_name = OPTS_GET(opts, func_name, NULL);
12598 if (func_name) {
12599 long sym_off;
12600
12601 if (archive_path) {
12602 sym_off = elf_find_func_offset_from_archive(archive_path, binary_path,
12603 func_name);
12604 binary_path = archive_path;
12605 } else {
12606 sym_off = elf_find_func_offset_from_file(binary_path, func_name);
12607 }
12608 if (sym_off < 0)
12609 return libbpf_err_ptr(sym_off);
12610 func_offset += sym_off;
12611 }
12612
12613 legacy = determine_uprobe_perf_type() < 0;
12614 switch (attach_mode) {
12615 case PROBE_ATTACH_MODE_LEGACY:
12616 legacy = true;
12617 pe_opts.force_ioctl_attach = true;
12618 break;
12619 case PROBE_ATTACH_MODE_PERF:
12620 if (legacy)
12621 return libbpf_err_ptr(-ENOTSUP);
12622 pe_opts.force_ioctl_attach = true;
12623 break;
12624 case PROBE_ATTACH_MODE_LINK:
12625 if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK))
12626 return libbpf_err_ptr(-ENOTSUP);
12627 break;
12628 case PROBE_ATTACH_MODE_DEFAULT:
12629 break;
12630 default:
12631 return libbpf_err_ptr(-EINVAL);
12632 }
12633
12634 if (!legacy) {
12635 pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path,
12636 func_offset, pid, ref_ctr_off);
12637 } else {
12638 char probe_name[MAX_EVENT_NAME_LEN];
12639
12640 if (ref_ctr_off)
12641 return libbpf_err_ptr(-EINVAL);
12642
12643 gen_probe_legacy_event_name(probe_name, sizeof(probe_name),
12644 strrchr(binary_path, '/') ? : binary_path,
12645 func_offset);
12646
12647 legacy_probe = strdup(probe_name);
12648 if (!legacy_probe)
12649 return libbpf_err_ptr(-ENOMEM);
12650
12651 pfd = perf_event_uprobe_open_legacy(legacy_probe, retprobe,
12652 binary_path, func_offset, pid);
12653 }
12654 if (pfd < 0) {
12655 err = -errno;
12656 pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
12657 prog->name, retprobe ? "uretprobe" : "uprobe",
12658 binary_path, func_offset,
12659 errstr(err));
12660 goto err_out;
12661 }
12662
12663 link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
12664 err = libbpf_get_error(link);
12665 if (err) {
12666 close(pfd);
12667 pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n",
12668 prog->name, retprobe ? "uretprobe" : "uprobe",
12669 binary_path, func_offset,
12670 errstr(err));
12671 goto err_clean_legacy;
12672 }
12673 if (legacy) {
12674 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
12675
12676 perf_link->legacy_probe_name = legacy_probe;
12677 perf_link->legacy_is_kprobe = false;
12678 perf_link->legacy_is_retprobe = retprobe;
12679 }
12680 return link;
12681
12682 err_clean_legacy:
12683 if (legacy)
12684 remove_uprobe_event_legacy(legacy_probe, retprobe);
12685 err_out:
12686 free(legacy_probe);
12687 return libbpf_err_ptr(err);
12688 }
12689
12690 /* Format of u[ret]probe section definition supporting auto-attach:
12691 * u[ret]probe/binary:function[+offset]
12692 *
12693 * binary can be an absolute/relative path or a filename; the latter is resolved to a
12694 * full binary path via bpf_program__attach_uprobe_opts.
12695 *
12696 * Specifying uprobe+ ensures we carry out strict matching; either "uprobe" must be
12697 * specified (and auto-attach is not possible) or the above format is specified for
12698 * auto-attach.
12699 */
attach_uprobe(const struct bpf_program * prog,long cookie,struct bpf_link ** link)12700 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12701 {
12702 DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
12703 char *probe_type = NULL, *binary_path = NULL, *func_name = NULL, *func_off;
12704 int n, c, ret = -EINVAL;
12705 long offset = 0;
12706
12707 *link = NULL;
12708
12709 n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]",
12710 &probe_type, &binary_path, &func_name);
12711 switch (n) {
12712 case 1:
12713 /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
12714 ret = 0;
12715 break;
12716 case 2:
12717 pr_warn("prog '%s': section '%s' missing ':function[+offset]' specification\n",
12718 prog->name, prog->sec_name);
12719 break;
12720 case 3:
12721 /* check if user specifies `+offset`, if yes, this should be
12722 * the last part of the string, make sure sscanf read to EOL
12723 */
12724 func_off = strrchr(func_name, '+');
12725 if (func_off) {
12726 n = sscanf(func_off, "+%li%n", &offset, &c);
12727 if (n == 1 && *(func_off + c) == '\0')
12728 func_off[0] = '\0';
12729 else
12730 offset = 0;
12731 }
12732 opts.retprobe = strcmp(probe_type, "uretprobe") == 0 ||
12733 strcmp(probe_type, "uretprobe.s") == 0;
12734 if (opts.retprobe && offset != 0) {
12735 pr_warn("prog '%s': uretprobes do not support offset specification\n",
12736 prog->name);
12737 break;
12738 }
12739 opts.func_name = func_name;
12740 *link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts);
12741 ret = libbpf_get_error(*link);
12742 break;
12743 default:
12744 pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name,
12745 prog->sec_name);
12746 break;
12747 }
12748 free(probe_type);
12749 free(binary_path);
12750 free(func_name);
12751
12752 return ret;
12753 }
12754
bpf_program__attach_uprobe(const struct bpf_program * prog,bool retprobe,pid_t pid,const char * binary_path,size_t func_offset)12755 struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog,
12756 bool retprobe, pid_t pid,
12757 const char *binary_path,
12758 size_t func_offset)
12759 {
12760 DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts, .retprobe = retprobe);
12761
12762 return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts);
12763 }
12764
bpf_program__attach_usdt(const struct bpf_program * prog,pid_t pid,const char * binary_path,const char * usdt_provider,const char * usdt_name,const struct bpf_usdt_opts * opts)12765 struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog,
12766 pid_t pid, const char *binary_path,
12767 const char *usdt_provider, const char *usdt_name,
12768 const struct bpf_usdt_opts *opts)
12769 {
12770 char resolved_path[512];
12771 struct bpf_object *obj = prog->obj;
12772 struct bpf_link *link;
12773 __u64 usdt_cookie;
12774 int err;
12775
12776 if (!OPTS_VALID(opts, bpf_uprobe_opts))
12777 return libbpf_err_ptr(-EINVAL);
12778
12779 if (bpf_program__fd(prog) < 0) {
12780 pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n",
12781 prog->name);
12782 return libbpf_err_ptr(-EINVAL);
12783 }
12784
12785 if (!binary_path)
12786 return libbpf_err_ptr(-EINVAL);
12787
12788 if (!strchr(binary_path, '/')) {
12789 err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path));
12790 if (err) {
12791 pr_warn("prog '%s': failed to resolve full path for '%s': %s\n",
12792 prog->name, binary_path, errstr(err));
12793 return libbpf_err_ptr(err);
12794 }
12795 binary_path = resolved_path;
12796 }
12797
12798 /* USDT manager is instantiated lazily on first USDT attach. It will
12799 * be destroyed together with BPF object in bpf_object__close().
12800 */
12801 if (IS_ERR(obj->usdt_man))
12802 return libbpf_ptr(obj->usdt_man);
12803 if (!obj->usdt_man) {
12804 obj->usdt_man = usdt_manager_new(obj);
12805 if (IS_ERR(obj->usdt_man))
12806 return libbpf_ptr(obj->usdt_man);
12807 }
12808
12809 usdt_cookie = OPTS_GET(opts, usdt_cookie, 0);
12810 link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path,
12811 usdt_provider, usdt_name, usdt_cookie);
12812 err = libbpf_get_error(link);
12813 if (err)
12814 return libbpf_err_ptr(err);
12815 return link;
12816 }
12817
attach_usdt(const struct bpf_program * prog,long cookie,struct bpf_link ** link)12818 static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12819 {
12820 char *path = NULL, *provider = NULL, *name = NULL;
12821 const char *sec_name;
12822 int n, err;
12823
12824 sec_name = bpf_program__section_name(prog);
12825 if (strcmp(sec_name, "usdt") == 0) {
12826 /* no auto-attach for just SEC("usdt") */
12827 *link = NULL;
12828 return 0;
12829 }
12830
12831 n = sscanf(sec_name, "usdt/%m[^:]:%m[^:]:%m[^:]", &path, &provider, &name);
12832 if (n != 3) {
12833 pr_warn("invalid section '%s', expected SEC(\"usdt/<path>:<provider>:<name>\")\n",
12834 sec_name);
12835 err = -EINVAL;
12836 } else {
12837 *link = bpf_program__attach_usdt(prog, -1 /* any process */, path,
12838 provider, name, NULL);
12839 err = libbpf_get_error(*link);
12840 }
12841 free(path);
12842 free(provider);
12843 free(name);
12844 return err;
12845 }
12846
determine_tracepoint_id(const char * tp_category,const char * tp_name)12847 static int determine_tracepoint_id(const char *tp_category,
12848 const char *tp_name)
12849 {
12850 char file[PATH_MAX];
12851 int ret;
12852
12853 ret = snprintf(file, sizeof(file), "%s/events/%s/%s/id",
12854 tracefs_path(), tp_category, tp_name);
12855 if (ret < 0)
12856 return -errno;
12857 if (ret >= sizeof(file)) {
12858 pr_debug("tracepoint %s/%s path is too long\n",
12859 tp_category, tp_name);
12860 return -E2BIG;
12861 }
12862 return parse_uint_from_file(file, "%d\n");
12863 }
12864
perf_event_open_tracepoint(const char * tp_category,const char * tp_name)12865 static int perf_event_open_tracepoint(const char *tp_category,
12866 const char *tp_name)
12867 {
12868 const size_t attr_sz = sizeof(struct perf_event_attr);
12869 struct perf_event_attr attr;
12870 int tp_id, pfd, err;
12871
12872 tp_id = determine_tracepoint_id(tp_category, tp_name);
12873 if (tp_id < 0) {
12874 pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n",
12875 tp_category, tp_name,
12876 errstr(tp_id));
12877 return tp_id;
12878 }
12879
12880 memset(&attr, 0, attr_sz);
12881 attr.type = PERF_TYPE_TRACEPOINT;
12882 attr.size = attr_sz;
12883 attr.config = tp_id;
12884
12885 pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */,
12886 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
12887 if (pfd < 0) {
12888 err = -errno;
12889 pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n",
12890 tp_category, tp_name,
12891 errstr(err));
12892 return err;
12893 }
12894 return pfd;
12895 }
12896
bpf_program__attach_tracepoint_opts(const struct bpf_program * prog,const char * tp_category,const char * tp_name,const struct bpf_tracepoint_opts * opts)12897 struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *prog,
12898 const char *tp_category,
12899 const char *tp_name,
12900 const struct bpf_tracepoint_opts *opts)
12901 {
12902 DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
12903 struct bpf_link *link;
12904 int pfd, err;
12905
12906 if (!OPTS_VALID(opts, bpf_tracepoint_opts))
12907 return libbpf_err_ptr(-EINVAL);
12908
12909 pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
12910
12911 pfd = perf_event_open_tracepoint(tp_category, tp_name);
12912 if (pfd < 0) {
12913 pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
12914 prog->name, tp_category, tp_name,
12915 errstr(pfd));
12916 return libbpf_err_ptr(pfd);
12917 }
12918 link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
12919 err = libbpf_get_error(link);
12920 if (err) {
12921 close(pfd);
12922 pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n",
12923 prog->name, tp_category, tp_name,
12924 errstr(err));
12925 return libbpf_err_ptr(err);
12926 }
12927 return link;
12928 }
12929
bpf_program__attach_tracepoint(const struct bpf_program * prog,const char * tp_category,const char * tp_name)12930 struct bpf_link *bpf_program__attach_tracepoint(const struct bpf_program *prog,
12931 const char *tp_category,
12932 const char *tp_name)
12933 {
12934 return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL);
12935 }
12936
attach_tp(const struct bpf_program * prog,long cookie,struct bpf_link ** link)12937 static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12938 {
12939 char *sec_name, *tp_cat, *tp_name;
12940
12941 *link = NULL;
12942
12943 /* no auto-attach for SEC("tp") or SEC("tracepoint") */
12944 if (strcmp(prog->sec_name, "tp") == 0 || strcmp(prog->sec_name, "tracepoint") == 0)
12945 return 0;
12946
12947 sec_name = strdup(prog->sec_name);
12948 if (!sec_name)
12949 return -ENOMEM;
12950
12951 /* extract "tp/<category>/<name>" or "tracepoint/<category>/<name>" */
12952 if (str_has_pfx(prog->sec_name, "tp/"))
12953 tp_cat = sec_name + sizeof("tp/") - 1;
12954 else
12955 tp_cat = sec_name + sizeof("tracepoint/") - 1;
12956 tp_name = strchr(tp_cat, '/');
12957 if (!tp_name) {
12958 free(sec_name);
12959 return -EINVAL;
12960 }
12961 *tp_name = '\0';
12962 tp_name++;
12963
12964 *link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name);
12965 free(sec_name);
12966 return libbpf_get_error(*link);
12967 }
12968
12969 struct bpf_link *
bpf_program__attach_raw_tracepoint_opts(const struct bpf_program * prog,const char * tp_name,struct bpf_raw_tracepoint_opts * opts)12970 bpf_program__attach_raw_tracepoint_opts(const struct bpf_program *prog,
12971 const char *tp_name,
12972 struct bpf_raw_tracepoint_opts *opts)
12973 {
12974 LIBBPF_OPTS(bpf_raw_tp_opts, raw_opts);
12975 struct bpf_link *link;
12976 int prog_fd, pfd;
12977
12978 if (!OPTS_VALID(opts, bpf_raw_tracepoint_opts))
12979 return libbpf_err_ptr(-EINVAL);
12980
12981 prog_fd = bpf_program__fd(prog);
12982 if (prog_fd < 0) {
12983 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
12984 return libbpf_err_ptr(-EINVAL);
12985 }
12986
12987 link = calloc(1, sizeof(*link));
12988 if (!link)
12989 return libbpf_err_ptr(-ENOMEM);
12990 link->detach = &bpf_link__detach_fd;
12991
12992 raw_opts.tp_name = tp_name;
12993 raw_opts.cookie = OPTS_GET(opts, cookie, 0);
12994 pfd = bpf_raw_tracepoint_open_opts(prog_fd, &raw_opts);
12995 if (pfd < 0) {
12996 pfd = -errno;
12997 free(link);
12998 pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n",
12999 prog->name, tp_name, errstr(pfd));
13000 return libbpf_err_ptr(pfd);
13001 }
13002 link->fd = pfd;
13003 return link;
13004 }
13005
bpf_program__attach_raw_tracepoint(const struct bpf_program * prog,const char * tp_name)13006 struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *prog,
13007 const char *tp_name)
13008 {
13009 return bpf_program__attach_raw_tracepoint_opts(prog, tp_name, NULL);
13010 }
13011
attach_raw_tp(const struct bpf_program * prog,long cookie,struct bpf_link ** link)13012 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link)
13013 {
13014 static const char *const prefixes[] = {
13015 "raw_tp",
13016 "raw_tracepoint",
13017 "raw_tp.w",
13018 "raw_tracepoint.w",
13019 };
13020 size_t i;
13021 const char *tp_name = NULL;
13022
13023 *link = NULL;
13024
13025 for (i = 0; i < ARRAY_SIZE(prefixes); i++) {
13026 size_t pfx_len;
13027
13028 if (!str_has_pfx(prog->sec_name, prefixes[i]))
13029 continue;
13030
13031 pfx_len = strlen(prefixes[i]);
13032 /* no auto-attach case of, e.g., SEC("raw_tp") */
13033 if (prog->sec_name[pfx_len] == '\0')
13034 return 0;
13035
13036 if (prog->sec_name[pfx_len] != '/')
13037 continue;
13038
13039 tp_name = prog->sec_name + pfx_len + 1;
13040 break;
13041 }
13042
13043 if (!tp_name) {
13044 pr_warn("prog '%s': invalid section name '%s'\n",
13045 prog->name, prog->sec_name);
13046 return -EINVAL;
13047 }
13048
13049 *link = bpf_program__attach_raw_tracepoint(prog, tp_name);
13050 return libbpf_get_error(*link);
13051 }
13052
13053 /* Common logic for all BPF program types that attach to a btf_id */
bpf_program__attach_btf_id(const struct bpf_program * prog,const struct bpf_trace_opts * opts)13054 static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog,
13055 const struct bpf_trace_opts *opts)
13056 {
13057 LIBBPF_OPTS(bpf_link_create_opts, link_opts);
13058 struct bpf_link *link;
13059 int prog_fd, pfd;
13060
13061 if (!OPTS_VALID(opts, bpf_trace_opts))
13062 return libbpf_err_ptr(-EINVAL);
13063
13064 prog_fd = bpf_program__fd(prog);
13065 if (prog_fd < 0) {
13066 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
13067 return libbpf_err_ptr(-EINVAL);
13068 }
13069
13070 link = calloc(1, sizeof(*link));
13071 if (!link)
13072 return libbpf_err_ptr(-ENOMEM);
13073 link->detach = &bpf_link__detach_fd;
13074
13075 /* libbpf is smart enough to redirect to BPF_RAW_TRACEPOINT_OPEN on old kernels */
13076 link_opts.tracing.cookie = OPTS_GET(opts, cookie, 0);
13077 pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), &link_opts);
13078 if (pfd < 0) {
13079 pfd = -errno;
13080 free(link);
13081 pr_warn("prog '%s': failed to attach: %s\n",
13082 prog->name, errstr(pfd));
13083 return libbpf_err_ptr(pfd);
13084 }
13085 link->fd = pfd;
13086 return link;
13087 }
13088
bpf_program__attach_trace(const struct bpf_program * prog)13089 struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog)
13090 {
13091 return bpf_program__attach_btf_id(prog, NULL);
13092 }
13093
bpf_program__attach_trace_opts(const struct bpf_program * prog,const struct bpf_trace_opts * opts)13094 struct bpf_link *bpf_program__attach_trace_opts(const struct bpf_program *prog,
13095 const struct bpf_trace_opts *opts)
13096 {
13097 return bpf_program__attach_btf_id(prog, opts);
13098 }
13099
bpf_program__attach_lsm(const struct bpf_program * prog)13100 struct bpf_link *bpf_program__attach_lsm(const struct bpf_program *prog)
13101 {
13102 return bpf_program__attach_btf_id(prog, NULL);
13103 }
13104
attach_trace(const struct bpf_program * prog,long cookie,struct bpf_link ** link)13105 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link)
13106 {
13107 *link = bpf_program__attach_trace(prog);
13108 return libbpf_get_error(*link);
13109 }
13110
attach_lsm(const struct bpf_program * prog,long cookie,struct bpf_link ** link)13111 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link)
13112 {
13113 *link = bpf_program__attach_lsm(prog);
13114 return libbpf_get_error(*link);
13115 }
13116
13117 static struct bpf_link *
bpf_program_attach_fd(const struct bpf_program * prog,int target_fd,const char * target_name,const struct bpf_link_create_opts * opts)13118 bpf_program_attach_fd(const struct bpf_program *prog,
13119 int target_fd, const char *target_name,
13120 const struct bpf_link_create_opts *opts)
13121 {
13122 enum bpf_attach_type attach_type;
13123 struct bpf_link *link;
13124 int prog_fd, link_fd;
13125
13126 prog_fd = bpf_program__fd(prog);
13127 if (prog_fd < 0) {
13128 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
13129 return libbpf_err_ptr(-EINVAL);
13130 }
13131
13132 link = calloc(1, sizeof(*link));
13133 if (!link)
13134 return libbpf_err_ptr(-ENOMEM);
13135 link->detach = &bpf_link__detach_fd;
13136
13137 attach_type = bpf_program__expected_attach_type(prog);
13138 link_fd = bpf_link_create(prog_fd, target_fd, attach_type, opts);
13139 if (link_fd < 0) {
13140 link_fd = -errno;
13141 free(link);
13142 pr_warn("prog '%s': failed to attach to %s: %s\n",
13143 prog->name, target_name,
13144 errstr(link_fd));
13145 return libbpf_err_ptr(link_fd);
13146 }
13147 link->fd = link_fd;
13148 return link;
13149 }
13150
13151 struct bpf_link *
bpf_program__attach_cgroup(const struct bpf_program * prog,int cgroup_fd)13152 bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd)
13153 {
13154 return bpf_program_attach_fd(prog, cgroup_fd, "cgroup", NULL);
13155 }
13156
13157 struct bpf_link *
bpf_program__attach_netns(const struct bpf_program * prog,int netns_fd)13158 bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd)
13159 {
13160 return bpf_program_attach_fd(prog, netns_fd, "netns", NULL);
13161 }
13162
13163 struct bpf_link *
bpf_program__attach_sockmap(const struct bpf_program * prog,int map_fd)13164 bpf_program__attach_sockmap(const struct bpf_program *prog, int map_fd)
13165 {
13166 return bpf_program_attach_fd(prog, map_fd, "sockmap", NULL);
13167 }
13168
bpf_program__attach_xdp(const struct bpf_program * prog,int ifindex)13169 struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex)
13170 {
13171 /* target_fd/target_ifindex use the same field in LINK_CREATE */
13172 return bpf_program_attach_fd(prog, ifindex, "xdp", NULL);
13173 }
13174
13175 struct bpf_link *
bpf_program__attach_cgroup_opts(const struct bpf_program * prog,int cgroup_fd,const struct bpf_cgroup_opts * opts)13176 bpf_program__attach_cgroup_opts(const struct bpf_program *prog, int cgroup_fd,
13177 const struct bpf_cgroup_opts *opts)
13178 {
13179 LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
13180 __u32 relative_id;
13181 int relative_fd;
13182
13183 if (!OPTS_VALID(opts, bpf_cgroup_opts))
13184 return libbpf_err_ptr(-EINVAL);
13185
13186 relative_id = OPTS_GET(opts, relative_id, 0);
13187 relative_fd = OPTS_GET(opts, relative_fd, 0);
13188
13189 if (relative_fd && relative_id) {
13190 pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n",
13191 prog->name);
13192 return libbpf_err_ptr(-EINVAL);
13193 }
13194
13195 link_create_opts.cgroup.expected_revision = OPTS_GET(opts, expected_revision, 0);
13196 link_create_opts.cgroup.relative_fd = relative_fd;
13197 link_create_opts.cgroup.relative_id = relative_id;
13198 link_create_opts.flags = OPTS_GET(opts, flags, 0);
13199
13200 return bpf_program_attach_fd(prog, cgroup_fd, "cgroup", &link_create_opts);
13201 }
13202
13203 struct bpf_link *
bpf_program__attach_tcx(const struct bpf_program * prog,int ifindex,const struct bpf_tcx_opts * opts)13204 bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex,
13205 const struct bpf_tcx_opts *opts)
13206 {
13207 LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
13208 __u32 relative_id;
13209 int relative_fd;
13210
13211 if (!OPTS_VALID(opts, bpf_tcx_opts))
13212 return libbpf_err_ptr(-EINVAL);
13213
13214 relative_id = OPTS_GET(opts, relative_id, 0);
13215 relative_fd = OPTS_GET(opts, relative_fd, 0);
13216
13217 /* validate we don't have unexpected combinations of non-zero fields */
13218 if (!ifindex) {
13219 pr_warn("prog '%s': target netdevice ifindex cannot be zero\n",
13220 prog->name);
13221 return libbpf_err_ptr(-EINVAL);
13222 }
13223 if (relative_fd && relative_id) {
13224 pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n",
13225 prog->name);
13226 return libbpf_err_ptr(-EINVAL);
13227 }
13228
13229 link_create_opts.tcx.expected_revision = OPTS_GET(opts, expected_revision, 0);
13230 link_create_opts.tcx.relative_fd = relative_fd;
13231 link_create_opts.tcx.relative_id = relative_id;
13232 link_create_opts.flags = OPTS_GET(opts, flags, 0);
13233
13234 /* target_fd/target_ifindex use the same field in LINK_CREATE */
13235 return bpf_program_attach_fd(prog, ifindex, "tcx", &link_create_opts);
13236 }
13237
13238 struct bpf_link *
bpf_program__attach_netkit(const struct bpf_program * prog,int ifindex,const struct bpf_netkit_opts * opts)13239 bpf_program__attach_netkit(const struct bpf_program *prog, int ifindex,
13240 const struct bpf_netkit_opts *opts)
13241 {
13242 LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
13243 __u32 relative_id;
13244 int relative_fd;
13245
13246 if (!OPTS_VALID(opts, bpf_netkit_opts))
13247 return libbpf_err_ptr(-EINVAL);
13248
13249 relative_id = OPTS_GET(opts, relative_id, 0);
13250 relative_fd = OPTS_GET(opts, relative_fd, 0);
13251
13252 /* validate we don't have unexpected combinations of non-zero fields */
13253 if (!ifindex) {
13254 pr_warn("prog '%s': target netdevice ifindex cannot be zero\n",
13255 prog->name);
13256 return libbpf_err_ptr(-EINVAL);
13257 }
13258 if (relative_fd && relative_id) {
13259 pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n",
13260 prog->name);
13261 return libbpf_err_ptr(-EINVAL);
13262 }
13263
13264 link_create_opts.netkit.expected_revision = OPTS_GET(opts, expected_revision, 0);
13265 link_create_opts.netkit.relative_fd = relative_fd;
13266 link_create_opts.netkit.relative_id = relative_id;
13267 link_create_opts.flags = OPTS_GET(opts, flags, 0);
13268
13269 return bpf_program_attach_fd(prog, ifindex, "netkit", &link_create_opts);
13270 }
13271
bpf_program__attach_freplace(const struct bpf_program * prog,int target_fd,const char * attach_func_name)13272 struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog,
13273 int target_fd,
13274 const char *attach_func_name)
13275 {
13276 int btf_id;
13277
13278 if (!!target_fd != !!attach_func_name) {
13279 pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n",
13280 prog->name);
13281 return libbpf_err_ptr(-EINVAL);
13282 }
13283
13284 if (prog->type != BPF_PROG_TYPE_EXT) {
13285 pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace\n",
13286 prog->name);
13287 return libbpf_err_ptr(-EINVAL);
13288 }
13289
13290 if (target_fd) {
13291 LIBBPF_OPTS(bpf_link_create_opts, target_opts);
13292
13293 btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd, prog->obj->token_fd);
13294 if (btf_id < 0)
13295 return libbpf_err_ptr(btf_id);
13296
13297 target_opts.target_btf_id = btf_id;
13298
13299 return bpf_program_attach_fd(prog, target_fd, "freplace",
13300 &target_opts);
13301 } else {
13302 /* no target, so use raw_tracepoint_open for compatibility
13303 * with old kernels
13304 */
13305 return bpf_program__attach_trace(prog);
13306 }
13307 }
13308
13309 struct bpf_link *
bpf_program__attach_iter(const struct bpf_program * prog,const struct bpf_iter_attach_opts * opts)13310 bpf_program__attach_iter(const struct bpf_program *prog,
13311 const struct bpf_iter_attach_opts *opts)
13312 {
13313 DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
13314 struct bpf_link *link;
13315 int prog_fd, link_fd;
13316 __u32 target_fd = 0;
13317
13318 if (!OPTS_VALID(opts, bpf_iter_attach_opts))
13319 return libbpf_err_ptr(-EINVAL);
13320
13321 link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
13322 link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
13323
13324 prog_fd = bpf_program__fd(prog);
13325 if (prog_fd < 0) {
13326 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
13327 return libbpf_err_ptr(-EINVAL);
13328 }
13329
13330 link = calloc(1, sizeof(*link));
13331 if (!link)
13332 return libbpf_err_ptr(-ENOMEM);
13333 link->detach = &bpf_link__detach_fd;
13334
13335 link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER,
13336 &link_create_opts);
13337 if (link_fd < 0) {
13338 link_fd = -errno;
13339 free(link);
13340 pr_warn("prog '%s': failed to attach to iterator: %s\n",
13341 prog->name, errstr(link_fd));
13342 return libbpf_err_ptr(link_fd);
13343 }
13344 link->fd = link_fd;
13345 return link;
13346 }
13347
attach_iter(const struct bpf_program * prog,long cookie,struct bpf_link ** link)13348 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link)
13349 {
13350 *link = bpf_program__attach_iter(prog, NULL);
13351 return libbpf_get_error(*link);
13352 }
13353
bpf_program__attach_netfilter(const struct bpf_program * prog,const struct bpf_netfilter_opts * opts)13354 struct bpf_link *bpf_program__attach_netfilter(const struct bpf_program *prog,
13355 const struct bpf_netfilter_opts *opts)
13356 {
13357 LIBBPF_OPTS(bpf_link_create_opts, lopts);
13358 struct bpf_link *link;
13359 int prog_fd, link_fd;
13360
13361 if (!OPTS_VALID(opts, bpf_netfilter_opts))
13362 return libbpf_err_ptr(-EINVAL);
13363
13364 prog_fd = bpf_program__fd(prog);
13365 if (prog_fd < 0) {
13366 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
13367 return libbpf_err_ptr(-EINVAL);
13368 }
13369
13370 link = calloc(1, sizeof(*link));
13371 if (!link)
13372 return libbpf_err_ptr(-ENOMEM);
13373
13374 link->detach = &bpf_link__detach_fd;
13375
13376 lopts.netfilter.pf = OPTS_GET(opts, pf, 0);
13377 lopts.netfilter.hooknum = OPTS_GET(opts, hooknum, 0);
13378 lopts.netfilter.priority = OPTS_GET(opts, priority, 0);
13379 lopts.netfilter.flags = OPTS_GET(opts, flags, 0);
13380
13381 link_fd = bpf_link_create(prog_fd, 0, BPF_NETFILTER, &lopts);
13382 if (link_fd < 0) {
13383 link_fd = -errno;
13384 free(link);
13385 pr_warn("prog '%s': failed to attach to netfilter: %s\n",
13386 prog->name, errstr(link_fd));
13387 return libbpf_err_ptr(link_fd);
13388 }
13389 link->fd = link_fd;
13390
13391 return link;
13392 }
13393
bpf_program__attach(const struct bpf_program * prog)13394 struct bpf_link *bpf_program__attach(const struct bpf_program *prog)
13395 {
13396 struct bpf_link *link = NULL;
13397 int err;
13398
13399 if (!prog->sec_def || !prog->sec_def->prog_attach_fn)
13400 return libbpf_err_ptr(-EOPNOTSUPP);
13401
13402 if (bpf_program__fd(prog) < 0) {
13403 pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n",
13404 prog->name);
13405 return libbpf_err_ptr(-EINVAL);
13406 }
13407
13408 err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, &link);
13409 if (err)
13410 return libbpf_err_ptr(err);
13411
13412 /* When calling bpf_program__attach() explicitly, auto-attach support
13413 * is expected to work, so NULL returned link is considered an error.
13414 * This is different for skeleton's attach, see comment in
13415 * bpf_object__attach_skeleton().
13416 */
13417 if (!link)
13418 return libbpf_err_ptr(-EOPNOTSUPP);
13419
13420 return link;
13421 }
13422
13423 struct bpf_link_struct_ops {
13424 struct bpf_link link;
13425 int map_fd;
13426 };
13427
bpf_link__detach_struct_ops(struct bpf_link * link)13428 static int bpf_link__detach_struct_ops(struct bpf_link *link)
13429 {
13430 struct bpf_link_struct_ops *st_link;
13431 __u32 zero = 0;
13432
13433 st_link = container_of(link, struct bpf_link_struct_ops, link);
13434
13435 if (st_link->map_fd < 0)
13436 /* w/o a real link */
13437 return bpf_map_delete_elem(link->fd, &zero);
13438
13439 return close(link->fd);
13440 }
13441
bpf_map__attach_struct_ops(const struct bpf_map * map)13442 struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map)
13443 {
13444 struct bpf_link_struct_ops *link;
13445 __u32 zero = 0;
13446 int err, fd;
13447
13448 if (!bpf_map__is_struct_ops(map)) {
13449 pr_warn("map '%s': can't attach non-struct_ops map\n", map->name);
13450 return libbpf_err_ptr(-EINVAL);
13451 }
13452
13453 if (map->fd < 0) {
13454 pr_warn("map '%s': can't attach BPF map without FD (was it created?)\n", map->name);
13455 return libbpf_err_ptr(-EINVAL);
13456 }
13457
13458 link = calloc(1, sizeof(*link));
13459 if (!link)
13460 return libbpf_err_ptr(-EINVAL);
13461
13462 /* kern_vdata should be prepared during the loading phase. */
13463 err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0);
13464 /* It can be EBUSY if the map has been used to create or
13465 * update a link before. We don't allow updating the value of
13466 * a struct_ops once it is set. That ensures that the value
13467 * never changed. So, it is safe to skip EBUSY.
13468 */
13469 if (err && (!(map->def.map_flags & BPF_F_LINK) || err != -EBUSY)) {
13470 free(link);
13471 return libbpf_err_ptr(err);
13472 }
13473
13474 link->link.detach = bpf_link__detach_struct_ops;
13475
13476 if (!(map->def.map_flags & BPF_F_LINK)) {
13477 /* w/o a real link */
13478 link->link.fd = map->fd;
13479 link->map_fd = -1;
13480 return &link->link;
13481 }
13482
13483 fd = bpf_link_create(map->fd, 0, BPF_STRUCT_OPS, NULL);
13484 if (fd < 0) {
13485 free(link);
13486 return libbpf_err_ptr(fd);
13487 }
13488
13489 link->link.fd = fd;
13490 link->map_fd = map->fd;
13491
13492 return &link->link;
13493 }
13494
13495 /*
13496 * Swap the back struct_ops of a link with a new struct_ops map.
13497 */
bpf_link__update_map(struct bpf_link * link,const struct bpf_map * map)13498 int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map)
13499 {
13500 struct bpf_link_struct_ops *st_ops_link;
13501 __u32 zero = 0;
13502 int err;
13503
13504 if (!bpf_map__is_struct_ops(map))
13505 return libbpf_err(-EINVAL);
13506
13507 if (map->fd < 0) {
13508 pr_warn("map '%s': can't use BPF map without FD (was it created?)\n", map->name);
13509 return libbpf_err(-EINVAL);
13510 }
13511
13512 st_ops_link = container_of(link, struct bpf_link_struct_ops, link);
13513 /* Ensure the type of a link is correct */
13514 if (st_ops_link->map_fd < 0)
13515 return libbpf_err(-EINVAL);
13516
13517 err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0);
13518 /* It can be EBUSY if the map has been used to create or
13519 * update a link before. We don't allow updating the value of
13520 * a struct_ops once it is set. That ensures that the value
13521 * never changed. So, it is safe to skip EBUSY.
13522 */
13523 if (err && err != -EBUSY)
13524 return err;
13525
13526 err = bpf_link_update(link->fd, map->fd, NULL);
13527 if (err < 0)
13528 return err;
13529
13530 st_ops_link->map_fd = map->fd;
13531
13532 return 0;
13533 }
13534
13535 typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr,
13536 void *private_data);
13537
13538 static enum bpf_perf_event_ret
perf_event_read_simple(void * mmap_mem,size_t mmap_size,size_t page_size,void ** copy_mem,size_t * copy_size,bpf_perf_event_print_t fn,void * private_data)13539 perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
13540 void **copy_mem, size_t *copy_size,
13541 bpf_perf_event_print_t fn, void *private_data)
13542 {
13543 struct perf_event_mmap_page *header = mmap_mem;
13544 __u64 data_head = ring_buffer_read_head(header);
13545 __u64 data_tail = header->data_tail;
13546 void *base = ((__u8 *)header) + page_size;
13547 int ret = LIBBPF_PERF_EVENT_CONT;
13548 struct perf_event_header *ehdr;
13549 size_t ehdr_size;
13550
13551 while (data_head != data_tail) {
13552 ehdr = base + (data_tail & (mmap_size - 1));
13553 ehdr_size = ehdr->size;
13554
13555 if (((void *)ehdr) + ehdr_size > base + mmap_size) {
13556 void *copy_start = ehdr;
13557 size_t len_first = base + mmap_size - copy_start;
13558 size_t len_secnd = ehdr_size - len_first;
13559
13560 if (*copy_size < ehdr_size) {
13561 free(*copy_mem);
13562 *copy_mem = malloc(ehdr_size);
13563 if (!*copy_mem) {
13564 *copy_size = 0;
13565 ret = LIBBPF_PERF_EVENT_ERROR;
13566 break;
13567 }
13568 *copy_size = ehdr_size;
13569 }
13570
13571 memcpy(*copy_mem, copy_start, len_first);
13572 memcpy(*copy_mem + len_first, base, len_secnd);
13573 ehdr = *copy_mem;
13574 }
13575
13576 ret = fn(ehdr, private_data);
13577 data_tail += ehdr_size;
13578 if (ret != LIBBPF_PERF_EVENT_CONT)
13579 break;
13580 }
13581
13582 ring_buffer_write_tail(header, data_tail);
13583 return libbpf_err(ret);
13584 }
13585
13586 struct perf_buffer;
13587
13588 struct perf_buffer_params {
13589 struct perf_event_attr *attr;
13590 /* if event_cb is specified, it takes precendence */
13591 perf_buffer_event_fn event_cb;
13592 /* sample_cb and lost_cb are higher-level common-case callbacks */
13593 perf_buffer_sample_fn sample_cb;
13594 perf_buffer_lost_fn lost_cb;
13595 void *ctx;
13596 int cpu_cnt;
13597 int *cpus;
13598 int *map_keys;
13599 };
13600
13601 struct perf_cpu_buf {
13602 struct perf_buffer *pb;
13603 void *base; /* mmap()'ed memory */
13604 void *buf; /* for reconstructing segmented data */
13605 size_t buf_size;
13606 int fd;
13607 int cpu;
13608 int map_key;
13609 };
13610
13611 struct perf_buffer {
13612 perf_buffer_event_fn event_cb;
13613 perf_buffer_sample_fn sample_cb;
13614 perf_buffer_lost_fn lost_cb;
13615 void *ctx; /* passed into callbacks */
13616
13617 size_t page_size;
13618 size_t mmap_size;
13619 struct perf_cpu_buf **cpu_bufs;
13620 struct epoll_event *events;
13621 int cpu_cnt; /* number of allocated CPU buffers */
13622 int epoll_fd; /* perf event FD */
13623 int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
13624 };
13625
perf_buffer__free_cpu_buf(struct perf_buffer * pb,struct perf_cpu_buf * cpu_buf)13626 static void perf_buffer__free_cpu_buf(struct perf_buffer *pb,
13627 struct perf_cpu_buf *cpu_buf)
13628 {
13629 if (!cpu_buf)
13630 return;
13631 if (cpu_buf->base &&
13632 munmap(cpu_buf->base, pb->mmap_size + pb->page_size))
13633 pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu);
13634 if (cpu_buf->fd >= 0) {
13635 ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0);
13636 close(cpu_buf->fd);
13637 }
13638 free(cpu_buf->buf);
13639 free(cpu_buf);
13640 }
13641
perf_buffer__free(struct perf_buffer * pb)13642 void perf_buffer__free(struct perf_buffer *pb)
13643 {
13644 int i;
13645
13646 if (IS_ERR_OR_NULL(pb))
13647 return;
13648 if (pb->cpu_bufs) {
13649 for (i = 0; i < pb->cpu_cnt; i++) {
13650 struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
13651
13652 if (!cpu_buf)
13653 continue;
13654
13655 bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key);
13656 perf_buffer__free_cpu_buf(pb, cpu_buf);
13657 }
13658 free(pb->cpu_bufs);
13659 }
13660 if (pb->epoll_fd >= 0)
13661 close(pb->epoll_fd);
13662 free(pb->events);
13663 free(pb);
13664 }
13665
13666 static struct perf_cpu_buf *
perf_buffer__open_cpu_buf(struct perf_buffer * pb,struct perf_event_attr * attr,int cpu,int map_key)13667 perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr,
13668 int cpu, int map_key)
13669 {
13670 struct perf_cpu_buf *cpu_buf;
13671 int err;
13672
13673 cpu_buf = calloc(1, sizeof(*cpu_buf));
13674 if (!cpu_buf)
13675 return ERR_PTR(-ENOMEM);
13676
13677 cpu_buf->pb = pb;
13678 cpu_buf->cpu = cpu;
13679 cpu_buf->map_key = map_key;
13680
13681 cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu,
13682 -1, PERF_FLAG_FD_CLOEXEC);
13683 if (cpu_buf->fd < 0) {
13684 err = -errno;
13685 pr_warn("failed to open perf buffer event on cpu #%d: %s\n",
13686 cpu, errstr(err));
13687 goto error;
13688 }
13689
13690 cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size,
13691 PROT_READ | PROT_WRITE, MAP_SHARED,
13692 cpu_buf->fd, 0);
13693 if (cpu_buf->base == MAP_FAILED) {
13694 cpu_buf->base = NULL;
13695 err = -errno;
13696 pr_warn("failed to mmap perf buffer on cpu #%d: %s\n",
13697 cpu, errstr(err));
13698 goto error;
13699 }
13700
13701 if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
13702 err = -errno;
13703 pr_warn("failed to enable perf buffer event on cpu #%d: %s\n",
13704 cpu, errstr(err));
13705 goto error;
13706 }
13707
13708 return cpu_buf;
13709
13710 error:
13711 perf_buffer__free_cpu_buf(pb, cpu_buf);
13712 return (struct perf_cpu_buf *)ERR_PTR(err);
13713 }
13714
13715 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
13716 struct perf_buffer_params *p);
13717
perf_buffer__new(int map_fd,size_t page_cnt,perf_buffer_sample_fn sample_cb,perf_buffer_lost_fn lost_cb,void * ctx,const struct perf_buffer_opts * opts)13718 struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
13719 perf_buffer_sample_fn sample_cb,
13720 perf_buffer_lost_fn lost_cb,
13721 void *ctx,
13722 const struct perf_buffer_opts *opts)
13723 {
13724 const size_t attr_sz = sizeof(struct perf_event_attr);
13725 struct perf_buffer_params p = {};
13726 struct perf_event_attr attr;
13727 __u32 sample_period;
13728
13729 if (!OPTS_VALID(opts, perf_buffer_opts))
13730 return libbpf_err_ptr(-EINVAL);
13731
13732 sample_period = OPTS_GET(opts, sample_period, 1);
13733 if (!sample_period)
13734 sample_period = 1;
13735
13736 memset(&attr, 0, attr_sz);
13737 attr.size = attr_sz;
13738 attr.config = PERF_COUNT_SW_BPF_OUTPUT;
13739 attr.type = PERF_TYPE_SOFTWARE;
13740 attr.sample_type = PERF_SAMPLE_RAW;
13741 attr.wakeup_events = sample_period;
13742
13743 p.attr = &attr;
13744 p.sample_cb = sample_cb;
13745 p.lost_cb = lost_cb;
13746 p.ctx = ctx;
13747
13748 return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
13749 }
13750
perf_buffer__new_raw(int map_fd,size_t page_cnt,struct perf_event_attr * attr,perf_buffer_event_fn event_cb,void * ctx,const struct perf_buffer_raw_opts * opts)13751 struct perf_buffer *perf_buffer__new_raw(int map_fd, size_t page_cnt,
13752 struct perf_event_attr *attr,
13753 perf_buffer_event_fn event_cb, void *ctx,
13754 const struct perf_buffer_raw_opts *opts)
13755 {
13756 struct perf_buffer_params p = {};
13757
13758 if (!attr)
13759 return libbpf_err_ptr(-EINVAL);
13760
13761 if (!OPTS_VALID(opts, perf_buffer_raw_opts))
13762 return libbpf_err_ptr(-EINVAL);
13763
13764 p.attr = attr;
13765 p.event_cb = event_cb;
13766 p.ctx = ctx;
13767 p.cpu_cnt = OPTS_GET(opts, cpu_cnt, 0);
13768 p.cpus = OPTS_GET(opts, cpus, NULL);
13769 p.map_keys = OPTS_GET(opts, map_keys, NULL);
13770
13771 return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
13772 }
13773
__perf_buffer__new(int map_fd,size_t page_cnt,struct perf_buffer_params * p)13774 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
13775 struct perf_buffer_params *p)
13776 {
13777 const char *online_cpus_file = "/sys/devices/system/cpu/online";
13778 struct bpf_map_info map;
13779 struct perf_buffer *pb;
13780 bool *online = NULL;
13781 __u32 map_info_len;
13782 int err, i, j, n;
13783
13784 if (page_cnt == 0 || (page_cnt & (page_cnt - 1))) {
13785 pr_warn("page count should be power of two, but is %zu\n",
13786 page_cnt);
13787 return ERR_PTR(-EINVAL);
13788 }
13789
13790 /* best-effort sanity checks */
13791 memset(&map, 0, sizeof(map));
13792 map_info_len = sizeof(map);
13793 err = bpf_map_get_info_by_fd(map_fd, &map, &map_info_len);
13794 if (err) {
13795 err = -errno;
13796 /* if BPF_OBJ_GET_INFO_BY_FD is supported, will return
13797 * -EBADFD, -EFAULT, or -E2BIG on real error
13798 */
13799 if (err != -EINVAL) {
13800 pr_warn("failed to get map info for map FD %d: %s\n",
13801 map_fd, errstr(err));
13802 return ERR_PTR(err);
13803 }
13804 pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n",
13805 map_fd);
13806 } else {
13807 if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
13808 pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
13809 map.name);
13810 return ERR_PTR(-EINVAL);
13811 }
13812 }
13813
13814 pb = calloc(1, sizeof(*pb));
13815 if (!pb)
13816 return ERR_PTR(-ENOMEM);
13817
13818 pb->event_cb = p->event_cb;
13819 pb->sample_cb = p->sample_cb;
13820 pb->lost_cb = p->lost_cb;
13821 pb->ctx = p->ctx;
13822
13823 pb->page_size = getpagesize();
13824 pb->mmap_size = pb->page_size * page_cnt;
13825 pb->map_fd = map_fd;
13826
13827 pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
13828 if (pb->epoll_fd < 0) {
13829 err = -errno;
13830 pr_warn("failed to create epoll instance: %s\n",
13831 errstr(err));
13832 goto error;
13833 }
13834
13835 if (p->cpu_cnt > 0) {
13836 pb->cpu_cnt = p->cpu_cnt;
13837 } else {
13838 pb->cpu_cnt = libbpf_num_possible_cpus();
13839 if (pb->cpu_cnt < 0) {
13840 err = pb->cpu_cnt;
13841 goto error;
13842 }
13843 if (map.max_entries && map.max_entries < pb->cpu_cnt)
13844 pb->cpu_cnt = map.max_entries;
13845 }
13846
13847 pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events));
13848 if (!pb->events) {
13849 err = -ENOMEM;
13850 pr_warn("failed to allocate events: out of memory\n");
13851 goto error;
13852 }
13853 pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs));
13854 if (!pb->cpu_bufs) {
13855 err = -ENOMEM;
13856 pr_warn("failed to allocate buffers: out of memory\n");
13857 goto error;
13858 }
13859
13860 err = parse_cpu_mask_file(online_cpus_file, &online, &n);
13861 if (err) {
13862 pr_warn("failed to get online CPU mask: %s\n", errstr(err));
13863 goto error;
13864 }
13865
13866 for (i = 0, j = 0; i < pb->cpu_cnt; i++) {
13867 struct perf_cpu_buf *cpu_buf;
13868 int cpu, map_key;
13869
13870 cpu = p->cpu_cnt > 0 ? p->cpus[i] : i;
13871 map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i;
13872
13873 /* in case user didn't explicitly requested particular CPUs to
13874 * be attached to, skip offline/not present CPUs
13875 */
13876 if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu]))
13877 continue;
13878
13879 cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key);
13880 if (IS_ERR(cpu_buf)) {
13881 err = PTR_ERR(cpu_buf);
13882 goto error;
13883 }
13884
13885 pb->cpu_bufs[j] = cpu_buf;
13886
13887 err = bpf_map_update_elem(pb->map_fd, &map_key,
13888 &cpu_buf->fd, 0);
13889 if (err) {
13890 err = -errno;
13891 pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n",
13892 cpu, map_key, cpu_buf->fd,
13893 errstr(err));
13894 goto error;
13895 }
13896
13897 pb->events[j].events = EPOLLIN;
13898 pb->events[j].data.ptr = cpu_buf;
13899 if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,
13900 &pb->events[j]) < 0) {
13901 err = -errno;
13902 pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
13903 cpu, cpu_buf->fd,
13904 errstr(err));
13905 goto error;
13906 }
13907 j++;
13908 }
13909 pb->cpu_cnt = j;
13910 free(online);
13911
13912 return pb;
13913
13914 error:
13915 free(online);
13916 if (pb)
13917 perf_buffer__free(pb);
13918 return ERR_PTR(err);
13919 }
13920
13921 struct perf_sample_raw {
13922 struct perf_event_header header;
13923 uint32_t size;
13924 char data[];
13925 };
13926
13927 struct perf_sample_lost {
13928 struct perf_event_header header;
13929 uint64_t id;
13930 uint64_t lost;
13931 uint64_t sample_id;
13932 };
13933
13934 static enum bpf_perf_event_ret
perf_buffer__process_record(struct perf_event_header * e,void * ctx)13935 perf_buffer__process_record(struct perf_event_header *e, void *ctx)
13936 {
13937 struct perf_cpu_buf *cpu_buf = ctx;
13938 struct perf_buffer *pb = cpu_buf->pb;
13939 void *data = e;
13940
13941 /* user wants full control over parsing perf event */
13942 if (pb->event_cb)
13943 return pb->event_cb(pb->ctx, cpu_buf->cpu, e);
13944
13945 switch (e->type) {
13946 case PERF_RECORD_SAMPLE: {
13947 struct perf_sample_raw *s = data;
13948
13949 if (pb->sample_cb)
13950 pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size);
13951 break;
13952 }
13953 case PERF_RECORD_LOST: {
13954 struct perf_sample_lost *s = data;
13955
13956 if (pb->lost_cb)
13957 pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost);
13958 break;
13959 }
13960 default:
13961 pr_warn("unknown perf sample type %d\n", e->type);
13962 return LIBBPF_PERF_EVENT_ERROR;
13963 }
13964 return LIBBPF_PERF_EVENT_CONT;
13965 }
13966
perf_buffer__process_records(struct perf_buffer * pb,struct perf_cpu_buf * cpu_buf)13967 static int perf_buffer__process_records(struct perf_buffer *pb,
13968 struct perf_cpu_buf *cpu_buf)
13969 {
13970 enum bpf_perf_event_ret ret;
13971
13972 ret = perf_event_read_simple(cpu_buf->base, pb->mmap_size,
13973 pb->page_size, &cpu_buf->buf,
13974 &cpu_buf->buf_size,
13975 perf_buffer__process_record, cpu_buf);
13976 if (ret != LIBBPF_PERF_EVENT_CONT)
13977 return ret;
13978 return 0;
13979 }
13980
perf_buffer__epoll_fd(const struct perf_buffer * pb)13981 int perf_buffer__epoll_fd(const struct perf_buffer *pb)
13982 {
13983 return pb->epoll_fd;
13984 }
13985
perf_buffer__poll(struct perf_buffer * pb,int timeout_ms)13986 int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
13987 {
13988 int i, cnt, err;
13989
13990 cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
13991 if (cnt < 0)
13992 return -errno;
13993
13994 for (i = 0; i < cnt; i++) {
13995 struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
13996
13997 err = perf_buffer__process_records(pb, cpu_buf);
13998 if (err) {
13999 pr_warn("error while processing records: %s\n", errstr(err));
14000 return libbpf_err(err);
14001 }
14002 }
14003 return cnt;
14004 }
14005
14006 /* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer
14007 * manager.
14008 */
perf_buffer__buffer_cnt(const struct perf_buffer * pb)14009 size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb)
14010 {
14011 return pb->cpu_cnt;
14012 }
14013
14014 /*
14015 * Return perf_event FD of a ring buffer in *buf_idx* slot of
14016 * PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using
14017 * select()/poll()/epoll() Linux syscalls.
14018 */
perf_buffer__buffer_fd(const struct perf_buffer * pb,size_t buf_idx)14019 int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx)
14020 {
14021 struct perf_cpu_buf *cpu_buf;
14022
14023 if (buf_idx >= pb->cpu_cnt)
14024 return libbpf_err(-EINVAL);
14025
14026 cpu_buf = pb->cpu_bufs[buf_idx];
14027 if (!cpu_buf)
14028 return libbpf_err(-ENOENT);
14029
14030 return cpu_buf->fd;
14031 }
14032
perf_buffer__buffer(struct perf_buffer * pb,int buf_idx,void ** buf,size_t * buf_size)14033 int perf_buffer__buffer(struct perf_buffer *pb, int buf_idx, void **buf, size_t *buf_size)
14034 {
14035 struct perf_cpu_buf *cpu_buf;
14036
14037 if (buf_idx >= pb->cpu_cnt)
14038 return libbpf_err(-EINVAL);
14039
14040 cpu_buf = pb->cpu_bufs[buf_idx];
14041 if (!cpu_buf)
14042 return libbpf_err(-ENOENT);
14043
14044 *buf = cpu_buf->base;
14045 *buf_size = pb->mmap_size;
14046 return 0;
14047 }
14048
14049 /*
14050 * Consume data from perf ring buffer corresponding to slot *buf_idx* in
14051 * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to
14052 * consume, do nothing and return success.
14053 * Returns:
14054 * - 0 on success;
14055 * - <0 on failure.
14056 */
perf_buffer__consume_buffer(struct perf_buffer * pb,size_t buf_idx)14057 int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx)
14058 {
14059 struct perf_cpu_buf *cpu_buf;
14060
14061 if (buf_idx >= pb->cpu_cnt)
14062 return libbpf_err(-EINVAL);
14063
14064 cpu_buf = pb->cpu_bufs[buf_idx];
14065 if (!cpu_buf)
14066 return libbpf_err(-ENOENT);
14067
14068 return perf_buffer__process_records(pb, cpu_buf);
14069 }
14070
perf_buffer__consume(struct perf_buffer * pb)14071 int perf_buffer__consume(struct perf_buffer *pb)
14072 {
14073 int i, err;
14074
14075 for (i = 0; i < pb->cpu_cnt; i++) {
14076 struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
14077
14078 if (!cpu_buf)
14079 continue;
14080
14081 err = perf_buffer__process_records(pb, cpu_buf);
14082 if (err) {
14083 pr_warn("perf_buffer: failed to process records in buffer #%d: %s\n",
14084 i, errstr(err));
14085 return libbpf_err(err);
14086 }
14087 }
14088 return 0;
14089 }
14090
bpf_program__set_attach_target(struct bpf_program * prog,int attach_prog_fd,const char * attach_func_name)14091 int bpf_program__set_attach_target(struct bpf_program *prog,
14092 int attach_prog_fd,
14093 const char *attach_func_name)
14094 {
14095 int btf_obj_fd = 0, btf_id = 0, err;
14096
14097 if (!prog || attach_prog_fd < 0)
14098 return libbpf_err(-EINVAL);
14099
14100 if (prog->obj->state >= OBJ_LOADED)
14101 return libbpf_err(-EINVAL);
14102
14103 if (attach_prog_fd && !attach_func_name) {
14104 /* Store attach_prog_fd. The BTF ID will be resolved later during
14105 * the normal object/program load phase.
14106 */
14107 prog->attach_prog_fd = attach_prog_fd;
14108 return 0;
14109 }
14110
14111 if (attach_prog_fd) {
14112 btf_id = libbpf_find_prog_btf_id(attach_func_name,
14113 attach_prog_fd, prog->obj->token_fd);
14114 if (btf_id < 0)
14115 return libbpf_err(btf_id);
14116 } else {
14117 if (!attach_func_name)
14118 return libbpf_err(-EINVAL);
14119
14120 /* load btf_vmlinux, if not yet */
14121 err = bpf_object__load_vmlinux_btf(prog->obj, true);
14122 if (err)
14123 return libbpf_err(err);
14124 err = find_kernel_btf_id(prog->obj, attach_func_name,
14125 prog->expected_attach_type,
14126 &btf_obj_fd, &btf_id);
14127 if (err)
14128 return libbpf_err(err);
14129 }
14130
14131 prog->attach_btf_id = btf_id;
14132 prog->attach_btf_obj_fd = btf_obj_fd;
14133 prog->attach_prog_fd = attach_prog_fd;
14134 return 0;
14135 }
14136
parse_cpu_mask_str(const char * s,bool ** mask,int * mask_sz)14137 int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz)
14138 {
14139 int err = 0, n, len, start, end = -1;
14140 bool *tmp;
14141
14142 *mask = NULL;
14143 *mask_sz = 0;
14144
14145 /* Each sub string separated by ',' has format \d+-\d+ or \d+ */
14146 while (*s) {
14147 if (*s == ',' || *s == '\n') {
14148 s++;
14149 continue;
14150 }
14151 n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len);
14152 if (n <= 0 || n > 2) {
14153 pr_warn("Failed to get CPU range %s: %d\n", s, n);
14154 err = -EINVAL;
14155 goto cleanup;
14156 } else if (n == 1) {
14157 end = start;
14158 }
14159 if (start < 0 || start > end) {
14160 pr_warn("Invalid CPU range [%d,%d] in %s\n",
14161 start, end, s);
14162 err = -EINVAL;
14163 goto cleanup;
14164 }
14165 tmp = realloc(*mask, end + 1);
14166 if (!tmp) {
14167 err = -ENOMEM;
14168 goto cleanup;
14169 }
14170 *mask = tmp;
14171 memset(tmp + *mask_sz, 0, start - *mask_sz);
14172 memset(tmp + start, 1, end - start + 1);
14173 *mask_sz = end + 1;
14174 s += len;
14175 }
14176 if (!*mask_sz) {
14177 pr_warn("Empty CPU range\n");
14178 return -EINVAL;
14179 }
14180 return 0;
14181 cleanup:
14182 free(*mask);
14183 *mask = NULL;
14184 return err;
14185 }
14186
parse_cpu_mask_file(const char * fcpu,bool ** mask,int * mask_sz)14187 int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz)
14188 {
14189 int fd, err = 0, len;
14190 char buf[128];
14191
14192 fd = open(fcpu, O_RDONLY | O_CLOEXEC);
14193 if (fd < 0) {
14194 err = -errno;
14195 pr_warn("Failed to open cpu mask file %s: %s\n", fcpu, errstr(err));
14196 return err;
14197 }
14198 len = read(fd, buf, sizeof(buf));
14199 close(fd);
14200 if (len <= 0) {
14201 err = len ? -errno : -EINVAL;
14202 pr_warn("Failed to read cpu mask from %s: %s\n", fcpu, errstr(err));
14203 return err;
14204 }
14205 if (len >= sizeof(buf)) {
14206 pr_warn("CPU mask is too big in file %s\n", fcpu);
14207 return -E2BIG;
14208 }
14209 buf[len] = '\0';
14210
14211 return parse_cpu_mask_str(buf, mask, mask_sz);
14212 }
14213
libbpf_num_possible_cpus(void)14214 int libbpf_num_possible_cpus(void)
14215 {
14216 static const char *fcpu = "/sys/devices/system/cpu/possible";
14217 static int cpus;
14218 int err, n, i, tmp_cpus;
14219 bool *mask;
14220
14221 tmp_cpus = READ_ONCE(cpus);
14222 if (tmp_cpus > 0)
14223 return tmp_cpus;
14224
14225 err = parse_cpu_mask_file(fcpu, &mask, &n);
14226 if (err)
14227 return libbpf_err(err);
14228
14229 tmp_cpus = 0;
14230 for (i = 0; i < n; i++) {
14231 if (mask[i])
14232 tmp_cpus++;
14233 }
14234 free(mask);
14235
14236 WRITE_ONCE(cpus, tmp_cpus);
14237 return tmp_cpus;
14238 }
14239
populate_skeleton_maps(const struct bpf_object * obj,struct bpf_map_skeleton * maps,size_t map_cnt,size_t map_skel_sz)14240 static int populate_skeleton_maps(const struct bpf_object *obj,
14241 struct bpf_map_skeleton *maps,
14242 size_t map_cnt, size_t map_skel_sz)
14243 {
14244 int i;
14245
14246 for (i = 0; i < map_cnt; i++) {
14247 struct bpf_map_skeleton *map_skel = (void *)maps + i * map_skel_sz;
14248 struct bpf_map **map = map_skel->map;
14249 const char *name = map_skel->name;
14250 void **mmaped = map_skel->mmaped;
14251
14252 *map = bpf_object__find_map_by_name(obj, name);
14253 if (!*map) {
14254 pr_warn("failed to find skeleton map '%s'\n", name);
14255 return -ESRCH;
14256 }
14257
14258 /* externs shouldn't be pre-setup from user code */
14259 if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG)
14260 *mmaped = (*map)->mmaped;
14261 }
14262 return 0;
14263 }
14264
populate_skeleton_progs(const struct bpf_object * obj,struct bpf_prog_skeleton * progs,size_t prog_cnt,size_t prog_skel_sz)14265 static int populate_skeleton_progs(const struct bpf_object *obj,
14266 struct bpf_prog_skeleton *progs,
14267 size_t prog_cnt, size_t prog_skel_sz)
14268 {
14269 int i;
14270
14271 for (i = 0; i < prog_cnt; i++) {
14272 struct bpf_prog_skeleton *prog_skel = (void *)progs + i * prog_skel_sz;
14273 struct bpf_program **prog = prog_skel->prog;
14274 const char *name = prog_skel->name;
14275
14276 *prog = bpf_object__find_program_by_name(obj, name);
14277 if (!*prog) {
14278 pr_warn("failed to find skeleton program '%s'\n", name);
14279 return -ESRCH;
14280 }
14281 }
14282 return 0;
14283 }
14284
bpf_object__open_skeleton(struct bpf_object_skeleton * s,const struct bpf_object_open_opts * opts)14285 int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
14286 const struct bpf_object_open_opts *opts)
14287 {
14288 struct bpf_object *obj;
14289 int err;
14290
14291 obj = bpf_object_open(NULL, s->data, s->data_sz, s->name, opts);
14292 if (IS_ERR(obj)) {
14293 err = PTR_ERR(obj);
14294 pr_warn("failed to initialize skeleton BPF object '%s': %s\n",
14295 s->name, errstr(err));
14296 return libbpf_err(err);
14297 }
14298
14299 *s->obj = obj;
14300 err = populate_skeleton_maps(obj, s->maps, s->map_cnt, s->map_skel_sz);
14301 if (err) {
14302 pr_warn("failed to populate skeleton maps for '%s': %s\n", s->name, errstr(err));
14303 return libbpf_err(err);
14304 }
14305
14306 err = populate_skeleton_progs(obj, s->progs, s->prog_cnt, s->prog_skel_sz);
14307 if (err) {
14308 pr_warn("failed to populate skeleton progs for '%s': %s\n", s->name, errstr(err));
14309 return libbpf_err(err);
14310 }
14311
14312 return 0;
14313 }
14314
bpf_object__open_subskeleton(struct bpf_object_subskeleton * s)14315 int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s)
14316 {
14317 int err, len, var_idx, i;
14318 const char *var_name;
14319 const struct bpf_map *map;
14320 struct btf *btf;
14321 __u32 map_type_id;
14322 const struct btf_type *map_type, *var_type;
14323 const struct bpf_var_skeleton *var_skel;
14324 struct btf_var_secinfo *var;
14325
14326 if (!s->obj)
14327 return libbpf_err(-EINVAL);
14328
14329 btf = bpf_object__btf(s->obj);
14330 if (!btf) {
14331 pr_warn("subskeletons require BTF at runtime (object %s)\n",
14332 bpf_object__name(s->obj));
14333 return libbpf_err(-errno);
14334 }
14335
14336 err = populate_skeleton_maps(s->obj, s->maps, s->map_cnt, s->map_skel_sz);
14337 if (err) {
14338 pr_warn("failed to populate subskeleton maps: %s\n", errstr(err));
14339 return libbpf_err(err);
14340 }
14341
14342 err = populate_skeleton_progs(s->obj, s->progs, s->prog_cnt, s->prog_skel_sz);
14343 if (err) {
14344 pr_warn("failed to populate subskeleton maps: %s\n", errstr(err));
14345 return libbpf_err(err);
14346 }
14347
14348 for (var_idx = 0; var_idx < s->var_cnt; var_idx++) {
14349 var_skel = (void *)s->vars + var_idx * s->var_skel_sz;
14350 map = *var_skel->map;
14351 map_type_id = bpf_map__btf_value_type_id(map);
14352 map_type = btf__type_by_id(btf, map_type_id);
14353
14354 if (!btf_is_datasec(map_type)) {
14355 pr_warn("type for map '%1$s' is not a datasec: %2$s\n",
14356 bpf_map__name(map),
14357 __btf_kind_str(btf_kind(map_type)));
14358 return libbpf_err(-EINVAL);
14359 }
14360
14361 len = btf_vlen(map_type);
14362 var = btf_var_secinfos(map_type);
14363 for (i = 0; i < len; i++, var++) {
14364 var_type = btf__type_by_id(btf, var->type);
14365 var_name = btf__name_by_offset(btf, var_type->name_off);
14366 if (strcmp(var_name, var_skel->name) == 0) {
14367 *var_skel->addr = map->mmaped + var->offset;
14368 break;
14369 }
14370 }
14371 }
14372 return 0;
14373 }
14374
bpf_object__destroy_subskeleton(struct bpf_object_subskeleton * s)14375 void bpf_object__destroy_subskeleton(struct bpf_object_subskeleton *s)
14376 {
14377 if (!s)
14378 return;
14379 free(s->maps);
14380 free(s->progs);
14381 free(s->vars);
14382 free(s);
14383 }
14384
bpf_object__load_skeleton(struct bpf_object_skeleton * s)14385 int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
14386 {
14387 int i, err;
14388
14389 err = bpf_object__load(*s->obj);
14390 if (err) {
14391 pr_warn("failed to load BPF skeleton '%s': %s\n", s->name, errstr(err));
14392 return libbpf_err(err);
14393 }
14394
14395 for (i = 0; i < s->map_cnt; i++) {
14396 struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz;
14397 struct bpf_map *map = *map_skel->map;
14398
14399 if (!map_skel->mmaped)
14400 continue;
14401
14402 *map_skel->mmaped = map->mmaped;
14403 }
14404
14405 return 0;
14406 }
14407
bpf_object__attach_skeleton(struct bpf_object_skeleton * s)14408 int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
14409 {
14410 int i, err;
14411
14412 for (i = 0; i < s->prog_cnt; i++) {
14413 struct bpf_prog_skeleton *prog_skel = (void *)s->progs + i * s->prog_skel_sz;
14414 struct bpf_program *prog = *prog_skel->prog;
14415 struct bpf_link **link = prog_skel->link;
14416
14417 if (!prog->autoload || !prog->autoattach)
14418 continue;
14419
14420 /* auto-attaching not supported for this program */
14421 if (!prog->sec_def || !prog->sec_def->prog_attach_fn)
14422 continue;
14423
14424 /* if user already set the link manually, don't attempt auto-attach */
14425 if (*link)
14426 continue;
14427
14428 err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, link);
14429 if (err) {
14430 pr_warn("prog '%s': failed to auto-attach: %s\n",
14431 bpf_program__name(prog), errstr(err));
14432 return libbpf_err(err);
14433 }
14434
14435 /* It's possible that for some SEC() definitions auto-attach
14436 * is supported in some cases (e.g., if definition completely
14437 * specifies target information), but is not in other cases.
14438 * SEC("uprobe") is one such case. If user specified target
14439 * binary and function name, such BPF program can be
14440 * auto-attached. But if not, it shouldn't trigger skeleton's
14441 * attach to fail. It should just be skipped.
14442 * attach_fn signals such case with returning 0 (no error) and
14443 * setting link to NULL.
14444 */
14445 }
14446
14447
14448 for (i = 0; i < s->map_cnt; i++) {
14449 struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz;
14450 struct bpf_map *map = *map_skel->map;
14451 struct bpf_link **link;
14452
14453 if (!map->autocreate || !map->autoattach)
14454 continue;
14455
14456 /* only struct_ops maps can be attached */
14457 if (!bpf_map__is_struct_ops(map))
14458 continue;
14459
14460 /* skeleton is created with earlier version of bpftool, notify user */
14461 if (s->map_skel_sz < offsetofend(struct bpf_map_skeleton, link)) {
14462 pr_warn("map '%s': BPF skeleton version is old, skipping map auto-attachment...\n",
14463 bpf_map__name(map));
14464 continue;
14465 }
14466
14467 link = map_skel->link;
14468 if (!link) {
14469 pr_warn("map '%s': BPF map skeleton link is uninitialized\n",
14470 bpf_map__name(map));
14471 continue;
14472 }
14473
14474 if (*link)
14475 continue;
14476
14477 *link = bpf_map__attach_struct_ops(map);
14478 if (!*link) {
14479 err = -errno;
14480 pr_warn("map '%s': failed to auto-attach: %s\n",
14481 bpf_map__name(map), errstr(err));
14482 return libbpf_err(err);
14483 }
14484 }
14485
14486 return 0;
14487 }
14488
bpf_object__detach_skeleton(struct bpf_object_skeleton * s)14489 void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
14490 {
14491 int i;
14492
14493 for (i = 0; i < s->prog_cnt; i++) {
14494 struct bpf_prog_skeleton *prog_skel = (void *)s->progs + i * s->prog_skel_sz;
14495 struct bpf_link **link = prog_skel->link;
14496
14497 bpf_link__destroy(*link);
14498 *link = NULL;
14499 }
14500
14501 if (s->map_skel_sz < sizeof(struct bpf_map_skeleton))
14502 return;
14503
14504 for (i = 0; i < s->map_cnt; i++) {
14505 struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz;
14506 struct bpf_link **link = map_skel->link;
14507
14508 if (link) {
14509 bpf_link__destroy(*link);
14510 *link = NULL;
14511 }
14512 }
14513 }
14514
bpf_object__destroy_skeleton(struct bpf_object_skeleton * s)14515 void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
14516 {
14517 if (!s)
14518 return;
14519
14520 bpf_object__detach_skeleton(s);
14521 if (s->obj)
14522 bpf_object__close(*s->obj);
14523 free(s->maps);
14524 free(s->progs);
14525 free(s);
14526 }
14527