1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 2 3 /* 4 * common eBPF ELF operations. 5 * 6 * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org> 7 * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> 8 * Copyright (C) 2015 Huawei Inc. 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU Lesser General Public 12 * License as published by the Free Software Foundation; 13 * version 2.1 of the License (not later!) 14 * 15 * This program is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License for more details. 19 * 20 * You should have received a copy of the GNU Lesser General Public 21 * License along with this program; if not, see <http://www.gnu.org/licenses> 22 */ 23 24 #include <stdlib.h> 25 #include <string.h> 26 #include <memory.h> 27 #include <unistd.h> 28 #include <asm/unistd.h> 29 #include <errno.h> 30 #include <linux/bpf.h> 31 #include <linux/filter.h> 32 #include <limits.h> 33 #include <sys/resource.h> 34 #include "bpf.h" 35 #include "libbpf.h" 36 #include "libbpf_internal.h" 37 38 /* 39 * When building perf, unistd.h is overridden. __NR_bpf is 40 * required to be defined explicitly. 41 */ 42 #ifndef __NR_bpf 43 # if defined(__i386__) 44 # define __NR_bpf 357 45 # elif defined(__x86_64__) 46 # define __NR_bpf 321 47 # elif defined(__aarch64__) 48 # define __NR_bpf 280 49 # elif defined(__sparc__) 50 # define __NR_bpf 349 51 # elif defined(__s390__) 52 # define __NR_bpf 351 53 # elif defined(__arc__) 54 # define __NR_bpf 280 55 # elif defined(__mips__) && defined(_ABIO32) 56 # define __NR_bpf 4355 57 # elif defined(__mips__) && defined(_ABIN32) 58 # define __NR_bpf 6319 59 # elif defined(__mips__) && defined(_ABI64) 60 # define __NR_bpf 5315 61 # else 62 # error __NR_bpf not defined. libbpf does not support your arch. 63 # endif 64 #endif 65 66 static inline __u64 ptr_to_u64(const void *ptr) 67 { 68 return (__u64) (unsigned long) ptr; 69 } 70 71 static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, 72 unsigned int size) 73 { 74 return syscall(__NR_bpf, cmd, attr, size); 75 } 76 77 static inline int sys_bpf_fd(enum bpf_cmd cmd, union bpf_attr *attr, 78 unsigned int size) 79 { 80 int fd; 81 82 fd = sys_bpf(cmd, attr, size); 83 return ensure_good_fd(fd); 84 } 85 86 #define PROG_LOAD_ATTEMPTS 5 87 88 static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts) 89 { 90 int fd; 91 92 do { 93 fd = sys_bpf_fd(BPF_PROG_LOAD, attr, size); 94 } while (fd < 0 && errno == EAGAIN && --attempts > 0); 95 96 return fd; 97 } 98 99 /* Probe whether kernel switched from memlock-based (RLIMIT_MEMLOCK) to 100 * memcg-based memory accounting for BPF maps and progs. This was done in [0]. 101 * We use the support for bpf_ktime_get_coarse_ns() helper, which was added in 102 * the same 5.11 Linux release ([1]), to detect memcg-based accounting for BPF. 103 * 104 * [0] https://lore.kernel.org/bpf/20201201215900.3569844-1-guro@fb.com/ 105 * [1] d05512618056 ("bpf: Add bpf_ktime_get_coarse_ns helper") 106 */ 107 int probe_memcg_account(void) 108 { 109 const size_t prog_load_attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd); 110 struct bpf_insn insns[] = { 111 BPF_EMIT_CALL(BPF_FUNC_ktime_get_coarse_ns), 112 BPF_EXIT_INSN(), 113 }; 114 size_t insn_cnt = sizeof(insns) / sizeof(insns[0]); 115 union bpf_attr attr; 116 int prog_fd; 117 118 /* attempt loading freplace trying to use custom BTF */ 119 memset(&attr, 0, prog_load_attr_sz); 120 attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; 121 attr.insns = ptr_to_u64(insns); 122 attr.insn_cnt = insn_cnt; 123 attr.license = ptr_to_u64("GPL"); 124 125 prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, prog_load_attr_sz); 126 if (prog_fd >= 0) { 127 close(prog_fd); 128 return 1; 129 } 130 return 0; 131 } 132 133 static bool memlock_bumped; 134 static rlim_t memlock_rlim = RLIM_INFINITY; 135 136 int libbpf_set_memlock_rlim(size_t memlock_bytes) 137 { 138 if (memlock_bumped) 139 return libbpf_err(-EBUSY); 140 141 memlock_rlim = memlock_bytes; 142 return 0; 143 } 144 145 int bump_rlimit_memlock(void) 146 { 147 struct rlimit rlim; 148 149 /* this the default in libbpf 1.0, but for now user has to opt-in explicitly */ 150 if (!(libbpf_mode & LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK)) 151 return 0; 152 153 /* if kernel supports memcg-based accounting, skip bumping RLIMIT_MEMLOCK */ 154 if (memlock_bumped || kernel_supports(NULL, FEAT_MEMCG_ACCOUNT)) 155 return 0; 156 157 memlock_bumped = true; 158 159 /* zero memlock_rlim_max disables auto-bumping RLIMIT_MEMLOCK */ 160 if (memlock_rlim == 0) 161 return 0; 162 163 rlim.rlim_cur = rlim.rlim_max = memlock_rlim; 164 if (setrlimit(RLIMIT_MEMLOCK, &rlim)) 165 return -errno; 166 167 return 0; 168 } 169 170 int bpf_map_create(enum bpf_map_type map_type, 171 const char *map_name, 172 __u32 key_size, 173 __u32 value_size, 174 __u32 max_entries, 175 const struct bpf_map_create_opts *opts) 176 { 177 const size_t attr_sz = offsetofend(union bpf_attr, map_extra); 178 union bpf_attr attr; 179 int fd; 180 181 bump_rlimit_memlock(); 182 183 memset(&attr, 0, attr_sz); 184 185 if (!OPTS_VALID(opts, bpf_map_create_opts)) 186 return libbpf_err(-EINVAL); 187 188 attr.map_type = map_type; 189 if (map_name) 190 libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name)); 191 attr.key_size = key_size; 192 attr.value_size = value_size; 193 attr.max_entries = max_entries; 194 195 attr.btf_fd = OPTS_GET(opts, btf_fd, 0); 196 attr.btf_key_type_id = OPTS_GET(opts, btf_key_type_id, 0); 197 attr.btf_value_type_id = OPTS_GET(opts, btf_value_type_id, 0); 198 attr.btf_vmlinux_value_type_id = OPTS_GET(opts, btf_vmlinux_value_type_id, 0); 199 200 attr.inner_map_fd = OPTS_GET(opts, inner_map_fd, 0); 201 attr.map_flags = OPTS_GET(opts, map_flags, 0); 202 attr.map_extra = OPTS_GET(opts, map_extra, 0); 203 attr.numa_node = OPTS_GET(opts, numa_node, 0); 204 attr.map_ifindex = OPTS_GET(opts, map_ifindex, 0); 205 206 fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz); 207 return libbpf_err_errno(fd); 208 } 209 210 int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) 211 { 212 LIBBPF_OPTS(bpf_map_create_opts, p); 213 214 p.map_flags = create_attr->map_flags; 215 p.numa_node = create_attr->numa_node; 216 p.btf_fd = create_attr->btf_fd; 217 p.btf_key_type_id = create_attr->btf_key_type_id; 218 p.btf_value_type_id = create_attr->btf_value_type_id; 219 p.map_ifindex = create_attr->map_ifindex; 220 if (create_attr->map_type == BPF_MAP_TYPE_STRUCT_OPS) 221 p.btf_vmlinux_value_type_id = create_attr->btf_vmlinux_value_type_id; 222 else 223 p.inner_map_fd = create_attr->inner_map_fd; 224 225 return bpf_map_create(create_attr->map_type, create_attr->name, 226 create_attr->key_size, create_attr->value_size, 227 create_attr->max_entries, &p); 228 } 229 230 int bpf_create_map_node(enum bpf_map_type map_type, const char *name, 231 int key_size, int value_size, int max_entries, 232 __u32 map_flags, int node) 233 { 234 LIBBPF_OPTS(bpf_map_create_opts, opts); 235 236 opts.map_flags = map_flags; 237 if (node >= 0) { 238 opts.numa_node = node; 239 opts.map_flags |= BPF_F_NUMA_NODE; 240 } 241 242 return bpf_map_create(map_type, name, key_size, value_size, max_entries, &opts); 243 } 244 245 int bpf_create_map(enum bpf_map_type map_type, int key_size, 246 int value_size, int max_entries, __u32 map_flags) 247 { 248 LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = map_flags); 249 250 return bpf_map_create(map_type, NULL, key_size, value_size, max_entries, &opts); 251 } 252 253 int bpf_create_map_name(enum bpf_map_type map_type, const char *name, 254 int key_size, int value_size, int max_entries, 255 __u32 map_flags) 256 { 257 LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = map_flags); 258 259 return bpf_map_create(map_type, name, key_size, value_size, max_entries, &opts); 260 } 261 262 int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, 263 int key_size, int inner_map_fd, int max_entries, 264 __u32 map_flags, int node) 265 { 266 LIBBPF_OPTS(bpf_map_create_opts, opts); 267 268 opts.inner_map_fd = inner_map_fd; 269 opts.map_flags = map_flags; 270 if (node >= 0) { 271 opts.map_flags |= BPF_F_NUMA_NODE; 272 opts.numa_node = node; 273 } 274 275 return bpf_map_create(map_type, name, key_size, 4, max_entries, &opts); 276 } 277 278 int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, 279 int key_size, int inner_map_fd, int max_entries, 280 __u32 map_flags) 281 { 282 LIBBPF_OPTS(bpf_map_create_opts, opts, 283 .inner_map_fd = inner_map_fd, 284 .map_flags = map_flags, 285 ); 286 287 return bpf_map_create(map_type, name, key_size, 4, max_entries, &opts); 288 } 289 290 static void * 291 alloc_zero_tailing_info(const void *orecord, __u32 cnt, 292 __u32 actual_rec_size, __u32 expected_rec_size) 293 { 294 __u64 info_len = (__u64)actual_rec_size * cnt; 295 void *info, *nrecord; 296 int i; 297 298 info = malloc(info_len); 299 if (!info) 300 return NULL; 301 302 /* zero out bytes kernel does not understand */ 303 nrecord = info; 304 for (i = 0; i < cnt; i++) { 305 memcpy(nrecord, orecord, expected_rec_size); 306 memset(nrecord + expected_rec_size, 0, 307 actual_rec_size - expected_rec_size); 308 orecord += actual_rec_size; 309 nrecord += actual_rec_size; 310 } 311 312 return info; 313 } 314 315 DEFAULT_VERSION(bpf_prog_load_v0_6_0, bpf_prog_load, LIBBPF_0.6.0) 316 int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type, 317 const char *prog_name, const char *license, 318 const struct bpf_insn *insns, size_t insn_cnt, 319 const struct bpf_prog_load_opts *opts) 320 { 321 void *finfo = NULL, *linfo = NULL; 322 const char *func_info, *line_info; 323 __u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd; 324 __u32 func_info_rec_size, line_info_rec_size; 325 int fd, attempts; 326 union bpf_attr attr; 327 char *log_buf; 328 329 bump_rlimit_memlock(); 330 331 if (!OPTS_VALID(opts, bpf_prog_load_opts)) 332 return libbpf_err(-EINVAL); 333 334 attempts = OPTS_GET(opts, attempts, 0); 335 if (attempts < 0) 336 return libbpf_err(-EINVAL); 337 if (attempts == 0) 338 attempts = PROG_LOAD_ATTEMPTS; 339 340 memset(&attr, 0, sizeof(attr)); 341 342 attr.prog_type = prog_type; 343 attr.expected_attach_type = OPTS_GET(opts, expected_attach_type, 0); 344 345 attr.prog_btf_fd = OPTS_GET(opts, prog_btf_fd, 0); 346 attr.prog_flags = OPTS_GET(opts, prog_flags, 0); 347 attr.prog_ifindex = OPTS_GET(opts, prog_ifindex, 0); 348 attr.kern_version = OPTS_GET(opts, kern_version, 0); 349 350 if (prog_name) 351 libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name)); 352 attr.license = ptr_to_u64(license); 353 354 if (insn_cnt > UINT_MAX) 355 return libbpf_err(-E2BIG); 356 357 attr.insns = ptr_to_u64(insns); 358 attr.insn_cnt = (__u32)insn_cnt; 359 360 attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); 361 attach_btf_obj_fd = OPTS_GET(opts, attach_btf_obj_fd, 0); 362 363 if (attach_prog_fd && attach_btf_obj_fd) 364 return libbpf_err(-EINVAL); 365 366 attr.attach_btf_id = OPTS_GET(opts, attach_btf_id, 0); 367 if (attach_prog_fd) 368 attr.attach_prog_fd = attach_prog_fd; 369 else 370 attr.attach_btf_obj_fd = attach_btf_obj_fd; 371 372 log_buf = OPTS_GET(opts, log_buf, NULL); 373 log_size = OPTS_GET(opts, log_size, 0); 374 log_level = OPTS_GET(opts, log_level, 0); 375 376 if (!!log_buf != !!log_size) 377 return libbpf_err(-EINVAL); 378 if (log_level > (4 | 2 | 1)) 379 return libbpf_err(-EINVAL); 380 if (log_level && !log_buf) 381 return libbpf_err(-EINVAL); 382 383 func_info_rec_size = OPTS_GET(opts, func_info_rec_size, 0); 384 func_info = OPTS_GET(opts, func_info, NULL); 385 attr.func_info_rec_size = func_info_rec_size; 386 attr.func_info = ptr_to_u64(func_info); 387 attr.func_info_cnt = OPTS_GET(opts, func_info_cnt, 0); 388 389 line_info_rec_size = OPTS_GET(opts, line_info_rec_size, 0); 390 line_info = OPTS_GET(opts, line_info, NULL); 391 attr.line_info_rec_size = line_info_rec_size; 392 attr.line_info = ptr_to_u64(line_info); 393 attr.line_info_cnt = OPTS_GET(opts, line_info_cnt, 0); 394 395 attr.fd_array = ptr_to_u64(OPTS_GET(opts, fd_array, NULL)); 396 397 if (log_level) { 398 attr.log_buf = ptr_to_u64(log_buf); 399 attr.log_size = log_size; 400 attr.log_level = log_level; 401 } 402 403 fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts); 404 if (fd >= 0) 405 return fd; 406 407 /* After bpf_prog_load, the kernel may modify certain attributes 408 * to give user space a hint how to deal with loading failure. 409 * Check to see whether we can make some changes and load again. 410 */ 411 while (errno == E2BIG && (!finfo || !linfo)) { 412 if (!finfo && attr.func_info_cnt && 413 attr.func_info_rec_size < func_info_rec_size) { 414 /* try with corrected func info records */ 415 finfo = alloc_zero_tailing_info(func_info, 416 attr.func_info_cnt, 417 func_info_rec_size, 418 attr.func_info_rec_size); 419 if (!finfo) { 420 errno = E2BIG; 421 goto done; 422 } 423 424 attr.func_info = ptr_to_u64(finfo); 425 attr.func_info_rec_size = func_info_rec_size; 426 } else if (!linfo && attr.line_info_cnt && 427 attr.line_info_rec_size < line_info_rec_size) { 428 linfo = alloc_zero_tailing_info(line_info, 429 attr.line_info_cnt, 430 line_info_rec_size, 431 attr.line_info_rec_size); 432 if (!linfo) { 433 errno = E2BIG; 434 goto done; 435 } 436 437 attr.line_info = ptr_to_u64(linfo); 438 attr.line_info_rec_size = line_info_rec_size; 439 } else { 440 break; 441 } 442 443 fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts); 444 if (fd >= 0) 445 goto done; 446 } 447 448 if (log_level == 0 && log_buf) { 449 /* log_level == 0 with non-NULL log_buf requires retrying on error 450 * with log_level == 1 and log_buf/log_buf_size set, to get details of 451 * failure 452 */ 453 attr.log_buf = ptr_to_u64(log_buf); 454 attr.log_size = log_size; 455 attr.log_level = 1; 456 457 fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts); 458 } 459 done: 460 /* free() doesn't affect errno, so we don't need to restore it */ 461 free(finfo); 462 free(linfo); 463 return libbpf_err_errno(fd); 464 } 465 466 __attribute__((alias("bpf_load_program_xattr2"))) 467 int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, 468 char *log_buf, size_t log_buf_sz); 469 470 static int bpf_load_program_xattr2(const struct bpf_load_program_attr *load_attr, 471 char *log_buf, size_t log_buf_sz) 472 { 473 LIBBPF_OPTS(bpf_prog_load_opts, p); 474 475 if (!load_attr || !log_buf != !log_buf_sz) 476 return libbpf_err(-EINVAL); 477 478 p.expected_attach_type = load_attr->expected_attach_type; 479 switch (load_attr->prog_type) { 480 case BPF_PROG_TYPE_STRUCT_OPS: 481 case BPF_PROG_TYPE_LSM: 482 p.attach_btf_id = load_attr->attach_btf_id; 483 break; 484 case BPF_PROG_TYPE_TRACING: 485 case BPF_PROG_TYPE_EXT: 486 p.attach_btf_id = load_attr->attach_btf_id; 487 p.attach_prog_fd = load_attr->attach_prog_fd; 488 break; 489 default: 490 p.prog_ifindex = load_attr->prog_ifindex; 491 p.kern_version = load_attr->kern_version; 492 } 493 p.log_level = load_attr->log_level; 494 p.log_buf = log_buf; 495 p.log_size = log_buf_sz; 496 p.prog_btf_fd = load_attr->prog_btf_fd; 497 p.func_info_rec_size = load_attr->func_info_rec_size; 498 p.func_info_cnt = load_attr->func_info_cnt; 499 p.func_info = load_attr->func_info; 500 p.line_info_rec_size = load_attr->line_info_rec_size; 501 p.line_info_cnt = load_attr->line_info_cnt; 502 p.line_info = load_attr->line_info; 503 p.prog_flags = load_attr->prog_flags; 504 505 return bpf_prog_load(load_attr->prog_type, load_attr->name, load_attr->license, 506 load_attr->insns, load_attr->insns_cnt, &p); 507 } 508 509 int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, 510 size_t insns_cnt, const char *license, 511 __u32 kern_version, char *log_buf, 512 size_t log_buf_sz) 513 { 514 struct bpf_load_program_attr load_attr; 515 516 memset(&load_attr, 0, sizeof(struct bpf_load_program_attr)); 517 load_attr.prog_type = type; 518 load_attr.expected_attach_type = 0; 519 load_attr.name = NULL; 520 load_attr.insns = insns; 521 load_attr.insns_cnt = insns_cnt; 522 load_attr.license = license; 523 load_attr.kern_version = kern_version; 524 525 return bpf_load_program_xattr2(&load_attr, log_buf, log_buf_sz); 526 } 527 528 int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, 529 size_t insns_cnt, __u32 prog_flags, const char *license, 530 __u32 kern_version, char *log_buf, size_t log_buf_sz, 531 int log_level) 532 { 533 union bpf_attr attr; 534 int fd; 535 536 bump_rlimit_memlock(); 537 538 memset(&attr, 0, sizeof(attr)); 539 attr.prog_type = type; 540 attr.insn_cnt = (__u32)insns_cnt; 541 attr.insns = ptr_to_u64(insns); 542 attr.license = ptr_to_u64(license); 543 attr.log_buf = ptr_to_u64(log_buf); 544 attr.log_size = log_buf_sz; 545 attr.log_level = log_level; 546 log_buf[0] = 0; 547 attr.kern_version = kern_version; 548 attr.prog_flags = prog_flags; 549 550 fd = sys_bpf_prog_load(&attr, sizeof(attr), PROG_LOAD_ATTEMPTS); 551 return libbpf_err_errno(fd); 552 } 553 554 int bpf_map_update_elem(int fd, const void *key, const void *value, 555 __u64 flags) 556 { 557 union bpf_attr attr; 558 int ret; 559 560 memset(&attr, 0, sizeof(attr)); 561 attr.map_fd = fd; 562 attr.key = ptr_to_u64(key); 563 attr.value = ptr_to_u64(value); 564 attr.flags = flags; 565 566 ret = sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)); 567 return libbpf_err_errno(ret); 568 } 569 570 int bpf_map_lookup_elem(int fd, const void *key, void *value) 571 { 572 union bpf_attr attr; 573 int ret; 574 575 memset(&attr, 0, sizeof(attr)); 576 attr.map_fd = fd; 577 attr.key = ptr_to_u64(key); 578 attr.value = ptr_to_u64(value); 579 580 ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); 581 return libbpf_err_errno(ret); 582 } 583 584 int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags) 585 { 586 union bpf_attr attr; 587 int ret; 588 589 memset(&attr, 0, sizeof(attr)); 590 attr.map_fd = fd; 591 attr.key = ptr_to_u64(key); 592 attr.value = ptr_to_u64(value); 593 attr.flags = flags; 594 595 ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); 596 return libbpf_err_errno(ret); 597 } 598 599 int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value) 600 { 601 union bpf_attr attr; 602 int ret; 603 604 memset(&attr, 0, sizeof(attr)); 605 attr.map_fd = fd; 606 attr.key = ptr_to_u64(key); 607 attr.value = ptr_to_u64(value); 608 609 ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr)); 610 return libbpf_err_errno(ret); 611 } 612 613 int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key, void *value, __u64 flags) 614 { 615 union bpf_attr attr; 616 int ret; 617 618 memset(&attr, 0, sizeof(attr)); 619 attr.map_fd = fd; 620 attr.key = ptr_to_u64(key); 621 attr.value = ptr_to_u64(value); 622 attr.flags = flags; 623 624 ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr)); 625 return libbpf_err_errno(ret); 626 } 627 628 int bpf_map_delete_elem(int fd, const void *key) 629 { 630 union bpf_attr attr; 631 int ret; 632 633 memset(&attr, 0, sizeof(attr)); 634 attr.map_fd = fd; 635 attr.key = ptr_to_u64(key); 636 637 ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr)); 638 return libbpf_err_errno(ret); 639 } 640 641 int bpf_map_get_next_key(int fd, const void *key, void *next_key) 642 { 643 union bpf_attr attr; 644 int ret; 645 646 memset(&attr, 0, sizeof(attr)); 647 attr.map_fd = fd; 648 attr.key = ptr_to_u64(key); 649 attr.next_key = ptr_to_u64(next_key); 650 651 ret = sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr)); 652 return libbpf_err_errno(ret); 653 } 654 655 int bpf_map_freeze(int fd) 656 { 657 union bpf_attr attr; 658 int ret; 659 660 memset(&attr, 0, sizeof(attr)); 661 attr.map_fd = fd; 662 663 ret = sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr)); 664 return libbpf_err_errno(ret); 665 } 666 667 static int bpf_map_batch_common(int cmd, int fd, void *in_batch, 668 void *out_batch, void *keys, void *values, 669 __u32 *count, 670 const struct bpf_map_batch_opts *opts) 671 { 672 union bpf_attr attr; 673 int ret; 674 675 if (!OPTS_VALID(opts, bpf_map_batch_opts)) 676 return libbpf_err(-EINVAL); 677 678 memset(&attr, 0, sizeof(attr)); 679 attr.batch.map_fd = fd; 680 attr.batch.in_batch = ptr_to_u64(in_batch); 681 attr.batch.out_batch = ptr_to_u64(out_batch); 682 attr.batch.keys = ptr_to_u64(keys); 683 attr.batch.values = ptr_to_u64(values); 684 attr.batch.count = *count; 685 attr.batch.elem_flags = OPTS_GET(opts, elem_flags, 0); 686 attr.batch.flags = OPTS_GET(opts, flags, 0); 687 688 ret = sys_bpf(cmd, &attr, sizeof(attr)); 689 *count = attr.batch.count; 690 691 return libbpf_err_errno(ret); 692 } 693 694 int bpf_map_delete_batch(int fd, const void *keys, __u32 *count, 695 const struct bpf_map_batch_opts *opts) 696 { 697 return bpf_map_batch_common(BPF_MAP_DELETE_BATCH, fd, NULL, 698 NULL, (void *)keys, NULL, count, opts); 699 } 700 701 int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, void *keys, 702 void *values, __u32 *count, 703 const struct bpf_map_batch_opts *opts) 704 { 705 return bpf_map_batch_common(BPF_MAP_LOOKUP_BATCH, fd, in_batch, 706 out_batch, keys, values, count, opts); 707 } 708 709 int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, void *out_batch, 710 void *keys, void *values, __u32 *count, 711 const struct bpf_map_batch_opts *opts) 712 { 713 return bpf_map_batch_common(BPF_MAP_LOOKUP_AND_DELETE_BATCH, 714 fd, in_batch, out_batch, keys, values, 715 count, opts); 716 } 717 718 int bpf_map_update_batch(int fd, const void *keys, const void *values, __u32 *count, 719 const struct bpf_map_batch_opts *opts) 720 { 721 return bpf_map_batch_common(BPF_MAP_UPDATE_BATCH, fd, NULL, NULL, 722 (void *)keys, (void *)values, count, opts); 723 } 724 725 int bpf_obj_pin(int fd, const char *pathname) 726 { 727 union bpf_attr attr; 728 int ret; 729 730 memset(&attr, 0, sizeof(attr)); 731 attr.pathname = ptr_to_u64((void *)pathname); 732 attr.bpf_fd = fd; 733 734 ret = sys_bpf(BPF_OBJ_PIN, &attr, sizeof(attr)); 735 return libbpf_err_errno(ret); 736 } 737 738 int bpf_obj_get(const char *pathname) 739 { 740 union bpf_attr attr; 741 int fd; 742 743 memset(&attr, 0, sizeof(attr)); 744 attr.pathname = ptr_to_u64((void *)pathname); 745 746 fd = sys_bpf_fd(BPF_OBJ_GET, &attr, sizeof(attr)); 747 return libbpf_err_errno(fd); 748 } 749 750 int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type, 751 unsigned int flags) 752 { 753 DECLARE_LIBBPF_OPTS(bpf_prog_attach_opts, opts, 754 .flags = flags, 755 ); 756 757 return bpf_prog_attach_xattr(prog_fd, target_fd, type, &opts); 758 } 759 760 int bpf_prog_attach_xattr(int prog_fd, int target_fd, 761 enum bpf_attach_type type, 762 const struct bpf_prog_attach_opts *opts) 763 { 764 union bpf_attr attr; 765 int ret; 766 767 if (!OPTS_VALID(opts, bpf_prog_attach_opts)) 768 return libbpf_err(-EINVAL); 769 770 memset(&attr, 0, sizeof(attr)); 771 attr.target_fd = target_fd; 772 attr.attach_bpf_fd = prog_fd; 773 attr.attach_type = type; 774 attr.attach_flags = OPTS_GET(opts, flags, 0); 775 attr.replace_bpf_fd = OPTS_GET(opts, replace_prog_fd, 0); 776 777 ret = sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)); 778 return libbpf_err_errno(ret); 779 } 780 781 int bpf_prog_detach(int target_fd, enum bpf_attach_type type) 782 { 783 union bpf_attr attr; 784 int ret; 785 786 memset(&attr, 0, sizeof(attr)); 787 attr.target_fd = target_fd; 788 attr.attach_type = type; 789 790 ret = sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); 791 return libbpf_err_errno(ret); 792 } 793 794 int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type) 795 { 796 union bpf_attr attr; 797 int ret; 798 799 memset(&attr, 0, sizeof(attr)); 800 attr.target_fd = target_fd; 801 attr.attach_bpf_fd = prog_fd; 802 attr.attach_type = type; 803 804 ret = sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); 805 return libbpf_err_errno(ret); 806 } 807 808 int bpf_link_create(int prog_fd, int target_fd, 809 enum bpf_attach_type attach_type, 810 const struct bpf_link_create_opts *opts) 811 { 812 __u32 target_btf_id, iter_info_len; 813 union bpf_attr attr; 814 int fd; 815 816 if (!OPTS_VALID(opts, bpf_link_create_opts)) 817 return libbpf_err(-EINVAL); 818 819 iter_info_len = OPTS_GET(opts, iter_info_len, 0); 820 target_btf_id = OPTS_GET(opts, target_btf_id, 0); 821 822 /* validate we don't have unexpected combinations of non-zero fields */ 823 if (iter_info_len || target_btf_id) { 824 if (iter_info_len && target_btf_id) 825 return libbpf_err(-EINVAL); 826 if (!OPTS_ZEROED(opts, target_btf_id)) 827 return libbpf_err(-EINVAL); 828 } 829 830 memset(&attr, 0, sizeof(attr)); 831 attr.link_create.prog_fd = prog_fd; 832 attr.link_create.target_fd = target_fd; 833 attr.link_create.attach_type = attach_type; 834 attr.link_create.flags = OPTS_GET(opts, flags, 0); 835 836 if (target_btf_id) { 837 attr.link_create.target_btf_id = target_btf_id; 838 goto proceed; 839 } 840 841 switch (attach_type) { 842 case BPF_TRACE_ITER: 843 attr.link_create.iter_info = ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0)); 844 attr.link_create.iter_info_len = iter_info_len; 845 break; 846 case BPF_PERF_EVENT: 847 attr.link_create.perf_event.bpf_cookie = OPTS_GET(opts, perf_event.bpf_cookie, 0); 848 if (!OPTS_ZEROED(opts, perf_event)) 849 return libbpf_err(-EINVAL); 850 break; 851 default: 852 if (!OPTS_ZEROED(opts, flags)) 853 return libbpf_err(-EINVAL); 854 break; 855 } 856 proceed: 857 fd = sys_bpf_fd(BPF_LINK_CREATE, &attr, sizeof(attr)); 858 return libbpf_err_errno(fd); 859 } 860 861 int bpf_link_detach(int link_fd) 862 { 863 union bpf_attr attr; 864 int ret; 865 866 memset(&attr, 0, sizeof(attr)); 867 attr.link_detach.link_fd = link_fd; 868 869 ret = sys_bpf(BPF_LINK_DETACH, &attr, sizeof(attr)); 870 return libbpf_err_errno(ret); 871 } 872 873 int bpf_link_update(int link_fd, int new_prog_fd, 874 const struct bpf_link_update_opts *opts) 875 { 876 union bpf_attr attr; 877 int ret; 878 879 if (!OPTS_VALID(opts, bpf_link_update_opts)) 880 return libbpf_err(-EINVAL); 881 882 memset(&attr, 0, sizeof(attr)); 883 attr.link_update.link_fd = link_fd; 884 attr.link_update.new_prog_fd = new_prog_fd; 885 attr.link_update.flags = OPTS_GET(opts, flags, 0); 886 attr.link_update.old_prog_fd = OPTS_GET(opts, old_prog_fd, 0); 887 888 ret = sys_bpf(BPF_LINK_UPDATE, &attr, sizeof(attr)); 889 return libbpf_err_errno(ret); 890 } 891 892 int bpf_iter_create(int link_fd) 893 { 894 union bpf_attr attr; 895 int fd; 896 897 memset(&attr, 0, sizeof(attr)); 898 attr.iter_create.link_fd = link_fd; 899 900 fd = sys_bpf_fd(BPF_ITER_CREATE, &attr, sizeof(attr)); 901 return libbpf_err_errno(fd); 902 } 903 904 int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, 905 __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt) 906 { 907 union bpf_attr attr; 908 int ret; 909 910 memset(&attr, 0, sizeof(attr)); 911 attr.query.target_fd = target_fd; 912 attr.query.attach_type = type; 913 attr.query.query_flags = query_flags; 914 attr.query.prog_cnt = *prog_cnt; 915 attr.query.prog_ids = ptr_to_u64(prog_ids); 916 917 ret = sys_bpf(BPF_PROG_QUERY, &attr, sizeof(attr)); 918 919 if (attach_flags) 920 *attach_flags = attr.query.attach_flags; 921 *prog_cnt = attr.query.prog_cnt; 922 923 return libbpf_err_errno(ret); 924 } 925 926 int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, 927 void *data_out, __u32 *size_out, __u32 *retval, 928 __u32 *duration) 929 { 930 union bpf_attr attr; 931 int ret; 932 933 memset(&attr, 0, sizeof(attr)); 934 attr.test.prog_fd = prog_fd; 935 attr.test.data_in = ptr_to_u64(data); 936 attr.test.data_out = ptr_to_u64(data_out); 937 attr.test.data_size_in = size; 938 attr.test.repeat = repeat; 939 940 ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr)); 941 942 if (size_out) 943 *size_out = attr.test.data_size_out; 944 if (retval) 945 *retval = attr.test.retval; 946 if (duration) 947 *duration = attr.test.duration; 948 949 return libbpf_err_errno(ret); 950 } 951 952 int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr) 953 { 954 union bpf_attr attr; 955 int ret; 956 957 if (!test_attr->data_out && test_attr->data_size_out > 0) 958 return libbpf_err(-EINVAL); 959 960 memset(&attr, 0, sizeof(attr)); 961 attr.test.prog_fd = test_attr->prog_fd; 962 attr.test.data_in = ptr_to_u64(test_attr->data_in); 963 attr.test.data_out = ptr_to_u64(test_attr->data_out); 964 attr.test.data_size_in = test_attr->data_size_in; 965 attr.test.data_size_out = test_attr->data_size_out; 966 attr.test.ctx_in = ptr_to_u64(test_attr->ctx_in); 967 attr.test.ctx_out = ptr_to_u64(test_attr->ctx_out); 968 attr.test.ctx_size_in = test_attr->ctx_size_in; 969 attr.test.ctx_size_out = test_attr->ctx_size_out; 970 attr.test.repeat = test_attr->repeat; 971 972 ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr)); 973 974 test_attr->data_size_out = attr.test.data_size_out; 975 test_attr->ctx_size_out = attr.test.ctx_size_out; 976 test_attr->retval = attr.test.retval; 977 test_attr->duration = attr.test.duration; 978 979 return libbpf_err_errno(ret); 980 } 981 982 int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts) 983 { 984 union bpf_attr attr; 985 int ret; 986 987 if (!OPTS_VALID(opts, bpf_test_run_opts)) 988 return libbpf_err(-EINVAL); 989 990 memset(&attr, 0, sizeof(attr)); 991 attr.test.prog_fd = prog_fd; 992 attr.test.cpu = OPTS_GET(opts, cpu, 0); 993 attr.test.flags = OPTS_GET(opts, flags, 0); 994 attr.test.repeat = OPTS_GET(opts, repeat, 0); 995 attr.test.duration = OPTS_GET(opts, duration, 0); 996 attr.test.ctx_size_in = OPTS_GET(opts, ctx_size_in, 0); 997 attr.test.ctx_size_out = OPTS_GET(opts, ctx_size_out, 0); 998 attr.test.data_size_in = OPTS_GET(opts, data_size_in, 0); 999 attr.test.data_size_out = OPTS_GET(opts, data_size_out, 0); 1000 attr.test.ctx_in = ptr_to_u64(OPTS_GET(opts, ctx_in, NULL)); 1001 attr.test.ctx_out = ptr_to_u64(OPTS_GET(opts, ctx_out, NULL)); 1002 attr.test.data_in = ptr_to_u64(OPTS_GET(opts, data_in, NULL)); 1003 attr.test.data_out = ptr_to_u64(OPTS_GET(opts, data_out, NULL)); 1004 1005 ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr)); 1006 1007 OPTS_SET(opts, data_size_out, attr.test.data_size_out); 1008 OPTS_SET(opts, ctx_size_out, attr.test.ctx_size_out); 1009 OPTS_SET(opts, duration, attr.test.duration); 1010 OPTS_SET(opts, retval, attr.test.retval); 1011 1012 return libbpf_err_errno(ret); 1013 } 1014 1015 static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd) 1016 { 1017 union bpf_attr attr; 1018 int err; 1019 1020 memset(&attr, 0, sizeof(attr)); 1021 attr.start_id = start_id; 1022 1023 err = sys_bpf(cmd, &attr, sizeof(attr)); 1024 if (!err) 1025 *next_id = attr.next_id; 1026 1027 return libbpf_err_errno(err); 1028 } 1029 1030 int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id) 1031 { 1032 return bpf_obj_get_next_id(start_id, next_id, BPF_PROG_GET_NEXT_ID); 1033 } 1034 1035 int bpf_map_get_next_id(__u32 start_id, __u32 *next_id) 1036 { 1037 return bpf_obj_get_next_id(start_id, next_id, BPF_MAP_GET_NEXT_ID); 1038 } 1039 1040 int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id) 1041 { 1042 return bpf_obj_get_next_id(start_id, next_id, BPF_BTF_GET_NEXT_ID); 1043 } 1044 1045 int bpf_link_get_next_id(__u32 start_id, __u32 *next_id) 1046 { 1047 return bpf_obj_get_next_id(start_id, next_id, BPF_LINK_GET_NEXT_ID); 1048 } 1049 1050 int bpf_prog_get_fd_by_id(__u32 id) 1051 { 1052 union bpf_attr attr; 1053 int fd; 1054 1055 memset(&attr, 0, sizeof(attr)); 1056 attr.prog_id = id; 1057 1058 fd = sys_bpf_fd(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr)); 1059 return libbpf_err_errno(fd); 1060 } 1061 1062 int bpf_map_get_fd_by_id(__u32 id) 1063 { 1064 union bpf_attr attr; 1065 int fd; 1066 1067 memset(&attr, 0, sizeof(attr)); 1068 attr.map_id = id; 1069 1070 fd = sys_bpf_fd(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr)); 1071 return libbpf_err_errno(fd); 1072 } 1073 1074 int bpf_btf_get_fd_by_id(__u32 id) 1075 { 1076 union bpf_attr attr; 1077 int fd; 1078 1079 memset(&attr, 0, sizeof(attr)); 1080 attr.btf_id = id; 1081 1082 fd = sys_bpf_fd(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr)); 1083 return libbpf_err_errno(fd); 1084 } 1085 1086 int bpf_link_get_fd_by_id(__u32 id) 1087 { 1088 union bpf_attr attr; 1089 int fd; 1090 1091 memset(&attr, 0, sizeof(attr)); 1092 attr.link_id = id; 1093 1094 fd = sys_bpf_fd(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr)); 1095 return libbpf_err_errno(fd); 1096 } 1097 1098 int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len) 1099 { 1100 union bpf_attr attr; 1101 int err; 1102 1103 memset(&attr, 0, sizeof(attr)); 1104 attr.info.bpf_fd = bpf_fd; 1105 attr.info.info_len = *info_len; 1106 attr.info.info = ptr_to_u64(info); 1107 1108 err = sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr)); 1109 1110 if (!err) 1111 *info_len = attr.info.info_len; 1112 1113 return libbpf_err_errno(err); 1114 } 1115 1116 int bpf_raw_tracepoint_open(const char *name, int prog_fd) 1117 { 1118 union bpf_attr attr; 1119 int fd; 1120 1121 memset(&attr, 0, sizeof(attr)); 1122 attr.raw_tracepoint.name = ptr_to_u64(name); 1123 attr.raw_tracepoint.prog_fd = prog_fd; 1124 1125 fd = sys_bpf_fd(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr)); 1126 return libbpf_err_errno(fd); 1127 } 1128 1129 int bpf_btf_load(const void *btf_data, size_t btf_size, const struct bpf_btf_load_opts *opts) 1130 { 1131 const size_t attr_sz = offsetofend(union bpf_attr, btf_log_level); 1132 union bpf_attr attr; 1133 char *log_buf; 1134 size_t log_size; 1135 __u32 log_level; 1136 int fd; 1137 1138 bump_rlimit_memlock(); 1139 1140 memset(&attr, 0, attr_sz); 1141 1142 if (!OPTS_VALID(opts, bpf_btf_load_opts)) 1143 return libbpf_err(-EINVAL); 1144 1145 log_buf = OPTS_GET(opts, log_buf, NULL); 1146 log_size = OPTS_GET(opts, log_size, 0); 1147 log_level = OPTS_GET(opts, log_level, 0); 1148 1149 if (log_size > UINT_MAX) 1150 return libbpf_err(-EINVAL); 1151 if (log_size && !log_buf) 1152 return libbpf_err(-EINVAL); 1153 1154 attr.btf = ptr_to_u64(btf_data); 1155 attr.btf_size = btf_size; 1156 /* log_level == 0 and log_buf != NULL means "try loading without 1157 * log_buf, but retry with log_buf and log_level=1 on error", which is 1158 * consistent across low-level and high-level BTF and program loading 1159 * APIs within libbpf and provides a sensible behavior in practice 1160 */ 1161 if (log_level) { 1162 attr.btf_log_buf = ptr_to_u64(log_buf); 1163 attr.btf_log_size = (__u32)log_size; 1164 attr.btf_log_level = log_level; 1165 } 1166 1167 fd = sys_bpf_fd(BPF_BTF_LOAD, &attr, attr_sz); 1168 if (fd < 0 && log_buf && log_level == 0) { 1169 attr.btf_log_buf = ptr_to_u64(log_buf); 1170 attr.btf_log_size = (__u32)log_size; 1171 attr.btf_log_level = 1; 1172 fd = sys_bpf_fd(BPF_BTF_LOAD, &attr, attr_sz); 1173 } 1174 return libbpf_err_errno(fd); 1175 } 1176 1177 int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, bool do_log) 1178 { 1179 LIBBPF_OPTS(bpf_btf_load_opts, opts); 1180 int fd; 1181 1182 retry: 1183 if (do_log && log_buf && log_buf_size) { 1184 opts.log_buf = log_buf; 1185 opts.log_size = log_buf_size; 1186 opts.log_level = 1; 1187 } 1188 1189 fd = bpf_btf_load(btf, btf_size, &opts); 1190 if (fd < 0 && !do_log && log_buf && log_buf_size) { 1191 do_log = true; 1192 goto retry; 1193 } 1194 1195 return libbpf_err_errno(fd); 1196 } 1197 1198 int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, 1199 __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset, 1200 __u64 *probe_addr) 1201 { 1202 union bpf_attr attr = {}; 1203 int err; 1204 1205 attr.task_fd_query.pid = pid; 1206 attr.task_fd_query.fd = fd; 1207 attr.task_fd_query.flags = flags; 1208 attr.task_fd_query.buf = ptr_to_u64(buf); 1209 attr.task_fd_query.buf_len = *buf_len; 1210 1211 err = sys_bpf(BPF_TASK_FD_QUERY, &attr, sizeof(attr)); 1212 1213 *buf_len = attr.task_fd_query.buf_len; 1214 *prog_id = attr.task_fd_query.prog_id; 1215 *fd_type = attr.task_fd_query.fd_type; 1216 *probe_offset = attr.task_fd_query.probe_offset; 1217 *probe_addr = attr.task_fd_query.probe_addr; 1218 1219 return libbpf_err_errno(err); 1220 } 1221 1222 int bpf_enable_stats(enum bpf_stats_type type) 1223 { 1224 union bpf_attr attr; 1225 int fd; 1226 1227 memset(&attr, 0, sizeof(attr)); 1228 attr.enable_stats.type = type; 1229 1230 fd = sys_bpf_fd(BPF_ENABLE_STATS, &attr, sizeof(attr)); 1231 return libbpf_err_errno(fd); 1232 } 1233 1234 int bpf_prog_bind_map(int prog_fd, int map_fd, 1235 const struct bpf_prog_bind_opts *opts) 1236 { 1237 union bpf_attr attr; 1238 int ret; 1239 1240 if (!OPTS_VALID(opts, bpf_prog_bind_opts)) 1241 return libbpf_err(-EINVAL); 1242 1243 memset(&attr, 0, sizeof(attr)); 1244 attr.prog_bind_map.prog_fd = prog_fd; 1245 attr.prog_bind_map.map_fd = map_fd; 1246 attr.prog_bind_map.flags = OPTS_GET(opts, flags, 0); 1247 1248 ret = sys_bpf(BPF_PROG_BIND_MAP, &attr, sizeof(attr)); 1249 return libbpf_err_errno(ret); 1250 } 1251