1 // SPDX-License-Identifier: GPL-2.0 2 #include <ctype.h> 3 #include <stdio.h> 4 #include <stdlib.h> 5 #include <string.h> 6 #include <assert.h> 7 #include <errno.h> 8 #include <fcntl.h> 9 #include <poll.h> 10 #include <pthread.h> 11 #include <unistd.h> 12 #include <linux/perf_event.h> 13 #include <linux/fs.h> 14 #include <sys/ioctl.h> 15 #include <sys/mman.h> 16 #include "trace_helpers.h" 17 #include <linux/limits.h> 18 #include <libelf.h> 19 #include <gelf.h> 20 #include "bpf/hashmap.h" 21 #include "bpf/libbpf_internal.h" 22 #include "bpf_util.h" 23 24 #define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe" 25 #define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe" 26 27 struct ksyms { 28 struct ksym *syms; 29 size_t sym_cap; 30 size_t sym_cnt; 31 }; 32 33 static struct ksyms *ksyms; 34 static pthread_mutex_t ksyms_mutex = PTHREAD_MUTEX_INITIALIZER; 35 36 static int ksyms__add_symbol(struct ksyms *ksyms, const char *name, 37 unsigned long addr) 38 { 39 void *tmp; 40 41 tmp = strdup(name); 42 if (!tmp) 43 return -ENOMEM; 44 ksyms->syms[ksyms->sym_cnt].addr = addr; 45 ksyms->syms[ksyms->sym_cnt].name = tmp; 46 ksyms->sym_cnt++; 47 return 0; 48 } 49 50 void free_kallsyms_local(struct ksyms *ksyms) 51 { 52 unsigned int i; 53 54 if (!ksyms) 55 return; 56 57 if (!ksyms->syms) { 58 free(ksyms); 59 return; 60 } 61 62 for (i = 0; i < ksyms->sym_cnt; i++) 63 free(ksyms->syms[i].name); 64 free(ksyms->syms); 65 free(ksyms); 66 } 67 68 static struct ksyms *load_kallsyms_local_common(ksym_cmp_t cmp_cb) 69 { 70 FILE *f; 71 char func[256], buf[256]; 72 char symbol; 73 void *addr; 74 int ret; 75 struct ksyms *ksyms; 76 77 f = fopen("/proc/kallsyms", "r"); 78 if (!f) 79 return NULL; 80 81 ksyms = calloc(1, sizeof(struct ksyms)); 82 if (!ksyms) { 83 fclose(f); 84 return NULL; 85 } 86 87 while (fgets(buf, sizeof(buf), f)) { 88 if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3) 89 break; 90 if (!addr) 91 continue; 92 93 ret = libbpf_ensure_mem((void **) &ksyms->syms, &ksyms->sym_cap, 94 sizeof(struct ksym), ksyms->sym_cnt + 1); 95 if (ret) 96 goto error; 97 ret = ksyms__add_symbol(ksyms, func, (unsigned long)addr); 98 if (ret) 99 goto error; 100 } 101 fclose(f); 102 qsort(ksyms->syms, ksyms->sym_cnt, sizeof(struct ksym), cmp_cb); 103 return ksyms; 104 105 error: 106 fclose(f); 107 free_kallsyms_local(ksyms); 108 return NULL; 109 } 110 111 static int ksym_cmp(const void *p1, const void *p2) 112 { 113 return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr; 114 } 115 116 struct ksyms *load_kallsyms_local(void) 117 { 118 return load_kallsyms_local_common(ksym_cmp); 119 } 120 121 struct ksyms *load_kallsyms_custom_local(ksym_cmp_t cmp_cb) 122 { 123 return load_kallsyms_local_common(cmp_cb); 124 } 125 126 int load_kallsyms(void) 127 { 128 pthread_mutex_lock(&ksyms_mutex); 129 if (!ksyms) 130 ksyms = load_kallsyms_local(); 131 pthread_mutex_unlock(&ksyms_mutex); 132 return ksyms ? 0 : 1; 133 } 134 135 struct ksym *ksym_search_local(struct ksyms *ksyms, long key) 136 { 137 int start = 0, end = ksyms->sym_cnt; 138 int result; 139 140 /* kallsyms not loaded. return NULL */ 141 if (ksyms->sym_cnt <= 0) 142 return NULL; 143 144 while (start < end) { 145 size_t mid = start + (end - start) / 2; 146 147 result = key - ksyms->syms[mid].addr; 148 if (result < 0) 149 end = mid; 150 else if (result > 0) 151 start = mid + 1; 152 else 153 return &ksyms->syms[mid]; 154 } 155 156 if (start >= 1 && ksyms->syms[start - 1].addr < key && 157 key < ksyms->syms[start].addr) 158 /* valid ksym */ 159 return &ksyms->syms[start - 1]; 160 161 /* out of range. return _stext */ 162 return &ksyms->syms[0]; 163 } 164 165 struct ksym *search_kallsyms_custom_local(struct ksyms *ksyms, const void *p, 166 ksym_search_cmp_t cmp_cb) 167 { 168 int start = 0, mid, end = ksyms->sym_cnt; 169 struct ksym *ks; 170 int result; 171 172 while (start < end) { 173 mid = start + (end - start) / 2; 174 ks = &ksyms->syms[mid]; 175 result = cmp_cb(p, ks); 176 if (result < 0) 177 end = mid; 178 else if (result > 0) 179 start = mid + 1; 180 else 181 return ks; 182 } 183 184 return NULL; 185 } 186 187 struct ksym *ksym_search(long key) 188 { 189 if (!ksyms) 190 return NULL; 191 return ksym_search_local(ksyms, key); 192 } 193 194 long ksym_get_addr_local(struct ksyms *ksyms, const char *name) 195 { 196 int i; 197 198 for (i = 0; i < ksyms->sym_cnt; i++) { 199 if (strcmp(ksyms->syms[i].name, name) == 0) 200 return ksyms->syms[i].addr; 201 } 202 203 return 0; 204 } 205 206 long ksym_get_addr(const char *name) 207 { 208 if (!ksyms) 209 return 0; 210 return ksym_get_addr_local(ksyms, name); 211 } 212 213 /* open kallsyms and read symbol addresses on the fly. Without caching all symbols, 214 * this is faster than load + find. 215 */ 216 int kallsyms_find(const char *sym, unsigned long long *addr) 217 { 218 char type, name[500], *match; 219 unsigned long long value; 220 int err = 0; 221 FILE *f; 222 223 f = fopen("/proc/kallsyms", "r"); 224 if (!f) 225 return -EINVAL; 226 227 while (fscanf(f, "%llx %c %499s%*[^\n]\n", &value, &type, name) > 0) { 228 /* If CONFIG_LTO_CLANG_THIN is enabled, static variable/function 229 * symbols could be promoted to global due to cross-file inlining. 230 * For such cases, clang compiler will add .llvm.<hash> suffix 231 * to those symbols to avoid potential naming conflict. 232 * Let us ignore .llvm.<hash> suffix during symbol comparison. 233 */ 234 if (type == 'd') { 235 match = strstr(name, ".llvm."); 236 if (match) 237 *match = '\0'; 238 } 239 if (strcmp(name, sym) == 0) { 240 *addr = value; 241 goto out; 242 } 243 } 244 err = -ENOENT; 245 246 out: 247 fclose(f); 248 return err; 249 } 250 251 #ifdef PROCMAP_QUERY 252 int env_verbosity __weak = 0; 253 254 static int procmap_query(int fd, const void *addr, __u32 query_flags, size_t *start, size_t *offset, int *flags) 255 { 256 char path_buf[PATH_MAX], build_id_buf[20]; 257 struct procmap_query q; 258 int err; 259 260 memset(&q, 0, sizeof(q)); 261 q.size = sizeof(q); 262 q.query_flags = query_flags; 263 q.query_addr = (__u64)addr; 264 q.vma_name_addr = (__u64)path_buf; 265 q.vma_name_size = sizeof(path_buf); 266 q.build_id_addr = (__u64)build_id_buf; 267 q.build_id_size = sizeof(build_id_buf); 268 269 err = ioctl(fd, PROCMAP_QUERY, &q); 270 if (err < 0) { 271 err = -errno; 272 if (err == -ENOTTY) 273 return -EOPNOTSUPP; /* ioctl() not implemented yet */ 274 if (err == -ENOENT) 275 return -ESRCH; /* vma not found */ 276 return err; 277 } 278 279 if (env_verbosity >= 1) { 280 printf("VMA FOUND (addr %08lx): %08lx-%08lx %c%c%c%c %08lx %02x:%02x %ld %s (build ID: %s, %d bytes)\n", 281 (long)addr, (long)q.vma_start, (long)q.vma_end, 282 (q.vma_flags & PROCMAP_QUERY_VMA_READABLE) ? 'r' : '-', 283 (q.vma_flags & PROCMAP_QUERY_VMA_WRITABLE) ? 'w' : '-', 284 (q.vma_flags & PROCMAP_QUERY_VMA_EXECUTABLE) ? 'x' : '-', 285 (q.vma_flags & PROCMAP_QUERY_VMA_SHARED) ? 's' : 'p', 286 (long)q.vma_offset, q.dev_major, q.dev_minor, (long)q.inode, 287 q.vma_name_size ? path_buf : "", 288 q.build_id_size ? "YES" : "NO", 289 q.build_id_size); 290 } 291 292 *start = q.vma_start; 293 *offset = q.vma_offset; 294 *flags = q.vma_flags; 295 return 0; 296 } 297 #else 298 # ifndef PROCMAP_QUERY_VMA_EXECUTABLE 299 # define PROCMAP_QUERY_VMA_EXECUTABLE 0x04 300 # endif 301 302 static int procmap_query(int fd, const void *addr, __u32 query_flags, size_t *start, size_t *offset, int *flags) 303 { 304 return -EOPNOTSUPP; 305 } 306 #endif 307 308 ssize_t get_uprobe_offset(const void *addr) 309 { 310 size_t start, base, end; 311 FILE *f; 312 char buf[256]; 313 int err, flags; 314 315 f = fopen("/proc/self/maps", "r"); 316 if (!f) 317 return -errno; 318 319 /* requested executable VMA only */ 320 err = procmap_query(fileno(f), addr, PROCMAP_QUERY_VMA_EXECUTABLE, &start, &base, &flags); 321 if (err == -EOPNOTSUPP) { 322 bool found = false; 323 324 while (fscanf(f, "%zx-%zx %s %zx %*[^\n]\n", &start, &end, buf, &base) == 4) { 325 if (buf[2] == 'x' && (uintptr_t)addr >= start && (uintptr_t)addr < end) { 326 found = true; 327 break; 328 } 329 } 330 if (!found) { 331 fclose(f); 332 return -ESRCH; 333 } 334 } else if (err) { 335 fclose(f); 336 return err; 337 } 338 fclose(f); 339 340 #if defined(__powerpc64__) && defined(_CALL_ELF) && _CALL_ELF == 2 341 342 #define OP_RT_RA_MASK 0xffff0000UL 343 #define LIS_R2 0x3c400000UL 344 #define ADDIS_R2_R12 0x3c4c0000UL 345 #define ADDI_R2_R2 0x38420000UL 346 347 /* 348 * A PPC64 ABIv2 function may have a local and a global entry 349 * point. We need to use the local entry point when patching 350 * functions, so identify and step over the global entry point 351 * sequence. 352 * 353 * The global entry point sequence is always of the form: 354 * 355 * addis r2,r12,XXXX 356 * addi r2,r2,XXXX 357 * 358 * A linker optimisation may convert the addis to lis: 359 * 360 * lis r2,XXXX 361 * addi r2,r2,XXXX 362 */ 363 { 364 const __u32 *insn = (const __u32 *)(uintptr_t)addr; 365 366 if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) || 367 ((*insn & OP_RT_RA_MASK) == LIS_R2)) && 368 ((*(insn + 1) & OP_RT_RA_MASK) == ADDI_R2_R2)) 369 return (uintptr_t)(insn + 2) - start + base; 370 } 371 #endif 372 return (uintptr_t)addr - start + base; 373 } 374 375 ssize_t get_rel_offset(uintptr_t addr) 376 { 377 size_t start, end, offset; 378 char buf[256]; 379 FILE *f; 380 int err, flags; 381 382 f = fopen("/proc/self/maps", "r"); 383 if (!f) 384 return -errno; 385 386 err = procmap_query(fileno(f), (const void *)addr, 0, &start, &offset, &flags); 387 if (err == 0) { 388 fclose(f); 389 return (size_t)addr - start + offset; 390 } else if (err != -EOPNOTSUPP) { 391 fclose(f); 392 return err; 393 } else if (err) { 394 while (fscanf(f, "%zx-%zx %s %zx %*[^\n]\n", &start, &end, buf, &offset) == 4) { 395 if (addr >= start && addr < end) { 396 fclose(f); 397 return (size_t)addr - start + offset; 398 } 399 } 400 } 401 402 fclose(f); 403 return -EINVAL; 404 } 405 406 static int 407 parse_build_id_buf(const void *note_start, Elf32_Word note_size, char *build_id) 408 { 409 Elf32_Word note_offs = 0; 410 411 while (note_offs + sizeof(Elf32_Nhdr) < note_size) { 412 Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs); 413 414 if (nhdr->n_type == 3 && nhdr->n_namesz == sizeof("GNU") && 415 !strcmp((char *)(nhdr + 1), "GNU") && nhdr->n_descsz > 0 && 416 nhdr->n_descsz <= BPF_BUILD_ID_SIZE) { 417 memcpy(build_id, note_start + note_offs + 418 ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr), nhdr->n_descsz); 419 memset(build_id + nhdr->n_descsz, 0, BPF_BUILD_ID_SIZE - nhdr->n_descsz); 420 return (int) nhdr->n_descsz; 421 } 422 423 note_offs = note_offs + sizeof(Elf32_Nhdr) + 424 ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4); 425 } 426 427 return -ENOENT; 428 } 429 430 /* Reads binary from *path* file and returns it in the *build_id* buffer 431 * with *size* which is expected to be at least BPF_BUILD_ID_SIZE bytes. 432 * Returns size of build id on success. On error the error value is 433 * returned. 434 */ 435 int read_build_id(const char *path, char *build_id, size_t size) 436 { 437 int fd, err = -EINVAL; 438 Elf *elf = NULL; 439 GElf_Ehdr ehdr; 440 size_t max, i; 441 442 if (size < BPF_BUILD_ID_SIZE) 443 return -EINVAL; 444 445 fd = open(path, O_RDONLY | O_CLOEXEC); 446 if (fd < 0) 447 return -errno; 448 449 (void)elf_version(EV_CURRENT); 450 451 elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); 452 if (!elf) 453 goto out; 454 if (elf_kind(elf) != ELF_K_ELF) 455 goto out; 456 if (!gelf_getehdr(elf, &ehdr)) 457 goto out; 458 459 for (i = 0; i < ehdr.e_phnum; i++) { 460 GElf_Phdr mem, *phdr; 461 char *data; 462 463 phdr = gelf_getphdr(elf, i, &mem); 464 if (!phdr) 465 goto out; 466 if (phdr->p_type != PT_NOTE) 467 continue; 468 data = elf_rawfile(elf, &max); 469 if (!data) 470 goto out; 471 if (phdr->p_offset + phdr->p_memsz > max) 472 goto out; 473 err = parse_build_id_buf(data + phdr->p_offset, phdr->p_memsz, build_id); 474 if (err > 0) 475 break; 476 } 477 478 out: 479 if (elf) 480 elf_end(elf); 481 close(fd); 482 return err; 483 } 484 485 int read_trace_pipe_iter(void (*cb)(const char *str, void *data), void *data, int iter) 486 { 487 size_t buflen, n; 488 char *buf = NULL; 489 FILE *fp = NULL; 490 491 if (access(TRACEFS_PIPE, F_OK) == 0) 492 fp = fopen(TRACEFS_PIPE, "r"); 493 else 494 fp = fopen(DEBUGFS_PIPE, "r"); 495 if (!fp) 496 return -1; 497 498 /* We do not want to wait forever when iter is specified. */ 499 if (iter) 500 fcntl(fileno(fp), F_SETFL, O_NONBLOCK); 501 502 while ((n = getline(&buf, &buflen, fp) >= 0) || errno == EAGAIN) { 503 if (n > 0) 504 cb(buf, data); 505 if (iter && !(--iter)) 506 break; 507 } 508 509 free(buf); 510 if (fp) 511 fclose(fp); 512 return 0; 513 } 514 515 static void trace_pipe_cb(const char *str, void *data) 516 { 517 printf("%s", str); 518 } 519 520 void read_trace_pipe(void) 521 { 522 read_trace_pipe_iter(trace_pipe_cb, NULL, 0); 523 } 524 525 static size_t symbol_hash(long key, void *ctx __maybe_unused) 526 { 527 return str_hash((const char *) key); 528 } 529 530 static bool symbol_equal(long key1, long key2, void *ctx __maybe_unused) 531 { 532 return strcmp((const char *) key1, (const char *) key2) == 0; 533 } 534 535 static bool is_invalid_entry(char *buf, bool kernel) 536 { 537 if (kernel && strchr(buf, '[')) 538 return true; 539 if (!kernel && !strchr(buf, '[')) 540 return true; 541 return false; 542 } 543 544 static const char * const trace_blacklist[] = { 545 "migrate_disable", 546 "migrate_enable", 547 "rcu_read_unlock_strict", 548 "preempt_count_add", 549 "preempt_count_sub", 550 "__rcu_read_lock", 551 "__rcu_read_unlock", 552 "bpf_get_numa_node_id", 553 }; 554 555 static bool skip_entry(char *name) 556 { 557 int i; 558 559 /* 560 * We attach to almost all kernel functions and some of them 561 * will cause 'suspicious RCU usage' when fprobe is attached 562 * to them. Filter out the current culprits - arch_cpu_idle 563 * default_idle and rcu_* functions. 564 */ 565 if (!strcmp(name, "arch_cpu_idle")) 566 return true; 567 if (!strcmp(name, "default_idle")) 568 return true; 569 if (!strncmp(name, "rcu_", 4)) 570 return true; 571 if (!strcmp(name, "bpf_dispatcher_xdp_func")) 572 return true; 573 if (!strncmp(name, "__ftrace_invalid_address__", 574 sizeof("__ftrace_invalid_address__") - 1)) 575 return true; 576 577 for (i = 0; i < ARRAY_SIZE(trace_blacklist); i++) { 578 if (!strcmp(name, trace_blacklist[i])) 579 return true; 580 } 581 582 return false; 583 } 584 585 /* Do comparison by ignoring '.llvm.<hash>' suffixes. */ 586 static int compare_name(const char *name1, const char *name2) 587 { 588 const char *res1, *res2; 589 int len1, len2; 590 591 res1 = strstr(name1, ".llvm."); 592 res2 = strstr(name2, ".llvm."); 593 len1 = res1 ? res1 - name1 : strlen(name1); 594 len2 = res2 ? res2 - name2 : strlen(name2); 595 596 if (len1 == len2) 597 return strncmp(name1, name2, len1); 598 if (len1 < len2) 599 return strncmp(name1, name2, len1) <= 0 ? -1 : 1; 600 return strncmp(name1, name2, len2) >= 0 ? 1 : -1; 601 } 602 603 static int load_kallsyms_compare(const void *p1, const void *p2) 604 { 605 return compare_name(((const struct ksym *)p1)->name, ((const struct ksym *)p2)->name); 606 } 607 608 static int search_kallsyms_compare(const void *p1, const struct ksym *p2) 609 { 610 return compare_name(p1, p2->name); 611 } 612 613 int bpf_get_ksyms(char ***symsp, size_t *cntp, bool kernel) 614 { 615 size_t cap = 0, cnt = 0; 616 char *name = NULL, *ksym_name, **syms = NULL; 617 struct hashmap *map; 618 struct ksyms *ksyms; 619 struct ksym *ks; 620 char buf[256]; 621 FILE *f; 622 int err = 0; 623 624 ksyms = load_kallsyms_custom_local(load_kallsyms_compare); 625 if (!ksyms) 626 return -EINVAL; 627 628 /* 629 * The available_filter_functions contains many duplicates, 630 * but other than that all symbols are usable to trace. 631 * Filtering out duplicates by using hashmap__add, which won't 632 * add existing entry. 633 */ 634 635 if (access("/sys/kernel/tracing/trace", F_OK) == 0) 636 f = fopen("/sys/kernel/tracing/available_filter_functions", "r"); 637 else 638 f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r"); 639 640 if (!f) 641 return -EINVAL; 642 643 map = hashmap__new(symbol_hash, symbol_equal, NULL); 644 if (IS_ERR(map)) { 645 err = libbpf_get_error(map); 646 goto error; 647 } 648 649 while (fgets(buf, sizeof(buf), f)) { 650 if (is_invalid_entry(buf, kernel)) 651 continue; 652 653 free(name); 654 if (sscanf(buf, "%ms$*[^\n]\n", &name) != 1) 655 continue; 656 if (skip_entry(name)) 657 continue; 658 659 ks = search_kallsyms_custom_local(ksyms, name, search_kallsyms_compare); 660 if (!ks) { 661 err = -EINVAL; 662 goto error; 663 } 664 665 ksym_name = ks->name; 666 err = hashmap__add(map, ksym_name, 0); 667 if (err == -EEXIST) { 668 err = 0; 669 continue; 670 } 671 if (err) 672 goto error; 673 674 err = libbpf_ensure_mem((void **) &syms, &cap, 675 sizeof(*syms), cnt + 1); 676 if (err) 677 goto error; 678 679 syms[cnt++] = ksym_name; 680 } 681 682 *symsp = syms; 683 *cntp = cnt; 684 685 error: 686 free(name); 687 fclose(f); 688 hashmap__free(map); 689 if (err) 690 free(syms); 691 return err; 692 } 693 694 int bpf_get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel) 695 { 696 unsigned long *addr, *addrs, *tmp_addrs; 697 int err = 0, max_cnt, inc_cnt; 698 char *name = NULL; 699 size_t cnt = 0; 700 char buf[256]; 701 FILE *f; 702 703 if (access("/sys/kernel/tracing/trace", F_OK) == 0) 704 f = fopen("/sys/kernel/tracing/available_filter_functions_addrs", "r"); 705 else 706 f = fopen("/sys/kernel/debug/tracing/available_filter_functions_addrs", "r"); 707 708 if (!f) 709 return -ENOENT; 710 711 /* In my local setup, the number of entries is 50k+ so Let us initially 712 * allocate space to hold 64k entries. If 64k is not enough, incrementally 713 * increase 1k each time. 714 */ 715 max_cnt = 65536; 716 inc_cnt = 1024; 717 addrs = malloc(max_cnt * sizeof(long)); 718 if (addrs == NULL) { 719 err = -ENOMEM; 720 goto error; 721 } 722 723 while (fgets(buf, sizeof(buf), f)) { 724 if (is_invalid_entry(buf, kernel)) 725 continue; 726 727 free(name); 728 if (sscanf(buf, "%p %ms$*[^\n]\n", &addr, &name) != 2) 729 continue; 730 if (skip_entry(name)) 731 continue; 732 733 if (cnt == max_cnt) { 734 max_cnt += inc_cnt; 735 tmp_addrs = realloc(addrs, max_cnt); 736 if (!tmp_addrs) { 737 err = -ENOMEM; 738 goto error; 739 } 740 addrs = tmp_addrs; 741 } 742 743 addrs[cnt++] = (unsigned long)addr; 744 } 745 746 *addrsp = addrs; 747 *cntp = cnt; 748 749 error: 750 free(name); 751 fclose(f); 752 if (err) 753 free(addrs); 754 return err; 755 } 756