1 // SPDX-License-Identifier: GPL-2.0 2 #include <ctype.h> 3 #include <stdio.h> 4 #include <stdlib.h> 5 #include <string.h> 6 #include <assert.h> 7 #include <errno.h> 8 #include <fcntl.h> 9 #include <poll.h> 10 #include <pthread.h> 11 #include <unistd.h> 12 #include <linux/perf_event.h> 13 #include <linux/fs.h> 14 #include <sys/ioctl.h> 15 #include <sys/mman.h> 16 #include "trace_helpers.h" 17 #include <linux/limits.h> 18 #include <libelf.h> 19 #include <gelf.h> 20 #include "bpf/hashmap.h" 21 #include "bpf/libbpf_internal.h" 22 #include "bpf_util.h" 23 24 #define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe" 25 #define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe" 26 27 static struct ksyms *ksyms; 28 static pthread_mutex_t ksyms_mutex = PTHREAD_MUTEX_INITIALIZER; 29 30 static int ksyms__add_symbol(struct ksyms *ksyms, const char *name, 31 unsigned long addr) 32 { 33 void *tmp; 34 35 tmp = strdup(name); 36 if (!tmp) 37 return -ENOMEM; 38 ksyms->syms[ksyms->sym_cnt].addr = addr; 39 ksyms->syms[ksyms->sym_cnt].name = tmp; 40 ksyms->sym_cnt++; 41 return 0; 42 } 43 44 void free_kallsyms_local(struct ksyms *ksyms) 45 { 46 unsigned int i; 47 48 if (!ksyms) 49 return; 50 51 free(ksyms->filtered_syms); 52 53 if (!ksyms->syms) { 54 free(ksyms); 55 return; 56 } 57 58 for (i = 0; i < ksyms->sym_cnt; i++) 59 free(ksyms->syms[i].name); 60 free(ksyms->syms); 61 free(ksyms); 62 } 63 64 static struct ksyms *load_kallsyms_local_common(ksym_cmp_t cmp_cb) 65 { 66 FILE *f; 67 char func[256], buf[256]; 68 char symbol; 69 void *addr; 70 int ret; 71 struct ksyms *ksyms; 72 73 f = fopen("/proc/kallsyms", "r"); 74 if (!f) 75 return NULL; 76 77 ksyms = calloc(1, sizeof(struct ksyms)); 78 if (!ksyms) { 79 fclose(f); 80 return NULL; 81 } 82 83 while (fgets(buf, sizeof(buf), f)) { 84 if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3) 85 break; 86 if (!addr) 87 continue; 88 89 ret = libbpf_ensure_mem((void **) &ksyms->syms, &ksyms->sym_cap, 90 sizeof(struct ksym), ksyms->sym_cnt + 1); 91 if (ret) 92 goto error; 93 ret = ksyms__add_symbol(ksyms, func, (unsigned long)addr); 94 if (ret) 95 goto error; 96 } 97 fclose(f); 98 qsort(ksyms->syms, ksyms->sym_cnt, sizeof(struct ksym), cmp_cb); 99 return ksyms; 100 101 error: 102 fclose(f); 103 free_kallsyms_local(ksyms); 104 return NULL; 105 } 106 107 static int ksym_cmp(const void *p1, const void *p2) 108 { 109 return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr; 110 } 111 112 struct ksyms *load_kallsyms_local(void) 113 { 114 return load_kallsyms_local_common(ksym_cmp); 115 } 116 117 struct ksyms *load_kallsyms_custom_local(ksym_cmp_t cmp_cb) 118 { 119 return load_kallsyms_local_common(cmp_cb); 120 } 121 122 int load_kallsyms(void) 123 { 124 pthread_mutex_lock(&ksyms_mutex); 125 if (!ksyms) 126 ksyms = load_kallsyms_local(); 127 pthread_mutex_unlock(&ksyms_mutex); 128 return ksyms ? 0 : 1; 129 } 130 131 struct ksym *ksym_search_local(struct ksyms *ksyms, long key) 132 { 133 int start = 0, end = ksyms->sym_cnt; 134 int result; 135 136 /* kallsyms not loaded. return NULL */ 137 if (ksyms->sym_cnt <= 0) 138 return NULL; 139 140 while (start < end) { 141 size_t mid = start + (end - start) / 2; 142 143 result = key - ksyms->syms[mid].addr; 144 if (result < 0) 145 end = mid; 146 else if (result > 0) 147 start = mid + 1; 148 else 149 return &ksyms->syms[mid]; 150 } 151 152 if (start >= 1 && ksyms->syms[start - 1].addr < key && 153 key < ksyms->syms[start].addr) 154 /* valid ksym */ 155 return &ksyms->syms[start - 1]; 156 157 /* out of range. return _stext */ 158 return &ksyms->syms[0]; 159 } 160 161 struct ksym *search_kallsyms_custom_local(struct ksyms *ksyms, const void *p, 162 ksym_search_cmp_t cmp_cb) 163 { 164 int start = 0, mid, end = ksyms->sym_cnt; 165 struct ksym *ks; 166 int result; 167 168 while (start < end) { 169 mid = start + (end - start) / 2; 170 ks = &ksyms->syms[mid]; 171 result = cmp_cb(p, ks); 172 if (result < 0) 173 end = mid; 174 else if (result > 0) 175 start = mid + 1; 176 else 177 return ks; 178 } 179 180 return NULL; 181 } 182 183 struct ksym *ksym_search(long key) 184 { 185 if (!ksyms) 186 return NULL; 187 return ksym_search_local(ksyms, key); 188 } 189 190 long ksym_get_addr_local(struct ksyms *ksyms, const char *name) 191 { 192 int i; 193 194 for (i = 0; i < ksyms->sym_cnt; i++) { 195 if (strcmp(ksyms->syms[i].name, name) == 0) 196 return ksyms->syms[i].addr; 197 } 198 199 return 0; 200 } 201 202 long ksym_get_addr(const char *name) 203 { 204 if (!ksyms) 205 return 0; 206 return ksym_get_addr_local(ksyms, name); 207 } 208 209 /* open kallsyms and read symbol addresses on the fly. Without caching all symbols, 210 * this is faster than load + find. 211 */ 212 int kallsyms_find(const char *sym, unsigned long long *addr) 213 { 214 char type, name[500], *match; 215 unsigned long long value; 216 int err = 0; 217 FILE *f; 218 219 f = fopen("/proc/kallsyms", "r"); 220 if (!f) 221 return -EINVAL; 222 223 while (fscanf(f, "%llx %c %499s%*[^\n]\n", &value, &type, name) > 0) { 224 /* If CONFIG_LTO_CLANG_THIN is enabled, static variable/function 225 * symbols could be promoted to global due to cross-file inlining. 226 * For such cases, clang compiler will add .llvm.<hash> suffix 227 * to those symbols to avoid potential naming conflict. 228 * Let us ignore .llvm.<hash> suffix during symbol comparison. 229 */ 230 if (type == 'd') { 231 match = strstr(name, ".llvm."); 232 if (match) 233 *match = '\0'; 234 } 235 if (strcmp(name, sym) == 0) { 236 *addr = value; 237 goto out; 238 } 239 } 240 err = -ENOENT; 241 242 out: 243 fclose(f); 244 return err; 245 } 246 247 #ifdef PROCMAP_QUERY 248 int env_verbosity __weak = 0; 249 250 static int procmap_query(int fd, const void *addr, __u32 query_flags, size_t *start, size_t *offset, int *flags) 251 { 252 char path_buf[PATH_MAX], build_id_buf[20]; 253 struct procmap_query q; 254 int err; 255 256 memset(&q, 0, sizeof(q)); 257 q.size = sizeof(q); 258 q.query_flags = query_flags; 259 q.query_addr = (__u64)addr; 260 q.vma_name_addr = (__u64)path_buf; 261 q.vma_name_size = sizeof(path_buf); 262 q.build_id_addr = (__u64)build_id_buf; 263 q.build_id_size = sizeof(build_id_buf); 264 265 err = ioctl(fd, PROCMAP_QUERY, &q); 266 if (err < 0) { 267 err = -errno; 268 if (err == -ENOTTY) 269 return -EOPNOTSUPP; /* ioctl() not implemented yet */ 270 if (err == -ENOENT) 271 return -ESRCH; /* vma not found */ 272 return err; 273 } 274 275 if (env_verbosity >= 1) { 276 printf("VMA FOUND (addr %08lx): %08lx-%08lx %c%c%c%c %08lx %02x:%02x %ld %s (build ID: %s, %d bytes)\n", 277 (long)addr, (long)q.vma_start, (long)q.vma_end, 278 (q.vma_flags & PROCMAP_QUERY_VMA_READABLE) ? 'r' : '-', 279 (q.vma_flags & PROCMAP_QUERY_VMA_WRITABLE) ? 'w' : '-', 280 (q.vma_flags & PROCMAP_QUERY_VMA_EXECUTABLE) ? 'x' : '-', 281 (q.vma_flags & PROCMAP_QUERY_VMA_SHARED) ? 's' : 'p', 282 (long)q.vma_offset, q.dev_major, q.dev_minor, (long)q.inode, 283 q.vma_name_size ? path_buf : "", 284 q.build_id_size ? "YES" : "NO", 285 q.build_id_size); 286 } 287 288 *start = q.vma_start; 289 *offset = q.vma_offset; 290 *flags = q.vma_flags; 291 return 0; 292 } 293 #else 294 # ifndef PROCMAP_QUERY_VMA_EXECUTABLE 295 # define PROCMAP_QUERY_VMA_EXECUTABLE 0x04 296 # endif 297 298 static int procmap_query(int fd, const void *addr, __u32 query_flags, size_t *start, size_t *offset, int *flags) 299 { 300 return -EOPNOTSUPP; 301 } 302 #endif 303 304 ssize_t get_uprobe_offset(const void *addr) 305 { 306 size_t start, base, end; 307 FILE *f; 308 char buf[256]; 309 int err, flags; 310 311 f = fopen("/proc/self/maps", "r"); 312 if (!f) 313 return -errno; 314 315 /* requested executable VMA only */ 316 err = procmap_query(fileno(f), addr, PROCMAP_QUERY_VMA_EXECUTABLE, &start, &base, &flags); 317 if (err == -EOPNOTSUPP) { 318 bool found = false; 319 320 while (fscanf(f, "%zx-%zx %s %zx %*[^\n]\n", &start, &end, buf, &base) == 4) { 321 if (buf[2] == 'x' && (uintptr_t)addr >= start && (uintptr_t)addr < end) { 322 found = true; 323 break; 324 } 325 } 326 if (!found) { 327 fclose(f); 328 return -ESRCH; 329 } 330 } else if (err) { 331 fclose(f); 332 return err; 333 } 334 fclose(f); 335 336 #if defined(__powerpc64__) && defined(_CALL_ELF) && _CALL_ELF == 2 337 338 #define OP_RT_RA_MASK 0xffff0000UL 339 #define LIS_R2 0x3c400000UL 340 #define ADDIS_R2_R12 0x3c4c0000UL 341 #define ADDI_R2_R2 0x38420000UL 342 343 /* 344 * A PPC64 ABIv2 function may have a local and a global entry 345 * point. We need to use the local entry point when patching 346 * functions, so identify and step over the global entry point 347 * sequence. 348 * 349 * The global entry point sequence is always of the form: 350 * 351 * addis r2,r12,XXXX 352 * addi r2,r2,XXXX 353 * 354 * A linker optimisation may convert the addis to lis: 355 * 356 * lis r2,XXXX 357 * addi r2,r2,XXXX 358 */ 359 { 360 const __u32 *insn = (const __u32 *)(uintptr_t)addr; 361 362 if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) || 363 ((*insn & OP_RT_RA_MASK) == LIS_R2)) && 364 ((*(insn + 1) & OP_RT_RA_MASK) == ADDI_R2_R2)) 365 return (uintptr_t)(insn + 2) - start + base; 366 } 367 #endif 368 return (uintptr_t)addr - start + base; 369 } 370 371 ssize_t get_rel_offset(uintptr_t addr) 372 { 373 size_t start, end, offset; 374 char buf[256]; 375 FILE *f; 376 int err, flags; 377 378 f = fopen("/proc/self/maps", "r"); 379 if (!f) 380 return -errno; 381 382 err = procmap_query(fileno(f), (const void *)addr, 0, &start, &offset, &flags); 383 if (err == 0) { 384 fclose(f); 385 return (size_t)addr - start + offset; 386 } else if (err != -EOPNOTSUPP) { 387 fclose(f); 388 return err; 389 } else if (err) { 390 while (fscanf(f, "%zx-%zx %s %zx %*[^\n]\n", &start, &end, buf, &offset) == 4) { 391 if (addr >= start && addr < end) { 392 fclose(f); 393 return (size_t)addr - start + offset; 394 } 395 } 396 } 397 398 fclose(f); 399 return -EINVAL; 400 } 401 402 static int 403 parse_build_id_buf(const void *note_start, Elf32_Word note_size, char *build_id) 404 { 405 Elf32_Word note_offs = 0; 406 407 while (note_offs + sizeof(Elf32_Nhdr) < note_size) { 408 Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs); 409 410 if (nhdr->n_type == 3 && nhdr->n_namesz == sizeof("GNU") && 411 !strcmp((char *)(nhdr + 1), "GNU") && nhdr->n_descsz > 0 && 412 nhdr->n_descsz <= BPF_BUILD_ID_SIZE) { 413 memcpy(build_id, note_start + note_offs + 414 ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr), nhdr->n_descsz); 415 memset(build_id + nhdr->n_descsz, 0, BPF_BUILD_ID_SIZE - nhdr->n_descsz); 416 return (int) nhdr->n_descsz; 417 } 418 419 note_offs = note_offs + sizeof(Elf32_Nhdr) + 420 ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4); 421 } 422 423 return -ENOENT; 424 } 425 426 /* Reads binary from *path* file and returns it in the *build_id* buffer 427 * with *size* which is expected to be at least BPF_BUILD_ID_SIZE bytes. 428 * Returns size of build id on success. On error the error value is 429 * returned. 430 */ 431 int read_build_id(const char *path, char *build_id, size_t size) 432 { 433 int fd, err = -EINVAL; 434 Elf *elf = NULL; 435 GElf_Ehdr ehdr; 436 size_t max, i; 437 438 if (size < BPF_BUILD_ID_SIZE) 439 return -EINVAL; 440 441 fd = open(path, O_RDONLY | O_CLOEXEC); 442 if (fd < 0) 443 return -errno; 444 445 (void)elf_version(EV_CURRENT); 446 447 elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); 448 if (!elf) 449 goto out; 450 if (elf_kind(elf) != ELF_K_ELF) 451 goto out; 452 if (!gelf_getehdr(elf, &ehdr)) 453 goto out; 454 455 for (i = 0; i < ehdr.e_phnum; i++) { 456 GElf_Phdr mem, *phdr; 457 char *data; 458 459 phdr = gelf_getphdr(elf, i, &mem); 460 if (!phdr) 461 goto out; 462 if (phdr->p_type != PT_NOTE) 463 continue; 464 data = elf_rawfile(elf, &max); 465 if (!data) 466 goto out; 467 if (phdr->p_offset + phdr->p_memsz > max) 468 goto out; 469 err = parse_build_id_buf(data + phdr->p_offset, phdr->p_memsz, build_id); 470 if (err > 0) 471 break; 472 } 473 474 out: 475 if (elf) 476 elf_end(elf); 477 close(fd); 478 return err; 479 } 480 481 int read_trace_pipe_iter(void (*cb)(const char *str, void *data), void *data, int iter) 482 { 483 size_t buflen, n; 484 char *buf = NULL; 485 FILE *fp = NULL; 486 487 if (access(TRACEFS_PIPE, F_OK) == 0) 488 fp = fopen(TRACEFS_PIPE, "r"); 489 else 490 fp = fopen(DEBUGFS_PIPE, "r"); 491 if (!fp) 492 return -1; 493 494 /* We do not want to wait forever when iter is specified. */ 495 if (iter) 496 fcntl(fileno(fp), F_SETFL, O_NONBLOCK); 497 498 while ((n = getline(&buf, &buflen, fp) >= 0) || errno == EAGAIN) { 499 if (n > 0) 500 cb(buf, data); 501 if (iter && !(--iter)) 502 break; 503 } 504 505 free(buf); 506 if (fp) 507 fclose(fp); 508 return 0; 509 } 510 511 static void trace_pipe_cb(const char *str, void *data) 512 { 513 printf("%s", str); 514 } 515 516 void read_trace_pipe(void) 517 { 518 read_trace_pipe_iter(trace_pipe_cb, NULL, 0); 519 } 520 521 static size_t symbol_hash(long key, void *ctx __maybe_unused) 522 { 523 return str_hash((const char *) key); 524 } 525 526 static bool symbol_equal(long key1, long key2, void *ctx __maybe_unused) 527 { 528 return strcmp((const char *) key1, (const char *) key2) == 0; 529 } 530 531 static bool is_invalid_entry(char *buf, bool kernel) 532 { 533 if (kernel && strchr(buf, '[')) 534 return true; 535 if (!kernel && !strchr(buf, '[')) 536 return true; 537 return false; 538 } 539 540 static const char * const trace_blacklist[] = { 541 "migrate_disable", 542 "migrate_enable", 543 "rcu_read_unlock_strict", 544 "preempt_count_add", 545 "preempt_count_sub", 546 "__rcu_read_lock", 547 "__rcu_read_unlock", 548 "bpf_get_numa_node_id", 549 }; 550 551 static bool skip_entry(char *name) 552 { 553 int i; 554 555 /* 556 * We attach to almost all kernel functions and some of them 557 * will cause 'suspicious RCU usage' when fprobe is attached 558 * to them. Filter out the current culprits - arch_cpu_idle 559 * default_idle and rcu_* functions. 560 */ 561 if (!strcmp(name, "arch_cpu_idle")) 562 return true; 563 if (!strcmp(name, "default_idle")) 564 return true; 565 if (!strncmp(name, "rcu_", 4)) 566 return true; 567 if (!strcmp(name, "bpf_dispatcher_xdp_func")) 568 return true; 569 if (!strncmp(name, "__ftrace_invalid_address__", 570 sizeof("__ftrace_invalid_address__") - 1)) 571 return true; 572 573 for (i = 0; i < ARRAY_SIZE(trace_blacklist); i++) { 574 if (!strcmp(name, trace_blacklist[i])) 575 return true; 576 } 577 578 return false; 579 } 580 581 /* Do comparison by ignoring '.llvm.<hash>' suffixes. */ 582 static int compare_name(const char *name1, const char *name2) 583 { 584 const char *res1, *res2; 585 int len1, len2; 586 587 res1 = strstr(name1, ".llvm."); 588 res2 = strstr(name2, ".llvm."); 589 len1 = res1 ? res1 - name1 : strlen(name1); 590 len2 = res2 ? res2 - name2 : strlen(name2); 591 592 if (len1 == len2) 593 return strncmp(name1, name2, len1); 594 if (len1 < len2) 595 return strncmp(name1, name2, len1) <= 0 ? -1 : 1; 596 return strncmp(name1, name2, len2) >= 0 ? 1 : -1; 597 } 598 599 static int load_kallsyms_compare(const void *p1, const void *p2) 600 { 601 return compare_name(((const struct ksym *)p1)->name, ((const struct ksym *)p2)->name); 602 } 603 604 static int search_kallsyms_compare(const void *p1, const struct ksym *p2) 605 { 606 return compare_name(p1, p2->name); 607 } 608 609 int bpf_get_ksyms(struct ksyms **ksymsp, bool kernel) 610 { 611 size_t cap = 0, cnt = 0; 612 char *name = NULL, *ksym_name, **syms = NULL; 613 struct hashmap *map; 614 struct ksyms *ksyms; 615 struct ksym *ks; 616 char buf[256]; 617 FILE *f; 618 int err = 0; 619 620 ksyms = load_kallsyms_custom_local(load_kallsyms_compare); 621 if (!ksyms) 622 return -EINVAL; 623 624 /* 625 * The available_filter_functions contains many duplicates, 626 * but other than that all symbols are usable to trace. 627 * Filtering out duplicates by using hashmap__add, which won't 628 * add existing entry. 629 */ 630 631 if (access("/sys/kernel/tracing/trace", F_OK) == 0) 632 f = fopen("/sys/kernel/tracing/available_filter_functions", "r"); 633 else 634 f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r"); 635 636 if (!f) { 637 free_kallsyms_local(ksyms); 638 return -EINVAL; 639 } 640 641 map = hashmap__new(symbol_hash, symbol_equal, NULL); 642 if (IS_ERR(map)) { 643 err = libbpf_get_error(map); 644 goto error; 645 } 646 647 while (fgets(buf, sizeof(buf), f)) { 648 if (is_invalid_entry(buf, kernel)) 649 continue; 650 651 free(name); 652 if (sscanf(buf, "%ms$*[^\n]\n", &name) != 1) 653 continue; 654 if (skip_entry(name)) 655 continue; 656 657 ks = search_kallsyms_custom_local(ksyms, name, search_kallsyms_compare); 658 if (!ks) { 659 err = -EINVAL; 660 goto error; 661 } 662 663 ksym_name = ks->name; 664 err = hashmap__add(map, ksym_name, 0); 665 if (err == -EEXIST) { 666 err = 0; 667 continue; 668 } 669 if (err) 670 goto error; 671 672 err = libbpf_ensure_mem((void **) &syms, &cap, 673 sizeof(*syms), cnt + 1); 674 if (err) 675 goto error; 676 677 syms[cnt++] = ksym_name; 678 } 679 680 ksyms->filtered_syms = syms; 681 ksyms->filtered_cnt = cnt; 682 *ksymsp = ksyms; 683 684 error: 685 free(name); 686 fclose(f); 687 hashmap__free(map); 688 if (err) { 689 free(syms); 690 free_kallsyms_local(ksyms); 691 } 692 return err; 693 } 694 695 int bpf_get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel) 696 { 697 unsigned long *addr, *addrs, *tmp_addrs; 698 int err = 0, max_cnt, inc_cnt; 699 char *name = NULL; 700 size_t cnt = 0; 701 char buf[256]; 702 FILE *f; 703 704 if (access("/sys/kernel/tracing/trace", F_OK) == 0) 705 f = fopen("/sys/kernel/tracing/available_filter_functions_addrs", "r"); 706 else 707 f = fopen("/sys/kernel/debug/tracing/available_filter_functions_addrs", "r"); 708 709 if (!f) 710 return -ENOENT; 711 712 /* In my local setup, the number of entries is 50k+ so Let us initially 713 * allocate space to hold 64k entries. If 64k is not enough, incrementally 714 * increase 1k each time. 715 */ 716 max_cnt = 65536; 717 inc_cnt = 1024; 718 addrs = malloc(max_cnt * sizeof(long)); 719 if (addrs == NULL) { 720 err = -ENOMEM; 721 goto error; 722 } 723 724 while (fgets(buf, sizeof(buf), f)) { 725 if (is_invalid_entry(buf, kernel)) 726 continue; 727 728 free(name); 729 if (sscanf(buf, "%p %ms$*[^\n]\n", &addr, &name) != 2) 730 continue; 731 if (skip_entry(name)) 732 continue; 733 734 if (cnt == max_cnt) { 735 max_cnt += inc_cnt; 736 tmp_addrs = realloc(addrs, max_cnt * sizeof(long)); 737 if (!tmp_addrs) { 738 err = -ENOMEM; 739 goto error; 740 } 741 addrs = tmp_addrs; 742 } 743 744 addrs[cnt++] = (unsigned long)addr; 745 } 746 747 *addrsp = addrs; 748 *cntp = cnt; 749 750 error: 751 free(name); 752 fclose(f); 753 if (err) 754 free(addrs); 755 return err; 756 } 757