1 // SPDX-License-Identifier: GPL-2.0 2 #include <ctype.h> 3 #include <stdio.h> 4 #include <stdlib.h> 5 #include <string.h> 6 #include <assert.h> 7 #include <errno.h> 8 #include <fcntl.h> 9 #include <poll.h> 10 #include <pthread.h> 11 #include <unistd.h> 12 #include <linux/perf_event.h> 13 #include <linux/fs.h> 14 #include <sys/ioctl.h> 15 #include <sys/mman.h> 16 #include "trace_helpers.h" 17 #include <linux/limits.h> 18 #include <libelf.h> 19 #include <gelf.h> 20 #include "bpf/libbpf_internal.h" 21 22 #define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe" 23 #define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe" 24 25 struct ksyms { 26 struct ksym *syms; 27 size_t sym_cap; 28 size_t sym_cnt; 29 }; 30 31 static struct ksyms *ksyms; 32 static pthread_mutex_t ksyms_mutex = PTHREAD_MUTEX_INITIALIZER; 33 34 static int ksyms__add_symbol(struct ksyms *ksyms, const char *name, 35 unsigned long addr) 36 { 37 void *tmp; 38 39 tmp = strdup(name); 40 if (!tmp) 41 return -ENOMEM; 42 ksyms->syms[ksyms->sym_cnt].addr = addr; 43 ksyms->syms[ksyms->sym_cnt].name = tmp; 44 ksyms->sym_cnt++; 45 return 0; 46 } 47 48 void free_kallsyms_local(struct ksyms *ksyms) 49 { 50 unsigned int i; 51 52 if (!ksyms) 53 return; 54 55 if (!ksyms->syms) { 56 free(ksyms); 57 return; 58 } 59 60 for (i = 0; i < ksyms->sym_cnt; i++) 61 free(ksyms->syms[i].name); 62 free(ksyms->syms); 63 free(ksyms); 64 } 65 66 static struct ksyms *load_kallsyms_local_common(ksym_cmp_t cmp_cb) 67 { 68 FILE *f; 69 char func[256], buf[256]; 70 char symbol; 71 void *addr; 72 int ret; 73 struct ksyms *ksyms; 74 75 f = fopen("/proc/kallsyms", "r"); 76 if (!f) 77 return NULL; 78 79 ksyms = calloc(1, sizeof(struct ksyms)); 80 if (!ksyms) { 81 fclose(f); 82 return NULL; 83 } 84 85 while (fgets(buf, sizeof(buf), f)) { 86 if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3) 87 break; 88 if (!addr) 89 continue; 90 91 ret = libbpf_ensure_mem((void **) &ksyms->syms, &ksyms->sym_cap, 92 sizeof(struct ksym), ksyms->sym_cnt + 1); 93 if (ret) 94 goto error; 95 ret = ksyms__add_symbol(ksyms, func, (unsigned long)addr); 96 if (ret) 97 goto error; 98 } 99 fclose(f); 100 qsort(ksyms->syms, ksyms->sym_cnt, sizeof(struct ksym), cmp_cb); 101 return ksyms; 102 103 error: 104 fclose(f); 105 free_kallsyms_local(ksyms); 106 return NULL; 107 } 108 109 static int ksym_cmp(const void *p1, const void *p2) 110 { 111 return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr; 112 } 113 114 struct ksyms *load_kallsyms_local(void) 115 { 116 return load_kallsyms_local_common(ksym_cmp); 117 } 118 119 struct ksyms *load_kallsyms_custom_local(ksym_cmp_t cmp_cb) 120 { 121 return load_kallsyms_local_common(cmp_cb); 122 } 123 124 int load_kallsyms(void) 125 { 126 pthread_mutex_lock(&ksyms_mutex); 127 if (!ksyms) 128 ksyms = load_kallsyms_local(); 129 pthread_mutex_unlock(&ksyms_mutex); 130 return ksyms ? 0 : 1; 131 } 132 133 struct ksym *ksym_search_local(struct ksyms *ksyms, long key) 134 { 135 int start = 0, end = ksyms->sym_cnt; 136 int result; 137 138 /* kallsyms not loaded. return NULL */ 139 if (ksyms->sym_cnt <= 0) 140 return NULL; 141 142 while (start < end) { 143 size_t mid = start + (end - start) / 2; 144 145 result = key - ksyms->syms[mid].addr; 146 if (result < 0) 147 end = mid; 148 else if (result > 0) 149 start = mid + 1; 150 else 151 return &ksyms->syms[mid]; 152 } 153 154 if (start >= 1 && ksyms->syms[start - 1].addr < key && 155 key < ksyms->syms[start].addr) 156 /* valid ksym */ 157 return &ksyms->syms[start - 1]; 158 159 /* out of range. return _stext */ 160 return &ksyms->syms[0]; 161 } 162 163 struct ksym *search_kallsyms_custom_local(struct ksyms *ksyms, const void *p, 164 ksym_search_cmp_t cmp_cb) 165 { 166 int start = 0, mid, end = ksyms->sym_cnt; 167 struct ksym *ks; 168 int result; 169 170 while (start < end) { 171 mid = start + (end - start) / 2; 172 ks = &ksyms->syms[mid]; 173 result = cmp_cb(p, ks); 174 if (result < 0) 175 end = mid; 176 else if (result > 0) 177 start = mid + 1; 178 else 179 return ks; 180 } 181 182 return NULL; 183 } 184 185 struct ksym *ksym_search(long key) 186 { 187 if (!ksyms) 188 return NULL; 189 return ksym_search_local(ksyms, key); 190 } 191 192 long ksym_get_addr_local(struct ksyms *ksyms, const char *name) 193 { 194 int i; 195 196 for (i = 0; i < ksyms->sym_cnt; i++) { 197 if (strcmp(ksyms->syms[i].name, name) == 0) 198 return ksyms->syms[i].addr; 199 } 200 201 return 0; 202 } 203 204 long ksym_get_addr(const char *name) 205 { 206 if (!ksyms) 207 return 0; 208 return ksym_get_addr_local(ksyms, name); 209 } 210 211 /* open kallsyms and read symbol addresses on the fly. Without caching all symbols, 212 * this is faster than load + find. 213 */ 214 int kallsyms_find(const char *sym, unsigned long long *addr) 215 { 216 char type, name[500], *match; 217 unsigned long long value; 218 int err = 0; 219 FILE *f; 220 221 f = fopen("/proc/kallsyms", "r"); 222 if (!f) 223 return -EINVAL; 224 225 while (fscanf(f, "%llx %c %499s%*[^\n]\n", &value, &type, name) > 0) { 226 /* If CONFIG_LTO_CLANG_THIN is enabled, static variable/function 227 * symbols could be promoted to global due to cross-file inlining. 228 * For such cases, clang compiler will add .llvm.<hash> suffix 229 * to those symbols to avoid potential naming conflict. 230 * Let us ignore .llvm.<hash> suffix during symbol comparison. 231 */ 232 if (type == 'd') { 233 match = strstr(name, ".llvm."); 234 if (match) 235 *match = '\0'; 236 } 237 if (strcmp(name, sym) == 0) { 238 *addr = value; 239 goto out; 240 } 241 } 242 err = -ENOENT; 243 244 out: 245 fclose(f); 246 return err; 247 } 248 249 #ifdef PROCMAP_QUERY 250 int env_verbosity __weak = 0; 251 252 static int procmap_query(int fd, const void *addr, __u32 query_flags, size_t *start, size_t *offset, int *flags) 253 { 254 char path_buf[PATH_MAX], build_id_buf[20]; 255 struct procmap_query q; 256 int err; 257 258 memset(&q, 0, sizeof(q)); 259 q.size = sizeof(q); 260 q.query_flags = query_flags; 261 q.query_addr = (__u64)addr; 262 q.vma_name_addr = (__u64)path_buf; 263 q.vma_name_size = sizeof(path_buf); 264 q.build_id_addr = (__u64)build_id_buf; 265 q.build_id_size = sizeof(build_id_buf); 266 267 err = ioctl(fd, PROCMAP_QUERY, &q); 268 if (err < 0) { 269 err = -errno; 270 if (err == -ENOTTY) 271 return -EOPNOTSUPP; /* ioctl() not implemented yet */ 272 if (err == -ENOENT) 273 return -ESRCH; /* vma not found */ 274 return err; 275 } 276 277 if (env_verbosity >= 1) { 278 printf("VMA FOUND (addr %08lx): %08lx-%08lx %c%c%c%c %08lx %02x:%02x %ld %s (build ID: %s, %d bytes)\n", 279 (long)addr, (long)q.vma_start, (long)q.vma_end, 280 (q.vma_flags & PROCMAP_QUERY_VMA_READABLE) ? 'r' : '-', 281 (q.vma_flags & PROCMAP_QUERY_VMA_WRITABLE) ? 'w' : '-', 282 (q.vma_flags & PROCMAP_QUERY_VMA_EXECUTABLE) ? 'x' : '-', 283 (q.vma_flags & PROCMAP_QUERY_VMA_SHARED) ? 's' : 'p', 284 (long)q.vma_offset, q.dev_major, q.dev_minor, (long)q.inode, 285 q.vma_name_size ? path_buf : "", 286 q.build_id_size ? "YES" : "NO", 287 q.build_id_size); 288 } 289 290 *start = q.vma_start; 291 *offset = q.vma_offset; 292 *flags = q.vma_flags; 293 return 0; 294 } 295 #else 296 static int procmap_query(int fd, const void *addr, __u32 query_flags, size_t *start, size_t *offset, int *flags) 297 { 298 return -EOPNOTSUPP; 299 } 300 #endif 301 302 ssize_t get_uprobe_offset(const void *addr) 303 { 304 size_t start, base, end; 305 FILE *f; 306 char buf[256]; 307 int err, flags; 308 309 f = fopen("/proc/self/maps", "r"); 310 if (!f) 311 return -errno; 312 313 /* requested executable VMA only */ 314 err = procmap_query(fileno(f), addr, PROCMAP_QUERY_VMA_EXECUTABLE, &start, &base, &flags); 315 if (err == -EOPNOTSUPP) { 316 bool found = false; 317 318 while (fscanf(f, "%zx-%zx %s %zx %*[^\n]\n", &start, &end, buf, &base) == 4) { 319 if (buf[2] == 'x' && (uintptr_t)addr >= start && (uintptr_t)addr < end) { 320 found = true; 321 break; 322 } 323 } 324 if (!found) { 325 fclose(f); 326 return -ESRCH; 327 } 328 } else if (err) { 329 fclose(f); 330 return err; 331 } 332 fclose(f); 333 334 #if defined(__powerpc64__) && defined(_CALL_ELF) && _CALL_ELF == 2 335 336 #define OP_RT_RA_MASK 0xffff0000UL 337 #define LIS_R2 0x3c400000UL 338 #define ADDIS_R2_R12 0x3c4c0000UL 339 #define ADDI_R2_R2 0x38420000UL 340 341 /* 342 * A PPC64 ABIv2 function may have a local and a global entry 343 * point. We need to use the local entry point when patching 344 * functions, so identify and step over the global entry point 345 * sequence. 346 * 347 * The global entry point sequence is always of the form: 348 * 349 * addis r2,r12,XXXX 350 * addi r2,r2,XXXX 351 * 352 * A linker optimisation may convert the addis to lis: 353 * 354 * lis r2,XXXX 355 * addi r2,r2,XXXX 356 */ 357 { 358 const __u32 *insn = (const __u32 *)(uintptr_t)addr; 359 360 if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) || 361 ((*insn & OP_RT_RA_MASK) == LIS_R2)) && 362 ((*(insn + 1) & OP_RT_RA_MASK) == ADDI_R2_R2)) 363 return (uintptr_t)(insn + 2) - start + base; 364 } 365 #endif 366 return (uintptr_t)addr - start + base; 367 } 368 369 ssize_t get_rel_offset(uintptr_t addr) 370 { 371 size_t start, end, offset; 372 char buf[256]; 373 FILE *f; 374 int err, flags; 375 376 f = fopen("/proc/self/maps", "r"); 377 if (!f) 378 return -errno; 379 380 err = procmap_query(fileno(f), (const void *)addr, 0, &start, &offset, &flags); 381 if (err == 0) { 382 fclose(f); 383 return (size_t)addr - start + offset; 384 } else if (err != -EOPNOTSUPP) { 385 fclose(f); 386 return err; 387 } else if (err) { 388 while (fscanf(f, "%zx-%zx %s %zx %*[^\n]\n", &start, &end, buf, &offset) == 4) { 389 if (addr >= start && addr < end) { 390 fclose(f); 391 return (size_t)addr - start + offset; 392 } 393 } 394 } 395 396 fclose(f); 397 return -EINVAL; 398 } 399 400 static int 401 parse_build_id_buf(const void *note_start, Elf32_Word note_size, char *build_id) 402 { 403 Elf32_Word note_offs = 0; 404 405 while (note_offs + sizeof(Elf32_Nhdr) < note_size) { 406 Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs); 407 408 if (nhdr->n_type == 3 && nhdr->n_namesz == sizeof("GNU") && 409 !strcmp((char *)(nhdr + 1), "GNU") && nhdr->n_descsz > 0 && 410 nhdr->n_descsz <= BPF_BUILD_ID_SIZE) { 411 memcpy(build_id, note_start + note_offs + 412 ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr), nhdr->n_descsz); 413 memset(build_id + nhdr->n_descsz, 0, BPF_BUILD_ID_SIZE - nhdr->n_descsz); 414 return (int) nhdr->n_descsz; 415 } 416 417 note_offs = note_offs + sizeof(Elf32_Nhdr) + 418 ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4); 419 } 420 421 return -ENOENT; 422 } 423 424 /* Reads binary from *path* file and returns it in the *build_id* buffer 425 * with *size* which is expected to be at least BPF_BUILD_ID_SIZE bytes. 426 * Returns size of build id on success. On error the error value is 427 * returned. 428 */ 429 int read_build_id(const char *path, char *build_id, size_t size) 430 { 431 int fd, err = -EINVAL; 432 Elf *elf = NULL; 433 GElf_Ehdr ehdr; 434 size_t max, i; 435 436 if (size < BPF_BUILD_ID_SIZE) 437 return -EINVAL; 438 439 fd = open(path, O_RDONLY | O_CLOEXEC); 440 if (fd < 0) 441 return -errno; 442 443 (void)elf_version(EV_CURRENT); 444 445 elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); 446 if (!elf) 447 goto out; 448 if (elf_kind(elf) != ELF_K_ELF) 449 goto out; 450 if (!gelf_getehdr(elf, &ehdr)) 451 goto out; 452 453 for (i = 0; i < ehdr.e_phnum; i++) { 454 GElf_Phdr mem, *phdr; 455 char *data; 456 457 phdr = gelf_getphdr(elf, i, &mem); 458 if (!phdr) 459 goto out; 460 if (phdr->p_type != PT_NOTE) 461 continue; 462 data = elf_rawfile(elf, &max); 463 if (!data) 464 goto out; 465 if (phdr->p_offset + phdr->p_memsz > max) 466 goto out; 467 err = parse_build_id_buf(data + phdr->p_offset, phdr->p_memsz, build_id); 468 if (err > 0) 469 break; 470 } 471 472 out: 473 if (elf) 474 elf_end(elf); 475 close(fd); 476 return err; 477 } 478 479 int read_trace_pipe_iter(void (*cb)(const char *str, void *data), void *data, int iter) 480 { 481 size_t buflen, n; 482 char *buf = NULL; 483 FILE *fp = NULL; 484 485 if (access(TRACEFS_PIPE, F_OK) == 0) 486 fp = fopen(TRACEFS_PIPE, "r"); 487 else 488 fp = fopen(DEBUGFS_PIPE, "r"); 489 if (!fp) 490 return -1; 491 492 /* We do not want to wait forever when iter is specified. */ 493 if (iter) 494 fcntl(fileno(fp), F_SETFL, O_NONBLOCK); 495 496 while ((n = getline(&buf, &buflen, fp) >= 0) || errno == EAGAIN) { 497 if (n > 0) 498 cb(buf, data); 499 if (iter && !(--iter)) 500 break; 501 } 502 503 free(buf); 504 if (fp) 505 fclose(fp); 506 return 0; 507 } 508 509 static void trace_pipe_cb(const char *str, void *data) 510 { 511 printf("%s", str); 512 } 513 514 void read_trace_pipe(void) 515 { 516 read_trace_pipe_iter(trace_pipe_cb, NULL, 0); 517 } 518