1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2025 - Google LLC 4 * Author: Vincent Donnefort <vdonnefort@google.com> 5 */ 6 7 #include <linux/kstrtox.h> 8 #include <linux/lockdep.h> 9 #include <linux/mutex.h> 10 #include <linux/tracefs.h> 11 #include <linux/trace_remote.h> 12 #include <linux/trace_seq.h> 13 #include <linux/types.h> 14 15 #include "trace.h" 16 17 #define TRACEFS_DIR "remotes" 18 #define TRACEFS_MODE_WRITE 0640 19 #define TRACEFS_MODE_READ 0440 20 21 enum tri_type { 22 TRI_CONSUMING, 23 TRI_NONCONSUMING, 24 }; 25 26 struct trace_remote_iterator { 27 struct trace_remote *remote; 28 struct trace_seq seq; 29 struct delayed_work poll_work; 30 unsigned long lost_events; 31 u64 ts; 32 struct ring_buffer_iter *rb_iter; 33 struct ring_buffer_iter **rb_iters; 34 struct remote_event_hdr *evt; 35 int cpu; 36 int evt_cpu; 37 loff_t pos; 38 enum tri_type type; 39 }; 40 41 struct trace_remote { 42 struct trace_remote_callbacks *cbs; 43 void *priv; 44 struct trace_buffer *trace_buffer; 45 struct trace_buffer_desc *trace_buffer_desc; 46 struct dentry *dentry; 47 struct eventfs_inode *eventfs; 48 struct remote_event *events; 49 unsigned long nr_events; 50 unsigned long trace_buffer_size; 51 struct ring_buffer_remote rb_remote; 52 struct mutex lock; 53 struct rw_semaphore reader_lock; 54 struct rw_semaphore *pcpu_reader_locks; 55 unsigned int nr_readers; 56 unsigned int poll_ms; 57 bool tracing_on; 58 }; 59 60 static bool trace_remote_loaded(struct trace_remote *remote) 61 { 62 return !!remote->trace_buffer; 63 } 64 65 static int trace_remote_load(struct trace_remote *remote) 66 { 67 struct ring_buffer_remote *rb_remote = &remote->rb_remote; 68 struct trace_buffer_desc *desc; 69 70 lockdep_assert_held(&remote->lock); 71 72 if (trace_remote_loaded(remote)) 73 return 0; 74 75 desc = remote->cbs->load_trace_buffer(remote->trace_buffer_size, remote->priv); 76 if (IS_ERR(desc)) 77 return PTR_ERR(desc); 78 79 rb_remote->desc = desc; 80 rb_remote->swap_reader_page = remote->cbs->swap_reader_page; 81 rb_remote->priv = remote->priv; 82 rb_remote->reset = remote->cbs->reset; 83 remote->trace_buffer = ring_buffer_alloc_remote(rb_remote); 84 if (!remote->trace_buffer) { 85 remote->cbs->unload_trace_buffer(desc, remote->priv); 86 return -ENOMEM; 87 } 88 89 remote->trace_buffer_desc = desc; 90 91 return 0; 92 } 93 94 static void trace_remote_try_unload(struct trace_remote *remote) 95 { 96 lockdep_assert_held(&remote->lock); 97 98 if (!trace_remote_loaded(remote)) 99 return; 100 101 /* The buffer is being read or writable */ 102 if (remote->nr_readers || remote->tracing_on) 103 return; 104 105 /* The buffer has readable data */ 106 if (!ring_buffer_empty(remote->trace_buffer)) 107 return; 108 109 ring_buffer_free(remote->trace_buffer); 110 remote->trace_buffer = NULL; 111 remote->cbs->unload_trace_buffer(remote->trace_buffer_desc, remote->priv); 112 } 113 114 static int trace_remote_enable_tracing(struct trace_remote *remote) 115 { 116 int ret; 117 118 lockdep_assert_held(&remote->lock); 119 120 if (remote->tracing_on) 121 return 0; 122 123 ret = trace_remote_load(remote); 124 if (ret) 125 return ret; 126 127 ret = remote->cbs->enable_tracing(true, remote->priv); 128 if (ret) { 129 trace_remote_try_unload(remote); 130 return ret; 131 } 132 133 remote->tracing_on = true; 134 135 return 0; 136 } 137 138 static int trace_remote_disable_tracing(struct trace_remote *remote) 139 { 140 int ret; 141 142 lockdep_assert_held(&remote->lock); 143 144 if (!remote->tracing_on) 145 return 0; 146 147 ret = remote->cbs->enable_tracing(false, remote->priv); 148 if (ret) 149 return ret; 150 151 ring_buffer_poll_remote(remote->trace_buffer, RING_BUFFER_ALL_CPUS); 152 remote->tracing_on = false; 153 trace_remote_try_unload(remote); 154 155 return 0; 156 } 157 158 static void trace_remote_reset(struct trace_remote *remote, int cpu) 159 { 160 lockdep_assert_held(&remote->lock); 161 162 if (!trace_remote_loaded(remote)) 163 return; 164 165 if (cpu == RING_BUFFER_ALL_CPUS) 166 ring_buffer_reset(remote->trace_buffer); 167 else 168 ring_buffer_reset_cpu(remote->trace_buffer, cpu); 169 170 trace_remote_try_unload(remote); 171 } 172 173 static ssize_t 174 tracing_on_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) 175 { 176 struct seq_file *seq = filp->private_data; 177 struct trace_remote *remote = seq->private; 178 unsigned long val; 179 int ret; 180 181 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 182 if (ret) 183 return ret; 184 185 guard(mutex)(&remote->lock); 186 187 ret = val ? trace_remote_enable_tracing(remote) : trace_remote_disable_tracing(remote); 188 if (ret) 189 return ret; 190 191 return cnt; 192 } 193 static int tracing_on_show(struct seq_file *s, void *unused) 194 { 195 struct trace_remote *remote = s->private; 196 197 seq_printf(s, "%d\n", remote->tracing_on); 198 199 return 0; 200 } 201 DEFINE_SHOW_STORE_ATTRIBUTE(tracing_on); 202 203 static ssize_t buffer_size_kb_write(struct file *filp, const char __user *ubuf, size_t cnt, 204 loff_t *ppos) 205 { 206 struct seq_file *seq = filp->private_data; 207 struct trace_remote *remote = seq->private; 208 unsigned long val; 209 int ret; 210 211 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 212 if (ret) 213 return ret; 214 215 /* KiB to Bytes */ 216 if (!val || check_shl_overflow(val, 10, &val)) 217 return -EINVAL; 218 219 guard(mutex)(&remote->lock); 220 221 if (trace_remote_loaded(remote)) 222 return -EBUSY; 223 224 remote->trace_buffer_size = val; 225 226 return cnt; 227 } 228 229 static int buffer_size_kb_show(struct seq_file *s, void *unused) 230 { 231 struct trace_remote *remote = s->private; 232 233 seq_printf(s, "%lu (%s)\n", remote->trace_buffer_size >> 10, 234 trace_remote_loaded(remote) ? "loaded" : "unloaded"); 235 236 return 0; 237 } 238 DEFINE_SHOW_STORE_ATTRIBUTE(buffer_size_kb); 239 240 static int trace_remote_get(struct trace_remote *remote, int cpu) 241 { 242 int ret; 243 244 if (remote->nr_readers == UINT_MAX) 245 return -EBUSY; 246 247 ret = trace_remote_load(remote); 248 if (ret) 249 return ret; 250 251 if (cpu != RING_BUFFER_ALL_CPUS && !remote->pcpu_reader_locks) { 252 int lock_cpu; 253 254 remote->pcpu_reader_locks = kcalloc(nr_cpu_ids, sizeof(*remote->pcpu_reader_locks), 255 GFP_KERNEL); 256 if (!remote->pcpu_reader_locks) { 257 trace_remote_try_unload(remote); 258 return -ENOMEM; 259 } 260 261 for_each_possible_cpu(lock_cpu) 262 init_rwsem(&remote->pcpu_reader_locks[lock_cpu]); 263 } 264 265 remote->nr_readers++; 266 267 return 0; 268 } 269 270 static void trace_remote_put(struct trace_remote *remote) 271 { 272 if (WARN_ON(!remote->nr_readers)) 273 return; 274 275 remote->nr_readers--; 276 if (remote->nr_readers) 277 return; 278 279 kfree(remote->pcpu_reader_locks); 280 remote->pcpu_reader_locks = NULL; 281 282 trace_remote_try_unload(remote); 283 } 284 285 static void __poll_remote(struct work_struct *work) 286 { 287 struct delayed_work *dwork = to_delayed_work(work); 288 struct trace_remote_iterator *iter; 289 290 iter = container_of(dwork, struct trace_remote_iterator, poll_work); 291 ring_buffer_poll_remote(iter->remote->trace_buffer, iter->cpu); 292 schedule_delayed_work((struct delayed_work *)work, 293 msecs_to_jiffies(iter->remote->poll_ms)); 294 } 295 296 static void __free_ring_buffer_iter(struct trace_remote_iterator *iter, int cpu) 297 { 298 if (cpu != RING_BUFFER_ALL_CPUS) { 299 ring_buffer_read_finish(iter->rb_iter); 300 return; 301 } 302 303 for_each_possible_cpu(cpu) { 304 if (iter->rb_iters[cpu]) 305 ring_buffer_read_finish(iter->rb_iters[cpu]); 306 } 307 308 kfree(iter->rb_iters); 309 } 310 311 static int __alloc_ring_buffer_iter(struct trace_remote_iterator *iter, int cpu) 312 { 313 if (cpu != RING_BUFFER_ALL_CPUS) { 314 iter->rb_iter = ring_buffer_read_start(iter->remote->trace_buffer, cpu, GFP_KERNEL); 315 316 return iter->rb_iter ? 0 : -ENOMEM; 317 } 318 319 iter->rb_iters = kcalloc(nr_cpu_ids, sizeof(*iter->rb_iters), GFP_KERNEL); 320 if (!iter->rb_iters) 321 return -ENOMEM; 322 323 for_each_possible_cpu(cpu) { 324 iter->rb_iters[cpu] = ring_buffer_read_start(iter->remote->trace_buffer, cpu, 325 GFP_KERNEL); 326 if (!iter->rb_iters[cpu]) { 327 __free_ring_buffer_iter(iter, RING_BUFFER_ALL_CPUS); 328 return -ENOMEM; 329 } 330 } 331 332 return 0; 333 } 334 335 static struct trace_remote_iterator 336 *trace_remote_iter(struct trace_remote *remote, int cpu, enum tri_type type) 337 { 338 struct trace_remote_iterator *iter = NULL; 339 int ret; 340 341 lockdep_assert_held(&remote->lock); 342 343 if (type == TRI_NONCONSUMING && !trace_remote_loaded(remote)) 344 return NULL; 345 346 ret = trace_remote_get(remote, cpu); 347 if (ret) 348 return ERR_PTR(ret); 349 350 /* Test the CPU */ 351 ret = ring_buffer_poll_remote(remote->trace_buffer, cpu); 352 if (ret) 353 goto err; 354 355 iter = kzalloc_obj(*iter); 356 if (iter) { 357 iter->remote = remote; 358 iter->cpu = cpu; 359 iter->type = type; 360 trace_seq_init(&iter->seq); 361 362 switch (type) { 363 case TRI_CONSUMING: 364 INIT_DELAYED_WORK(&iter->poll_work, __poll_remote); 365 schedule_delayed_work(&iter->poll_work, msecs_to_jiffies(remote->poll_ms)); 366 break; 367 case TRI_NONCONSUMING: 368 ret = __alloc_ring_buffer_iter(iter, cpu); 369 break; 370 } 371 372 if (ret) 373 goto err; 374 375 return iter; 376 } 377 ret = -ENOMEM; 378 379 err: 380 kfree(iter); 381 trace_remote_put(remote); 382 383 return ERR_PTR(ret); 384 } 385 386 static void trace_remote_iter_free(struct trace_remote_iterator *iter) 387 { 388 struct trace_remote *remote; 389 390 if (!iter) 391 return; 392 393 remote = iter->remote; 394 395 lockdep_assert_held(&remote->lock); 396 397 switch (iter->type) { 398 case TRI_CONSUMING: 399 cancel_delayed_work_sync(&iter->poll_work); 400 break; 401 case TRI_NONCONSUMING: 402 __free_ring_buffer_iter(iter, iter->cpu); 403 break; 404 } 405 406 kfree(iter); 407 trace_remote_put(remote); 408 } 409 410 static void trace_remote_iter_read_start(struct trace_remote_iterator *iter) 411 { 412 struct trace_remote *remote = iter->remote; 413 int cpu = iter->cpu; 414 415 /* Acquire global reader lock */ 416 if (cpu == RING_BUFFER_ALL_CPUS && iter->type == TRI_CONSUMING) 417 down_write(&remote->reader_lock); 418 else 419 down_read(&remote->reader_lock); 420 421 if (cpu == RING_BUFFER_ALL_CPUS) 422 return; 423 424 /* 425 * No need for the remote lock here, iter holds a reference on 426 * remote->nr_readers 427 */ 428 429 /* Get the per-CPU one */ 430 if (WARN_ON_ONCE(!remote->pcpu_reader_locks)) 431 return; 432 433 if (iter->type == TRI_CONSUMING) 434 down_write(&remote->pcpu_reader_locks[cpu]); 435 else 436 down_read(&remote->pcpu_reader_locks[cpu]); 437 } 438 439 static void trace_remote_iter_read_finished(struct trace_remote_iterator *iter) 440 { 441 struct trace_remote *remote = iter->remote; 442 int cpu = iter->cpu; 443 444 /* Release per-CPU reader lock */ 445 if (cpu != RING_BUFFER_ALL_CPUS) { 446 /* 447 * No need for the remote lock here, iter holds a reference on 448 * remote->nr_readers 449 */ 450 if (iter->type == TRI_CONSUMING) 451 up_write(&remote->pcpu_reader_locks[cpu]); 452 else 453 up_read(&remote->pcpu_reader_locks[cpu]); 454 } 455 456 /* Release global reader lock */ 457 if (cpu == RING_BUFFER_ALL_CPUS && iter->type == TRI_CONSUMING) 458 up_write(&remote->reader_lock); 459 else 460 up_read(&remote->reader_lock); 461 } 462 463 static struct ring_buffer_iter *__get_rb_iter(struct trace_remote_iterator *iter, int cpu) 464 { 465 return iter->cpu != RING_BUFFER_ALL_CPUS ? iter->rb_iter : iter->rb_iters[cpu]; 466 } 467 468 static struct ring_buffer_event * 469 __peek_event(struct trace_remote_iterator *iter, int cpu, u64 *ts, unsigned long *lost_events) 470 { 471 struct ring_buffer_event *rb_evt; 472 struct ring_buffer_iter *rb_iter; 473 474 switch (iter->type) { 475 case TRI_CONSUMING: 476 return ring_buffer_peek(iter->remote->trace_buffer, cpu, ts, lost_events); 477 case TRI_NONCONSUMING: 478 rb_iter = __get_rb_iter(iter, cpu); 479 rb_evt = ring_buffer_iter_peek(rb_iter, ts); 480 if (!rb_evt) 481 return NULL; 482 483 *lost_events = ring_buffer_iter_dropped(rb_iter); 484 485 return rb_evt; 486 } 487 488 return NULL; 489 } 490 491 static bool trace_remote_iter_read_event(struct trace_remote_iterator *iter) 492 { 493 struct trace_buffer *trace_buffer = iter->remote->trace_buffer; 494 struct ring_buffer_event *rb_evt; 495 int cpu = iter->cpu; 496 497 if (cpu != RING_BUFFER_ALL_CPUS) { 498 if (ring_buffer_empty_cpu(trace_buffer, cpu)) 499 return false; 500 501 rb_evt = __peek_event(iter, cpu, &iter->ts, &iter->lost_events); 502 if (!rb_evt) 503 return false; 504 505 iter->evt_cpu = cpu; 506 iter->evt = ring_buffer_event_data(rb_evt); 507 return true; 508 } 509 510 iter->ts = U64_MAX; 511 for_each_possible_cpu(cpu) { 512 unsigned long lost_events; 513 u64 ts; 514 515 if (ring_buffer_empty_cpu(trace_buffer, cpu)) 516 continue; 517 518 rb_evt = __peek_event(iter, cpu, &ts, &lost_events); 519 if (!rb_evt) 520 continue; 521 522 if (ts >= iter->ts) 523 continue; 524 525 iter->ts = ts; 526 iter->evt_cpu = cpu; 527 iter->evt = ring_buffer_event_data(rb_evt); 528 iter->lost_events = lost_events; 529 } 530 531 return iter->ts != U64_MAX; 532 } 533 534 static void trace_remote_iter_move(struct trace_remote_iterator *iter) 535 { 536 struct trace_buffer *trace_buffer = iter->remote->trace_buffer; 537 538 switch (iter->type) { 539 case TRI_CONSUMING: 540 ring_buffer_consume(trace_buffer, iter->evt_cpu, NULL, NULL); 541 break; 542 case TRI_NONCONSUMING: 543 ring_buffer_iter_advance(__get_rb_iter(iter, iter->evt_cpu)); 544 break; 545 } 546 } 547 548 static struct remote_event *trace_remote_find_event(struct trace_remote *remote, unsigned short id); 549 550 static int trace_remote_iter_print_event(struct trace_remote_iterator *iter) 551 { 552 struct remote_event *evt; 553 unsigned long usecs_rem; 554 u64 ts = iter->ts; 555 556 if (iter->lost_events) 557 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n", 558 iter->evt_cpu, iter->lost_events); 559 560 do_div(ts, 1000); 561 usecs_rem = do_div(ts, USEC_PER_SEC); 562 563 trace_seq_printf(&iter->seq, "[%03d]\t%5llu.%06lu: ", iter->evt_cpu, 564 ts, usecs_rem); 565 566 evt = trace_remote_find_event(iter->remote, iter->evt->id); 567 if (!evt) 568 trace_seq_printf(&iter->seq, "UNKNOWN id=%d\n", iter->evt->id); 569 else 570 evt->print(iter->evt, &iter->seq); 571 572 return trace_seq_has_overflowed(&iter->seq) ? -EOVERFLOW : 0; 573 } 574 575 static int trace_pipe_open(struct inode *inode, struct file *filp) 576 { 577 struct trace_remote *remote = inode->i_private; 578 struct trace_remote_iterator *iter; 579 int cpu = tracing_get_cpu(inode); 580 581 guard(mutex)(&remote->lock); 582 583 iter = trace_remote_iter(remote, cpu, TRI_CONSUMING); 584 if (IS_ERR(iter)) 585 return PTR_ERR(iter); 586 587 filp->private_data = iter; 588 589 return IS_ERR(iter) ? PTR_ERR(iter) : 0; 590 } 591 592 static int trace_pipe_release(struct inode *inode, struct file *filp) 593 { 594 struct trace_remote_iterator *iter = filp->private_data; 595 struct trace_remote *remote = iter->remote; 596 597 guard(mutex)(&remote->lock); 598 599 trace_remote_iter_free(iter); 600 601 return 0; 602 } 603 604 static ssize_t trace_pipe_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) 605 { 606 struct trace_remote_iterator *iter = filp->private_data; 607 struct trace_buffer *trace_buffer = iter->remote->trace_buffer; 608 int ret; 609 610 copy_to_user: 611 ret = trace_seq_to_user(&iter->seq, ubuf, cnt); 612 if (ret != -EBUSY) 613 return ret; 614 615 trace_seq_init(&iter->seq); 616 617 ret = ring_buffer_wait(trace_buffer, iter->cpu, 0, NULL, NULL); 618 if (ret < 0) 619 return ret; 620 621 trace_remote_iter_read_start(iter); 622 623 while (trace_remote_iter_read_event(iter)) { 624 int prev_len = iter->seq.seq.len; 625 626 if (trace_remote_iter_print_event(iter)) { 627 iter->seq.seq.len = prev_len; 628 break; 629 } 630 631 trace_remote_iter_move(iter); 632 } 633 634 trace_remote_iter_read_finished(iter); 635 636 goto copy_to_user; 637 } 638 639 static const struct file_operations trace_pipe_fops = { 640 .open = trace_pipe_open, 641 .read = trace_pipe_read, 642 .release = trace_pipe_release, 643 }; 644 645 static void *trace_next(struct seq_file *m, void *v, loff_t *pos) 646 { 647 struct trace_remote_iterator *iter = m->private; 648 649 ++*pos; 650 651 if (!iter || !trace_remote_iter_read_event(iter)) 652 return NULL; 653 654 trace_remote_iter_move(iter); 655 iter->pos++; 656 657 return iter; 658 } 659 660 static void *trace_start(struct seq_file *m, loff_t *pos) 661 { 662 struct trace_remote_iterator *iter = m->private; 663 loff_t i; 664 665 if (!iter) 666 return NULL; 667 668 trace_remote_iter_read_start(iter); 669 670 if (!*pos) { 671 iter->pos = -1; 672 return trace_next(m, NULL, &i); 673 } 674 675 i = iter->pos; 676 while (i < *pos) { 677 iter = trace_next(m, NULL, &i); 678 if (!iter) 679 return NULL; 680 } 681 682 return iter; 683 } 684 685 static int trace_show(struct seq_file *m, void *v) 686 { 687 struct trace_remote_iterator *iter = v; 688 689 trace_seq_init(&iter->seq); 690 691 if (trace_remote_iter_print_event(iter)) { 692 seq_printf(m, "[EVENT %d PRINT TOO BIG]\n", iter->evt->id); 693 return 0; 694 } 695 696 return trace_print_seq(m, &iter->seq); 697 } 698 699 static void trace_stop(struct seq_file *m, void *v) 700 { 701 struct trace_remote_iterator *iter = m->private; 702 703 if (iter) 704 trace_remote_iter_read_finished(iter); 705 } 706 707 static const struct seq_operations trace_sops = { 708 .start = trace_start, 709 .next = trace_next, 710 .show = trace_show, 711 .stop = trace_stop, 712 }; 713 714 static int trace_open(struct inode *inode, struct file *filp) 715 { 716 struct trace_remote *remote = inode->i_private; 717 struct trace_remote_iterator *iter = NULL; 718 int cpu = tracing_get_cpu(inode); 719 int ret; 720 721 if (!(filp->f_mode & FMODE_READ)) 722 return 0; 723 724 guard(mutex)(&remote->lock); 725 726 iter = trace_remote_iter(remote, cpu, TRI_NONCONSUMING); 727 if (IS_ERR(iter)) 728 return PTR_ERR(iter); 729 730 ret = seq_open(filp, &trace_sops); 731 if (ret) { 732 trace_remote_iter_free(iter); 733 return ret; 734 } 735 736 ((struct seq_file *)filp->private_data)->private = (void *)iter; 737 738 return 0; 739 } 740 741 static int trace_release(struct inode *inode, struct file *filp) 742 { 743 struct trace_remote_iterator *iter; 744 745 if (!(filp->f_mode & FMODE_READ)) 746 return 0; 747 748 iter = ((struct seq_file *)filp->private_data)->private; 749 seq_release(inode, filp); 750 751 if (!iter) 752 return 0; 753 754 guard(mutex)(&iter->remote->lock); 755 756 trace_remote_iter_free(iter); 757 758 return 0; 759 } 760 761 static ssize_t trace_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) 762 { 763 struct inode *inode = file_inode(filp); 764 struct trace_remote *remote = inode->i_private; 765 int cpu = tracing_get_cpu(inode); 766 767 guard(mutex)(&remote->lock); 768 769 trace_remote_reset(remote, cpu); 770 771 return cnt; 772 } 773 774 static const struct file_operations trace_fops = { 775 .open = trace_open, 776 .write = trace_write, 777 .read = seq_read, 778 .read_iter = seq_read_iter, 779 .release = trace_release, 780 }; 781 782 static int trace_remote_init_tracefs(const char *name, struct trace_remote *remote) 783 { 784 struct dentry *remote_d, *percpu_d, *d; 785 static struct dentry *root; 786 static DEFINE_MUTEX(lock); 787 bool root_inited = false; 788 int cpu; 789 790 guard(mutex)(&lock); 791 792 if (!root) { 793 root = tracefs_create_dir(TRACEFS_DIR, NULL); 794 if (!root) { 795 pr_err("Failed to create tracefs dir "TRACEFS_DIR"\n"); 796 return -ENOMEM; 797 } 798 root_inited = true; 799 } 800 801 remote_d = tracefs_create_dir(name, root); 802 if (!remote_d) { 803 pr_err("Failed to create tracefs dir "TRACEFS_DIR"%s/\n", name); 804 goto err; 805 } 806 807 d = trace_create_file("tracing_on", TRACEFS_MODE_WRITE, remote_d, remote, &tracing_on_fops); 808 if (!d) 809 goto err; 810 811 d = trace_create_file("buffer_size_kb", TRACEFS_MODE_WRITE, remote_d, remote, 812 &buffer_size_kb_fops); 813 if (!d) 814 goto err; 815 816 d = trace_create_file("trace_pipe", TRACEFS_MODE_READ, remote_d, remote, &trace_pipe_fops); 817 if (!d) 818 goto err; 819 820 d = trace_create_file("trace", TRACEFS_MODE_WRITE, remote_d, remote, &trace_fops); 821 if (!d) 822 goto err; 823 824 percpu_d = tracefs_create_dir("per_cpu", remote_d); 825 if (!percpu_d) { 826 pr_err("Failed to create tracefs dir "TRACEFS_DIR"%s/per_cpu/\n", name); 827 goto err; 828 } 829 830 for_each_possible_cpu(cpu) { 831 struct dentry *cpu_d; 832 char cpu_name[16]; 833 834 snprintf(cpu_name, sizeof(cpu_name), "cpu%d", cpu); 835 cpu_d = tracefs_create_dir(cpu_name, percpu_d); 836 if (!cpu_d) { 837 pr_err("Failed to create tracefs dir "TRACEFS_DIR"%s/percpu/cpu%d\n", 838 name, cpu); 839 goto err; 840 } 841 842 d = trace_create_cpu_file("trace_pipe", TRACEFS_MODE_READ, cpu_d, remote, cpu, 843 &trace_pipe_fops); 844 if (!d) 845 goto err; 846 847 d = trace_create_cpu_file("trace", TRACEFS_MODE_WRITE, cpu_d, remote, cpu, 848 &trace_fops); 849 if (!d) 850 goto err; 851 } 852 853 remote->dentry = remote_d; 854 855 return 0; 856 857 err: 858 if (root_inited) { 859 tracefs_remove(root); 860 root = NULL; 861 } else { 862 tracefs_remove(remote_d); 863 } 864 865 return -ENOMEM; 866 } 867 868 static int trace_remote_register_events(const char *remote_name, struct trace_remote *remote, 869 struct remote_event *events, size_t nr_events); 870 871 /** 872 * trace_remote_register() - Register a Tracefs remote 873 * @name: Name of the remote, used for the Tracefs remotes/ directory. 874 * @cbs: Set of callbacks used to control the remote. 875 * @priv: Private data, passed to each callback from @cbs. 876 * @events: Array of events. &remote_event.name and &remote_event.id must be 877 * filled by the caller. 878 * @nr_events: Number of events in the @events array. 879 * 880 * A trace remote is an entity, outside of the kernel (most likely firmware or 881 * hypervisor) capable of writing events into a Tracefs compatible ring-buffer. 882 * The kernel would then act as a reader. 883 * 884 * The registered remote will be found under the Tracefs directory 885 * remotes/<name>. 886 * 887 * Return: 0 on success, negative error code on failure. 888 */ 889 int trace_remote_register(const char *name, struct trace_remote_callbacks *cbs, void *priv, 890 struct remote_event *events, size_t nr_events) 891 { 892 struct trace_remote *remote; 893 int ret; 894 895 remote = kzalloc_obj(*remote); 896 if (!remote) 897 return -ENOMEM; 898 899 remote->cbs = cbs; 900 remote->priv = priv; 901 remote->trace_buffer_size = 7 << 10; 902 remote->poll_ms = 100; 903 mutex_init(&remote->lock); 904 init_rwsem(&remote->reader_lock); 905 906 if (trace_remote_init_tracefs(name, remote)) { 907 kfree(remote); 908 return -ENOMEM; 909 } 910 911 ret = trace_remote_register_events(name, remote, events, nr_events); 912 if (ret) { 913 pr_err("Failed to register events for trace remote '%s' (%d)\n", 914 name, ret); 915 return ret; 916 } 917 918 ret = cbs->init ? cbs->init(remote->dentry, priv) : 0; 919 if (ret) 920 pr_err("Init failed for trace remote '%s' (%d)\n", name, ret); 921 922 return ret; 923 } 924 EXPORT_SYMBOL_GPL(trace_remote_register); 925 926 /** 927 * trace_remote_free_buffer() - Free trace buffer allocated with trace_remote_alloc_buffer() 928 * @desc: Descriptor of the per-CPU ring-buffers, originally filled by 929 * trace_remote_alloc_buffer() 930 * 931 * Most likely called from &trace_remote_callbacks.unload_trace_buffer. 932 */ 933 void trace_remote_free_buffer(struct trace_buffer_desc *desc) 934 { 935 struct ring_buffer_desc *rb_desc; 936 int cpu; 937 938 for_each_ring_buffer_desc(rb_desc, cpu, desc) { 939 unsigned int id; 940 941 free_page(rb_desc->meta_va); 942 943 for (id = 0; id < rb_desc->nr_page_va; id++) 944 free_page(rb_desc->page_va[id]); 945 } 946 } 947 EXPORT_SYMBOL_GPL(trace_remote_free_buffer); 948 949 /** 950 * trace_remote_alloc_buffer() - Dynamically allocate a trace buffer 951 * @desc: Uninitialized trace_buffer_desc 952 * @desc_size: Size of the trace_buffer_desc. Must be at least equal to 953 * trace_buffer_desc_size() 954 * @buffer_size: Size in bytes of each per-CPU ring-buffer 955 * @cpumask: CPUs to allocate a ring-buffer for 956 * 957 * Helper to dynamically allocate a set of pages (enough to cover @buffer_size) 958 * for each CPU from @cpumask and fill @desc. Most likely called from 959 * &trace_remote_callbacks.load_trace_buffer. 960 * 961 * Return: 0 on success, negative error code on failure. 962 */ 963 int trace_remote_alloc_buffer(struct trace_buffer_desc *desc, size_t desc_size, size_t buffer_size, 964 const struct cpumask *cpumask) 965 { 966 unsigned int nr_pages = max(DIV_ROUND_UP(buffer_size, PAGE_SIZE), 2UL) + 1; 967 void *desc_end = desc + desc_size; 968 struct ring_buffer_desc *rb_desc; 969 int cpu, ret = -ENOMEM; 970 971 if (desc_size < struct_size(desc, __data, 0)) 972 return -EINVAL; 973 974 desc->nr_cpus = 0; 975 desc->struct_len = struct_size(desc, __data, 0); 976 977 rb_desc = (struct ring_buffer_desc *)&desc->__data[0]; 978 979 for_each_cpu(cpu, cpumask) { 980 unsigned int id; 981 982 if ((void *)rb_desc + struct_size(rb_desc, page_va, nr_pages) > desc_end) { 983 ret = -EINVAL; 984 goto err; 985 } 986 987 rb_desc->cpu = cpu; 988 rb_desc->nr_page_va = 0; 989 rb_desc->meta_va = (unsigned long)__get_free_page(GFP_KERNEL); 990 if (!rb_desc->meta_va) 991 goto err; 992 993 for (id = 0; id < nr_pages; id++) { 994 rb_desc->page_va[id] = (unsigned long)__get_free_page(GFP_KERNEL); 995 if (!rb_desc->page_va[id]) 996 goto err; 997 998 rb_desc->nr_page_va++; 999 } 1000 desc->nr_cpus++; 1001 desc->struct_len += offsetof(struct ring_buffer_desc, page_va); 1002 desc->struct_len += struct_size(rb_desc, page_va, rb_desc->nr_page_va); 1003 rb_desc = __next_ring_buffer_desc(rb_desc); 1004 } 1005 1006 return 0; 1007 1008 err: 1009 trace_remote_free_buffer(desc); 1010 return ret; 1011 } 1012 EXPORT_SYMBOL_GPL(trace_remote_alloc_buffer); 1013 1014 static int 1015 trace_remote_enable_event(struct trace_remote *remote, struct remote_event *evt, bool enable) 1016 { 1017 int ret; 1018 1019 lockdep_assert_held(&remote->lock); 1020 1021 if (evt->enabled == enable) 1022 return 0; 1023 1024 ret = remote->cbs->enable_event(evt->id, enable, remote->priv); 1025 if (ret) 1026 return ret; 1027 1028 evt->enabled = enable; 1029 1030 return 0; 1031 } 1032 1033 static int remote_event_enable_show(struct seq_file *s, void *unused) 1034 { 1035 struct remote_event *evt = s->private; 1036 1037 seq_printf(s, "%d\n", evt->enabled); 1038 1039 return 0; 1040 } 1041 1042 static ssize_t remote_event_enable_write(struct file *filp, const char __user *ubuf, 1043 size_t count, loff_t *ppos) 1044 { 1045 struct seq_file *seq = filp->private_data; 1046 struct remote_event *evt = seq->private; 1047 struct trace_remote *remote = evt->remote; 1048 u8 enable; 1049 int ret; 1050 1051 ret = kstrtou8_from_user(ubuf, count, 10, &enable); 1052 if (ret) 1053 return ret; 1054 1055 guard(mutex)(&remote->lock); 1056 1057 ret = trace_remote_enable_event(remote, evt, enable); 1058 if (ret) 1059 return ret; 1060 1061 return count; 1062 } 1063 DEFINE_SHOW_STORE_ATTRIBUTE(remote_event_enable); 1064 1065 static int remote_event_id_show(struct seq_file *s, void *unused) 1066 { 1067 struct remote_event *evt = s->private; 1068 1069 seq_printf(s, "%d\n", evt->id); 1070 1071 return 0; 1072 } 1073 DEFINE_SHOW_ATTRIBUTE(remote_event_id); 1074 1075 static int remote_event_format_show(struct seq_file *s, void *unused) 1076 { 1077 size_t offset = sizeof(struct remote_event_hdr); 1078 struct remote_event *evt = s->private; 1079 struct trace_event_fields *field; 1080 1081 seq_printf(s, "name: %s\n", evt->name); 1082 seq_printf(s, "ID: %d\n", evt->id); 1083 seq_puts(s, 1084 "format:\n\tfield:unsigned short common_type;\toffset:0;\tsize:2;\tsigned:0;\n\n"); 1085 1086 field = &evt->fields[0]; 1087 while (field->name) { 1088 seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%u;\tsigned:%d;\n", 1089 field->type, field->name, offset, field->size, 1090 field->is_signed); 1091 offset += field->size; 1092 field++; 1093 } 1094 1095 if (field != &evt->fields[0]) 1096 seq_puts(s, "\n"); 1097 1098 seq_printf(s, "print fmt: %s\n", evt->print_fmt); 1099 1100 return 0; 1101 } 1102 DEFINE_SHOW_ATTRIBUTE(remote_event_format); 1103 1104 static int remote_event_callback(const char *name, umode_t *mode, void **data, 1105 const struct file_operations **fops) 1106 { 1107 if (!strcmp(name, "enable")) { 1108 *mode = TRACEFS_MODE_WRITE; 1109 *fops = &remote_event_enable_fops; 1110 return 1; 1111 } 1112 1113 if (!strcmp(name, "id")) { 1114 *mode = TRACEFS_MODE_READ; 1115 *fops = &remote_event_id_fops; 1116 return 1; 1117 } 1118 1119 if (!strcmp(name, "format")) { 1120 *mode = TRACEFS_MODE_READ; 1121 *fops = &remote_event_format_fops; 1122 return 1; 1123 } 1124 1125 return 0; 1126 } 1127 1128 static ssize_t remote_events_dir_enable_write(struct file *filp, const char __user *ubuf, 1129 size_t count, loff_t *ppos) 1130 { 1131 struct trace_remote *remote = file_inode(filp)->i_private; 1132 int i, ret; 1133 u8 enable; 1134 1135 ret = kstrtou8_from_user(ubuf, count, 10, &enable); 1136 if (ret) 1137 return ret; 1138 1139 guard(mutex)(&remote->lock); 1140 1141 for (i = 0; i < remote->nr_events; i++) { 1142 struct remote_event *evt = &remote->events[i]; 1143 1144 trace_remote_enable_event(remote, evt, enable); 1145 } 1146 1147 return count; 1148 } 1149 1150 static ssize_t remote_events_dir_enable_read(struct file *filp, char __user *ubuf, size_t cnt, 1151 loff_t *ppos) 1152 { 1153 struct trace_remote *remote = file_inode(filp)->i_private; 1154 const char enabled_char[] = {'0', '1', 'X'}; 1155 char enabled_str[] = " \n"; 1156 int i, enabled = -1; 1157 1158 guard(mutex)(&remote->lock); 1159 1160 for (i = 0; i < remote->nr_events; i++) { 1161 struct remote_event *evt = &remote->events[i]; 1162 1163 if (enabled == -1) { 1164 enabled = evt->enabled; 1165 } else if (enabled != evt->enabled) { 1166 enabled = 2; 1167 break; 1168 } 1169 } 1170 1171 enabled_str[0] = enabled_char[enabled == -1 ? 0 : enabled]; 1172 1173 return simple_read_from_buffer(ubuf, cnt, ppos, enabled_str, 2); 1174 } 1175 1176 static const struct file_operations remote_events_dir_enable_fops = { 1177 .write = remote_events_dir_enable_write, 1178 .read = remote_events_dir_enable_read, 1179 }; 1180 1181 static ssize_t 1182 remote_events_dir_header_page_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) 1183 { 1184 struct trace_seq *s; 1185 int ret; 1186 1187 s = kmalloc(sizeof(*s), GFP_KERNEL); 1188 if (!s) 1189 return -ENOMEM; 1190 1191 trace_seq_init(s); 1192 1193 ring_buffer_print_page_header(NULL, s); 1194 ret = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, trace_seq_used(s)); 1195 kfree(s); 1196 1197 return ret; 1198 } 1199 1200 static const struct file_operations remote_events_dir_header_page_fops = { 1201 .read = remote_events_dir_header_page_read, 1202 }; 1203 1204 static ssize_t 1205 remote_events_dir_header_event_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) 1206 { 1207 struct trace_seq *s; 1208 int ret; 1209 1210 s = kmalloc(sizeof(*s), GFP_KERNEL); 1211 if (!s) 1212 return -ENOMEM; 1213 1214 trace_seq_init(s); 1215 1216 ring_buffer_print_entry_header(s); 1217 ret = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, trace_seq_used(s)); 1218 kfree(s); 1219 1220 return ret; 1221 } 1222 1223 static const struct file_operations remote_events_dir_header_event_fops = { 1224 .read = remote_events_dir_header_event_read, 1225 }; 1226 1227 static int remote_events_dir_callback(const char *name, umode_t *mode, void **data, 1228 const struct file_operations **fops) 1229 { 1230 if (!strcmp(name, "enable")) { 1231 *mode = TRACEFS_MODE_WRITE; 1232 *fops = &remote_events_dir_enable_fops; 1233 return 1; 1234 } 1235 1236 if (!strcmp(name, "header_page")) { 1237 *mode = TRACEFS_MODE_READ; 1238 *fops = &remote_events_dir_header_page_fops; 1239 return 1; 1240 } 1241 1242 if (!strcmp(name, "header_event")) { 1243 *mode = TRACEFS_MODE_READ; 1244 *fops = &remote_events_dir_header_event_fops; 1245 return 1; 1246 } 1247 1248 return 0; 1249 } 1250 1251 static int trace_remote_init_eventfs(const char *remote_name, struct trace_remote *remote, 1252 struct remote_event *evt) 1253 { 1254 struct eventfs_inode *eventfs = remote->eventfs; 1255 static struct eventfs_entry dir_entries[] = { 1256 { 1257 .name = "enable", 1258 .callback = remote_events_dir_callback, 1259 }, { 1260 .name = "header_page", 1261 .callback = remote_events_dir_callback, 1262 }, { 1263 .name = "header_event", 1264 .callback = remote_events_dir_callback, 1265 } 1266 }; 1267 static struct eventfs_entry entries[] = { 1268 { 1269 .name = "enable", 1270 .callback = remote_event_callback, 1271 }, { 1272 .name = "id", 1273 .callback = remote_event_callback, 1274 }, { 1275 .name = "format", 1276 .callback = remote_event_callback, 1277 } 1278 }; 1279 bool eventfs_create = false; 1280 1281 if (!eventfs) { 1282 eventfs = eventfs_create_events_dir("events", remote->dentry, dir_entries, 1283 ARRAY_SIZE(dir_entries), remote); 1284 if (IS_ERR(eventfs)) 1285 return PTR_ERR(eventfs); 1286 1287 /* 1288 * Create similar hierarchy as local events even if a single system is supported at 1289 * the moment 1290 */ 1291 eventfs = eventfs_create_dir(remote_name, eventfs, NULL, 0, NULL); 1292 if (IS_ERR(eventfs)) 1293 return PTR_ERR(eventfs); 1294 1295 remote->eventfs = eventfs; 1296 eventfs_create = true; 1297 } 1298 1299 eventfs = eventfs_create_dir(evt->name, eventfs, entries, ARRAY_SIZE(entries), evt); 1300 if (IS_ERR(eventfs)) { 1301 if (eventfs_create) { 1302 eventfs_remove_events_dir(remote->eventfs); 1303 remote->eventfs = NULL; 1304 } 1305 return PTR_ERR(eventfs); 1306 } 1307 1308 return 0; 1309 } 1310 1311 static int trace_remote_attach_events(struct trace_remote *remote, struct remote_event *events, 1312 size_t nr_events) 1313 { 1314 int i; 1315 1316 for (i = 0; i < nr_events; i++) { 1317 struct remote_event *evt = &events[i]; 1318 1319 if (evt->remote) 1320 return -EEXIST; 1321 1322 evt->remote = remote; 1323 1324 /* We need events to be sorted for efficient lookup */ 1325 if (i && evt->id <= events[i - 1].id) 1326 return -EINVAL; 1327 } 1328 1329 remote->events = events; 1330 remote->nr_events = nr_events; 1331 1332 return 0; 1333 } 1334 1335 static int trace_remote_register_events(const char *remote_name, struct trace_remote *remote, 1336 struct remote_event *events, size_t nr_events) 1337 { 1338 int i, ret; 1339 1340 ret = trace_remote_attach_events(remote, events, nr_events); 1341 if (ret) 1342 return ret; 1343 1344 for (i = 0; i < nr_events; i++) { 1345 struct remote_event *evt = &events[i]; 1346 1347 ret = trace_remote_init_eventfs(remote_name, remote, evt); 1348 if (ret) 1349 pr_warn("Failed to init eventfs for event '%s' (%d)", 1350 evt->name, ret); 1351 } 1352 1353 return 0; 1354 } 1355 1356 static int __cmp_events(const void *key, const void *data) 1357 { 1358 const struct remote_event *evt = data; 1359 int id = (int)((long)key); 1360 1361 return id - (int)evt->id; 1362 } 1363 1364 static struct remote_event *trace_remote_find_event(struct trace_remote *remote, unsigned short id) 1365 { 1366 return bsearch((const void *)(unsigned long)id, remote->events, remote->nr_events, 1367 sizeof(*remote->events), __cmp_events); 1368 } 1369