1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2025 - Google LLC 4 * Author: Vincent Donnefort <vdonnefort@google.com> 5 */ 6 7 #include <linux/kstrtox.h> 8 #include <linux/lockdep.h> 9 #include <linux/mutex.h> 10 #include <linux/tracefs.h> 11 #include <linux/trace_remote.h> 12 #include <linux/trace_seq.h> 13 #include <linux/types.h> 14 15 #include "trace.h" 16 17 #define TRACEFS_DIR "remotes" 18 #define TRACEFS_MODE_WRITE 0640 19 #define TRACEFS_MODE_READ 0440 20 21 enum tri_type { 22 TRI_CONSUMING, 23 TRI_NONCONSUMING, 24 }; 25 26 struct trace_remote_iterator { 27 struct trace_remote *remote; 28 struct trace_seq seq; 29 struct delayed_work poll_work; 30 unsigned long lost_events; 31 u64 ts; 32 struct ring_buffer_iter *rb_iter; 33 struct ring_buffer_iter **rb_iters; 34 struct remote_event_hdr *evt; 35 int cpu; 36 int evt_cpu; 37 loff_t pos; 38 enum tri_type type; 39 }; 40 41 struct trace_remote { 42 struct trace_remote_callbacks *cbs; 43 void *priv; 44 struct trace_buffer *trace_buffer; 45 struct trace_buffer_desc *trace_buffer_desc; 46 struct dentry *dentry; 47 struct eventfs_inode *eventfs; 48 struct remote_event *events; 49 unsigned long nr_events; 50 unsigned long trace_buffer_size; 51 struct ring_buffer_remote rb_remote; 52 struct mutex lock; 53 struct rw_semaphore reader_lock; 54 struct rw_semaphore *pcpu_reader_locks; 55 unsigned int nr_readers; 56 unsigned int poll_ms; 57 bool tracing_on; 58 }; 59 60 static bool trace_remote_loaded(struct trace_remote *remote) 61 { 62 return !!remote->trace_buffer; 63 } 64 65 static int trace_remote_load(struct trace_remote *remote) 66 { 67 struct ring_buffer_remote *rb_remote = &remote->rb_remote; 68 struct trace_buffer_desc *desc; 69 70 lockdep_assert_held(&remote->lock); 71 72 if (trace_remote_loaded(remote)) 73 return 0; 74 75 desc = remote->cbs->load_trace_buffer(remote->trace_buffer_size, remote->priv); 76 if (IS_ERR(desc)) 77 return PTR_ERR(desc); 78 79 rb_remote->desc = desc; 80 rb_remote->swap_reader_page = remote->cbs->swap_reader_page; 81 rb_remote->priv = remote->priv; 82 rb_remote->reset = remote->cbs->reset; 83 remote->trace_buffer = ring_buffer_alloc_remote(rb_remote); 84 if (!remote->trace_buffer) { 85 remote->cbs->unload_trace_buffer(desc, remote->priv); 86 return -ENOMEM; 87 } 88 89 remote->trace_buffer_desc = desc; 90 91 return 0; 92 } 93 94 static void trace_remote_try_unload(struct trace_remote *remote) 95 { 96 lockdep_assert_held(&remote->lock); 97 98 if (!trace_remote_loaded(remote)) 99 return; 100 101 /* The buffer is being read or writable */ 102 if (remote->nr_readers || remote->tracing_on) 103 return; 104 105 /* The buffer has readable data */ 106 if (!ring_buffer_empty(remote->trace_buffer)) 107 return; 108 109 ring_buffer_free(remote->trace_buffer); 110 remote->trace_buffer = NULL; 111 remote->cbs->unload_trace_buffer(remote->trace_buffer_desc, remote->priv); 112 } 113 114 static int trace_remote_enable_tracing(struct trace_remote *remote) 115 { 116 int ret; 117 118 lockdep_assert_held(&remote->lock); 119 120 if (remote->tracing_on) 121 return 0; 122 123 ret = trace_remote_load(remote); 124 if (ret) 125 return ret; 126 127 ret = remote->cbs->enable_tracing(true, remote->priv); 128 if (ret) { 129 trace_remote_try_unload(remote); 130 return ret; 131 } 132 133 remote->tracing_on = true; 134 135 return 0; 136 } 137 138 static int trace_remote_disable_tracing(struct trace_remote *remote) 139 { 140 int ret; 141 142 lockdep_assert_held(&remote->lock); 143 144 if (!remote->tracing_on) 145 return 0; 146 147 ret = remote->cbs->enable_tracing(false, remote->priv); 148 if (ret) 149 return ret; 150 151 ring_buffer_poll_remote(remote->trace_buffer, RING_BUFFER_ALL_CPUS); 152 remote->tracing_on = false; 153 trace_remote_try_unload(remote); 154 155 return 0; 156 } 157 158 static void trace_remote_reset(struct trace_remote *remote, int cpu) 159 { 160 lockdep_assert_held(&remote->lock); 161 162 if (!trace_remote_loaded(remote)) 163 return; 164 165 if (cpu == RING_BUFFER_ALL_CPUS) 166 ring_buffer_reset(remote->trace_buffer); 167 else 168 ring_buffer_reset_cpu(remote->trace_buffer, cpu); 169 170 trace_remote_try_unload(remote); 171 } 172 173 static ssize_t 174 tracing_on_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) 175 { 176 struct seq_file *seq = filp->private_data; 177 struct trace_remote *remote = seq->private; 178 unsigned long val; 179 int ret; 180 181 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 182 if (ret) 183 return ret; 184 185 guard(mutex)(&remote->lock); 186 187 ret = val ? trace_remote_enable_tracing(remote) : trace_remote_disable_tracing(remote); 188 if (ret) 189 return ret; 190 191 return cnt; 192 } 193 static int tracing_on_show(struct seq_file *s, void *unused) 194 { 195 struct trace_remote *remote = s->private; 196 197 seq_printf(s, "%d\n", remote->tracing_on); 198 199 return 0; 200 } 201 DEFINE_SHOW_STORE_ATTRIBUTE(tracing_on); 202 203 static ssize_t buffer_size_kb_write(struct file *filp, const char __user *ubuf, size_t cnt, 204 loff_t *ppos) 205 { 206 struct seq_file *seq = filp->private_data; 207 struct trace_remote *remote = seq->private; 208 unsigned long val; 209 int ret; 210 211 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 212 if (ret) 213 return ret; 214 215 /* KiB to Bytes */ 216 if (!val || check_shl_overflow(val, 10, &val)) 217 return -EINVAL; 218 219 guard(mutex)(&remote->lock); 220 221 if (trace_remote_loaded(remote)) 222 return -EBUSY; 223 224 remote->trace_buffer_size = val; 225 226 return cnt; 227 } 228 229 static int buffer_size_kb_show(struct seq_file *s, void *unused) 230 { 231 struct trace_remote *remote = s->private; 232 233 seq_printf(s, "%lu (%s)\n", remote->trace_buffer_size >> 10, 234 trace_remote_loaded(remote) ? "loaded" : "unloaded"); 235 236 return 0; 237 } 238 DEFINE_SHOW_STORE_ATTRIBUTE(buffer_size_kb); 239 240 static int trace_remote_get(struct trace_remote *remote, int cpu) 241 { 242 int ret; 243 244 if (remote->nr_readers == UINT_MAX) 245 return -EBUSY; 246 247 ret = trace_remote_load(remote); 248 if (ret) 249 return ret; 250 251 if (cpu != RING_BUFFER_ALL_CPUS && !remote->pcpu_reader_locks) { 252 int lock_cpu; 253 254 remote->pcpu_reader_locks = kcalloc(nr_cpu_ids, sizeof(*remote->pcpu_reader_locks), 255 GFP_KERNEL); 256 if (!remote->pcpu_reader_locks) { 257 trace_remote_try_unload(remote); 258 return -ENOMEM; 259 } 260 261 for_each_possible_cpu(lock_cpu) 262 init_rwsem(&remote->pcpu_reader_locks[lock_cpu]); 263 } 264 265 remote->nr_readers++; 266 267 return 0; 268 } 269 270 static void trace_remote_put(struct trace_remote *remote) 271 { 272 if (WARN_ON(!remote->nr_readers)) 273 return; 274 275 remote->nr_readers--; 276 if (remote->nr_readers) 277 return; 278 279 kfree(remote->pcpu_reader_locks); 280 remote->pcpu_reader_locks = NULL; 281 282 trace_remote_try_unload(remote); 283 } 284 285 static bool trace_remote_has_cpu(struct trace_remote *remote, int cpu) 286 { 287 if (cpu == RING_BUFFER_ALL_CPUS) 288 return true; 289 290 return ring_buffer_poll_remote(remote->trace_buffer, cpu) == 0; 291 } 292 293 static void __poll_remote(struct work_struct *work) 294 { 295 struct delayed_work *dwork = to_delayed_work(work); 296 struct trace_remote_iterator *iter; 297 298 iter = container_of(dwork, struct trace_remote_iterator, poll_work); 299 ring_buffer_poll_remote(iter->remote->trace_buffer, iter->cpu); 300 schedule_delayed_work((struct delayed_work *)work, 301 msecs_to_jiffies(iter->remote->poll_ms)); 302 } 303 304 static void __free_ring_buffer_iter(struct trace_remote_iterator *iter, int cpu) 305 { 306 if (cpu != RING_BUFFER_ALL_CPUS) { 307 ring_buffer_read_finish(iter->rb_iter); 308 return; 309 } 310 311 for_each_possible_cpu(cpu) { 312 if (iter->rb_iters[cpu]) 313 ring_buffer_read_finish(iter->rb_iters[cpu]); 314 } 315 316 kfree(iter->rb_iters); 317 } 318 319 static int __alloc_ring_buffer_iter(struct trace_remote_iterator *iter, int cpu) 320 { 321 if (cpu != RING_BUFFER_ALL_CPUS) { 322 iter->rb_iter = ring_buffer_read_start(iter->remote->trace_buffer, cpu, GFP_KERNEL); 323 324 return iter->rb_iter ? 0 : -ENOMEM; 325 } 326 327 iter->rb_iters = kcalloc(nr_cpu_ids, sizeof(*iter->rb_iters), GFP_KERNEL); 328 if (!iter->rb_iters) 329 return -ENOMEM; 330 331 for_each_possible_cpu(cpu) { 332 iter->rb_iters[cpu] = ring_buffer_read_start(iter->remote->trace_buffer, cpu, 333 GFP_KERNEL); 334 if (!iter->rb_iters[cpu]) { 335 /* This CPU isn't part of trace_buffer. Skip it */ 336 if (!trace_remote_has_cpu(iter->remote, cpu)) 337 continue; 338 339 __free_ring_buffer_iter(iter, RING_BUFFER_ALL_CPUS); 340 return -ENOMEM; 341 } 342 } 343 344 return 0; 345 } 346 347 static struct trace_remote_iterator 348 *trace_remote_iter(struct trace_remote *remote, int cpu, enum tri_type type) 349 { 350 struct trace_remote_iterator *iter = NULL; 351 int ret; 352 353 lockdep_assert_held(&remote->lock); 354 355 if (type == TRI_NONCONSUMING && !trace_remote_loaded(remote)) 356 return NULL; 357 358 ret = trace_remote_get(remote, cpu); 359 if (ret) 360 return ERR_PTR(ret); 361 362 if (!trace_remote_has_cpu(remote, cpu)) { 363 ret = -ENODEV; 364 goto err; 365 } 366 367 iter = kzalloc_obj(*iter); 368 if (iter) { 369 iter->remote = remote; 370 iter->cpu = cpu; 371 iter->type = type; 372 trace_seq_init(&iter->seq); 373 374 switch (type) { 375 case TRI_CONSUMING: 376 ring_buffer_poll_remote(remote->trace_buffer, cpu); 377 INIT_DELAYED_WORK(&iter->poll_work, __poll_remote); 378 schedule_delayed_work(&iter->poll_work, msecs_to_jiffies(remote->poll_ms)); 379 break; 380 case TRI_NONCONSUMING: 381 ret = __alloc_ring_buffer_iter(iter, cpu); 382 break; 383 } 384 385 if (ret) 386 goto err; 387 388 return iter; 389 } 390 ret = -ENOMEM; 391 392 err: 393 kfree(iter); 394 trace_remote_put(remote); 395 396 return ERR_PTR(ret); 397 } 398 399 static void trace_remote_iter_free(struct trace_remote_iterator *iter) 400 { 401 struct trace_remote *remote; 402 403 if (!iter) 404 return; 405 406 remote = iter->remote; 407 408 lockdep_assert_held(&remote->lock); 409 410 switch (iter->type) { 411 case TRI_CONSUMING: 412 cancel_delayed_work_sync(&iter->poll_work); 413 break; 414 case TRI_NONCONSUMING: 415 __free_ring_buffer_iter(iter, iter->cpu); 416 break; 417 } 418 419 kfree(iter); 420 trace_remote_put(remote); 421 } 422 423 static void trace_remote_iter_read_start(struct trace_remote_iterator *iter) 424 { 425 struct trace_remote *remote = iter->remote; 426 int cpu = iter->cpu; 427 428 /* Acquire global reader lock */ 429 if (cpu == RING_BUFFER_ALL_CPUS && iter->type == TRI_CONSUMING) 430 down_write(&remote->reader_lock); 431 else 432 down_read(&remote->reader_lock); 433 434 if (cpu == RING_BUFFER_ALL_CPUS) 435 return; 436 437 /* 438 * No need for the remote lock here, iter holds a reference on 439 * remote->nr_readers 440 */ 441 442 /* Get the per-CPU one */ 443 if (WARN_ON_ONCE(!remote->pcpu_reader_locks)) 444 return; 445 446 if (iter->type == TRI_CONSUMING) 447 down_write(&remote->pcpu_reader_locks[cpu]); 448 else 449 down_read(&remote->pcpu_reader_locks[cpu]); 450 } 451 452 static void trace_remote_iter_read_finished(struct trace_remote_iterator *iter) 453 { 454 struct trace_remote *remote = iter->remote; 455 int cpu = iter->cpu; 456 457 /* Release per-CPU reader lock */ 458 if (cpu != RING_BUFFER_ALL_CPUS) { 459 /* 460 * No need for the remote lock here, iter holds a reference on 461 * remote->nr_readers 462 */ 463 if (iter->type == TRI_CONSUMING) 464 up_write(&remote->pcpu_reader_locks[cpu]); 465 else 466 up_read(&remote->pcpu_reader_locks[cpu]); 467 } 468 469 /* Release global reader lock */ 470 if (cpu == RING_BUFFER_ALL_CPUS && iter->type == TRI_CONSUMING) 471 up_write(&remote->reader_lock); 472 else 473 up_read(&remote->reader_lock); 474 } 475 476 static struct ring_buffer_iter *__get_rb_iter(struct trace_remote_iterator *iter, int cpu) 477 { 478 return iter->cpu != RING_BUFFER_ALL_CPUS ? iter->rb_iter : iter->rb_iters[cpu]; 479 } 480 481 static struct ring_buffer_event * 482 __peek_event(struct trace_remote_iterator *iter, int cpu, u64 *ts, unsigned long *lost_events) 483 { 484 struct ring_buffer_event *rb_evt; 485 struct ring_buffer_iter *rb_iter; 486 487 switch (iter->type) { 488 case TRI_CONSUMING: 489 return ring_buffer_peek(iter->remote->trace_buffer, cpu, ts, lost_events); 490 case TRI_NONCONSUMING: 491 rb_iter = __get_rb_iter(iter, cpu); 492 if (!rb_iter) 493 return NULL; 494 495 rb_evt = ring_buffer_iter_peek(rb_iter, ts); 496 if (!rb_evt) 497 return NULL; 498 499 *lost_events = ring_buffer_iter_dropped(rb_iter); 500 501 return rb_evt; 502 } 503 504 return NULL; 505 } 506 507 static bool trace_remote_iter_read_event(struct trace_remote_iterator *iter) 508 { 509 struct trace_buffer *trace_buffer = iter->remote->trace_buffer; 510 struct ring_buffer_event *rb_evt; 511 int cpu = iter->cpu; 512 513 if (cpu != RING_BUFFER_ALL_CPUS) { 514 if (ring_buffer_empty_cpu(trace_buffer, cpu)) 515 return false; 516 517 rb_evt = __peek_event(iter, cpu, &iter->ts, &iter->lost_events); 518 if (!rb_evt) 519 return false; 520 521 iter->evt_cpu = cpu; 522 iter->evt = ring_buffer_event_data(rb_evt); 523 return true; 524 } 525 526 iter->ts = U64_MAX; 527 for_each_possible_cpu(cpu) { 528 unsigned long lost_events; 529 u64 ts; 530 531 if (ring_buffer_empty_cpu(trace_buffer, cpu)) 532 continue; 533 534 rb_evt = __peek_event(iter, cpu, &ts, &lost_events); 535 if (!rb_evt) 536 continue; 537 538 if (ts >= iter->ts) 539 continue; 540 541 iter->ts = ts; 542 iter->evt_cpu = cpu; 543 iter->evt = ring_buffer_event_data(rb_evt); 544 iter->lost_events = lost_events; 545 } 546 547 return iter->ts != U64_MAX; 548 } 549 550 static void trace_remote_iter_move(struct trace_remote_iterator *iter) 551 { 552 struct trace_buffer *trace_buffer = iter->remote->trace_buffer; 553 554 switch (iter->type) { 555 case TRI_CONSUMING: 556 ring_buffer_consume(trace_buffer, iter->evt_cpu, NULL, NULL); 557 break; 558 case TRI_NONCONSUMING: 559 ring_buffer_iter_advance(__get_rb_iter(iter, iter->evt_cpu)); 560 break; 561 } 562 } 563 564 static struct remote_event *trace_remote_find_event(struct trace_remote *remote, unsigned short id); 565 566 static int trace_remote_iter_print_event(struct trace_remote_iterator *iter) 567 { 568 struct remote_event *evt; 569 unsigned long usecs_rem; 570 u64 ts = iter->ts; 571 572 if (iter->lost_events) 573 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n", 574 iter->evt_cpu, iter->lost_events); 575 576 do_div(ts, 1000); 577 usecs_rem = do_div(ts, USEC_PER_SEC); 578 579 trace_seq_printf(&iter->seq, "[%03d]\t%5llu.%06lu: ", iter->evt_cpu, 580 ts, usecs_rem); 581 582 evt = trace_remote_find_event(iter->remote, iter->evt->id); 583 if (!evt) 584 trace_seq_printf(&iter->seq, "UNKNOWN id=%d\n", iter->evt->id); 585 else 586 evt->print(iter->evt, &iter->seq); 587 588 return trace_seq_has_overflowed(&iter->seq) ? -EOVERFLOW : 0; 589 } 590 591 static int trace_pipe_open(struct inode *inode, struct file *filp) 592 { 593 struct trace_remote *remote = inode->i_private; 594 struct trace_remote_iterator *iter; 595 int cpu = tracing_get_cpu(inode); 596 597 guard(mutex)(&remote->lock); 598 599 iter = trace_remote_iter(remote, cpu, TRI_CONSUMING); 600 if (IS_ERR(iter)) 601 return PTR_ERR(iter); 602 603 filp->private_data = iter; 604 605 return IS_ERR(iter) ? PTR_ERR(iter) : 0; 606 } 607 608 static int trace_pipe_release(struct inode *inode, struct file *filp) 609 { 610 struct trace_remote_iterator *iter = filp->private_data; 611 struct trace_remote *remote = iter->remote; 612 613 guard(mutex)(&remote->lock); 614 615 trace_remote_iter_free(iter); 616 617 return 0; 618 } 619 620 static ssize_t trace_pipe_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) 621 { 622 struct trace_remote_iterator *iter = filp->private_data; 623 struct trace_buffer *trace_buffer = iter->remote->trace_buffer; 624 int ret; 625 626 copy_to_user: 627 ret = trace_seq_to_user(&iter->seq, ubuf, cnt); 628 if (ret != -EBUSY) 629 return ret; 630 631 trace_seq_init(&iter->seq); 632 633 ret = ring_buffer_wait(trace_buffer, iter->cpu, 0, NULL, NULL); 634 if (ret < 0) 635 return ret; 636 637 trace_remote_iter_read_start(iter); 638 639 while (trace_remote_iter_read_event(iter)) { 640 int prev_len = iter->seq.seq.len; 641 642 if (trace_remote_iter_print_event(iter)) { 643 iter->seq.seq.len = prev_len; 644 break; 645 } 646 647 trace_remote_iter_move(iter); 648 } 649 650 trace_remote_iter_read_finished(iter); 651 652 goto copy_to_user; 653 } 654 655 static const struct file_operations trace_pipe_fops = { 656 .open = trace_pipe_open, 657 .read = trace_pipe_read, 658 .release = trace_pipe_release, 659 }; 660 661 static void *trace_next(struct seq_file *m, void *v, loff_t *pos) 662 { 663 struct trace_remote_iterator *iter = m->private; 664 665 ++*pos; 666 667 if (!iter || !trace_remote_iter_read_event(iter)) 668 return NULL; 669 670 trace_remote_iter_move(iter); 671 iter->pos++; 672 673 return iter; 674 } 675 676 static void *trace_start(struct seq_file *m, loff_t *pos) 677 { 678 struct trace_remote_iterator *iter = m->private; 679 loff_t i; 680 681 if (!iter) 682 return NULL; 683 684 trace_remote_iter_read_start(iter); 685 686 if (!*pos) { 687 iter->pos = -1; 688 return trace_next(m, NULL, &i); 689 } 690 691 i = iter->pos; 692 while (i < *pos) { 693 iter = trace_next(m, NULL, &i); 694 if (!iter) 695 return NULL; 696 } 697 698 return iter; 699 } 700 701 static int trace_show(struct seq_file *m, void *v) 702 { 703 struct trace_remote_iterator *iter = v; 704 705 trace_seq_init(&iter->seq); 706 707 if (trace_remote_iter_print_event(iter)) { 708 seq_printf(m, "[EVENT %d PRINT TOO BIG]\n", iter->evt->id); 709 return 0; 710 } 711 712 return trace_print_seq(m, &iter->seq); 713 } 714 715 static void trace_stop(struct seq_file *m, void *v) 716 { 717 struct trace_remote_iterator *iter = m->private; 718 719 if (iter) 720 trace_remote_iter_read_finished(iter); 721 } 722 723 static const struct seq_operations trace_sops = { 724 .start = trace_start, 725 .next = trace_next, 726 .show = trace_show, 727 .stop = trace_stop, 728 }; 729 730 static int trace_open(struct inode *inode, struct file *filp) 731 { 732 struct trace_remote *remote = inode->i_private; 733 struct trace_remote_iterator *iter = NULL; 734 int cpu = tracing_get_cpu(inode); 735 int ret; 736 737 if (!(filp->f_mode & FMODE_READ)) 738 return 0; 739 740 guard(mutex)(&remote->lock); 741 742 iter = trace_remote_iter(remote, cpu, TRI_NONCONSUMING); 743 if (IS_ERR(iter)) 744 return PTR_ERR(iter); 745 746 ret = seq_open(filp, &trace_sops); 747 if (ret) { 748 trace_remote_iter_free(iter); 749 return ret; 750 } 751 752 ((struct seq_file *)filp->private_data)->private = (void *)iter; 753 754 return 0; 755 } 756 757 static int trace_release(struct inode *inode, struct file *filp) 758 { 759 struct trace_remote_iterator *iter; 760 761 if (!(filp->f_mode & FMODE_READ)) 762 return 0; 763 764 iter = ((struct seq_file *)filp->private_data)->private; 765 seq_release(inode, filp); 766 767 if (!iter) 768 return 0; 769 770 guard(mutex)(&iter->remote->lock); 771 772 trace_remote_iter_free(iter); 773 774 return 0; 775 } 776 777 static ssize_t trace_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) 778 { 779 struct inode *inode = file_inode(filp); 780 struct trace_remote *remote = inode->i_private; 781 int cpu = tracing_get_cpu(inode); 782 783 guard(mutex)(&remote->lock); 784 785 trace_remote_reset(remote, cpu); 786 787 return cnt; 788 } 789 790 static const struct file_operations trace_fops = { 791 .open = trace_open, 792 .write = trace_write, 793 .read = seq_read, 794 .read_iter = seq_read_iter, 795 .release = trace_release, 796 }; 797 798 static int trace_remote_init_tracefs(const char *name, struct trace_remote *remote) 799 { 800 struct dentry *remote_d, *percpu_d, *d; 801 static struct dentry *root; 802 static DEFINE_MUTEX(lock); 803 bool root_inited = false; 804 int cpu; 805 806 guard(mutex)(&lock); 807 808 if (!root) { 809 root = tracefs_create_dir(TRACEFS_DIR, NULL); 810 if (!root) { 811 pr_err("Failed to create tracefs dir "TRACEFS_DIR"\n"); 812 return -ENOMEM; 813 } 814 root_inited = true; 815 } 816 817 remote_d = tracefs_create_dir(name, root); 818 if (!remote_d) { 819 pr_err("Failed to create tracefs dir "TRACEFS_DIR"%s/\n", name); 820 goto err; 821 } 822 823 d = trace_create_file("tracing_on", TRACEFS_MODE_WRITE, remote_d, remote, &tracing_on_fops); 824 if (!d) 825 goto err; 826 827 d = trace_create_file("buffer_size_kb", TRACEFS_MODE_WRITE, remote_d, remote, 828 &buffer_size_kb_fops); 829 if (!d) 830 goto err; 831 832 d = trace_create_file("trace_pipe", TRACEFS_MODE_READ, remote_d, remote, &trace_pipe_fops); 833 if (!d) 834 goto err; 835 836 d = trace_create_file("trace", TRACEFS_MODE_WRITE, remote_d, remote, &trace_fops); 837 if (!d) 838 goto err; 839 840 percpu_d = tracefs_create_dir("per_cpu", remote_d); 841 if (!percpu_d) { 842 pr_err("Failed to create tracefs dir "TRACEFS_DIR"%s/per_cpu/\n", name); 843 goto err; 844 } 845 846 for_each_possible_cpu(cpu) { 847 struct dentry *cpu_d; 848 char cpu_name[16]; 849 850 snprintf(cpu_name, sizeof(cpu_name), "cpu%d", cpu); 851 cpu_d = tracefs_create_dir(cpu_name, percpu_d); 852 if (!cpu_d) { 853 pr_err("Failed to create tracefs dir "TRACEFS_DIR"%s/percpu/cpu%d\n", 854 name, cpu); 855 goto err; 856 } 857 858 d = trace_create_cpu_file("trace_pipe", TRACEFS_MODE_READ, cpu_d, remote, cpu, 859 &trace_pipe_fops); 860 if (!d) 861 goto err; 862 863 d = trace_create_cpu_file("trace", TRACEFS_MODE_WRITE, cpu_d, remote, cpu, 864 &trace_fops); 865 if (!d) 866 goto err; 867 } 868 869 remote->dentry = remote_d; 870 871 return 0; 872 873 err: 874 if (root_inited) { 875 tracefs_remove(root); 876 root = NULL; 877 } else { 878 tracefs_remove(remote_d); 879 } 880 881 return -ENOMEM; 882 } 883 884 static int trace_remote_register_events(const char *remote_name, struct trace_remote *remote, 885 struct remote_event *events, size_t nr_events); 886 887 /** 888 * trace_remote_register() - Register a Tracefs remote 889 * @name: Name of the remote, used for the Tracefs remotes/ directory. 890 * @cbs: Set of callbacks used to control the remote. 891 * @priv: Private data, passed to each callback from @cbs. 892 * @events: Array of events. &remote_event.name and &remote_event.id must be 893 * filled by the caller. 894 * @nr_events: Number of events in the @events array. 895 * 896 * A trace remote is an entity, outside of the kernel (most likely firmware or 897 * hypervisor) capable of writing events into a Tracefs compatible ring-buffer. 898 * The kernel would then act as a reader. 899 * 900 * The registered remote will be found under the Tracefs directory 901 * remotes/<name>. 902 * 903 * Return: 0 on success, negative error code on failure. 904 */ 905 int trace_remote_register(const char *name, struct trace_remote_callbacks *cbs, void *priv, 906 struct remote_event *events, size_t nr_events) 907 { 908 struct trace_remote *remote; 909 int ret; 910 911 remote = kzalloc_obj(*remote); 912 if (!remote) 913 return -ENOMEM; 914 915 remote->cbs = cbs; 916 remote->priv = priv; 917 remote->trace_buffer_size = 7 << 10; 918 remote->poll_ms = 100; 919 mutex_init(&remote->lock); 920 init_rwsem(&remote->reader_lock); 921 922 if (trace_remote_init_tracefs(name, remote)) { 923 kfree(remote); 924 return -ENOMEM; 925 } 926 927 ret = trace_remote_register_events(name, remote, events, nr_events); 928 if (ret) { 929 pr_err("Failed to register events for trace remote '%s' (%d)\n", 930 name, ret); 931 return ret; 932 } 933 934 ret = cbs->init ? cbs->init(remote->dentry, priv) : 0; 935 if (ret) 936 pr_err("Init failed for trace remote '%s' (%d)\n", name, ret); 937 938 return ret; 939 } 940 EXPORT_SYMBOL_GPL(trace_remote_register); 941 942 /** 943 * trace_remote_free_buffer() - Free trace buffer allocated with trace_remote_alloc_buffer() 944 * @desc: Descriptor of the per-CPU ring-buffers, originally filled by 945 * trace_remote_alloc_buffer() 946 * 947 * Most likely called from &trace_remote_callbacks.unload_trace_buffer. 948 */ 949 void trace_remote_free_buffer(struct trace_buffer_desc *desc) 950 { 951 struct ring_buffer_desc *rb_desc; 952 int cpu; 953 954 for_each_ring_buffer_desc(rb_desc, cpu, desc) { 955 unsigned int id; 956 957 free_page(rb_desc->meta_va); 958 959 for (id = 0; id < rb_desc->nr_page_va; id++) 960 free_page(rb_desc->page_va[id]); 961 } 962 } 963 EXPORT_SYMBOL_GPL(trace_remote_free_buffer); 964 965 /** 966 * trace_remote_alloc_buffer() - Dynamically allocate a trace buffer 967 * @desc: Uninitialized trace_buffer_desc 968 * @desc_size: Size of the trace_buffer_desc. Must be at least equal to 969 * trace_buffer_desc_size() 970 * @buffer_size: Size in bytes of each per-CPU ring-buffer 971 * @cpumask: CPUs to allocate a ring-buffer for 972 * 973 * Helper to dynamically allocate a set of pages (enough to cover @buffer_size) 974 * for each CPU from @cpumask and fill @desc. Most likely called from 975 * &trace_remote_callbacks.load_trace_buffer. 976 * 977 * Return: 0 on success, negative error code on failure. 978 */ 979 int trace_remote_alloc_buffer(struct trace_buffer_desc *desc, size_t desc_size, size_t buffer_size, 980 const struct cpumask *cpumask) 981 { 982 unsigned int nr_pages = max(DIV_ROUND_UP(buffer_size, PAGE_SIZE), 2UL) + 1; 983 void *desc_end = desc + desc_size; 984 struct ring_buffer_desc *rb_desc; 985 int cpu, ret = -ENOMEM; 986 987 if (desc_size < struct_size(desc, __data, 0)) 988 return -EINVAL; 989 990 desc->nr_cpus = 0; 991 desc->struct_len = struct_size(desc, __data, 0); 992 993 rb_desc = (struct ring_buffer_desc *)&desc->__data[0]; 994 995 for_each_cpu(cpu, cpumask) { 996 unsigned int id; 997 998 if ((void *)rb_desc + struct_size(rb_desc, page_va, nr_pages) > desc_end) { 999 ret = -EINVAL; 1000 goto err; 1001 } 1002 1003 rb_desc->cpu = cpu; 1004 rb_desc->nr_page_va = 0; 1005 rb_desc->meta_va = (unsigned long)__get_free_page(GFP_KERNEL); 1006 if (!rb_desc->meta_va) 1007 goto err; 1008 1009 for (id = 0; id < nr_pages; id++) { 1010 rb_desc->page_va[id] = (unsigned long)__get_free_page(GFP_KERNEL); 1011 if (!rb_desc->page_va[id]) 1012 goto err; 1013 1014 rb_desc->nr_page_va++; 1015 } 1016 desc->nr_cpus++; 1017 desc->struct_len += offsetof(struct ring_buffer_desc, page_va); 1018 desc->struct_len += struct_size(rb_desc, page_va, rb_desc->nr_page_va); 1019 rb_desc = __next_ring_buffer_desc(rb_desc); 1020 } 1021 1022 return 0; 1023 1024 err: 1025 trace_remote_free_buffer(desc); 1026 return ret; 1027 } 1028 EXPORT_SYMBOL_GPL(trace_remote_alloc_buffer); 1029 1030 static int 1031 trace_remote_enable_event(struct trace_remote *remote, struct remote_event *evt, bool enable) 1032 { 1033 int ret; 1034 1035 lockdep_assert_held(&remote->lock); 1036 1037 if (evt->enabled == enable) 1038 return 0; 1039 1040 ret = remote->cbs->enable_event(evt->id, enable, remote->priv); 1041 if (ret) 1042 return ret; 1043 1044 evt->enabled = enable; 1045 1046 return 0; 1047 } 1048 1049 static int remote_event_enable_show(struct seq_file *s, void *unused) 1050 { 1051 struct remote_event *evt = s->private; 1052 1053 seq_printf(s, "%d\n", evt->enabled); 1054 1055 return 0; 1056 } 1057 1058 static ssize_t remote_event_enable_write(struct file *filp, const char __user *ubuf, 1059 size_t count, loff_t *ppos) 1060 { 1061 struct seq_file *seq = filp->private_data; 1062 struct remote_event *evt = seq->private; 1063 struct trace_remote *remote = evt->remote; 1064 u8 enable; 1065 int ret; 1066 1067 ret = kstrtou8_from_user(ubuf, count, 10, &enable); 1068 if (ret) 1069 return ret; 1070 1071 guard(mutex)(&remote->lock); 1072 1073 ret = trace_remote_enable_event(remote, evt, enable); 1074 if (ret) 1075 return ret; 1076 1077 return count; 1078 } 1079 DEFINE_SHOW_STORE_ATTRIBUTE(remote_event_enable); 1080 1081 static int remote_event_id_show(struct seq_file *s, void *unused) 1082 { 1083 struct remote_event *evt = s->private; 1084 1085 seq_printf(s, "%d\n", evt->id); 1086 1087 return 0; 1088 } 1089 DEFINE_SHOW_ATTRIBUTE(remote_event_id); 1090 1091 static int remote_event_format_show(struct seq_file *s, void *unused) 1092 { 1093 size_t offset = sizeof(struct remote_event_hdr); 1094 struct remote_event *evt = s->private; 1095 struct trace_event_fields *field; 1096 1097 seq_printf(s, "name: %s\n", evt->name); 1098 seq_printf(s, "ID: %d\n", evt->id); 1099 seq_puts(s, 1100 "format:\n\tfield:unsigned short common_type;\toffset:0;\tsize:2;\tsigned:0;\n\n"); 1101 1102 field = &evt->fields[0]; 1103 while (field->name) { 1104 seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%u;\tsigned:%d;\n", 1105 field->type, field->name, offset, field->size, 1106 field->is_signed); 1107 offset += field->size; 1108 field++; 1109 } 1110 1111 if (field != &evt->fields[0]) 1112 seq_puts(s, "\n"); 1113 1114 seq_printf(s, "print fmt: %s\n", evt->print_fmt); 1115 1116 return 0; 1117 } 1118 DEFINE_SHOW_ATTRIBUTE(remote_event_format); 1119 1120 static int remote_event_callback(const char *name, umode_t *mode, void **data, 1121 const struct file_operations **fops) 1122 { 1123 if (!strcmp(name, "enable")) { 1124 *mode = TRACEFS_MODE_WRITE; 1125 *fops = &remote_event_enable_fops; 1126 return 1; 1127 } 1128 1129 if (!strcmp(name, "id")) { 1130 *mode = TRACEFS_MODE_READ; 1131 *fops = &remote_event_id_fops; 1132 return 1; 1133 } 1134 1135 if (!strcmp(name, "format")) { 1136 *mode = TRACEFS_MODE_READ; 1137 *fops = &remote_event_format_fops; 1138 return 1; 1139 } 1140 1141 return 0; 1142 } 1143 1144 static ssize_t remote_events_dir_enable_write(struct file *filp, const char __user *ubuf, 1145 size_t count, loff_t *ppos) 1146 { 1147 struct trace_remote *remote = file_inode(filp)->i_private; 1148 int i, ret; 1149 u8 enable; 1150 1151 ret = kstrtou8_from_user(ubuf, count, 10, &enable); 1152 if (ret) 1153 return ret; 1154 1155 guard(mutex)(&remote->lock); 1156 1157 for (i = 0; i < remote->nr_events; i++) { 1158 struct remote_event *evt = &remote->events[i]; 1159 1160 trace_remote_enable_event(remote, evt, enable); 1161 } 1162 1163 return count; 1164 } 1165 1166 static ssize_t remote_events_dir_enable_read(struct file *filp, char __user *ubuf, size_t cnt, 1167 loff_t *ppos) 1168 { 1169 struct trace_remote *remote = file_inode(filp)->i_private; 1170 const char enabled_char[] = {'0', '1', 'X'}; 1171 char enabled_str[] = " \n"; 1172 int i, enabled = -1; 1173 1174 guard(mutex)(&remote->lock); 1175 1176 for (i = 0; i < remote->nr_events; i++) { 1177 struct remote_event *evt = &remote->events[i]; 1178 1179 if (enabled == -1) { 1180 enabled = evt->enabled; 1181 } else if (enabled != evt->enabled) { 1182 enabled = 2; 1183 break; 1184 } 1185 } 1186 1187 enabled_str[0] = enabled_char[enabled == -1 ? 0 : enabled]; 1188 1189 return simple_read_from_buffer(ubuf, cnt, ppos, enabled_str, 2); 1190 } 1191 1192 static const struct file_operations remote_events_dir_enable_fops = { 1193 .write = remote_events_dir_enable_write, 1194 .read = remote_events_dir_enable_read, 1195 }; 1196 1197 static ssize_t 1198 remote_events_dir_header_page_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) 1199 { 1200 struct trace_seq *s; 1201 int ret; 1202 1203 s = kmalloc(sizeof(*s), GFP_KERNEL); 1204 if (!s) 1205 return -ENOMEM; 1206 1207 trace_seq_init(s); 1208 1209 ring_buffer_print_page_header(NULL, s); 1210 ret = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, trace_seq_used(s)); 1211 kfree(s); 1212 1213 return ret; 1214 } 1215 1216 static const struct file_operations remote_events_dir_header_page_fops = { 1217 .read = remote_events_dir_header_page_read, 1218 }; 1219 1220 static ssize_t 1221 remote_events_dir_header_event_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) 1222 { 1223 struct trace_seq *s; 1224 int ret; 1225 1226 s = kmalloc(sizeof(*s), GFP_KERNEL); 1227 if (!s) 1228 return -ENOMEM; 1229 1230 trace_seq_init(s); 1231 1232 ring_buffer_print_entry_header(s); 1233 ret = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, trace_seq_used(s)); 1234 kfree(s); 1235 1236 return ret; 1237 } 1238 1239 static const struct file_operations remote_events_dir_header_event_fops = { 1240 .read = remote_events_dir_header_event_read, 1241 }; 1242 1243 static int remote_events_dir_callback(const char *name, umode_t *mode, void **data, 1244 const struct file_operations **fops) 1245 { 1246 if (!strcmp(name, "enable")) { 1247 *mode = TRACEFS_MODE_WRITE; 1248 *fops = &remote_events_dir_enable_fops; 1249 return 1; 1250 } 1251 1252 if (!strcmp(name, "header_page")) { 1253 *mode = TRACEFS_MODE_READ; 1254 *fops = &remote_events_dir_header_page_fops; 1255 return 1; 1256 } 1257 1258 if (!strcmp(name, "header_event")) { 1259 *mode = TRACEFS_MODE_READ; 1260 *fops = &remote_events_dir_header_event_fops; 1261 return 1; 1262 } 1263 1264 return 0; 1265 } 1266 1267 static int trace_remote_init_eventfs(const char *remote_name, struct trace_remote *remote, 1268 struct remote_event *evt) 1269 { 1270 struct eventfs_inode *eventfs = remote->eventfs; 1271 static struct eventfs_entry dir_entries[] = { 1272 { 1273 .name = "enable", 1274 .callback = remote_events_dir_callback, 1275 }, { 1276 .name = "header_page", 1277 .callback = remote_events_dir_callback, 1278 }, { 1279 .name = "header_event", 1280 .callback = remote_events_dir_callback, 1281 } 1282 }; 1283 static struct eventfs_entry entries[] = { 1284 { 1285 .name = "enable", 1286 .callback = remote_event_callback, 1287 }, { 1288 .name = "id", 1289 .callback = remote_event_callback, 1290 }, { 1291 .name = "format", 1292 .callback = remote_event_callback, 1293 } 1294 }; 1295 bool eventfs_create = false; 1296 1297 if (!eventfs) { 1298 eventfs = eventfs_create_events_dir("events", remote->dentry, dir_entries, 1299 ARRAY_SIZE(dir_entries), remote); 1300 if (IS_ERR(eventfs)) 1301 return PTR_ERR(eventfs); 1302 1303 /* 1304 * Create similar hierarchy as local events even if a single system is supported at 1305 * the moment 1306 */ 1307 eventfs = eventfs_create_dir(remote_name, eventfs, NULL, 0, NULL); 1308 if (IS_ERR(eventfs)) 1309 return PTR_ERR(eventfs); 1310 1311 remote->eventfs = eventfs; 1312 eventfs_create = true; 1313 } 1314 1315 eventfs = eventfs_create_dir(evt->name, eventfs, entries, ARRAY_SIZE(entries), evt); 1316 if (IS_ERR(eventfs)) { 1317 if (eventfs_create) { 1318 eventfs_remove_events_dir(remote->eventfs); 1319 remote->eventfs = NULL; 1320 } 1321 return PTR_ERR(eventfs); 1322 } 1323 1324 return 0; 1325 } 1326 1327 static int trace_remote_attach_events(struct trace_remote *remote, struct remote_event *events, 1328 size_t nr_events) 1329 { 1330 int i; 1331 1332 for (i = 0; i < nr_events; i++) { 1333 struct remote_event *evt = &events[i]; 1334 1335 if (evt->remote) 1336 return -EEXIST; 1337 1338 evt->remote = remote; 1339 1340 /* We need events to be sorted for efficient lookup */ 1341 if (i && evt->id <= events[i - 1].id) 1342 return -EINVAL; 1343 } 1344 1345 remote->events = events; 1346 remote->nr_events = nr_events; 1347 1348 return 0; 1349 } 1350 1351 static int trace_remote_register_events(const char *remote_name, struct trace_remote *remote, 1352 struct remote_event *events, size_t nr_events) 1353 { 1354 int i, ret; 1355 1356 ret = trace_remote_attach_events(remote, events, nr_events); 1357 if (ret) 1358 return ret; 1359 1360 for (i = 0; i < nr_events; i++) { 1361 struct remote_event *evt = &events[i]; 1362 1363 ret = trace_remote_init_eventfs(remote_name, remote, evt); 1364 if (ret) 1365 pr_warn("Failed to init eventfs for event '%s' (%d)", 1366 evt->name, ret); 1367 } 1368 1369 return 0; 1370 } 1371 1372 static int __cmp_events(const void *key, const void *data) 1373 { 1374 const struct remote_event *evt = data; 1375 int id = (int)((long)key); 1376 1377 return id - (int)evt->id; 1378 } 1379 1380 static struct remote_event *trace_remote_find_event(struct trace_remote *remote, unsigned short id) 1381 { 1382 return bsearch((const void *)(unsigned long)id, remote->events, remote->nr_events, 1383 sizeof(*remote->events), __cmp_events); 1384 } 1385