1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2025 - Google LLC 4 * Author: Vincent Donnefort <vdonnefort@google.com> 5 */ 6 7 #include <linux/kstrtox.h> 8 #include <linux/lockdep.h> 9 #include <linux/mutex.h> 10 #include <linux/tracefs.h> 11 #include <linux/trace_remote.h> 12 #include <linux/trace_seq.h> 13 #include <linux/types.h> 14 15 #include "trace.h" 16 17 #define TRACEFS_DIR "remotes" 18 #define TRACEFS_MODE_WRITE 0640 19 #define TRACEFS_MODE_READ 0440 20 21 struct trace_remote_iterator { 22 struct trace_remote *remote; 23 struct trace_seq seq; 24 struct delayed_work poll_work; 25 unsigned long lost_events; 26 u64 ts; 27 int cpu; 28 int evt_cpu; 29 }; 30 31 struct trace_remote { 32 struct trace_remote_callbacks *cbs; 33 void *priv; 34 struct trace_buffer *trace_buffer; 35 struct trace_buffer_desc *trace_buffer_desc; 36 unsigned long trace_buffer_size; 37 struct ring_buffer_remote rb_remote; 38 struct mutex lock; 39 unsigned int nr_readers; 40 unsigned int poll_ms; 41 bool tracing_on; 42 }; 43 44 static bool trace_remote_loaded(struct trace_remote *remote) 45 { 46 return !!remote->trace_buffer; 47 } 48 49 static int trace_remote_load(struct trace_remote *remote) 50 { 51 struct ring_buffer_remote *rb_remote = &remote->rb_remote; 52 struct trace_buffer_desc *desc; 53 54 lockdep_assert_held(&remote->lock); 55 56 if (trace_remote_loaded(remote)) 57 return 0; 58 59 desc = remote->cbs->load_trace_buffer(remote->trace_buffer_size, remote->priv); 60 if (IS_ERR(desc)) 61 return PTR_ERR(desc); 62 63 rb_remote->desc = desc; 64 rb_remote->swap_reader_page = remote->cbs->swap_reader_page; 65 rb_remote->priv = remote->priv; 66 rb_remote->reset = remote->cbs->reset; 67 remote->trace_buffer = ring_buffer_alloc_remote(rb_remote); 68 if (!remote->trace_buffer) { 69 remote->cbs->unload_trace_buffer(desc, remote->priv); 70 return -ENOMEM; 71 } 72 73 remote->trace_buffer_desc = desc; 74 75 return 0; 76 } 77 78 static void trace_remote_try_unload(struct trace_remote *remote) 79 { 80 lockdep_assert_held(&remote->lock); 81 82 if (!trace_remote_loaded(remote)) 83 return; 84 85 /* The buffer is being read or writable */ 86 if (remote->nr_readers || remote->tracing_on) 87 return; 88 89 /* The buffer has readable data */ 90 if (!ring_buffer_empty(remote->trace_buffer)) 91 return; 92 93 ring_buffer_free(remote->trace_buffer); 94 remote->trace_buffer = NULL; 95 remote->cbs->unload_trace_buffer(remote->trace_buffer_desc, remote->priv); 96 } 97 98 static int trace_remote_enable_tracing(struct trace_remote *remote) 99 { 100 int ret; 101 102 lockdep_assert_held(&remote->lock); 103 104 if (remote->tracing_on) 105 return 0; 106 107 ret = trace_remote_load(remote); 108 if (ret) 109 return ret; 110 111 ret = remote->cbs->enable_tracing(true, remote->priv); 112 if (ret) { 113 trace_remote_try_unload(remote); 114 return ret; 115 } 116 117 remote->tracing_on = true; 118 119 return 0; 120 } 121 122 static int trace_remote_disable_tracing(struct trace_remote *remote) 123 { 124 int ret; 125 126 lockdep_assert_held(&remote->lock); 127 128 if (!remote->tracing_on) 129 return 0; 130 131 ret = remote->cbs->enable_tracing(false, remote->priv); 132 if (ret) 133 return ret; 134 135 ring_buffer_poll_remote(remote->trace_buffer, RING_BUFFER_ALL_CPUS); 136 remote->tracing_on = false; 137 trace_remote_try_unload(remote); 138 139 return 0; 140 } 141 142 static void trace_remote_reset(struct trace_remote *remote, int cpu) 143 { 144 lockdep_assert_held(&remote->lock); 145 146 if (!trace_remote_loaded(remote)) 147 return; 148 149 if (cpu == RING_BUFFER_ALL_CPUS) 150 ring_buffer_reset(remote->trace_buffer); 151 else 152 ring_buffer_reset_cpu(remote->trace_buffer, cpu); 153 154 trace_remote_try_unload(remote); 155 } 156 157 static ssize_t 158 tracing_on_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) 159 { 160 struct trace_remote *remote = filp->private_data; 161 unsigned long val; 162 int ret; 163 164 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 165 if (ret) 166 return ret; 167 168 guard(mutex)(&remote->lock); 169 170 ret = val ? trace_remote_enable_tracing(remote) : trace_remote_disable_tracing(remote); 171 if (ret) 172 return ret; 173 174 return cnt; 175 } 176 static int tracing_on_show(struct seq_file *s, void *unused) 177 { 178 struct trace_remote *remote = s->private; 179 180 seq_printf(s, "%d\n", remote->tracing_on); 181 182 return 0; 183 } 184 DEFINE_SHOW_STORE_ATTRIBUTE(tracing_on); 185 186 static ssize_t buffer_size_kb_write(struct file *filp, const char __user *ubuf, size_t cnt, 187 loff_t *ppos) 188 { 189 struct trace_remote *remote = filp->private_data; 190 unsigned long val; 191 int ret; 192 193 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 194 if (ret) 195 return ret; 196 197 /* KiB to Bytes */ 198 if (!val || check_shl_overflow(val, 10, &val)) 199 return -EINVAL; 200 201 guard(mutex)(&remote->lock); 202 203 if (trace_remote_loaded(remote)) 204 return -EBUSY; 205 206 remote->trace_buffer_size = val; 207 208 return cnt; 209 } 210 211 static int buffer_size_kb_show(struct seq_file *s, void *unused) 212 { 213 struct trace_remote *remote = s->private; 214 215 seq_printf(s, "%lu (%s)\n", remote->trace_buffer_size >> 10, 216 trace_remote_loaded(remote) ? "loaded" : "unloaded"); 217 218 return 0; 219 } 220 DEFINE_SHOW_STORE_ATTRIBUTE(buffer_size_kb); 221 222 static int trace_remote_get(struct trace_remote *remote, int cpu) 223 { 224 int ret; 225 226 if (remote->nr_readers == UINT_MAX) 227 return -EBUSY; 228 229 ret = trace_remote_load(remote); 230 if (ret) 231 return ret; 232 233 remote->nr_readers++; 234 235 return 0; 236 } 237 238 static void trace_remote_put(struct trace_remote *remote) 239 { 240 if (WARN_ON(!remote->nr_readers)) 241 return; 242 243 remote->nr_readers--; 244 if (remote->nr_readers) 245 return; 246 247 trace_remote_try_unload(remote); 248 } 249 250 static void __poll_remote(struct work_struct *work) 251 { 252 struct delayed_work *dwork = to_delayed_work(work); 253 struct trace_remote_iterator *iter; 254 255 iter = container_of(dwork, struct trace_remote_iterator, poll_work); 256 ring_buffer_poll_remote(iter->remote->trace_buffer, iter->cpu); 257 schedule_delayed_work((struct delayed_work *)work, 258 msecs_to_jiffies(iter->remote->poll_ms)); 259 } 260 261 static struct trace_remote_iterator *trace_remote_iter(struct trace_remote *remote, int cpu) 262 { 263 struct trace_remote_iterator *iter = NULL; 264 int ret; 265 266 lockdep_assert_held(&remote->lock); 267 268 269 ret = trace_remote_get(remote, cpu); 270 if (ret) 271 return ERR_PTR(ret); 272 273 /* Test the CPU */ 274 ret = ring_buffer_poll_remote(remote->trace_buffer, cpu); 275 if (ret) 276 goto err; 277 278 iter = kzalloc_obj(*iter); 279 if (iter) { 280 iter->remote = remote; 281 iter->cpu = cpu; 282 trace_seq_init(&iter->seq); 283 INIT_DELAYED_WORK(&iter->poll_work, __poll_remote); 284 schedule_delayed_work(&iter->poll_work, msecs_to_jiffies(remote->poll_ms)); 285 286 return iter; 287 } 288 ret = -ENOMEM; 289 290 err: 291 kfree(iter); 292 trace_remote_put(remote); 293 294 return ERR_PTR(ret); 295 } 296 297 static void trace_remote_iter_free(struct trace_remote_iterator *iter) 298 { 299 struct trace_remote *remote; 300 301 if (!iter) 302 return; 303 304 remote = iter->remote; 305 306 lockdep_assert_held(&remote->lock); 307 308 kfree(iter); 309 trace_remote_put(remote); 310 } 311 312 static bool trace_remote_iter_read_event(struct trace_remote_iterator *iter) 313 { 314 struct trace_buffer *trace_buffer = iter->remote->trace_buffer; 315 int cpu = iter->cpu; 316 317 if (cpu != RING_BUFFER_ALL_CPUS) { 318 if (ring_buffer_empty_cpu(trace_buffer, cpu)) 319 return false; 320 321 if (!ring_buffer_peek(trace_buffer, cpu, &iter->ts, &iter->lost_events)) 322 return false; 323 324 iter->evt_cpu = cpu; 325 return true; 326 } 327 328 iter->ts = U64_MAX; 329 for_each_possible_cpu(cpu) { 330 unsigned long lost_events; 331 u64 ts; 332 333 if (ring_buffer_empty_cpu(trace_buffer, cpu)) 334 continue; 335 336 if (!ring_buffer_peek(trace_buffer, cpu, &ts, &lost_events)) 337 continue; 338 339 if (ts >= iter->ts) 340 continue; 341 342 iter->ts = ts; 343 iter->evt_cpu = cpu; 344 iter->lost_events = lost_events; 345 } 346 347 return iter->ts != U64_MAX; 348 } 349 350 static int trace_remote_iter_print_event(struct trace_remote_iterator *iter) 351 { 352 unsigned long usecs_rem; 353 u64 ts = iter->ts; 354 355 if (iter->lost_events) 356 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n", 357 iter->evt_cpu, iter->lost_events); 358 359 do_div(ts, 1000); 360 usecs_rem = do_div(ts, USEC_PER_SEC); 361 362 trace_seq_printf(&iter->seq, "[%03d]\t%5llu.%06lu: ", iter->evt_cpu, 363 ts, usecs_rem); 364 365 return trace_seq_has_overflowed(&iter->seq) ? -EOVERFLOW : 0; 366 } 367 368 static int trace_pipe_open(struct inode *inode, struct file *filp) 369 { 370 struct trace_remote *remote = inode->i_private; 371 struct trace_remote_iterator *iter; 372 int cpu = RING_BUFFER_ALL_CPUS; 373 374 if (inode->i_cdev) 375 cpu = (long)inode->i_cdev - 1; 376 377 guard(mutex)(&remote->lock); 378 iter = trace_remote_iter(remote, cpu); 379 filp->private_data = iter; 380 381 return IS_ERR(iter) ? PTR_ERR(iter) : 0; 382 } 383 384 static int trace_pipe_release(struct inode *inode, struct file *filp) 385 { 386 struct trace_remote_iterator *iter = filp->private_data; 387 struct trace_remote *remote = iter->remote; 388 389 guard(mutex)(&remote->lock); 390 391 trace_remote_iter_free(iter); 392 393 return 0; 394 } 395 396 static ssize_t trace_pipe_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) 397 { 398 struct trace_remote_iterator *iter = filp->private_data; 399 struct trace_buffer *trace_buffer = iter->remote->trace_buffer; 400 int ret; 401 402 copy_to_user: 403 ret = trace_seq_to_user(&iter->seq, ubuf, cnt); 404 if (ret != -EBUSY) 405 return ret; 406 407 trace_seq_init(&iter->seq); 408 409 ret = ring_buffer_wait(trace_buffer, iter->cpu, 0, NULL, NULL); 410 if (ret < 0) 411 return ret; 412 413 while (trace_remote_iter_read_event(iter)) { 414 int prev_len = iter->seq.seq.len; 415 416 if (trace_remote_iter_print_event(iter)) { 417 iter->seq.seq.len = prev_len; 418 break; 419 } 420 421 ring_buffer_consume(trace_buffer, iter->evt_cpu, NULL, NULL); 422 } 423 424 goto copy_to_user; 425 } 426 427 static const struct file_operations trace_pipe_fops = { 428 .open = trace_pipe_open, 429 .read = trace_pipe_read, 430 .release = trace_pipe_release, 431 }; 432 433 static ssize_t trace_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) 434 { 435 struct inode *inode = file_inode(filp); 436 struct trace_remote *remote = inode->i_private; 437 int cpu = RING_BUFFER_ALL_CPUS; 438 439 if (inode->i_cdev) 440 cpu = (long)inode->i_cdev - 1; 441 442 guard(mutex)(&remote->lock); 443 444 trace_remote_reset(remote, cpu); 445 446 return cnt; 447 } 448 449 static const struct file_operations trace_fops = { 450 .write = trace_write, 451 }; 452 453 static int trace_remote_init_tracefs(const char *name, struct trace_remote *remote) 454 { 455 struct dentry *remote_d, *percpu_d, *d; 456 static struct dentry *root; 457 static DEFINE_MUTEX(lock); 458 bool root_inited = false; 459 int cpu; 460 461 guard(mutex)(&lock); 462 463 if (!root) { 464 root = tracefs_create_dir(TRACEFS_DIR, NULL); 465 if (!root) { 466 pr_err("Failed to create tracefs dir "TRACEFS_DIR"\n"); 467 return -ENOMEM; 468 } 469 root_inited = true; 470 } 471 472 remote_d = tracefs_create_dir(name, root); 473 if (!remote_d) { 474 pr_err("Failed to create tracefs dir "TRACEFS_DIR"%s/\n", name); 475 goto err; 476 } 477 478 d = trace_create_file("tracing_on", TRACEFS_MODE_WRITE, remote_d, remote, &tracing_on_fops); 479 if (!d) 480 goto err; 481 482 d = trace_create_file("buffer_size_kb", TRACEFS_MODE_WRITE, remote_d, remote, 483 &buffer_size_kb_fops); 484 if (!d) 485 goto err; 486 487 d = trace_create_file("trace_pipe", TRACEFS_MODE_READ, remote_d, remote, &trace_pipe_fops); 488 if (!d) 489 goto err; 490 491 d = trace_create_file("trace", TRACEFS_MODE_WRITE, remote_d, remote, &trace_fops); 492 if (!d) 493 goto err; 494 495 percpu_d = tracefs_create_dir("per_cpu", remote_d); 496 if (!percpu_d) { 497 pr_err("Failed to create tracefs dir "TRACEFS_DIR"%s/per_cpu/\n", name); 498 goto err; 499 } 500 501 for_each_possible_cpu(cpu) { 502 struct dentry *cpu_d; 503 char cpu_name[16]; 504 505 snprintf(cpu_name, sizeof(cpu_name), "cpu%d", cpu); 506 cpu_d = tracefs_create_dir(cpu_name, percpu_d); 507 if (!cpu_d) { 508 pr_err("Failed to create tracefs dir "TRACEFS_DIR"%s/percpu/cpu%d\n", 509 name, cpu); 510 goto err; 511 } 512 513 d = trace_create_cpu_file("trace_pipe", TRACEFS_MODE_READ, cpu_d, remote, cpu, 514 &trace_pipe_fops); 515 if (!d) 516 goto err; 517 518 d = trace_create_cpu_file("trace", TRACEFS_MODE_WRITE, cpu_d, remote, cpu, 519 &trace_fops); 520 if (!d) 521 goto err; 522 } 523 524 return 0; 525 526 err: 527 if (root_inited) { 528 tracefs_remove(root); 529 root = NULL; 530 } else { 531 tracefs_remove(remote_d); 532 } 533 534 return -ENOMEM; 535 } 536 537 /** 538 * trace_remote_register() - Register a Tracefs remote 539 * @name: Name of the remote, used for the Tracefs remotes/ directory. 540 * @cbs: Set of callbacks used to control the remote. 541 * @priv: Private data, passed to each callback from @cbs. 542 * @events: Array of events. &remote_event.name and &remote_event.id must be 543 * filled by the caller. 544 * @nr_events: Number of events in the @events array. 545 * 546 * A trace remote is an entity, outside of the kernel (most likely firmware or 547 * hypervisor) capable of writing events into a Tracefs compatible ring-buffer. 548 * The kernel would then act as a reader. 549 * 550 * The registered remote will be found under the Tracefs directory 551 * remotes/<name>. 552 * 553 * Return: 0 on success, negative error code on failure. 554 */ 555 int trace_remote_register(const char *name, struct trace_remote_callbacks *cbs, void *priv) 556 { 557 struct trace_remote *remote; 558 559 remote = kzalloc_obj(*remote); 560 if (!remote) 561 return -ENOMEM; 562 563 remote->cbs = cbs; 564 remote->priv = priv; 565 remote->trace_buffer_size = 7 << 10; 566 remote->poll_ms = 100; 567 mutex_init(&remote->lock); 568 569 if (trace_remote_init_tracefs(name, remote)) { 570 kfree(remote); 571 return -ENOMEM; 572 } 573 574 return 0; 575 } 576 EXPORT_SYMBOL_GPL(trace_remote_register); 577 578 /** 579 * trace_remote_free_buffer() - Free trace buffer allocated with trace_remote_alloc_buffer() 580 * @desc: Descriptor of the per-CPU ring-buffers, originally filled by 581 * trace_remote_alloc_buffer() 582 * 583 * Most likely called from &trace_remote_callbacks.unload_trace_buffer. 584 */ 585 void trace_remote_free_buffer(struct trace_buffer_desc *desc) 586 { 587 struct ring_buffer_desc *rb_desc; 588 int cpu; 589 590 for_each_ring_buffer_desc(rb_desc, cpu, desc) { 591 unsigned int id; 592 593 free_page(rb_desc->meta_va); 594 595 for (id = 0; id < rb_desc->nr_page_va; id++) 596 free_page(rb_desc->page_va[id]); 597 } 598 } 599 EXPORT_SYMBOL_GPL(trace_remote_free_buffer); 600 601 /** 602 * trace_remote_alloc_buffer() - Dynamically allocate a trace buffer 603 * @desc: Uninitialized trace_buffer_desc 604 * @desc_size: Size of the trace_buffer_desc. Must be at least equal to 605 * trace_buffer_desc_size() 606 * @buffer_size: Size in bytes of each per-CPU ring-buffer 607 * @cpumask: CPUs to allocate a ring-buffer for 608 * 609 * Helper to dynamically allocate a set of pages (enough to cover @buffer_size) 610 * for each CPU from @cpumask and fill @desc. Most likely called from 611 * &trace_remote_callbacks.load_trace_buffer. 612 * 613 * Return: 0 on success, negative error code on failure. 614 */ 615 int trace_remote_alloc_buffer(struct trace_buffer_desc *desc, size_t desc_size, size_t buffer_size, 616 const struct cpumask *cpumask) 617 { 618 unsigned int nr_pages = max(DIV_ROUND_UP(buffer_size, PAGE_SIZE), 2UL) + 1; 619 void *desc_end = desc + desc_size; 620 struct ring_buffer_desc *rb_desc; 621 int cpu, ret = -ENOMEM; 622 623 if (desc_size < struct_size(desc, __data, 0)) 624 return -EINVAL; 625 626 desc->nr_cpus = 0; 627 desc->struct_len = struct_size(desc, __data, 0); 628 629 rb_desc = (struct ring_buffer_desc *)&desc->__data[0]; 630 631 for_each_cpu(cpu, cpumask) { 632 unsigned int id; 633 634 if ((void *)rb_desc + struct_size(rb_desc, page_va, nr_pages) > desc_end) { 635 ret = -EINVAL; 636 goto err; 637 } 638 639 rb_desc->cpu = cpu; 640 rb_desc->nr_page_va = 0; 641 rb_desc->meta_va = (unsigned long)__get_free_page(GFP_KERNEL); 642 if (!rb_desc->meta_va) 643 goto err; 644 645 for (id = 0; id < nr_pages; id++) { 646 rb_desc->page_va[id] = (unsigned long)__get_free_page(GFP_KERNEL); 647 if (!rb_desc->page_va[id]) 648 goto err; 649 650 rb_desc->nr_page_va++; 651 } 652 desc->nr_cpus++; 653 desc->struct_len += offsetof(struct ring_buffer_desc, page_va); 654 desc->struct_len += struct_size(rb_desc, page_va, rb_desc->nr_page_va); 655 rb_desc = __next_ring_buffer_desc(rb_desc); 656 } 657 658 return 0; 659 660 err: 661 trace_remote_free_buffer(desc); 662 return ret; 663 } 664 EXPORT_SYMBOL_GPL(trace_remote_alloc_buffer); 665