1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2025 - Google LLC 4 * Author: Vincent Donnefort <vdonnefort@google.com> 5 */ 6 7 #include <linux/kstrtox.h> 8 #include <linux/lockdep.h> 9 #include <linux/mutex.h> 10 #include <linux/tracefs.h> 11 #include <linux/trace_remote.h> 12 #include <linux/trace_seq.h> 13 #include <linux/types.h> 14 15 #include "trace.h" 16 17 #define TRACEFS_DIR "remotes" 18 #define TRACEFS_MODE_WRITE 0640 19 #define TRACEFS_MODE_READ 0440 20 21 struct trace_remote_iterator { 22 struct trace_remote *remote; 23 struct trace_seq seq; 24 struct delayed_work poll_work; 25 unsigned long lost_events; 26 u64 ts; 27 int cpu; 28 int evt_cpu; 29 }; 30 31 struct trace_remote { 32 struct trace_remote_callbacks *cbs; 33 void *priv; 34 struct trace_buffer *trace_buffer; 35 struct trace_buffer_desc *trace_buffer_desc; 36 unsigned long trace_buffer_size; 37 struct ring_buffer_remote rb_remote; 38 struct mutex lock; 39 unsigned int nr_readers; 40 unsigned int poll_ms; 41 bool tracing_on; 42 }; 43 44 static bool trace_remote_loaded(struct trace_remote *remote) 45 { 46 return !!remote->trace_buffer; 47 } 48 49 static int trace_remote_load(struct trace_remote *remote) 50 { 51 struct ring_buffer_remote *rb_remote = &remote->rb_remote; 52 struct trace_buffer_desc *desc; 53 54 lockdep_assert_held(&remote->lock); 55 56 if (trace_remote_loaded(remote)) 57 return 0; 58 59 desc = remote->cbs->load_trace_buffer(remote->trace_buffer_size, remote->priv); 60 if (IS_ERR(desc)) 61 return PTR_ERR(desc); 62 63 rb_remote->desc = desc; 64 rb_remote->swap_reader_page = remote->cbs->swap_reader_page; 65 rb_remote->priv = remote->priv; 66 remote->trace_buffer = ring_buffer_alloc_remote(rb_remote); 67 if (!remote->trace_buffer) { 68 remote->cbs->unload_trace_buffer(desc, remote->priv); 69 return -ENOMEM; 70 } 71 72 remote->trace_buffer_desc = desc; 73 74 return 0; 75 } 76 77 static void trace_remote_try_unload(struct trace_remote *remote) 78 { 79 lockdep_assert_held(&remote->lock); 80 81 if (!trace_remote_loaded(remote)) 82 return; 83 84 /* The buffer is being read or writable */ 85 if (remote->nr_readers || remote->tracing_on) 86 return; 87 88 /* The buffer has readable data */ 89 if (!ring_buffer_empty(remote->trace_buffer)) 90 return; 91 92 ring_buffer_free(remote->trace_buffer); 93 remote->trace_buffer = NULL; 94 remote->cbs->unload_trace_buffer(remote->trace_buffer_desc, remote->priv); 95 } 96 97 static int trace_remote_enable_tracing(struct trace_remote *remote) 98 { 99 int ret; 100 101 lockdep_assert_held(&remote->lock); 102 103 if (remote->tracing_on) 104 return 0; 105 106 ret = trace_remote_load(remote); 107 if (ret) 108 return ret; 109 110 ret = remote->cbs->enable_tracing(true, remote->priv); 111 if (ret) { 112 trace_remote_try_unload(remote); 113 return ret; 114 } 115 116 remote->tracing_on = true; 117 118 return 0; 119 } 120 121 static int trace_remote_disable_tracing(struct trace_remote *remote) 122 { 123 int ret; 124 125 lockdep_assert_held(&remote->lock); 126 127 if (!remote->tracing_on) 128 return 0; 129 130 ret = remote->cbs->enable_tracing(false, remote->priv); 131 if (ret) 132 return ret; 133 134 ring_buffer_poll_remote(remote->trace_buffer, RING_BUFFER_ALL_CPUS); 135 remote->tracing_on = false; 136 trace_remote_try_unload(remote); 137 138 return 0; 139 } 140 141 static ssize_t 142 tracing_on_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) 143 { 144 struct trace_remote *remote = filp->private_data; 145 unsigned long val; 146 int ret; 147 148 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 149 if (ret) 150 return ret; 151 152 guard(mutex)(&remote->lock); 153 154 ret = val ? trace_remote_enable_tracing(remote) : trace_remote_disable_tracing(remote); 155 if (ret) 156 return ret; 157 158 return cnt; 159 } 160 static int tracing_on_show(struct seq_file *s, void *unused) 161 { 162 struct trace_remote *remote = s->private; 163 164 seq_printf(s, "%d\n", remote->tracing_on); 165 166 return 0; 167 } 168 DEFINE_SHOW_STORE_ATTRIBUTE(tracing_on); 169 170 static ssize_t buffer_size_kb_write(struct file *filp, const char __user *ubuf, size_t cnt, 171 loff_t *ppos) 172 { 173 struct trace_remote *remote = filp->private_data; 174 unsigned long val; 175 int ret; 176 177 ret = kstrtoul_from_user(ubuf, cnt, 10, &val); 178 if (ret) 179 return ret; 180 181 /* KiB to Bytes */ 182 if (!val || check_shl_overflow(val, 10, &val)) 183 return -EINVAL; 184 185 guard(mutex)(&remote->lock); 186 187 if (trace_remote_loaded(remote)) 188 return -EBUSY; 189 190 remote->trace_buffer_size = val; 191 192 return cnt; 193 } 194 195 static int buffer_size_kb_show(struct seq_file *s, void *unused) 196 { 197 struct trace_remote *remote = s->private; 198 199 seq_printf(s, "%lu (%s)\n", remote->trace_buffer_size >> 10, 200 trace_remote_loaded(remote) ? "loaded" : "unloaded"); 201 202 return 0; 203 } 204 DEFINE_SHOW_STORE_ATTRIBUTE(buffer_size_kb); 205 206 static int trace_remote_get(struct trace_remote *remote, int cpu) 207 { 208 int ret; 209 210 if (remote->nr_readers == UINT_MAX) 211 return -EBUSY; 212 213 ret = trace_remote_load(remote); 214 if (ret) 215 return ret; 216 217 remote->nr_readers++; 218 219 return 0; 220 } 221 222 static void trace_remote_put(struct trace_remote *remote) 223 { 224 if (WARN_ON(!remote->nr_readers)) 225 return; 226 227 remote->nr_readers--; 228 if (remote->nr_readers) 229 return; 230 231 trace_remote_try_unload(remote); 232 } 233 234 static void __poll_remote(struct work_struct *work) 235 { 236 struct delayed_work *dwork = to_delayed_work(work); 237 struct trace_remote_iterator *iter; 238 239 iter = container_of(dwork, struct trace_remote_iterator, poll_work); 240 ring_buffer_poll_remote(iter->remote->trace_buffer, iter->cpu); 241 schedule_delayed_work((struct delayed_work *)work, 242 msecs_to_jiffies(iter->remote->poll_ms)); 243 } 244 245 static struct trace_remote_iterator *trace_remote_iter(struct trace_remote *remote, int cpu) 246 { 247 struct trace_remote_iterator *iter = NULL; 248 int ret; 249 250 lockdep_assert_held(&remote->lock); 251 252 253 ret = trace_remote_get(remote, cpu); 254 if (ret) 255 return ERR_PTR(ret); 256 257 /* Test the CPU */ 258 ret = ring_buffer_poll_remote(remote->trace_buffer, cpu); 259 if (ret) 260 goto err; 261 262 iter = kzalloc_obj(*iter); 263 if (iter) { 264 iter->remote = remote; 265 iter->cpu = cpu; 266 trace_seq_init(&iter->seq); 267 INIT_DELAYED_WORK(&iter->poll_work, __poll_remote); 268 schedule_delayed_work(&iter->poll_work, msecs_to_jiffies(remote->poll_ms)); 269 270 return iter; 271 } 272 ret = -ENOMEM; 273 274 err: 275 kfree(iter); 276 trace_remote_put(remote); 277 278 return ERR_PTR(ret); 279 } 280 281 static void trace_remote_iter_free(struct trace_remote_iterator *iter) 282 { 283 struct trace_remote *remote; 284 285 if (!iter) 286 return; 287 288 remote = iter->remote; 289 290 lockdep_assert_held(&remote->lock); 291 292 kfree(iter); 293 trace_remote_put(remote); 294 } 295 296 static bool trace_remote_iter_read_event(struct trace_remote_iterator *iter) 297 { 298 struct trace_buffer *trace_buffer = iter->remote->trace_buffer; 299 int cpu = iter->cpu; 300 301 if (cpu != RING_BUFFER_ALL_CPUS) { 302 if (ring_buffer_empty_cpu(trace_buffer, cpu)) 303 return false; 304 305 if (!ring_buffer_peek(trace_buffer, cpu, &iter->ts, &iter->lost_events)) 306 return false; 307 308 iter->evt_cpu = cpu; 309 return true; 310 } 311 312 iter->ts = U64_MAX; 313 for_each_possible_cpu(cpu) { 314 unsigned long lost_events; 315 u64 ts; 316 317 if (ring_buffer_empty_cpu(trace_buffer, cpu)) 318 continue; 319 320 if (!ring_buffer_peek(trace_buffer, cpu, &ts, &lost_events)) 321 continue; 322 323 if (ts >= iter->ts) 324 continue; 325 326 iter->ts = ts; 327 iter->evt_cpu = cpu; 328 iter->lost_events = lost_events; 329 } 330 331 return iter->ts != U64_MAX; 332 } 333 334 static int trace_remote_iter_print_event(struct trace_remote_iterator *iter) 335 { 336 unsigned long usecs_rem; 337 u64 ts = iter->ts; 338 339 if (iter->lost_events) 340 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n", 341 iter->evt_cpu, iter->lost_events); 342 343 do_div(ts, 1000); 344 usecs_rem = do_div(ts, USEC_PER_SEC); 345 346 trace_seq_printf(&iter->seq, "[%03d]\t%5llu.%06lu: ", iter->evt_cpu, 347 ts, usecs_rem); 348 349 return trace_seq_has_overflowed(&iter->seq) ? -EOVERFLOW : 0; 350 } 351 352 static int trace_pipe_open(struct inode *inode, struct file *filp) 353 { 354 struct trace_remote *remote = inode->i_private; 355 struct trace_remote_iterator *iter; 356 int cpu = RING_BUFFER_ALL_CPUS; 357 358 if (inode->i_cdev) 359 cpu = (long)inode->i_cdev - 1; 360 361 guard(mutex)(&remote->lock); 362 iter = trace_remote_iter(remote, cpu); 363 filp->private_data = iter; 364 365 return IS_ERR(iter) ? PTR_ERR(iter) : 0; 366 } 367 368 static int trace_pipe_release(struct inode *inode, struct file *filp) 369 { 370 struct trace_remote_iterator *iter = filp->private_data; 371 struct trace_remote *remote = iter->remote; 372 373 guard(mutex)(&remote->lock); 374 375 trace_remote_iter_free(iter); 376 377 return 0; 378 } 379 380 static ssize_t trace_pipe_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) 381 { 382 struct trace_remote_iterator *iter = filp->private_data; 383 struct trace_buffer *trace_buffer = iter->remote->trace_buffer; 384 int ret; 385 386 copy_to_user: 387 ret = trace_seq_to_user(&iter->seq, ubuf, cnt); 388 if (ret != -EBUSY) 389 return ret; 390 391 trace_seq_init(&iter->seq); 392 393 ret = ring_buffer_wait(trace_buffer, iter->cpu, 0, NULL, NULL); 394 if (ret < 0) 395 return ret; 396 397 while (trace_remote_iter_read_event(iter)) { 398 int prev_len = iter->seq.seq.len; 399 400 if (trace_remote_iter_print_event(iter)) { 401 iter->seq.seq.len = prev_len; 402 break; 403 } 404 405 ring_buffer_consume(trace_buffer, iter->evt_cpu, NULL, NULL); 406 } 407 408 goto copy_to_user; 409 } 410 411 static const struct file_operations trace_pipe_fops = { 412 .open = trace_pipe_open, 413 .read = trace_pipe_read, 414 .release = trace_pipe_release, 415 }; 416 417 static int trace_remote_init_tracefs(const char *name, struct trace_remote *remote) 418 { 419 struct dentry *remote_d, *percpu_d, *d; 420 static struct dentry *root; 421 static DEFINE_MUTEX(lock); 422 bool root_inited = false; 423 int cpu; 424 425 guard(mutex)(&lock); 426 427 if (!root) { 428 root = tracefs_create_dir(TRACEFS_DIR, NULL); 429 if (!root) { 430 pr_err("Failed to create tracefs dir "TRACEFS_DIR"\n"); 431 return -ENOMEM; 432 } 433 root_inited = true; 434 } 435 436 remote_d = tracefs_create_dir(name, root); 437 if (!remote_d) { 438 pr_err("Failed to create tracefs dir "TRACEFS_DIR"%s/\n", name); 439 goto err; 440 } 441 442 d = trace_create_file("tracing_on", TRACEFS_MODE_WRITE, remote_d, remote, &tracing_on_fops); 443 if (!d) 444 goto err; 445 446 d = trace_create_file("buffer_size_kb", TRACEFS_MODE_WRITE, remote_d, remote, 447 &buffer_size_kb_fops); 448 if (!d) 449 goto err; 450 451 d = trace_create_file("trace_pipe", TRACEFS_MODE_READ, remote_d, remote, &trace_pipe_fops); 452 if (!d) 453 goto err; 454 455 percpu_d = tracefs_create_dir("per_cpu", remote_d); 456 if (!percpu_d) { 457 pr_err("Failed to create tracefs dir "TRACEFS_DIR"%s/per_cpu/\n", name); 458 goto err; 459 } 460 461 for_each_possible_cpu(cpu) { 462 struct dentry *cpu_d; 463 char cpu_name[16]; 464 465 snprintf(cpu_name, sizeof(cpu_name), "cpu%d", cpu); 466 cpu_d = tracefs_create_dir(cpu_name, percpu_d); 467 if (!cpu_d) { 468 pr_err("Failed to create tracefs dir "TRACEFS_DIR"%s/percpu/cpu%d\n", 469 name, cpu); 470 goto err; 471 } 472 473 d = trace_create_cpu_file("trace_pipe", TRACEFS_MODE_READ, cpu_d, remote, cpu, 474 &trace_pipe_fops); 475 if (!d) 476 goto err; 477 } 478 479 return 0; 480 481 err: 482 if (root_inited) { 483 tracefs_remove(root); 484 root = NULL; 485 } else { 486 tracefs_remove(remote_d); 487 } 488 489 return -ENOMEM; 490 } 491 492 /** 493 * trace_remote_register() - Register a Tracefs remote 494 * @name: Name of the remote, used for the Tracefs remotes/ directory. 495 * @cbs: Set of callbacks used to control the remote. 496 * @priv: Private data, passed to each callback from @cbs. 497 * @events: Array of events. &remote_event.name and &remote_event.id must be 498 * filled by the caller. 499 * @nr_events: Number of events in the @events array. 500 * 501 * A trace remote is an entity, outside of the kernel (most likely firmware or 502 * hypervisor) capable of writing events into a Tracefs compatible ring-buffer. 503 * The kernel would then act as a reader. 504 * 505 * The registered remote will be found under the Tracefs directory 506 * remotes/<name>. 507 * 508 * Return: 0 on success, negative error code on failure. 509 */ 510 int trace_remote_register(const char *name, struct trace_remote_callbacks *cbs, void *priv) 511 { 512 struct trace_remote *remote; 513 514 remote = kzalloc_obj(*remote); 515 if (!remote) 516 return -ENOMEM; 517 518 remote->cbs = cbs; 519 remote->priv = priv; 520 remote->trace_buffer_size = 7 << 10; 521 remote->poll_ms = 100; 522 mutex_init(&remote->lock); 523 524 if (trace_remote_init_tracefs(name, remote)) { 525 kfree(remote); 526 return -ENOMEM; 527 } 528 529 return 0; 530 } 531 EXPORT_SYMBOL_GPL(trace_remote_register); 532 533 /** 534 * trace_remote_free_buffer() - Free trace buffer allocated with trace_remote_alloc_buffer() 535 * @desc: Descriptor of the per-CPU ring-buffers, originally filled by 536 * trace_remote_alloc_buffer() 537 * 538 * Most likely called from &trace_remote_callbacks.unload_trace_buffer. 539 */ 540 void trace_remote_free_buffer(struct trace_buffer_desc *desc) 541 { 542 struct ring_buffer_desc *rb_desc; 543 int cpu; 544 545 for_each_ring_buffer_desc(rb_desc, cpu, desc) { 546 unsigned int id; 547 548 free_page(rb_desc->meta_va); 549 550 for (id = 0; id < rb_desc->nr_page_va; id++) 551 free_page(rb_desc->page_va[id]); 552 } 553 } 554 EXPORT_SYMBOL_GPL(trace_remote_free_buffer); 555 556 /** 557 * trace_remote_alloc_buffer() - Dynamically allocate a trace buffer 558 * @desc: Uninitialized trace_buffer_desc 559 * @desc_size: Size of the trace_buffer_desc. Must be at least equal to 560 * trace_buffer_desc_size() 561 * @buffer_size: Size in bytes of each per-CPU ring-buffer 562 * @cpumask: CPUs to allocate a ring-buffer for 563 * 564 * Helper to dynamically allocate a set of pages (enough to cover @buffer_size) 565 * for each CPU from @cpumask and fill @desc. Most likely called from 566 * &trace_remote_callbacks.load_trace_buffer. 567 * 568 * Return: 0 on success, negative error code on failure. 569 */ 570 int trace_remote_alloc_buffer(struct trace_buffer_desc *desc, size_t desc_size, size_t buffer_size, 571 const struct cpumask *cpumask) 572 { 573 unsigned int nr_pages = max(DIV_ROUND_UP(buffer_size, PAGE_SIZE), 2UL) + 1; 574 void *desc_end = desc + desc_size; 575 struct ring_buffer_desc *rb_desc; 576 int cpu, ret = -ENOMEM; 577 578 if (desc_size < struct_size(desc, __data, 0)) 579 return -EINVAL; 580 581 desc->nr_cpus = 0; 582 desc->struct_len = struct_size(desc, __data, 0); 583 584 rb_desc = (struct ring_buffer_desc *)&desc->__data[0]; 585 586 for_each_cpu(cpu, cpumask) { 587 unsigned int id; 588 589 if ((void *)rb_desc + struct_size(rb_desc, page_va, nr_pages) > desc_end) { 590 ret = -EINVAL; 591 goto err; 592 } 593 594 rb_desc->cpu = cpu; 595 rb_desc->nr_page_va = 0; 596 rb_desc->meta_va = (unsigned long)__get_free_page(GFP_KERNEL); 597 if (!rb_desc->meta_va) 598 goto err; 599 600 for (id = 0; id < nr_pages; id++) { 601 rb_desc->page_va[id] = (unsigned long)__get_free_page(GFP_KERNEL); 602 if (!rb_desc->page_va[id]) 603 goto err; 604 605 rb_desc->nr_page_va++; 606 } 607 desc->nr_cpus++; 608 desc->struct_len += offsetof(struct ring_buffer_desc, page_va); 609 desc->struct_len += struct_size(rb_desc, page_va, rb_desc->nr_page_va); 610 rb_desc = __next_ring_buffer_desc(rb_desc); 611 } 612 613 return 0; 614 615 err: 616 trace_remote_free_buffer(desc); 617 return ret; 618 } 619 EXPORT_SYMBOL_GPL(trace_remote_alloc_buffer); 620