1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2025 - Google LLC 4 * Author: Vincent Donnefort <vdonnefort@google.com> 5 */ 6 7 #include <linux/atomic.h> 8 #include <linux/simple_ring_buffer.h> 9 10 #include <asm/barrier.h> 11 #include <asm/local.h> 12 13 enum simple_rb_link_type { 14 SIMPLE_RB_LINK_NORMAL = 0, 15 SIMPLE_RB_LINK_HEAD = 1, 16 SIMPLE_RB_LINK_HEAD_MOVING 17 }; 18 19 #define SIMPLE_RB_LINK_MASK ~(SIMPLE_RB_LINK_HEAD | SIMPLE_RB_LINK_HEAD_MOVING) 20 21 static void simple_bpage_set_head_link(struct simple_buffer_page *bpage) 22 { 23 unsigned long link = (unsigned long)bpage->link.next; 24 25 link &= SIMPLE_RB_LINK_MASK; 26 link |= SIMPLE_RB_LINK_HEAD; 27 28 /* 29 * Paired with simple_rb_find_head() to order access between the head 30 * link and overrun. It ensures we always report an up-to-date value 31 * after swapping the reader page. 32 */ 33 smp_store_release(&bpage->link.next, (struct list_head *)link); 34 } 35 36 static bool simple_bpage_unset_head_link(struct simple_buffer_page *bpage, 37 struct simple_buffer_page *dst, 38 enum simple_rb_link_type new_type) 39 { 40 unsigned long *link = (unsigned long *)(&bpage->link.next); 41 unsigned long old = (*link & SIMPLE_RB_LINK_MASK) | SIMPLE_RB_LINK_HEAD; 42 unsigned long new = (unsigned long)(&dst->link) | new_type; 43 44 return try_cmpxchg(link, &old, new); 45 } 46 47 static void simple_bpage_set_normal_link(struct simple_buffer_page *bpage) 48 { 49 unsigned long link = (unsigned long)bpage->link.next; 50 51 WRITE_ONCE(bpage->link.next, (struct list_head *)(link & SIMPLE_RB_LINK_MASK)); 52 } 53 54 static struct simple_buffer_page *simple_bpage_from_link(struct list_head *link) 55 { 56 unsigned long ptr = (unsigned long)link & SIMPLE_RB_LINK_MASK; 57 58 return container_of((struct list_head *)ptr, struct simple_buffer_page, link); 59 } 60 61 static struct simple_buffer_page *simple_bpage_next_page(struct simple_buffer_page *bpage) 62 { 63 return simple_bpage_from_link(bpage->link.next); 64 } 65 66 static void simple_bpage_reset(struct simple_buffer_page *bpage) 67 { 68 bpage->write = 0; 69 bpage->entries = 0; 70 71 local_set(&bpage->page->commit, 0); 72 } 73 74 static void simple_bpage_init(struct simple_buffer_page *bpage, void *page) 75 { 76 INIT_LIST_HEAD(&bpage->link); 77 bpage->page = (struct buffer_data_page *)page; 78 79 simple_bpage_reset(bpage); 80 } 81 82 #define simple_rb_meta_inc(__meta, __inc) \ 83 WRITE_ONCE((__meta), (__meta + __inc)) 84 85 static bool simple_rb_loaded(struct simple_rb_per_cpu *cpu_buffer) 86 { 87 return !!cpu_buffer->bpages; 88 } 89 90 static int simple_rb_find_head(struct simple_rb_per_cpu *cpu_buffer) 91 { 92 int retry = cpu_buffer->nr_pages * 2; 93 struct simple_buffer_page *head; 94 95 head = cpu_buffer->head_page; 96 97 while (retry--) { 98 unsigned long link; 99 100 spin: 101 /* See smp_store_release in simple_bpage_set_head_link() */ 102 link = (unsigned long)smp_load_acquire(&head->link.prev->next); 103 104 switch (link & ~SIMPLE_RB_LINK_MASK) { 105 /* Found the head */ 106 case SIMPLE_RB_LINK_HEAD: 107 cpu_buffer->head_page = head; 108 return 0; 109 /* The writer caught the head, we can spin, that won't be long */ 110 case SIMPLE_RB_LINK_HEAD_MOVING: 111 goto spin; 112 } 113 114 head = simple_bpage_next_page(head); 115 } 116 117 return -EBUSY; 118 } 119 120 /** 121 * simple_ring_buffer_swap_reader_page - Swap ring-buffer head with the reader 122 * @cpu_buffer: A simple_rb_per_cpu 123 * 124 * This function enables consuming reading. It ensures the current head page will not be overwritten 125 * and can be safely read. 126 * 127 * Returns 0 on success, -ENODEV if @cpu_buffer was unloaded or -EBUSY if we failed to catch the 128 * head page. 129 */ 130 int simple_ring_buffer_swap_reader_page(struct simple_rb_per_cpu *cpu_buffer) 131 { 132 struct simple_buffer_page *last, *head, *reader; 133 unsigned long overrun; 134 int retry = 8; 135 int ret; 136 137 if (!simple_rb_loaded(cpu_buffer)) 138 return -ENODEV; 139 140 reader = cpu_buffer->reader_page; 141 142 do { 143 /* Run after the writer to find the head */ 144 ret = simple_rb_find_head(cpu_buffer); 145 if (ret) 146 return ret; 147 148 head = cpu_buffer->head_page; 149 150 /* Connect the reader page around the header page */ 151 reader->link.next = head->link.next; 152 reader->link.prev = head->link.prev; 153 154 /* The last page before the head */ 155 last = simple_bpage_from_link(head->link.prev); 156 157 /* The reader page points to the new header page */ 158 simple_bpage_set_head_link(reader); 159 160 overrun = cpu_buffer->meta->overrun; 161 } while (!simple_bpage_unset_head_link(last, reader, SIMPLE_RB_LINK_NORMAL) && retry--); 162 163 if (!retry) 164 return -EINVAL; 165 166 cpu_buffer->head_page = simple_bpage_from_link(reader->link.next); 167 cpu_buffer->head_page->link.prev = &reader->link; 168 cpu_buffer->reader_page = head; 169 cpu_buffer->meta->reader.lost_events = overrun - cpu_buffer->last_overrun; 170 cpu_buffer->meta->reader.id = cpu_buffer->reader_page->id; 171 cpu_buffer->last_overrun = overrun; 172 173 return 0; 174 } 175 EXPORT_SYMBOL_GPL(simple_ring_buffer_swap_reader_page); 176 177 static struct simple_buffer_page *simple_rb_move_tail(struct simple_rb_per_cpu *cpu_buffer) 178 { 179 struct simple_buffer_page *tail, *new_tail; 180 181 tail = cpu_buffer->tail_page; 182 new_tail = simple_bpage_next_page(tail); 183 184 if (simple_bpage_unset_head_link(tail, new_tail, SIMPLE_RB_LINK_HEAD_MOVING)) { 185 /* 186 * Oh no! we've caught the head. There is none anymore and 187 * swap_reader will spin until we set the new one. Overrun must 188 * be written first, to make sure we report the correct number 189 * of lost events. 190 */ 191 simple_rb_meta_inc(cpu_buffer->meta->overrun, new_tail->entries); 192 simple_rb_meta_inc(cpu_buffer->meta->pages_lost, 1); 193 194 simple_bpage_set_head_link(new_tail); 195 simple_bpage_set_normal_link(tail); 196 } 197 198 simple_bpage_reset(new_tail); 199 cpu_buffer->tail_page = new_tail; 200 201 simple_rb_meta_inc(cpu_buffer->meta->pages_touched, 1); 202 203 return new_tail; 204 } 205 206 static unsigned long rb_event_size(unsigned long length) 207 { 208 struct ring_buffer_event *event; 209 210 return length + RB_EVNT_HDR_SIZE + sizeof(event->array[0]); 211 } 212 213 static struct ring_buffer_event * 214 rb_event_add_ts_extend(struct ring_buffer_event *event, u64 delta) 215 { 216 event->type_len = RINGBUF_TYPE_TIME_EXTEND; 217 event->time_delta = delta & TS_MASK; 218 event->array[0] = delta >> TS_SHIFT; 219 220 return (struct ring_buffer_event *)((unsigned long)event + 8); 221 } 222 223 static struct ring_buffer_event * 224 simple_rb_reserve_next(struct simple_rb_per_cpu *cpu_buffer, unsigned long length, u64 timestamp) 225 { 226 unsigned long ts_ext_size = 0, event_size = rb_event_size(length); 227 struct simple_buffer_page *tail = cpu_buffer->tail_page; 228 struct ring_buffer_event *event; 229 u32 write, prev_write; 230 u64 time_delta; 231 232 time_delta = timestamp - cpu_buffer->write_stamp; 233 234 if (test_time_stamp(time_delta)) 235 ts_ext_size = 8; 236 237 prev_write = tail->write; 238 write = prev_write + event_size + ts_ext_size; 239 240 if (unlikely(write > (PAGE_SIZE - BUF_PAGE_HDR_SIZE))) 241 tail = simple_rb_move_tail(cpu_buffer); 242 243 if (!tail->entries) { 244 tail->page->time_stamp = timestamp; 245 time_delta = 0; 246 ts_ext_size = 0; 247 write = event_size; 248 prev_write = 0; 249 } 250 251 tail->write = write; 252 tail->entries++; 253 254 cpu_buffer->write_stamp = timestamp; 255 256 event = (struct ring_buffer_event *)(tail->page->data + prev_write); 257 if (ts_ext_size) { 258 event = rb_event_add_ts_extend(event, time_delta); 259 time_delta = 0; 260 } 261 262 event->type_len = 0; 263 event->time_delta = time_delta; 264 event->array[0] = event_size - RB_EVNT_HDR_SIZE; 265 266 return event; 267 } 268 269 /** 270 * simple_ring_buffer_reserve - Reserve an entry in @cpu_buffer 271 * @cpu_buffer: A simple_rb_per_cpu 272 * @length: Size of the entry in bytes 273 * @timestamp: Timestamp of the entry 274 * 275 * Returns the address of the entry where to write data or NULL 276 */ 277 void *simple_ring_buffer_reserve(struct simple_rb_per_cpu *cpu_buffer, unsigned long length, 278 u64 timestamp) 279 { 280 struct ring_buffer_event *rb_event; 281 282 if (cmpxchg(&cpu_buffer->status, SIMPLE_RB_READY, SIMPLE_RB_WRITING) != SIMPLE_RB_READY) 283 return NULL; 284 285 rb_event = simple_rb_reserve_next(cpu_buffer, length, timestamp); 286 287 return &rb_event->array[1]; 288 } 289 EXPORT_SYMBOL_GPL(simple_ring_buffer_reserve); 290 291 /** 292 * simple_ring_buffer_commit - Commit the entry reserved with simple_ring_buffer_reserve() 293 * @cpu_buffer: The simple_rb_per_cpu where the entry has been reserved 294 */ 295 void simple_ring_buffer_commit(struct simple_rb_per_cpu *cpu_buffer) 296 { 297 local_set(&cpu_buffer->tail_page->page->commit, 298 cpu_buffer->tail_page->write); 299 simple_rb_meta_inc(cpu_buffer->meta->entries, 1); 300 301 /* 302 * Paired with simple_rb_enable_tracing() to ensure data is 303 * written to the ring-buffer before teardown. 304 */ 305 smp_store_release(&cpu_buffer->status, SIMPLE_RB_READY); 306 } 307 EXPORT_SYMBOL_GPL(simple_ring_buffer_commit); 308 309 static u32 simple_rb_enable_tracing(struct simple_rb_per_cpu *cpu_buffer, bool enable) 310 { 311 u32 prev_status; 312 313 if (enable) 314 return cmpxchg(&cpu_buffer->status, SIMPLE_RB_UNAVAILABLE, SIMPLE_RB_READY); 315 316 /* Wait for the buffer to be released */ 317 do { 318 prev_status = cmpxchg_acquire(&cpu_buffer->status, 319 SIMPLE_RB_READY, 320 SIMPLE_RB_UNAVAILABLE); 321 } while (prev_status == SIMPLE_RB_WRITING); 322 323 return prev_status; 324 } 325 326 /** 327 * simple_ring_buffer_reset - Reset @cpu_buffer 328 * @cpu_buffer: A simple_rb_per_cpu 329 * 330 * This will not clear the content of the data, only reset counters and pointers 331 * 332 * Returns 0 on success or -ENODEV if @cpu_buffer was unloaded. 333 */ 334 int simple_ring_buffer_reset(struct simple_rb_per_cpu *cpu_buffer) 335 { 336 struct simple_buffer_page *bpage; 337 u32 prev_status; 338 int ret; 339 340 if (!simple_rb_loaded(cpu_buffer)) 341 return -ENODEV; 342 343 prev_status = simple_rb_enable_tracing(cpu_buffer, false); 344 345 ret = simple_rb_find_head(cpu_buffer); 346 if (ret) 347 return ret; 348 349 bpage = cpu_buffer->tail_page = cpu_buffer->head_page; 350 do { 351 simple_bpage_reset(bpage); 352 bpage = simple_bpage_next_page(bpage); 353 } while (bpage != cpu_buffer->head_page); 354 355 simple_bpage_reset(cpu_buffer->reader_page); 356 357 cpu_buffer->last_overrun = 0; 358 cpu_buffer->write_stamp = 0; 359 360 cpu_buffer->meta->reader.read = 0; 361 cpu_buffer->meta->reader.lost_events = 0; 362 cpu_buffer->meta->entries = 0; 363 cpu_buffer->meta->overrun = 0; 364 cpu_buffer->meta->read = 0; 365 cpu_buffer->meta->pages_lost = 0; 366 cpu_buffer->meta->pages_touched = 0; 367 368 if (prev_status == SIMPLE_RB_READY) 369 simple_rb_enable_tracing(cpu_buffer, true); 370 371 return 0; 372 } 373 EXPORT_SYMBOL_GPL(simple_ring_buffer_reset); 374 375 int simple_ring_buffer_init_mm(struct simple_rb_per_cpu *cpu_buffer, 376 struct simple_buffer_page *bpages, 377 const struct ring_buffer_desc *desc, 378 void *(*load_page)(unsigned long va), 379 void (*unload_page)(void *va)) 380 { 381 struct simple_buffer_page *bpage = bpages; 382 int ret = 0; 383 void *page; 384 int i; 385 386 /* At least 1 reader page and two pages in the ring-buffer */ 387 if (desc->nr_page_va < 3) 388 return -EINVAL; 389 390 memset(cpu_buffer, 0, sizeof(*cpu_buffer)); 391 392 cpu_buffer->meta = load_page(desc->meta_va); 393 if (!cpu_buffer->meta) 394 return -EINVAL; 395 396 memset(cpu_buffer->meta, 0, sizeof(*cpu_buffer->meta)); 397 cpu_buffer->meta->meta_page_size = PAGE_SIZE; 398 cpu_buffer->meta->nr_subbufs = cpu_buffer->nr_pages; 399 400 /* The reader page is not part of the ring initially */ 401 page = load_page(desc->page_va[0]); 402 if (!page) { 403 unload_page(cpu_buffer->meta); 404 return -EINVAL; 405 } 406 407 simple_bpage_init(bpage, page); 408 bpage->id = 0; 409 410 cpu_buffer->nr_pages = 1; 411 412 cpu_buffer->reader_page = bpage; 413 cpu_buffer->tail_page = bpage + 1; 414 cpu_buffer->head_page = bpage + 1; 415 416 for (i = 1; i < desc->nr_page_va; i++) { 417 page = load_page(desc->page_va[i]); 418 if (!page) { 419 ret = -EINVAL; 420 break; 421 } 422 423 simple_bpage_init(++bpage, page); 424 425 bpage->link.next = &(bpage + 1)->link; 426 bpage->link.prev = &(bpage - 1)->link; 427 bpage->id = i; 428 429 cpu_buffer->nr_pages = i + 1; 430 } 431 432 if (ret) { 433 for (i--; i >= 0; i--) 434 unload_page((void *)desc->page_va[i]); 435 unload_page(cpu_buffer->meta); 436 437 return ret; 438 } 439 440 /* Close the ring */ 441 bpage->link.next = &cpu_buffer->tail_page->link; 442 cpu_buffer->tail_page->link.prev = &bpage->link; 443 444 /* The last init'ed page points to the head page */ 445 simple_bpage_set_head_link(bpage); 446 447 cpu_buffer->bpages = bpages; 448 449 return 0; 450 } 451 452 static void *__load_page(unsigned long page) 453 { 454 return (void *)page; 455 } 456 457 static void __unload_page(void *page) { } 458 459 /** 460 * simple_ring_buffer_init - Init @cpu_buffer based on @desc 461 * @cpu_buffer: A simple_rb_per_cpu buffer to init, allocated by the caller. 462 * @bpages: Array of simple_buffer_pages, with as many elements as @desc->nr_page_va 463 * @desc: A ring_buffer_desc 464 * 465 * Returns 0 on success or -EINVAL if the content of @desc is invalid 466 */ 467 int simple_ring_buffer_init(struct simple_rb_per_cpu *cpu_buffer, struct simple_buffer_page *bpages, 468 const struct ring_buffer_desc *desc) 469 { 470 return simple_ring_buffer_init_mm(cpu_buffer, bpages, desc, __load_page, __unload_page); 471 } 472 EXPORT_SYMBOL_GPL(simple_ring_buffer_init); 473 474 void simple_ring_buffer_unload_mm(struct simple_rb_per_cpu *cpu_buffer, 475 void (*unload_page)(void *)) 476 { 477 int p; 478 479 if (!simple_rb_loaded(cpu_buffer)) 480 return; 481 482 simple_rb_enable_tracing(cpu_buffer, false); 483 484 unload_page(cpu_buffer->meta); 485 for (p = 0; p < cpu_buffer->nr_pages; p++) 486 unload_page(cpu_buffer->bpages[p].page); 487 488 cpu_buffer->bpages = NULL; 489 } 490 491 /** 492 * simple_ring_buffer_unload - Prepare @cpu_buffer for deletion 493 * @cpu_buffer: A simple_rb_per_cpu that will be deleted. 494 */ 495 void simple_ring_buffer_unload(struct simple_rb_per_cpu *cpu_buffer) 496 { 497 return simple_ring_buffer_unload_mm(cpu_buffer, __unload_page); 498 } 499 EXPORT_SYMBOL_GPL(simple_ring_buffer_unload); 500 501 /** 502 * simple_ring_buffer_enable_tracing - Enable or disable writing to @cpu_buffer 503 * @cpu_buffer: A simple_rb_per_cpu 504 * @enable: True to enable tracing, False to disable it 505 * 506 * Returns 0 on success or -ENODEV if @cpu_buffer was unloaded 507 */ 508 int simple_ring_buffer_enable_tracing(struct simple_rb_per_cpu *cpu_buffer, bool enable) 509 { 510 if (!simple_rb_loaded(cpu_buffer)) 511 return -ENODEV; 512 513 simple_rb_enable_tracing(cpu_buffer, enable); 514 515 return 0; 516 } 517 EXPORT_SYMBOL_GPL(simple_ring_buffer_enable_tracing); 518