1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Read-Copy Update module-based scalability-test facility 4 * 5 * Copyright (C) IBM Corporation, 2015 6 * 7 * Authors: Paul E. McKenney <paulmck@linux.ibm.com> 8 */ 9 10 #define pr_fmt(fmt) fmt 11 12 #include <linux/types.h> 13 #include <linux/kernel.h> 14 #include <linux/init.h> 15 #include <linux/mm.h> 16 #include <linux/module.h> 17 #include <linux/kthread.h> 18 #include <linux/err.h> 19 #include <linux/spinlock.h> 20 #include <linux/smp.h> 21 #include <linux/rcupdate.h> 22 #include <linux/interrupt.h> 23 #include <linux/sched.h> 24 #include <uapi/linux/sched/types.h> 25 #include <linux/atomic.h> 26 #include <linux/bitops.h> 27 #include <linux/completion.h> 28 #include <linux/moduleparam.h> 29 #include <linux/percpu.h> 30 #include <linux/notifier.h> 31 #include <linux/reboot.h> 32 #include <linux/freezer.h> 33 #include <linux/cpu.h> 34 #include <linux/delay.h> 35 #include <linux/stat.h> 36 #include <linux/srcu.h> 37 #include <linux/slab.h> 38 #include <asm/byteorder.h> 39 #include <linux/torture.h> 40 #include <linux/vmalloc.h> 41 #include <linux/rcupdate_trace.h> 42 #include <linux/sched/debug.h> 43 44 #include "rcu.h" 45 46 MODULE_DESCRIPTION("Read-Copy Update module-based scalability-test facility"); 47 MODULE_LICENSE("GPL"); 48 MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com>"); 49 50 #define SCALE_FLAG "-scale:" 51 #define SCALEOUT_STRING(s) \ 52 pr_alert("%s" SCALE_FLAG " %s\n", scale_type, s) 53 #define VERBOSE_SCALEOUT_STRING(s) \ 54 do { if (verbose) pr_alert("%s" SCALE_FLAG " %s\n", scale_type, s); } while (0) 55 #define SCALEOUT_ERRSTRING(s) \ 56 pr_alert("%s" SCALE_FLAG "!!! %s\n", scale_type, s) 57 58 /* 59 * The intended use cases for the nreaders and nwriters module parameters 60 * are as follows: 61 * 62 * 1. Specify only the nr_cpus kernel boot parameter. This will 63 * set both nreaders and nwriters to the value specified by 64 * nr_cpus for a mixed reader/writer test. 65 * 66 * 2. Specify the nr_cpus kernel boot parameter, but set 67 * rcuscale.nreaders to zero. This will set nwriters to the 68 * value specified by nr_cpus for an update-only test. 69 * 70 * 3. Specify the nr_cpus kernel boot parameter, but set 71 * rcuscale.nwriters to zero. This will set nreaders to the 72 * value specified by nr_cpus for a read-only test. 73 * 74 * Various other use cases may of course be specified. 75 * 76 * Note that this test's readers are intended only as a test load for 77 * the writers. The reader scalability statistics will be overly 78 * pessimistic due to the per-critical-section interrupt disabling, 79 * test-end checks, and the pair of calls through pointers. 80 */ 81 82 torture_param(bool, gp_async, false, "Use asynchronous GP wait primitives"); 83 torture_param(int, gp_async_max, 1000, "Max # outstanding waits per writer"); 84 torture_param(bool, gp_exp, false, "Use expedited GP wait primitives"); 85 torture_param(int, holdoff, 10, "Holdoff time before test start (s)"); 86 torture_param(int, minruntime, 0, "Minimum run time (s)"); 87 torture_param(int, nreaders, -1, "Number of RCU reader threads"); 88 torture_param(int, nwriters, -1, "Number of RCU updater threads"); 89 torture_param(int, shutdown_secs, !IS_MODULE(CONFIG_RCU_SCALE_TEST) * 300, 90 "Shutdown at end of scalability tests or at specified timeout (s)."); 91 torture_param(int, verbose, 1, "Enable verbose debugging printk()s"); 92 torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable"); 93 torture_param(int, writer_holdoff_jiffies, 0, "Holdoff (jiffies) between GPs, zero to disable"); 94 torture_param(int, kfree_rcu_test, 0, "Do we run a kfree_rcu() scale test?"); 95 torture_param(int, kfree_mult, 1, "Multiple of kfree_obj size to allocate."); 96 torture_param(int, kfree_by_call_rcu, 0, "Use call_rcu() to emulate kfree_rcu()?"); 97 98 static char *scale_type = "rcu"; 99 module_param(scale_type, charp, 0444); 100 MODULE_PARM_DESC(scale_type, "Type of RCU to scalability-test (rcu, srcu, ...)"); 101 102 // Structure definitions for custom fixed-per-task allocator. 103 struct writer_mblock { 104 struct rcu_head wmb_rh; 105 struct llist_node wmb_node; 106 struct writer_freelist *wmb_wfl; 107 }; 108 109 struct writer_freelist { 110 struct llist_head ws_lhg; 111 atomic_t ws_inflight; 112 struct llist_head ____cacheline_internodealigned_in_smp ws_lhp; 113 struct writer_mblock *ws_mblocks; 114 }; 115 116 static int nrealreaders; 117 static int nrealwriters; 118 static struct task_struct **writer_tasks; 119 static struct task_struct **reader_tasks; 120 121 static u64 **writer_durations; 122 static bool *writer_done; 123 static struct writer_freelist *writer_freelists; 124 static int *writer_n_durations; 125 static atomic_t n_rcu_scale_reader_started; 126 static atomic_t n_rcu_scale_writer_started; 127 static atomic_t n_rcu_scale_writer_finished; 128 static u64 t_rcu_scale_writer_started; 129 static u64 t_rcu_scale_writer_finished; 130 static unsigned long b_rcu_gp_test_started; 131 static unsigned long b_rcu_gp_test_finished; 132 133 #define MAX_MEAS 10000 134 #define MIN_MEAS 100 135 136 /* 137 * Operations vector for selecting different types of tests. 138 */ 139 140 struct rcu_scale_ops { 141 int ptype; 142 void (*init)(void); 143 void (*cleanup)(void); 144 int (*readlock)(void); 145 void (*readunlock)(int idx); 146 unsigned long (*get_gp_seq)(void); 147 unsigned long (*gp_diff)(unsigned long new, unsigned long old); 148 unsigned long (*exp_completed)(void); 149 void (*async)(struct rcu_head *head, rcu_callback_t func); 150 void (*gp_barrier)(void); 151 void (*sync)(void); 152 void (*exp_sync)(void); 153 struct task_struct *(*rso_gp_kthread)(void); 154 void (*stats)(void); 155 const char *name; 156 }; 157 158 static struct rcu_scale_ops *cur_ops; 159 160 /* 161 * Definitions for rcu scalability testing. 162 */ 163 164 static int rcu_scale_read_lock(void) __acquires(RCU) 165 { 166 rcu_read_lock(); 167 return 0; 168 } 169 170 static void rcu_scale_read_unlock(int idx) __releases(RCU) 171 { 172 rcu_read_unlock(); 173 } 174 175 static unsigned long __maybe_unused rcu_no_completed(void) 176 { 177 return 0; 178 } 179 180 static void rcu_sync_scale_init(void) 181 { 182 } 183 184 static struct rcu_scale_ops rcu_ops = { 185 .ptype = RCU_FLAVOR, 186 .init = rcu_sync_scale_init, 187 .readlock = rcu_scale_read_lock, 188 .readunlock = rcu_scale_read_unlock, 189 .get_gp_seq = rcu_get_gp_seq, 190 .gp_diff = rcu_seq_diff, 191 .exp_completed = rcu_exp_batches_completed, 192 .async = call_rcu_hurry, 193 .gp_barrier = rcu_barrier, 194 .sync = synchronize_rcu, 195 .exp_sync = synchronize_rcu_expedited, 196 .name = "rcu" 197 }; 198 199 /* 200 * Definitions for srcu scalability testing. 201 */ 202 203 DEFINE_STATIC_SRCU(srcu_ctl_scale); 204 static struct srcu_struct *srcu_ctlp = &srcu_ctl_scale; 205 206 static int srcu_scale_read_lock(void) __acquires(srcu_ctlp) 207 { 208 return srcu_read_lock(srcu_ctlp); 209 } 210 211 static void srcu_scale_read_unlock(int idx) __releases(srcu_ctlp) 212 { 213 srcu_read_unlock(srcu_ctlp, idx); 214 } 215 216 static unsigned long srcu_scale_completed(void) 217 { 218 return srcu_batches_completed(srcu_ctlp); 219 } 220 221 static void srcu_call_rcu(struct rcu_head *head, rcu_callback_t func) 222 { 223 call_srcu(srcu_ctlp, head, func); 224 } 225 226 static void srcu_rcu_barrier(void) 227 { 228 srcu_barrier(srcu_ctlp); 229 } 230 231 static void srcu_scale_synchronize(void) 232 { 233 synchronize_srcu(srcu_ctlp); 234 } 235 236 static void srcu_scale_stats(void) 237 { 238 srcu_torture_stats_print(srcu_ctlp, scale_type, SCALE_FLAG); 239 } 240 241 static void srcu_scale_synchronize_expedited(void) 242 { 243 synchronize_srcu_expedited(srcu_ctlp); 244 } 245 246 static struct rcu_scale_ops srcu_ops = { 247 .ptype = SRCU_FLAVOR, 248 .init = rcu_sync_scale_init, 249 .readlock = srcu_scale_read_lock, 250 .readunlock = srcu_scale_read_unlock, 251 .get_gp_seq = srcu_scale_completed, 252 .gp_diff = rcu_seq_diff, 253 .exp_completed = srcu_scale_completed, 254 .async = srcu_call_rcu, 255 .gp_barrier = srcu_rcu_barrier, 256 .sync = srcu_scale_synchronize, 257 .exp_sync = srcu_scale_synchronize_expedited, 258 .stats = srcu_scale_stats, 259 .name = "srcu" 260 }; 261 262 static struct srcu_struct srcud; 263 264 static void srcu_sync_scale_init(void) 265 { 266 srcu_ctlp = &srcud; 267 init_srcu_struct(srcu_ctlp); 268 } 269 270 static void srcu_sync_scale_cleanup(void) 271 { 272 cleanup_srcu_struct(srcu_ctlp); 273 } 274 275 static struct rcu_scale_ops srcud_ops = { 276 .ptype = SRCU_FLAVOR, 277 .init = srcu_sync_scale_init, 278 .cleanup = srcu_sync_scale_cleanup, 279 .readlock = srcu_scale_read_lock, 280 .readunlock = srcu_scale_read_unlock, 281 .get_gp_seq = srcu_scale_completed, 282 .gp_diff = rcu_seq_diff, 283 .exp_completed = srcu_scale_completed, 284 .async = srcu_call_rcu, 285 .gp_barrier = srcu_rcu_barrier, 286 .sync = srcu_scale_synchronize, 287 .exp_sync = srcu_scale_synchronize_expedited, 288 .stats = srcu_scale_stats, 289 .name = "srcud" 290 }; 291 292 #ifdef CONFIG_TASKS_RCU 293 294 /* 295 * Definitions for RCU-tasks scalability testing. 296 */ 297 298 static int tasks_scale_read_lock(void) 299 { 300 return 0; 301 } 302 303 static void tasks_scale_read_unlock(int idx) 304 { 305 } 306 307 static void rcu_tasks_scale_stats(void) 308 { 309 rcu_tasks_torture_stats_print(scale_type, SCALE_FLAG); 310 } 311 312 static struct rcu_scale_ops tasks_ops = { 313 .ptype = RCU_TASKS_FLAVOR, 314 .init = rcu_sync_scale_init, 315 .readlock = tasks_scale_read_lock, 316 .readunlock = tasks_scale_read_unlock, 317 .get_gp_seq = rcu_no_completed, 318 .gp_diff = rcu_seq_diff, 319 .async = call_rcu_tasks, 320 .gp_barrier = rcu_barrier_tasks, 321 .sync = synchronize_rcu_tasks, 322 .exp_sync = synchronize_rcu_tasks, 323 .rso_gp_kthread = get_rcu_tasks_gp_kthread, 324 .stats = IS_ENABLED(CONFIG_TINY_RCU) ? NULL : rcu_tasks_scale_stats, 325 .name = "tasks" 326 }; 327 328 #define TASKS_OPS &tasks_ops, 329 330 #else // #ifdef CONFIG_TASKS_RCU 331 332 #define TASKS_OPS 333 334 #endif // #else // #ifdef CONFIG_TASKS_RCU 335 336 #ifdef CONFIG_TASKS_RUDE_RCU 337 338 /* 339 * Definitions for RCU-tasks-rude scalability testing. 340 */ 341 342 static int tasks_rude_scale_read_lock(void) 343 { 344 return 0; 345 } 346 347 static void tasks_rude_scale_read_unlock(int idx) 348 { 349 } 350 351 static void rcu_tasks_rude_scale_stats(void) 352 { 353 rcu_tasks_rude_torture_stats_print(scale_type, SCALE_FLAG); 354 } 355 356 static struct rcu_scale_ops tasks_rude_ops = { 357 .ptype = RCU_TASKS_RUDE_FLAVOR, 358 .init = rcu_sync_scale_init, 359 .readlock = tasks_rude_scale_read_lock, 360 .readunlock = tasks_rude_scale_read_unlock, 361 .get_gp_seq = rcu_no_completed, 362 .gp_diff = rcu_seq_diff, 363 .sync = synchronize_rcu_tasks_rude, 364 .exp_sync = synchronize_rcu_tasks_rude, 365 .rso_gp_kthread = get_rcu_tasks_rude_gp_kthread, 366 .stats = IS_ENABLED(CONFIG_TINY_RCU) ? NULL : rcu_tasks_rude_scale_stats, 367 .name = "tasks-rude" 368 }; 369 370 #define TASKS_RUDE_OPS &tasks_rude_ops, 371 372 #else // #ifdef CONFIG_TASKS_RUDE_RCU 373 374 #define TASKS_RUDE_OPS 375 376 #endif // #else // #ifdef CONFIG_TASKS_RUDE_RCU 377 378 #ifdef CONFIG_TASKS_TRACE_RCU 379 380 /* 381 * Definitions for RCU-tasks-trace scalability testing. 382 */ 383 384 static int tasks_trace_scale_read_lock(void) 385 { 386 rcu_read_lock_trace(); 387 return 0; 388 } 389 390 static void tasks_trace_scale_read_unlock(int idx) 391 { 392 rcu_read_unlock_trace(); 393 } 394 395 static struct rcu_scale_ops tasks_tracing_ops = { 396 .ptype = RCU_TASKS_FLAVOR, 397 .init = rcu_sync_scale_init, 398 .readlock = tasks_trace_scale_read_lock, 399 .readunlock = tasks_trace_scale_read_unlock, 400 .get_gp_seq = rcu_no_completed, 401 .gp_diff = rcu_seq_diff, 402 .async = call_rcu_tasks_trace, 403 .gp_barrier = rcu_barrier_tasks_trace, 404 .sync = synchronize_rcu_tasks_trace, 405 .exp_sync = synchronize_rcu_tasks_trace, 406 .name = "tasks-tracing" 407 }; 408 409 #define TASKS_TRACING_OPS &tasks_tracing_ops, 410 411 #else // #ifdef CONFIG_TASKS_TRACE_RCU 412 413 #define TASKS_TRACING_OPS 414 415 #endif // #else // #ifdef CONFIG_TASKS_TRACE_RCU 416 417 static unsigned long rcuscale_seq_diff(unsigned long new, unsigned long old) 418 { 419 if (!cur_ops->gp_diff) 420 return new - old; 421 return cur_ops->gp_diff(new, old); 422 } 423 424 /* 425 * If scalability tests complete, wait for shutdown to commence. 426 */ 427 static void rcu_scale_wait_shutdown(void) 428 { 429 cond_resched_tasks_rcu_qs(); 430 if (atomic_read(&n_rcu_scale_writer_finished) < nrealwriters) 431 return; 432 while (!torture_must_stop()) 433 schedule_timeout_uninterruptible(1); 434 } 435 436 /* 437 * RCU scalability reader kthread. Repeatedly does empty RCU read-side 438 * critical section, minimizing update-side interference. However, the 439 * point of this test is not to evaluate reader scalability, but instead 440 * to serve as a test load for update-side scalability testing. 441 */ 442 static int 443 rcu_scale_reader(void *arg) 444 { 445 unsigned long flags; 446 int idx; 447 long me = (long)arg; 448 449 VERBOSE_SCALEOUT_STRING("rcu_scale_reader task started"); 450 set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids)); 451 set_user_nice(current, MAX_NICE); 452 atomic_inc(&n_rcu_scale_reader_started); 453 454 do { 455 local_irq_save(flags); 456 idx = cur_ops->readlock(); 457 cur_ops->readunlock(idx); 458 local_irq_restore(flags); 459 rcu_scale_wait_shutdown(); 460 } while (!torture_must_stop()); 461 torture_kthread_stopping("rcu_scale_reader"); 462 return 0; 463 } 464 465 /* 466 * Allocate a writer_mblock structure for the specified rcu_scale_writer 467 * task. 468 */ 469 static struct writer_mblock *rcu_scale_alloc(long me) 470 { 471 struct llist_node *llnp; 472 struct writer_freelist *wflp; 473 struct writer_mblock *wmbp; 474 475 if (WARN_ON_ONCE(!writer_freelists)) 476 return NULL; 477 wflp = &writer_freelists[me]; 478 if (llist_empty(&wflp->ws_lhp)) { 479 // ->ws_lhp is private to its rcu_scale_writer task. 480 wmbp = container_of(llist_del_all(&wflp->ws_lhg), struct writer_mblock, wmb_node); 481 wflp->ws_lhp.first = &wmbp->wmb_node; 482 } 483 llnp = llist_del_first(&wflp->ws_lhp); 484 if (!llnp) 485 return NULL; 486 return container_of(llnp, struct writer_mblock, wmb_node); 487 } 488 489 /* 490 * Free a writer_mblock structure to its rcu_scale_writer task. 491 */ 492 static void rcu_scale_free(struct writer_mblock *wmbp) 493 { 494 struct writer_freelist *wflp; 495 496 if (!wmbp) 497 return; 498 wflp = wmbp->wmb_wfl; 499 llist_add(&wmbp->wmb_node, &wflp->ws_lhg); 500 } 501 502 /* 503 * Callback function for asynchronous grace periods from rcu_scale_writer(). 504 */ 505 static void rcu_scale_async_cb(struct rcu_head *rhp) 506 { 507 struct writer_mblock *wmbp = container_of(rhp, struct writer_mblock, wmb_rh); 508 struct writer_freelist *wflp = wmbp->wmb_wfl; 509 510 atomic_dec(&wflp->ws_inflight); 511 rcu_scale_free(wmbp); 512 } 513 514 static void rcu_scale_cleanup(void); 515 516 /* 517 * RCU scale writer kthread. Repeatedly does a grace period. 518 */ 519 static int 520 rcu_scale_writer(void *arg) 521 { 522 int i = 0; 523 int i_max; 524 unsigned long jdone; 525 long me = (long)arg; 526 bool selfreport = false; 527 bool started = false, done = false, alldone = false; 528 u64 t; 529 DEFINE_TORTURE_RANDOM(tr); 530 u64 *wdp; 531 u64 *wdpp = writer_durations[me]; 532 struct writer_freelist *wflp = &writer_freelists[me]; 533 struct writer_mblock *wmbp = NULL; 534 535 VERBOSE_SCALEOUT_STRING("rcu_scale_writer task started"); 536 WARN_ON(!wdpp); 537 set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids)); 538 current->flags |= PF_NO_SETAFFINITY; 539 sched_set_fifo_low(current); 540 541 if (holdoff) 542 schedule_timeout_idle(holdoff * HZ); 543 544 /* 545 * Wait until rcu_end_inkernel_boot() is called for normal GP tests 546 * so that RCU is not always expedited for normal GP tests. 547 * The system_state test is approximate, but works well in practice. 548 */ 549 while (!gp_exp && system_state != SYSTEM_RUNNING) 550 schedule_timeout_uninterruptible(1); 551 552 t = ktime_get_mono_fast_ns(); 553 if (atomic_inc_return(&n_rcu_scale_writer_started) >= nrealwriters) { 554 t_rcu_scale_writer_started = t; 555 if (gp_exp) { 556 b_rcu_gp_test_started = 557 cur_ops->exp_completed() / 2; 558 } else { 559 b_rcu_gp_test_started = cur_ops->get_gp_seq(); 560 } 561 } 562 563 jdone = jiffies + minruntime * HZ; 564 do { 565 bool gp_succeeded = false; 566 567 if (writer_holdoff) 568 udelay(writer_holdoff); 569 if (writer_holdoff_jiffies) 570 schedule_timeout_idle(torture_random(&tr) % writer_holdoff_jiffies + 1); 571 wdp = &wdpp[i]; 572 *wdp = ktime_get_mono_fast_ns(); 573 if (gp_async && !WARN_ON_ONCE(!cur_ops->async)) { 574 if (!wmbp) 575 wmbp = rcu_scale_alloc(me); 576 if (wmbp && atomic_read(&wflp->ws_inflight) < gp_async_max) { 577 atomic_inc(&wflp->ws_inflight); 578 cur_ops->async(&wmbp->wmb_rh, rcu_scale_async_cb); 579 wmbp = NULL; 580 gp_succeeded = true; 581 } else if (!kthread_should_stop()) { 582 cur_ops->gp_barrier(); 583 } else { 584 rcu_scale_free(wmbp); /* Because we are stopping. */ 585 wmbp = NULL; 586 } 587 } else if (gp_exp) { 588 cur_ops->exp_sync(); 589 gp_succeeded = true; 590 } else { 591 cur_ops->sync(); 592 gp_succeeded = true; 593 } 594 t = ktime_get_mono_fast_ns(); 595 *wdp = t - *wdp; 596 i_max = i; 597 if (!started && 598 atomic_read(&n_rcu_scale_writer_started) >= nrealwriters) 599 started = true; 600 if (!done && i >= MIN_MEAS && time_after(jiffies, jdone)) { 601 done = true; 602 WRITE_ONCE(writer_done[me], true); 603 sched_set_normal(current, 0); 604 pr_alert("%s%s rcu_scale_writer %ld has %d measurements\n", 605 scale_type, SCALE_FLAG, me, MIN_MEAS); 606 if (atomic_inc_return(&n_rcu_scale_writer_finished) >= 607 nrealwriters) { 608 schedule_timeout_interruptible(10); 609 rcu_ftrace_dump(DUMP_ALL); 610 SCALEOUT_STRING("Test complete"); 611 t_rcu_scale_writer_finished = t; 612 if (gp_exp) { 613 b_rcu_gp_test_finished = 614 cur_ops->exp_completed() / 2; 615 } else { 616 b_rcu_gp_test_finished = 617 cur_ops->get_gp_seq(); 618 } 619 if (shutdown_secs) { 620 writer_tasks[me] = NULL; 621 smp_mb(); /* Assign before wake. */ 622 rcu_scale_cleanup(); 623 kernel_power_off(); 624 } 625 } 626 } 627 if (done && !alldone && 628 atomic_read(&n_rcu_scale_writer_finished) >= nrealwriters) 629 alldone = true; 630 if (done && !alldone && time_after(jiffies, jdone + HZ * 60)) { 631 static atomic_t dumped; 632 int i; 633 634 if (!atomic_xchg(&dumped, 1)) { 635 for (i = 0; i < nrealwriters; i++) { 636 if (writer_done[i]) 637 continue; 638 pr_info("%s: Task %ld flags writer %d:\n", __func__, me, i); 639 sched_show_task(writer_tasks[i]); 640 } 641 if (cur_ops->stats) 642 cur_ops->stats(); 643 } 644 } 645 if (!selfreport && time_after(jiffies, jdone + HZ * (70 + me))) { 646 pr_info("%s: Writer %ld self-report: started %d done %d/%d->%d i %d jdone %lu.\n", 647 __func__, me, started, done, writer_done[me], atomic_read(&n_rcu_scale_writer_finished), i, jiffies - jdone); 648 selfreport = true; 649 } 650 if (gp_succeeded && started && !alldone && i < MAX_MEAS - 1) 651 i++; 652 rcu_scale_wait_shutdown(); 653 } while (!torture_must_stop()); 654 if (gp_async && cur_ops->async) { 655 rcu_scale_free(wmbp); 656 cur_ops->gp_barrier(); 657 } 658 writer_n_durations[me] = i_max + 1; 659 torture_kthread_stopping("rcu_scale_writer"); 660 return 0; 661 } 662 663 static void 664 rcu_scale_print_module_parms(struct rcu_scale_ops *cur_ops, const char *tag) 665 { 666 pr_alert("%s" SCALE_FLAG 667 "--- %s: gp_async=%d gp_async_max=%d gp_exp=%d holdoff=%d minruntime=%d nreaders=%d nwriters=%d writer_holdoff=%d writer_holdoff_jiffies=%d verbose=%d shutdown_secs=%d\n", 668 scale_type, tag, gp_async, gp_async_max, gp_exp, holdoff, minruntime, nrealreaders, nrealwriters, writer_holdoff, writer_holdoff_jiffies, verbose, shutdown_secs); 669 } 670 671 /* 672 * Return the number if non-negative. If -1, the number of CPUs. 673 * If less than -1, that much less than the number of CPUs, but 674 * at least one. 675 */ 676 static int compute_real(int n) 677 { 678 int nr; 679 680 if (n >= 0) { 681 nr = n; 682 } else { 683 nr = num_online_cpus() + 1 + n; 684 if (nr <= 0) 685 nr = 1; 686 } 687 return nr; 688 } 689 690 /* 691 * kfree_rcu() scalability tests: Start a kfree_rcu() loop on all CPUs for number 692 * of iterations and measure total time and number of GP for all iterations to complete. 693 */ 694 695 torture_param(int, kfree_nthreads, -1, "Number of threads running loops of kfree_rcu()."); 696 torture_param(int, kfree_alloc_num, 8000, "Number of allocations and frees done in an iteration."); 697 torture_param(int, kfree_loops, 10, "Number of loops doing kfree_alloc_num allocations and frees."); 698 torture_param(bool, kfree_rcu_test_double, false, "Do we run a kfree_rcu() double-argument scale test?"); 699 torture_param(bool, kfree_rcu_test_single, false, "Do we run a kfree_rcu() single-argument scale test?"); 700 701 static struct task_struct **kfree_reader_tasks; 702 static int kfree_nrealthreads; 703 static atomic_t n_kfree_scale_thread_started; 704 static atomic_t n_kfree_scale_thread_ended; 705 static struct task_struct *kthread_tp; 706 static u64 kthread_stime; 707 708 struct kfree_obj { 709 char kfree_obj[8]; 710 struct rcu_head rh; 711 }; 712 713 /* Used if doing RCU-kfree'ing via call_rcu(). */ 714 static void kfree_call_rcu(struct rcu_head *rh) 715 { 716 struct kfree_obj *obj = container_of(rh, struct kfree_obj, rh); 717 718 kfree(obj); 719 } 720 721 static void kfree_scale_cleanup(void); 722 723 static int 724 kfree_scale_thread(void *arg) 725 { 726 int i, loop = 0; 727 long me = (long)arg; 728 struct kfree_obj *alloc_ptr; 729 u64 start_time, end_time; 730 long long mem_begin, mem_during = 0; 731 bool kfree_rcu_test_both; 732 DEFINE_TORTURE_RANDOM(tr); 733 734 VERBOSE_SCALEOUT_STRING("kfree_scale_thread task started"); 735 set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids)); 736 set_user_nice(current, MAX_NICE); 737 kfree_rcu_test_both = (kfree_rcu_test_single == kfree_rcu_test_double); 738 739 start_time = ktime_get_mono_fast_ns(); 740 741 if (atomic_inc_return(&n_kfree_scale_thread_started) >= kfree_nrealthreads) { 742 if (gp_exp) 743 b_rcu_gp_test_started = cur_ops->exp_completed() / 2; 744 else 745 b_rcu_gp_test_started = cur_ops->get_gp_seq(); 746 } 747 748 do { 749 if (!mem_during) { 750 mem_during = mem_begin = si_mem_available(); 751 } else if (loop % (kfree_loops / 4) == 0) { 752 mem_during = (mem_during + si_mem_available()) / 2; 753 } 754 755 for (i = 0; i < kfree_alloc_num; i++) { 756 alloc_ptr = kzalloc_objs(struct kfree_obj, kfree_mult); 757 if (!alloc_ptr) 758 return -ENOMEM; 759 760 if (kfree_by_call_rcu) { 761 call_rcu(&(alloc_ptr->rh), kfree_call_rcu); 762 continue; 763 } 764 765 // By default kfree_rcu_test_single and kfree_rcu_test_double are 766 // initialized to false. If both have the same value (false or true) 767 // both are randomly tested, otherwise only the one with value true 768 // is tested. 769 if ((kfree_rcu_test_single && !kfree_rcu_test_double) || 770 (kfree_rcu_test_both && torture_random(&tr) & 0x800)) 771 kfree_rcu_mightsleep(alloc_ptr); 772 else 773 kfree_rcu(alloc_ptr, rh); 774 } 775 776 cond_resched(); 777 } while (!torture_must_stop() && ++loop < kfree_loops); 778 779 if (atomic_inc_return(&n_kfree_scale_thread_ended) >= kfree_nrealthreads) { 780 end_time = ktime_get_mono_fast_ns(); 781 782 if (gp_exp) 783 b_rcu_gp_test_finished = cur_ops->exp_completed() / 2; 784 else 785 b_rcu_gp_test_finished = cur_ops->get_gp_seq(); 786 787 pr_alert("Total time taken by all kfree'ers: %llu ns, loops: %d, batches: %ld, memory footprint: %lldMB\n", 788 (unsigned long long)(end_time - start_time), kfree_loops, 789 rcuscale_seq_diff(b_rcu_gp_test_finished, b_rcu_gp_test_started), 790 PAGES_TO_MB(mem_begin - mem_during)); 791 792 if (shutdown_secs) { 793 kfree_reader_tasks[me] = NULL; 794 smp_mb(); /* Assign before wake. */ 795 kfree_scale_cleanup(); 796 kernel_power_off(); 797 } 798 } 799 800 torture_kthread_stopping("kfree_scale_thread"); 801 return 0; 802 } 803 804 static void 805 kfree_scale_cleanup(void) 806 { 807 int i; 808 809 if (torture_cleanup_begin()) 810 return; 811 812 if (kfree_reader_tasks) { 813 for (i = 0; i < kfree_nrealthreads; i++) 814 torture_stop_kthread(kfree_scale_thread, 815 kfree_reader_tasks[i]); 816 kfree(kfree_reader_tasks); 817 kfree_reader_tasks = NULL; 818 } 819 820 torture_cleanup_end(); 821 } 822 823 // Used if doing RCU-kfree'ing via call_rcu(). 824 static unsigned long jiffies_at_lazy_cb; 825 static struct rcu_head lazy_test1_rh; 826 static int rcu_lazy_test1_cb_called; 827 static void call_rcu_lazy_test1(struct rcu_head *rh) 828 { 829 jiffies_at_lazy_cb = jiffies; 830 WRITE_ONCE(rcu_lazy_test1_cb_called, 1); 831 } 832 833 static int __init 834 kfree_scale_init(void) 835 { 836 int firsterr = 0; 837 long i; 838 unsigned long jif_start; 839 unsigned long orig_jif; 840 841 pr_alert("%s" SCALE_FLAG 842 "--- kfree_rcu_test: kfree_mult=%d kfree_by_call_rcu=%d kfree_nthreads=%d kfree_alloc_num=%d kfree_loops=%d kfree_rcu_test_double=%d kfree_rcu_test_single=%d\n", 843 scale_type, kfree_mult, kfree_by_call_rcu, kfree_nthreads, kfree_alloc_num, kfree_loops, kfree_rcu_test_double, kfree_rcu_test_single); 844 845 // Also, do a quick self-test to ensure laziness is as much as 846 // expected. 847 if (kfree_by_call_rcu && !IS_ENABLED(CONFIG_RCU_LAZY)) { 848 pr_alert("CONFIG_RCU_LAZY is disabled, falling back to kfree_rcu() for delayed RCU kfree'ing\n"); 849 kfree_by_call_rcu = 0; 850 } 851 852 if (kfree_by_call_rcu) { 853 /* do a test to check the timeout. */ 854 orig_jif = rcu_get_jiffies_lazy_flush(); 855 856 rcu_set_jiffies_lazy_flush(2 * HZ); 857 rcu_barrier(); 858 859 jif_start = jiffies; 860 jiffies_at_lazy_cb = 0; 861 call_rcu(&lazy_test1_rh, call_rcu_lazy_test1); 862 863 smp_cond_load_relaxed(&rcu_lazy_test1_cb_called, VAL == 1); 864 865 rcu_set_jiffies_lazy_flush(orig_jif); 866 867 if (WARN_ON_ONCE(jiffies_at_lazy_cb - jif_start < 2 * HZ)) { 868 pr_alert("ERROR: call_rcu() CBs are not being lazy as expected!\n"); 869 firsterr = -1; 870 goto unwind; 871 } 872 873 if (WARN_ON_ONCE(jiffies_at_lazy_cb - jif_start > 3 * HZ)) { 874 pr_alert("ERROR: call_rcu() CBs are being too lazy!\n"); 875 firsterr = -1; 876 goto unwind; 877 } 878 } 879 880 kfree_nrealthreads = compute_real(kfree_nthreads); 881 /* Start up the kthreads. */ 882 if (shutdown_secs) { 883 firsterr = torture_shutdown_init(shutdown_secs, kfree_scale_cleanup); 884 if (torture_init_error(firsterr)) 885 goto unwind; 886 } 887 888 pr_alert("kfree object size=%zu, kfree_by_call_rcu=%d\n", 889 kfree_mult * sizeof(struct kfree_obj), 890 kfree_by_call_rcu); 891 892 kfree_reader_tasks = kzalloc_objs(kfree_reader_tasks[0], 893 kfree_nrealthreads); 894 if (kfree_reader_tasks == NULL) { 895 firsterr = -ENOMEM; 896 goto unwind; 897 } 898 899 for (i = 0; i < kfree_nrealthreads; i++) { 900 firsterr = torture_create_kthread(kfree_scale_thread, (void *)i, 901 kfree_reader_tasks[i]); 902 if (torture_init_error(firsterr)) 903 goto unwind; 904 } 905 906 while (atomic_read(&n_kfree_scale_thread_started) < kfree_nrealthreads) 907 schedule_timeout_uninterruptible(1); 908 909 torture_init_end(); 910 return 0; 911 912 unwind: 913 torture_init_end(); 914 kfree_scale_cleanup(); 915 return firsterr; 916 } 917 918 static void 919 rcu_scale_cleanup(void) 920 { 921 int i; 922 int j; 923 int ngps = 0; 924 u64 *wdp; 925 u64 *wdpp; 926 927 /* 928 * Would like warning at start, but everything is expedited 929 * during the mid-boot phase, so have to wait till the end. 930 */ 931 if (rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp) 932 SCALEOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!"); 933 if (rcu_gp_is_normal() && gp_exp) 934 SCALEOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!"); 935 if (gp_exp && gp_async) 936 SCALEOUT_ERRSTRING("No expedited async GPs, so went with async!"); 937 938 // If built-in, just report all of the GP kthread's CPU time. 939 if (IS_BUILTIN(CONFIG_RCU_SCALE_TEST) && !kthread_tp && cur_ops->rso_gp_kthread) 940 kthread_tp = cur_ops->rso_gp_kthread(); 941 if (kthread_tp) { 942 u32 ns; 943 u64 us; 944 945 kthread_stime = kthread_tp->stime - kthread_stime; 946 us = div_u64_rem(kthread_stime, 1000, &ns); 947 pr_info("rcu_scale: Grace-period kthread CPU time: %llu.%03u us\n", us, ns); 948 show_rcu_gp_kthreads(); 949 } 950 if (kfree_rcu_test) { 951 kfree_scale_cleanup(); 952 return; 953 } 954 955 if (torture_cleanup_begin()) 956 return; 957 if (!cur_ops) { 958 torture_cleanup_end(); 959 return; 960 } 961 962 if (reader_tasks) { 963 for (i = 0; i < nrealreaders; i++) 964 torture_stop_kthread(rcu_scale_reader, 965 reader_tasks[i]); 966 kfree(reader_tasks); 967 reader_tasks = NULL; 968 } 969 970 if (writer_tasks) { 971 for (i = 0; i < nrealwriters; i++) { 972 torture_stop_kthread(rcu_scale_writer, 973 writer_tasks[i]); 974 if (!writer_n_durations) 975 continue; 976 j = writer_n_durations[i]; 977 pr_alert("%s%s writer %d gps: %d\n", 978 scale_type, SCALE_FLAG, i, j); 979 ngps += j; 980 } 981 pr_alert("%s%s start: %llu end: %llu duration: %llu gps: %d batches: %ld\n", 982 scale_type, SCALE_FLAG, 983 t_rcu_scale_writer_started, t_rcu_scale_writer_finished, 984 t_rcu_scale_writer_finished - 985 t_rcu_scale_writer_started, 986 ngps, 987 rcuscale_seq_diff(b_rcu_gp_test_finished, 988 b_rcu_gp_test_started)); 989 for (i = 0; i < nrealwriters; i++) { 990 if (!writer_durations) 991 break; 992 if (!writer_n_durations) 993 continue; 994 wdpp = writer_durations[i]; 995 if (!wdpp) 996 continue; 997 for (j = 0; j < writer_n_durations[i]; j++) { 998 wdp = &wdpp[j]; 999 pr_alert("%s%s %4d writer-duration: %5d %llu\n", 1000 scale_type, SCALE_FLAG, 1001 i, j, *wdp); 1002 if (j % 100 == 0) 1003 schedule_timeout_uninterruptible(1); 1004 } 1005 kfree(writer_durations[i]); 1006 if (writer_freelists) { 1007 int ctr = 0; 1008 struct llist_node *llnp; 1009 struct writer_freelist *wflp = &writer_freelists[i]; 1010 1011 if (wflp->ws_mblocks) { 1012 llist_for_each(llnp, wflp->ws_lhg.first) 1013 ctr++; 1014 llist_for_each(llnp, wflp->ws_lhp.first) 1015 ctr++; 1016 WARN_ONCE(ctr != gp_async_max, 1017 "%s: ctr = %d gp_async_max = %d\n", 1018 __func__, ctr, gp_async_max); 1019 kfree(wflp->ws_mblocks); 1020 } 1021 } 1022 } 1023 kfree(writer_tasks); 1024 writer_tasks = NULL; 1025 kfree(writer_durations); 1026 writer_durations = NULL; 1027 kfree(writer_n_durations); 1028 writer_n_durations = NULL; 1029 kfree(writer_done); 1030 writer_done = NULL; 1031 kfree(writer_freelists); 1032 writer_freelists = NULL; 1033 } 1034 1035 /* Do torture-type-specific cleanup operations. */ 1036 if (cur_ops->cleanup != NULL) 1037 cur_ops->cleanup(); 1038 1039 torture_cleanup_end(); 1040 } 1041 1042 static int __init 1043 rcu_scale_init(void) 1044 { 1045 int firsterr = 0; 1046 long i; 1047 long j; 1048 static struct rcu_scale_ops *scale_ops[] = { 1049 &rcu_ops, &srcu_ops, &srcud_ops, TASKS_OPS TASKS_RUDE_OPS TASKS_TRACING_OPS 1050 }; 1051 1052 if (!torture_init_begin(scale_type, verbose)) 1053 return -EBUSY; 1054 1055 /* Process args and announce that the scalability'er is on the job. */ 1056 for (i = 0; i < ARRAY_SIZE(scale_ops); i++) { 1057 cur_ops = scale_ops[i]; 1058 if (strcmp(scale_type, cur_ops->name) == 0) 1059 break; 1060 } 1061 if (i == ARRAY_SIZE(scale_ops)) { 1062 pr_alert("rcu-scale: invalid scale type: \"%s\"\n", scale_type); 1063 pr_alert("rcu-scale types:"); 1064 for (i = 0; i < ARRAY_SIZE(scale_ops); i++) 1065 pr_cont(" %s", scale_ops[i]->name); 1066 pr_cont("\n"); 1067 firsterr = -EINVAL; 1068 cur_ops = NULL; 1069 goto unwind; 1070 } 1071 if (cur_ops->init) 1072 cur_ops->init(); 1073 1074 if (cur_ops->rso_gp_kthread) { 1075 kthread_tp = cur_ops->rso_gp_kthread(); 1076 if (kthread_tp) 1077 kthread_stime = kthread_tp->stime; 1078 } 1079 if (kfree_rcu_test) 1080 return kfree_scale_init(); 1081 1082 nrealwriters = compute_real(nwriters); 1083 nrealreaders = compute_real(nreaders); 1084 atomic_set(&n_rcu_scale_reader_started, 0); 1085 atomic_set(&n_rcu_scale_writer_started, 0); 1086 atomic_set(&n_rcu_scale_writer_finished, 0); 1087 rcu_scale_print_module_parms(cur_ops, "Start of test"); 1088 1089 /* Start up the kthreads. */ 1090 1091 if (shutdown_secs) { 1092 firsterr = torture_shutdown_init(shutdown_secs, rcu_scale_cleanup); 1093 if (torture_init_error(firsterr)) 1094 goto unwind; 1095 } 1096 reader_tasks = kzalloc_objs(reader_tasks[0], nrealreaders); 1097 if (reader_tasks == NULL) { 1098 SCALEOUT_ERRSTRING("out of memory"); 1099 firsterr = -ENOMEM; 1100 goto unwind; 1101 } 1102 for (i = 0; i < nrealreaders; i++) { 1103 firsterr = torture_create_kthread(rcu_scale_reader, (void *)i, 1104 reader_tasks[i]); 1105 if (torture_init_error(firsterr)) 1106 goto unwind; 1107 } 1108 while (atomic_read(&n_rcu_scale_reader_started) < nrealreaders) 1109 schedule_timeout_uninterruptible(1); 1110 writer_tasks = kzalloc_objs(writer_tasks[0], nrealwriters); 1111 writer_durations = kcalloc(nrealwriters, sizeof(*writer_durations), GFP_KERNEL); 1112 writer_n_durations = kzalloc_objs(*writer_n_durations, nrealwriters); 1113 writer_done = kzalloc_objs(writer_done[0], nrealwriters); 1114 if (gp_async) { 1115 if (gp_async_max <= 0) { 1116 pr_warn("%s: gp_async_max = %d must be greater than zero.\n", 1117 __func__, gp_async_max); 1118 WARN_ON_ONCE(IS_BUILTIN(CONFIG_RCU_TORTURE_TEST)); 1119 firsterr = -EINVAL; 1120 goto unwind; 1121 } 1122 writer_freelists = kzalloc_objs(writer_freelists[0], 1123 nrealwriters); 1124 } 1125 if (!writer_tasks || !writer_durations || !writer_n_durations || !writer_done || 1126 (gp_async && !writer_freelists)) { 1127 SCALEOUT_ERRSTRING("out of memory"); 1128 firsterr = -ENOMEM; 1129 goto unwind; 1130 } 1131 for (i = 0; i < nrealwriters; i++) { 1132 writer_durations[i] = 1133 kcalloc(MAX_MEAS, sizeof(*writer_durations[i]), 1134 GFP_KERNEL); 1135 if (!writer_durations[i]) { 1136 firsterr = -ENOMEM; 1137 goto unwind; 1138 } 1139 if (writer_freelists) { 1140 struct writer_freelist *wflp = &writer_freelists[i]; 1141 1142 init_llist_head(&wflp->ws_lhg); 1143 init_llist_head(&wflp->ws_lhp); 1144 wflp->ws_mblocks = kzalloc_objs(wflp->ws_mblocks[0], 1145 gp_async_max); 1146 if (!wflp->ws_mblocks) { 1147 firsterr = -ENOMEM; 1148 goto unwind; 1149 } 1150 for (j = 0; j < gp_async_max; j++) { 1151 struct writer_mblock *wmbp = &wflp->ws_mblocks[j]; 1152 1153 wmbp->wmb_wfl = wflp; 1154 llist_add(&wmbp->wmb_node, &wflp->ws_lhp); 1155 } 1156 } 1157 firsterr = torture_create_kthread(rcu_scale_writer, (void *)i, 1158 writer_tasks[i]); 1159 if (torture_init_error(firsterr)) 1160 goto unwind; 1161 } 1162 torture_init_end(); 1163 return 0; 1164 1165 unwind: 1166 torture_init_end(); 1167 rcu_scale_cleanup(); 1168 if (shutdown_secs) { 1169 WARN_ON(!IS_MODULE(CONFIG_RCU_SCALE_TEST)); 1170 kernel_power_off(); 1171 } 1172 return firsterr; 1173 } 1174 1175 module_init(rcu_scale_init); 1176 module_exit(rcu_scale_cleanup); 1177