1 // SPDX-License-Identifier: GPL-2.0+ 2 // 3 // Scalability test comparing RCU vs other mechanisms 4 // for acquiring references on objects. 5 // 6 // Copyright (C) Google, 2020. 7 // 8 // Author: Joel Fernandes <joel@joelfernandes.org> 9 10 #define pr_fmt(fmt) fmt 11 12 #include <linux/atomic.h> 13 #include <linux/bitops.h> 14 #include <linux/completion.h> 15 #include <linux/cpu.h> 16 #include <linux/delay.h> 17 #include <linux/err.h> 18 #include <linux/init.h> 19 #include <linux/interrupt.h> 20 #include <linux/kthread.h> 21 #include <linux/kernel.h> 22 #include <linux/mm.h> 23 #include <linux/module.h> 24 #include <linux/moduleparam.h> 25 #include <linux/notifier.h> 26 #include <linux/percpu.h> 27 #include <linux/rcupdate.h> 28 #include <linux/rcupdate_trace.h> 29 #include <linux/reboot.h> 30 #include <linux/sched.h> 31 #include <linux/seq_buf.h> 32 #include <linux/spinlock.h> 33 #include <linux/smp.h> 34 #include <linux/stat.h> 35 #include <linux/srcu.h> 36 #include <linux/slab.h> 37 #include <linux/torture.h> 38 #include <linux/types.h> 39 #include <linux/sched/clock.h> 40 41 #include "rcu.h" 42 43 #define SCALE_FLAG "-ref-scale: " 44 45 #define SCALEOUT(s, x...) \ 46 pr_alert("%s" SCALE_FLAG s, scale_type, ## x) 47 48 #define VERBOSE_SCALEOUT(s, x...) \ 49 do { \ 50 if (verbose) \ 51 pr_alert("%s" SCALE_FLAG s "\n", scale_type, ## x); \ 52 } while (0) 53 54 static atomic_t verbose_batch_ctr; 55 56 #define VERBOSE_SCALEOUT_BATCH(s, x...) \ 57 do { \ 58 if (verbose && \ 59 (verbose_batched <= 0 || \ 60 !(atomic_inc_return(&verbose_batch_ctr) % verbose_batched))) { \ 61 schedule_timeout_uninterruptible(1); \ 62 pr_alert("%s" SCALE_FLAG s "\n", scale_type, ## x); \ 63 } \ 64 } while (0) 65 66 #define SCALEOUT_ERRSTRING(s, x...) pr_alert("%s" SCALE_FLAG "!!! " s "\n", scale_type, ## x) 67 68 MODULE_DESCRIPTION("Scalability test for object reference mechanisms"); 69 MODULE_LICENSE("GPL"); 70 MODULE_AUTHOR("Joel Fernandes (Google) <joel@joelfernandes.org>"); 71 72 static char *scale_type = "rcu"; 73 module_param(scale_type, charp, 0444); 74 MODULE_PARM_DESC(scale_type, "Type of test (rcu, srcu, refcnt, rwsem, rwlock."); 75 76 torture_param(int, verbose, 0, "Enable verbose debugging printk()s"); 77 torture_param(int, verbose_batched, 0, "Batch verbose debugging printk()s"); 78 79 // Number of seconds to extend warm-up and cool-down for multiple guest OSes 80 torture_param(long, guest_os_delay, 0, 81 "Number of seconds to extend warm-up/cool-down for multiple guest OSes."); 82 // Wait until there are multiple CPUs before starting test. 83 torture_param(int, holdoff, IS_BUILTIN(CONFIG_RCU_REF_SCALE_TEST) ? 10 : 0, 84 "Holdoff time before test start (s)"); 85 // Number of typesafe_lookup structures, that is, the degree of concurrency. 86 torture_param(long, lookup_instances, 0, "Number of typesafe_lookup structures."); 87 // Number of loops per experiment, all readers execute operations concurrently. 88 torture_param(long, loops, 10000, "Number of loops per experiment."); 89 // Number of readers, with -1 defaulting to about 75% of the CPUs. 90 torture_param(int, nreaders, -1, "Number of readers, -1 for 75% of CPUs."); 91 // Number of runs. 92 torture_param(int, nruns, 30, "Number of experiments to run."); 93 // Reader delay in nanoseconds, 0 for no delay. 94 torture_param(int, readdelay, 0, "Read-side delay in nanoseconds."); 95 96 #ifdef MODULE 97 # define REFSCALE_SHUTDOWN 0 98 #else 99 # define REFSCALE_SHUTDOWN 1 100 #endif 101 102 torture_param(bool, shutdown, REFSCALE_SHUTDOWN, 103 "Shutdown at end of scalability tests."); 104 105 struct reader_task { 106 struct task_struct *task; 107 int start_reader; 108 wait_queue_head_t wq; 109 u64 last_duration_ns; 110 }; 111 112 static struct task_struct *shutdown_task; 113 static wait_queue_head_t shutdown_wq; 114 115 static struct task_struct *main_task; 116 static wait_queue_head_t main_wq; 117 static int shutdown_start; 118 119 static struct reader_task *reader_tasks; 120 121 // Number of readers that are part of the current experiment. 122 static atomic_t nreaders_exp; 123 124 // Use to wait for all threads to start. 125 static atomic_t n_init; 126 static atomic_t n_started; 127 static atomic_t n_warmedup; 128 static atomic_t n_cooleddown; 129 130 // Track which experiment is currently running. 131 static int exp_idx; 132 133 // Operations vector for selecting different types of tests. 134 struct ref_scale_ops { 135 bool (*init)(void); 136 void (*cleanup)(void); 137 void (*readsection)(const int nloops); 138 void (*delaysection)(const int nloops, const int udl, const int ndl); 139 const char *name; 140 }; 141 142 static const struct ref_scale_ops *cur_ops; 143 144 static void un_delay(const int udl, const int ndl) 145 { 146 if (udl) 147 udelay(udl); 148 if (ndl) 149 ndelay(ndl); 150 } 151 152 static void ref_rcu_read_section(const int nloops) 153 { 154 int i; 155 156 for (i = nloops; i >= 0; i--) { 157 rcu_read_lock(); 158 rcu_read_unlock(); 159 } 160 } 161 162 static void ref_rcu_delay_section(const int nloops, const int udl, const int ndl) 163 { 164 int i; 165 166 for (i = nloops; i >= 0; i--) { 167 rcu_read_lock(); 168 un_delay(udl, ndl); 169 rcu_read_unlock(); 170 } 171 } 172 173 static bool rcu_sync_scale_init(void) 174 { 175 return true; 176 } 177 178 static const struct ref_scale_ops rcu_ops = { 179 .init = rcu_sync_scale_init, 180 .readsection = ref_rcu_read_section, 181 .delaysection = ref_rcu_delay_section, 182 .name = "rcu" 183 }; 184 185 // Definitions for SRCU ref scale testing. 186 DEFINE_STATIC_SRCU(srcu_refctl_scale); 187 static struct srcu_struct *srcu_ctlp = &srcu_refctl_scale; 188 189 static void srcu_ref_scale_read_section(const int nloops) 190 { 191 int i; 192 int idx; 193 194 for (i = nloops; i >= 0; i--) { 195 idx = srcu_read_lock(srcu_ctlp); 196 srcu_read_unlock(srcu_ctlp, idx); 197 } 198 } 199 200 static void srcu_ref_scale_delay_section(const int nloops, const int udl, const int ndl) 201 { 202 int i; 203 int idx; 204 205 for (i = nloops; i >= 0; i--) { 206 idx = srcu_read_lock(srcu_ctlp); 207 un_delay(udl, ndl); 208 srcu_read_unlock(srcu_ctlp, idx); 209 } 210 } 211 212 static const struct ref_scale_ops srcu_ops = { 213 .init = rcu_sync_scale_init, 214 .readsection = srcu_ref_scale_read_section, 215 .delaysection = srcu_ref_scale_delay_section, 216 .name = "srcu" 217 }; 218 219 static void srcu_lite_ref_scale_read_section(const int nloops) 220 { 221 int i; 222 int idx; 223 224 for (i = nloops; i >= 0; i--) { 225 idx = srcu_read_lock_lite(srcu_ctlp); 226 srcu_read_unlock_lite(srcu_ctlp, idx); 227 } 228 } 229 230 static void srcu_lite_ref_scale_delay_section(const int nloops, const int udl, const int ndl) 231 { 232 int i; 233 int idx; 234 235 for (i = nloops; i >= 0; i--) { 236 idx = srcu_read_lock_lite(srcu_ctlp); 237 un_delay(udl, ndl); 238 srcu_read_unlock_lite(srcu_ctlp, idx); 239 } 240 } 241 242 static const struct ref_scale_ops srcu_lite_ops = { 243 .init = rcu_sync_scale_init, 244 .readsection = srcu_lite_ref_scale_read_section, 245 .delaysection = srcu_lite_ref_scale_delay_section, 246 .name = "srcu-lite" 247 }; 248 249 #ifdef CONFIG_TASKS_RCU 250 251 // Definitions for RCU Tasks ref scale testing: Empty read markers. 252 // These definitions also work for RCU Rude readers. 253 static void rcu_tasks_ref_scale_read_section(const int nloops) 254 { 255 int i; 256 257 for (i = nloops; i >= 0; i--) 258 continue; 259 } 260 261 static void rcu_tasks_ref_scale_delay_section(const int nloops, const int udl, const int ndl) 262 { 263 int i; 264 265 for (i = nloops; i >= 0; i--) 266 un_delay(udl, ndl); 267 } 268 269 static const struct ref_scale_ops rcu_tasks_ops = { 270 .init = rcu_sync_scale_init, 271 .readsection = rcu_tasks_ref_scale_read_section, 272 .delaysection = rcu_tasks_ref_scale_delay_section, 273 .name = "rcu-tasks" 274 }; 275 276 #define RCU_TASKS_OPS &rcu_tasks_ops, 277 278 #else // #ifdef CONFIG_TASKS_RCU 279 280 #define RCU_TASKS_OPS 281 282 #endif // #else // #ifdef CONFIG_TASKS_RCU 283 284 #ifdef CONFIG_TASKS_TRACE_RCU 285 286 // Definitions for RCU Tasks Trace ref scale testing. 287 static void rcu_trace_ref_scale_read_section(const int nloops) 288 { 289 int i; 290 291 for (i = nloops; i >= 0; i--) { 292 rcu_read_lock_trace(); 293 rcu_read_unlock_trace(); 294 } 295 } 296 297 static void rcu_trace_ref_scale_delay_section(const int nloops, const int udl, const int ndl) 298 { 299 int i; 300 301 for (i = nloops; i >= 0; i--) { 302 rcu_read_lock_trace(); 303 un_delay(udl, ndl); 304 rcu_read_unlock_trace(); 305 } 306 } 307 308 static const struct ref_scale_ops rcu_trace_ops = { 309 .init = rcu_sync_scale_init, 310 .readsection = rcu_trace_ref_scale_read_section, 311 .delaysection = rcu_trace_ref_scale_delay_section, 312 .name = "rcu-trace" 313 }; 314 315 #define RCU_TRACE_OPS &rcu_trace_ops, 316 317 #else // #ifdef CONFIG_TASKS_TRACE_RCU 318 319 #define RCU_TRACE_OPS 320 321 #endif // #else // #ifdef CONFIG_TASKS_TRACE_RCU 322 323 // Definitions for reference count 324 static atomic_t refcnt; 325 326 static void ref_refcnt_section(const int nloops) 327 { 328 int i; 329 330 for (i = nloops; i >= 0; i--) { 331 atomic_inc(&refcnt); 332 atomic_dec(&refcnt); 333 } 334 } 335 336 static void ref_refcnt_delay_section(const int nloops, const int udl, const int ndl) 337 { 338 int i; 339 340 for (i = nloops; i >= 0; i--) { 341 atomic_inc(&refcnt); 342 un_delay(udl, ndl); 343 atomic_dec(&refcnt); 344 } 345 } 346 347 static const struct ref_scale_ops refcnt_ops = { 348 .init = rcu_sync_scale_init, 349 .readsection = ref_refcnt_section, 350 .delaysection = ref_refcnt_delay_section, 351 .name = "refcnt" 352 }; 353 354 // Definitions for rwlock 355 static rwlock_t test_rwlock; 356 357 static bool ref_rwlock_init(void) 358 { 359 rwlock_init(&test_rwlock); 360 return true; 361 } 362 363 static void ref_rwlock_section(const int nloops) 364 { 365 int i; 366 367 for (i = nloops; i >= 0; i--) { 368 read_lock(&test_rwlock); 369 read_unlock(&test_rwlock); 370 } 371 } 372 373 static void ref_rwlock_delay_section(const int nloops, const int udl, const int ndl) 374 { 375 int i; 376 377 for (i = nloops; i >= 0; i--) { 378 read_lock(&test_rwlock); 379 un_delay(udl, ndl); 380 read_unlock(&test_rwlock); 381 } 382 } 383 384 static const struct ref_scale_ops rwlock_ops = { 385 .init = ref_rwlock_init, 386 .readsection = ref_rwlock_section, 387 .delaysection = ref_rwlock_delay_section, 388 .name = "rwlock" 389 }; 390 391 // Definitions for rwsem 392 static struct rw_semaphore test_rwsem; 393 394 static bool ref_rwsem_init(void) 395 { 396 init_rwsem(&test_rwsem); 397 return true; 398 } 399 400 static void ref_rwsem_section(const int nloops) 401 { 402 int i; 403 404 for (i = nloops; i >= 0; i--) { 405 down_read(&test_rwsem); 406 up_read(&test_rwsem); 407 } 408 } 409 410 static void ref_rwsem_delay_section(const int nloops, const int udl, const int ndl) 411 { 412 int i; 413 414 for (i = nloops; i >= 0; i--) { 415 down_read(&test_rwsem); 416 un_delay(udl, ndl); 417 up_read(&test_rwsem); 418 } 419 } 420 421 static const struct ref_scale_ops rwsem_ops = { 422 .init = ref_rwsem_init, 423 .readsection = ref_rwsem_section, 424 .delaysection = ref_rwsem_delay_section, 425 .name = "rwsem" 426 }; 427 428 // Definitions for global spinlock 429 static DEFINE_RAW_SPINLOCK(test_lock); 430 431 static void ref_lock_section(const int nloops) 432 { 433 int i; 434 435 preempt_disable(); 436 for (i = nloops; i >= 0; i--) { 437 raw_spin_lock(&test_lock); 438 raw_spin_unlock(&test_lock); 439 } 440 preempt_enable(); 441 } 442 443 static void ref_lock_delay_section(const int nloops, const int udl, const int ndl) 444 { 445 int i; 446 447 preempt_disable(); 448 for (i = nloops; i >= 0; i--) { 449 raw_spin_lock(&test_lock); 450 un_delay(udl, ndl); 451 raw_spin_unlock(&test_lock); 452 } 453 preempt_enable(); 454 } 455 456 static const struct ref_scale_ops lock_ops = { 457 .readsection = ref_lock_section, 458 .delaysection = ref_lock_delay_section, 459 .name = "lock" 460 }; 461 462 // Definitions for global irq-save spinlock 463 464 static void ref_lock_irq_section(const int nloops) 465 { 466 unsigned long flags; 467 int i; 468 469 preempt_disable(); 470 for (i = nloops; i >= 0; i--) { 471 raw_spin_lock_irqsave(&test_lock, flags); 472 raw_spin_unlock_irqrestore(&test_lock, flags); 473 } 474 preempt_enable(); 475 } 476 477 static void ref_lock_irq_delay_section(const int nloops, const int udl, const int ndl) 478 { 479 unsigned long flags; 480 int i; 481 482 preempt_disable(); 483 for (i = nloops; i >= 0; i--) { 484 raw_spin_lock_irqsave(&test_lock, flags); 485 un_delay(udl, ndl); 486 raw_spin_unlock_irqrestore(&test_lock, flags); 487 } 488 preempt_enable(); 489 } 490 491 static const struct ref_scale_ops lock_irq_ops = { 492 .readsection = ref_lock_irq_section, 493 .delaysection = ref_lock_irq_delay_section, 494 .name = "lock-irq" 495 }; 496 497 // Definitions acquire-release. 498 static DEFINE_PER_CPU(unsigned long, test_acqrel); 499 500 static void ref_acqrel_section(const int nloops) 501 { 502 unsigned long x; 503 int i; 504 505 preempt_disable(); 506 for (i = nloops; i >= 0; i--) { 507 x = smp_load_acquire(this_cpu_ptr(&test_acqrel)); 508 smp_store_release(this_cpu_ptr(&test_acqrel), x + 1); 509 } 510 preempt_enable(); 511 } 512 513 static void ref_acqrel_delay_section(const int nloops, const int udl, const int ndl) 514 { 515 unsigned long x; 516 int i; 517 518 preempt_disable(); 519 for (i = nloops; i >= 0; i--) { 520 x = smp_load_acquire(this_cpu_ptr(&test_acqrel)); 521 un_delay(udl, ndl); 522 smp_store_release(this_cpu_ptr(&test_acqrel), x + 1); 523 } 524 preempt_enable(); 525 } 526 527 static const struct ref_scale_ops acqrel_ops = { 528 .readsection = ref_acqrel_section, 529 .delaysection = ref_acqrel_delay_section, 530 .name = "acqrel" 531 }; 532 533 static volatile u64 stopopts; 534 535 static void ref_sched_clock_section(const int nloops) 536 { 537 u64 x = 0; 538 int i; 539 540 preempt_disable(); 541 for (i = nloops; i >= 0; i--) 542 x += sched_clock(); 543 preempt_enable(); 544 stopopts = x; 545 } 546 547 static void ref_sched_clock_delay_section(const int nloops, const int udl, const int ndl) 548 { 549 u64 x = 0; 550 int i; 551 552 preempt_disable(); 553 for (i = nloops; i >= 0; i--) { 554 x += sched_clock(); 555 un_delay(udl, ndl); 556 } 557 preempt_enable(); 558 stopopts = x; 559 } 560 561 static const struct ref_scale_ops sched_clock_ops = { 562 .readsection = ref_sched_clock_section, 563 .delaysection = ref_sched_clock_delay_section, 564 .name = "sched-clock" 565 }; 566 567 568 static void ref_clock_section(const int nloops) 569 { 570 u64 x = 0; 571 int i; 572 573 preempt_disable(); 574 for (i = nloops; i >= 0; i--) 575 x += ktime_get_real_fast_ns(); 576 preempt_enable(); 577 stopopts = x; 578 } 579 580 static void ref_clock_delay_section(const int nloops, const int udl, const int ndl) 581 { 582 u64 x = 0; 583 int i; 584 585 preempt_disable(); 586 for (i = nloops; i >= 0; i--) { 587 x += ktime_get_real_fast_ns(); 588 un_delay(udl, ndl); 589 } 590 preempt_enable(); 591 stopopts = x; 592 } 593 594 static const struct ref_scale_ops clock_ops = { 595 .readsection = ref_clock_section, 596 .delaysection = ref_clock_delay_section, 597 .name = "clock" 598 }; 599 600 static void ref_jiffies_section(const int nloops) 601 { 602 u64 x = 0; 603 int i; 604 605 preempt_disable(); 606 for (i = nloops; i >= 0; i--) 607 x += jiffies; 608 preempt_enable(); 609 stopopts = x; 610 } 611 612 static void ref_jiffies_delay_section(const int nloops, const int udl, const int ndl) 613 { 614 u64 x = 0; 615 int i; 616 617 preempt_disable(); 618 for (i = nloops; i >= 0; i--) { 619 x += jiffies; 620 un_delay(udl, ndl); 621 } 622 preempt_enable(); 623 stopopts = x; 624 } 625 626 static const struct ref_scale_ops jiffies_ops = { 627 .readsection = ref_jiffies_section, 628 .delaysection = ref_jiffies_delay_section, 629 .name = "jiffies" 630 }; 631 632 //////////////////////////////////////////////////////////////////////// 633 // 634 // Methods leveraging SLAB_TYPESAFE_BY_RCU. 635 // 636 637 // Item to look up in a typesafe manner. Array of pointers to these. 638 struct refscale_typesafe { 639 atomic_t rts_refctr; // Used by all flavors 640 spinlock_t rts_lock; 641 seqlock_t rts_seqlock; 642 unsigned int a; 643 unsigned int b; 644 }; 645 646 static struct kmem_cache *typesafe_kmem_cachep; 647 static struct refscale_typesafe **rtsarray; 648 static long rtsarray_size; 649 static DEFINE_TORTURE_RANDOM_PERCPU(refscale_rand); 650 static bool (*rts_acquire)(struct refscale_typesafe *rtsp, unsigned int *start); 651 static bool (*rts_release)(struct refscale_typesafe *rtsp, unsigned int start); 652 653 // Conditionally acquire an explicit in-structure reference count. 654 static bool typesafe_ref_acquire(struct refscale_typesafe *rtsp, unsigned int *start) 655 { 656 return atomic_inc_not_zero(&rtsp->rts_refctr); 657 } 658 659 // Unconditionally release an explicit in-structure reference count. 660 static bool typesafe_ref_release(struct refscale_typesafe *rtsp, unsigned int start) 661 { 662 if (!atomic_dec_return(&rtsp->rts_refctr)) { 663 WRITE_ONCE(rtsp->a, rtsp->a + 1); 664 kmem_cache_free(typesafe_kmem_cachep, rtsp); 665 } 666 return true; 667 } 668 669 // Unconditionally acquire an explicit in-structure spinlock. 670 static bool typesafe_lock_acquire(struct refscale_typesafe *rtsp, unsigned int *start) 671 { 672 spin_lock(&rtsp->rts_lock); 673 return true; 674 } 675 676 // Unconditionally release an explicit in-structure spinlock. 677 static bool typesafe_lock_release(struct refscale_typesafe *rtsp, unsigned int start) 678 { 679 spin_unlock(&rtsp->rts_lock); 680 return true; 681 } 682 683 // Unconditionally acquire an explicit in-structure sequence lock. 684 static bool typesafe_seqlock_acquire(struct refscale_typesafe *rtsp, unsigned int *start) 685 { 686 *start = read_seqbegin(&rtsp->rts_seqlock); 687 return true; 688 } 689 690 // Conditionally release an explicit in-structure sequence lock. Return 691 // true if this release was successful, that is, if no retry is required. 692 static bool typesafe_seqlock_release(struct refscale_typesafe *rtsp, unsigned int start) 693 { 694 return !read_seqretry(&rtsp->rts_seqlock, start); 695 } 696 697 // Do a read-side critical section with the specified delay in 698 // microseconds and nanoseconds inserted so as to increase probability 699 // of failure. 700 static void typesafe_delay_section(const int nloops, const int udl, const int ndl) 701 { 702 unsigned int a; 703 unsigned int b; 704 int i; 705 long idx; 706 struct refscale_typesafe *rtsp; 707 unsigned int start; 708 709 for (i = nloops; i >= 0; i--) { 710 preempt_disable(); 711 idx = torture_random(this_cpu_ptr(&refscale_rand)) % rtsarray_size; 712 preempt_enable(); 713 retry: 714 rcu_read_lock(); 715 rtsp = rcu_dereference(rtsarray[idx]); 716 a = READ_ONCE(rtsp->a); 717 if (!rts_acquire(rtsp, &start)) { 718 rcu_read_unlock(); 719 goto retry; 720 } 721 if (a != READ_ONCE(rtsp->a)) { 722 (void)rts_release(rtsp, start); 723 rcu_read_unlock(); 724 goto retry; 725 } 726 un_delay(udl, ndl); 727 b = READ_ONCE(rtsp->a); 728 // Remember, seqlock read-side release can fail. 729 if (!rts_release(rtsp, start)) { 730 rcu_read_unlock(); 731 goto retry; 732 } 733 WARN_ONCE(a != b, "Re-read of ->a changed from %u to %u.\n", a, b); 734 b = rtsp->b; 735 rcu_read_unlock(); 736 WARN_ON_ONCE(a * a != b); 737 } 738 } 739 740 // Because the acquisition and release methods are expensive, there 741 // is no point in optimizing away the un_delay() function's two checks. 742 // Thus simply define typesafe_read_section() as a simple wrapper around 743 // typesafe_delay_section(). 744 static void typesafe_read_section(const int nloops) 745 { 746 typesafe_delay_section(nloops, 0, 0); 747 } 748 749 // Allocate and initialize one refscale_typesafe structure. 750 static struct refscale_typesafe *typesafe_alloc_one(void) 751 { 752 struct refscale_typesafe *rtsp; 753 754 rtsp = kmem_cache_alloc(typesafe_kmem_cachep, GFP_KERNEL); 755 if (!rtsp) 756 return NULL; 757 atomic_set(&rtsp->rts_refctr, 1); 758 WRITE_ONCE(rtsp->a, rtsp->a + 1); 759 WRITE_ONCE(rtsp->b, rtsp->a * rtsp->a); 760 return rtsp; 761 } 762 763 // Slab-allocator constructor for refscale_typesafe structures created 764 // out of a new slab of system memory. 765 static void refscale_typesafe_ctor(void *rtsp_in) 766 { 767 struct refscale_typesafe *rtsp = rtsp_in; 768 769 spin_lock_init(&rtsp->rts_lock); 770 seqlock_init(&rtsp->rts_seqlock); 771 preempt_disable(); 772 rtsp->a = torture_random(this_cpu_ptr(&refscale_rand)); 773 preempt_enable(); 774 } 775 776 static const struct ref_scale_ops typesafe_ref_ops; 777 static const struct ref_scale_ops typesafe_lock_ops; 778 static const struct ref_scale_ops typesafe_seqlock_ops; 779 780 // Initialize for a typesafe test. 781 static bool typesafe_init(void) 782 { 783 long idx; 784 long si = lookup_instances; 785 786 typesafe_kmem_cachep = kmem_cache_create("refscale_typesafe", 787 sizeof(struct refscale_typesafe), sizeof(void *), 788 SLAB_TYPESAFE_BY_RCU, refscale_typesafe_ctor); 789 if (!typesafe_kmem_cachep) 790 return false; 791 if (si < 0) 792 si = -si * nr_cpu_ids; 793 else if (si == 0) 794 si = nr_cpu_ids; 795 rtsarray_size = si; 796 rtsarray = kcalloc(si, sizeof(*rtsarray), GFP_KERNEL); 797 if (!rtsarray) 798 return false; 799 for (idx = 0; idx < rtsarray_size; idx++) { 800 rtsarray[idx] = typesafe_alloc_one(); 801 if (!rtsarray[idx]) 802 return false; 803 } 804 if (cur_ops == &typesafe_ref_ops) { 805 rts_acquire = typesafe_ref_acquire; 806 rts_release = typesafe_ref_release; 807 } else if (cur_ops == &typesafe_lock_ops) { 808 rts_acquire = typesafe_lock_acquire; 809 rts_release = typesafe_lock_release; 810 } else if (cur_ops == &typesafe_seqlock_ops) { 811 rts_acquire = typesafe_seqlock_acquire; 812 rts_release = typesafe_seqlock_release; 813 } else { 814 WARN_ON_ONCE(1); 815 return false; 816 } 817 return true; 818 } 819 820 // Clean up after a typesafe test. 821 static void typesafe_cleanup(void) 822 { 823 long idx; 824 825 if (rtsarray) { 826 for (idx = 0; idx < rtsarray_size; idx++) 827 kmem_cache_free(typesafe_kmem_cachep, rtsarray[idx]); 828 kfree(rtsarray); 829 rtsarray = NULL; 830 rtsarray_size = 0; 831 } 832 kmem_cache_destroy(typesafe_kmem_cachep); 833 typesafe_kmem_cachep = NULL; 834 rts_acquire = NULL; 835 rts_release = NULL; 836 } 837 838 // The typesafe_init() function distinguishes these structures by address. 839 static const struct ref_scale_ops typesafe_ref_ops = { 840 .init = typesafe_init, 841 .cleanup = typesafe_cleanup, 842 .readsection = typesafe_read_section, 843 .delaysection = typesafe_delay_section, 844 .name = "typesafe_ref" 845 }; 846 847 static const struct ref_scale_ops typesafe_lock_ops = { 848 .init = typesafe_init, 849 .cleanup = typesafe_cleanup, 850 .readsection = typesafe_read_section, 851 .delaysection = typesafe_delay_section, 852 .name = "typesafe_lock" 853 }; 854 855 static const struct ref_scale_ops typesafe_seqlock_ops = { 856 .init = typesafe_init, 857 .cleanup = typesafe_cleanup, 858 .readsection = typesafe_read_section, 859 .delaysection = typesafe_delay_section, 860 .name = "typesafe_seqlock" 861 }; 862 863 static void rcu_scale_one_reader(void) 864 { 865 if (readdelay <= 0) 866 cur_ops->readsection(loops); 867 else 868 cur_ops->delaysection(loops, readdelay / 1000, readdelay % 1000); 869 } 870 871 // Warm up cache, or, if needed run a series of rcu_scale_one_reader() 872 // to allow multiple rcuscale guest OSes to collect mutually valid data. 873 static void rcu_scale_warm_cool(void) 874 { 875 unsigned long jdone = jiffies + (guest_os_delay > 0 ? guest_os_delay * HZ : -1); 876 877 do { 878 rcu_scale_one_reader(); 879 cond_resched(); 880 } while (time_before(jiffies, jdone)); 881 } 882 883 // Reader kthread. Repeatedly does empty RCU read-side 884 // critical section, minimizing update-side interference. 885 static int 886 ref_scale_reader(void *arg) 887 { 888 unsigned long flags; 889 long me = (long)arg; 890 struct reader_task *rt = &(reader_tasks[me]); 891 u64 start; 892 s64 duration; 893 894 VERBOSE_SCALEOUT_BATCH("ref_scale_reader %ld: task started", me); 895 WARN_ON_ONCE(set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids))); 896 set_user_nice(current, MAX_NICE); 897 atomic_inc(&n_init); 898 if (holdoff) 899 schedule_timeout_interruptible(holdoff * HZ); 900 repeat: 901 VERBOSE_SCALEOUT_BATCH("ref_scale_reader %ld: waiting to start next experiment on cpu %d", me, raw_smp_processor_id()); 902 903 // Wait for signal that this reader can start. 904 wait_event(rt->wq, (atomic_read(&nreaders_exp) && smp_load_acquire(&rt->start_reader)) || 905 torture_must_stop()); 906 907 if (torture_must_stop()) 908 goto end; 909 910 // Make sure that the CPU is affinitized appropriately during testing. 911 WARN_ON_ONCE(raw_smp_processor_id() != me % nr_cpu_ids); 912 913 WRITE_ONCE(rt->start_reader, 0); 914 if (!atomic_dec_return(&n_started)) 915 while (atomic_read_acquire(&n_started)) 916 cpu_relax(); 917 918 VERBOSE_SCALEOUT_BATCH("ref_scale_reader %ld: experiment %d started", me, exp_idx); 919 920 921 // To reduce noise, do an initial cache-warming invocation, check 922 // in, and then keep warming until everyone has checked in. 923 rcu_scale_one_reader(); 924 if (!atomic_dec_return(&n_warmedup)) 925 while (atomic_read_acquire(&n_warmedup)) 926 rcu_scale_one_reader(); 927 // Also keep interrupts disabled. This also has the effect 928 // of preventing entries into slow path for rcu_read_unlock(). 929 local_irq_save(flags); 930 start = ktime_get_mono_fast_ns(); 931 932 rcu_scale_one_reader(); 933 934 duration = ktime_get_mono_fast_ns() - start; 935 local_irq_restore(flags); 936 937 rt->last_duration_ns = WARN_ON_ONCE(duration < 0) ? 0 : duration; 938 // To reduce runtime-skew noise, do maintain-load invocations until 939 // everyone is done. 940 if (!atomic_dec_return(&n_cooleddown)) 941 while (atomic_read_acquire(&n_cooleddown)) 942 rcu_scale_one_reader(); 943 944 if (atomic_dec_and_test(&nreaders_exp)) 945 wake_up(&main_wq); 946 947 VERBOSE_SCALEOUT_BATCH("ref_scale_reader %ld: experiment %d ended, (readers remaining=%d)", 948 me, exp_idx, atomic_read(&nreaders_exp)); 949 950 if (!torture_must_stop()) 951 goto repeat; 952 end: 953 torture_kthread_stopping("ref_scale_reader"); 954 return 0; 955 } 956 957 static void reset_readers(void) 958 { 959 int i; 960 struct reader_task *rt; 961 962 for (i = 0; i < nreaders; i++) { 963 rt = &(reader_tasks[i]); 964 965 rt->last_duration_ns = 0; 966 } 967 } 968 969 // Print the results of each reader and return the sum of all their durations. 970 static u64 process_durations(int n) 971 { 972 int i; 973 struct reader_task *rt; 974 struct seq_buf s; 975 char *buf; 976 u64 sum = 0; 977 978 buf = kmalloc(800 + 64, GFP_KERNEL); 979 if (!buf) 980 return 0; 981 seq_buf_init(&s, buf, 800 + 64); 982 983 seq_buf_printf(&s, "Experiment #%d (Format: <THREAD-NUM>:<Total loop time in ns>)", 984 exp_idx); 985 986 for (i = 0; i < n && !torture_must_stop(); i++) { 987 rt = &(reader_tasks[i]); 988 989 if (i % 5 == 0) 990 seq_buf_putc(&s, '\n'); 991 992 if (seq_buf_used(&s) >= 800) { 993 pr_alert("%s", seq_buf_str(&s)); 994 seq_buf_clear(&s); 995 } 996 997 seq_buf_printf(&s, "%d: %llu\t", i, rt->last_duration_ns); 998 999 sum += rt->last_duration_ns; 1000 } 1001 pr_alert("%s\n", seq_buf_str(&s)); 1002 1003 kfree(buf); 1004 return sum; 1005 } 1006 1007 // The main_func is the main orchestrator, it performs a bunch of 1008 // experiments. For every experiment, it orders all the readers 1009 // involved to start and waits for them to finish the experiment. It 1010 // then reads their timestamps and starts the next experiment. Each 1011 // experiment progresses from 1 concurrent reader to N of them at which 1012 // point all the timestamps are printed. 1013 static int main_func(void *arg) 1014 { 1015 int exp, r; 1016 char buf1[64]; 1017 char *buf; 1018 u64 *result_avg; 1019 1020 set_cpus_allowed_ptr(current, cpumask_of(nreaders % nr_cpu_ids)); 1021 set_user_nice(current, MAX_NICE); 1022 1023 VERBOSE_SCALEOUT("main_func task started"); 1024 result_avg = kzalloc(nruns * sizeof(*result_avg), GFP_KERNEL); 1025 buf = kzalloc(800 + 64, GFP_KERNEL); 1026 if (!result_avg || !buf) { 1027 SCALEOUT_ERRSTRING("out of memory"); 1028 goto oom_exit; 1029 } 1030 if (holdoff) 1031 schedule_timeout_interruptible(holdoff * HZ); 1032 1033 // Wait for all threads to start. 1034 atomic_inc(&n_init); 1035 while (atomic_read(&n_init) < nreaders + 1) 1036 schedule_timeout_uninterruptible(1); 1037 1038 // Start exp readers up per experiment 1039 rcu_scale_warm_cool(); 1040 for (exp = 0; exp < nruns && !torture_must_stop(); exp++) { 1041 if (torture_must_stop()) 1042 goto end; 1043 1044 reset_readers(); 1045 atomic_set(&nreaders_exp, nreaders); 1046 atomic_set(&n_started, nreaders); 1047 atomic_set(&n_warmedup, nreaders); 1048 atomic_set(&n_cooleddown, nreaders); 1049 1050 exp_idx = exp; 1051 1052 for (r = 0; r < nreaders; r++) { 1053 smp_store_release(&reader_tasks[r].start_reader, 1); 1054 wake_up(&reader_tasks[r].wq); 1055 } 1056 1057 VERBOSE_SCALEOUT("main_func: experiment started, waiting for %d readers", 1058 nreaders); 1059 1060 wait_event(main_wq, 1061 !atomic_read(&nreaders_exp) || torture_must_stop()); 1062 1063 VERBOSE_SCALEOUT("main_func: experiment ended"); 1064 1065 if (torture_must_stop()) 1066 goto end; 1067 1068 result_avg[exp] = div_u64(1000 * process_durations(nreaders), nreaders * loops); 1069 } 1070 rcu_scale_warm_cool(); 1071 1072 // Print the average of all experiments 1073 SCALEOUT("END OF TEST. Calculating average duration per loop (nanoseconds)...\n"); 1074 1075 pr_alert("Runs\tTime(ns)\n"); 1076 for (exp = 0; exp < nruns; exp++) { 1077 u64 avg; 1078 u32 rem; 1079 1080 avg = div_u64_rem(result_avg[exp], 1000, &rem); 1081 sprintf(buf1, "%d\t%llu.%03u\n", exp + 1, avg, rem); 1082 strcat(buf, buf1); 1083 if (strlen(buf) >= 800) { 1084 pr_alert("%s", buf); 1085 buf[0] = 0; 1086 } 1087 } 1088 1089 pr_alert("%s", buf); 1090 1091 oom_exit: 1092 // This will shutdown everything including us. 1093 if (shutdown) { 1094 shutdown_start = 1; 1095 wake_up(&shutdown_wq); 1096 } 1097 1098 // Wait for torture to stop us 1099 while (!torture_must_stop()) 1100 schedule_timeout_uninterruptible(1); 1101 1102 end: 1103 torture_kthread_stopping("main_func"); 1104 kfree(result_avg); 1105 kfree(buf); 1106 return 0; 1107 } 1108 1109 static void 1110 ref_scale_print_module_parms(const struct ref_scale_ops *cur_ops, const char *tag) 1111 { 1112 pr_alert("%s" SCALE_FLAG 1113 "--- %s: verbose=%d verbose_batched=%d shutdown=%d holdoff=%d lookup_instances=%ld loops=%ld nreaders=%d nruns=%d readdelay=%d\n", scale_type, tag, 1114 verbose, verbose_batched, shutdown, holdoff, lookup_instances, loops, nreaders, nruns, readdelay); 1115 } 1116 1117 static void 1118 ref_scale_cleanup(void) 1119 { 1120 int i; 1121 1122 if (torture_cleanup_begin()) 1123 return; 1124 1125 if (!cur_ops) { 1126 torture_cleanup_end(); 1127 return; 1128 } 1129 1130 if (reader_tasks) { 1131 for (i = 0; i < nreaders; i++) 1132 torture_stop_kthread("ref_scale_reader", 1133 reader_tasks[i].task); 1134 } 1135 kfree(reader_tasks); 1136 1137 torture_stop_kthread("main_task", main_task); 1138 kfree(main_task); 1139 1140 // Do scale-type-specific cleanup operations. 1141 if (cur_ops->cleanup != NULL) 1142 cur_ops->cleanup(); 1143 1144 torture_cleanup_end(); 1145 } 1146 1147 // Shutdown kthread. Just waits to be awakened, then shuts down system. 1148 static int 1149 ref_scale_shutdown(void *arg) 1150 { 1151 wait_event_idle(shutdown_wq, shutdown_start); 1152 1153 smp_mb(); // Wake before output. 1154 ref_scale_cleanup(); 1155 kernel_power_off(); 1156 1157 return -EINVAL; 1158 } 1159 1160 static int __init 1161 ref_scale_init(void) 1162 { 1163 long i; 1164 int firsterr = 0; 1165 static const struct ref_scale_ops *scale_ops[] = { 1166 &rcu_ops, &srcu_ops, &srcu_lite_ops, RCU_TRACE_OPS RCU_TASKS_OPS 1167 &refcnt_ops, &rwlock_ops, &rwsem_ops, &lock_ops, &lock_irq_ops, 1168 &acqrel_ops, &sched_clock_ops, &clock_ops, &jiffies_ops, 1169 &typesafe_ref_ops, &typesafe_lock_ops, &typesafe_seqlock_ops, 1170 }; 1171 1172 if (!torture_init_begin(scale_type, verbose)) 1173 return -EBUSY; 1174 1175 for (i = 0; i < ARRAY_SIZE(scale_ops); i++) { 1176 cur_ops = scale_ops[i]; 1177 if (strcmp(scale_type, cur_ops->name) == 0) 1178 break; 1179 } 1180 if (i == ARRAY_SIZE(scale_ops)) { 1181 pr_alert("rcu-scale: invalid scale type: \"%s\"\n", scale_type); 1182 pr_alert("rcu-scale types:"); 1183 for (i = 0; i < ARRAY_SIZE(scale_ops); i++) 1184 pr_cont(" %s", scale_ops[i]->name); 1185 pr_cont("\n"); 1186 firsterr = -EINVAL; 1187 cur_ops = NULL; 1188 goto unwind; 1189 } 1190 if (cur_ops->init) 1191 if (!cur_ops->init()) { 1192 firsterr = -EUCLEAN; 1193 goto unwind; 1194 } 1195 1196 ref_scale_print_module_parms(cur_ops, "Start of test"); 1197 1198 // Shutdown task 1199 if (shutdown) { 1200 init_waitqueue_head(&shutdown_wq); 1201 firsterr = torture_create_kthread(ref_scale_shutdown, NULL, 1202 shutdown_task); 1203 if (torture_init_error(firsterr)) 1204 goto unwind; 1205 schedule_timeout_uninterruptible(1); 1206 } 1207 1208 // Reader tasks (default to ~75% of online CPUs). 1209 if (nreaders < 0) 1210 nreaders = (num_online_cpus() >> 1) + (num_online_cpus() >> 2); 1211 if (WARN_ONCE(loops <= 0, "%s: loops = %ld, adjusted to 1\n", __func__, loops)) 1212 loops = 1; 1213 if (WARN_ONCE(nreaders <= 0, "%s: nreaders = %d, adjusted to 1\n", __func__, nreaders)) 1214 nreaders = 1; 1215 if (WARN_ONCE(nruns <= 0, "%s: nruns = %d, adjusted to 1\n", __func__, nruns)) 1216 nruns = 1; 1217 reader_tasks = kcalloc(nreaders, sizeof(reader_tasks[0]), 1218 GFP_KERNEL); 1219 if (!reader_tasks) { 1220 SCALEOUT_ERRSTRING("out of memory"); 1221 firsterr = -ENOMEM; 1222 goto unwind; 1223 } 1224 1225 VERBOSE_SCALEOUT("Starting %d reader threads", nreaders); 1226 1227 for (i = 0; i < nreaders; i++) { 1228 init_waitqueue_head(&reader_tasks[i].wq); 1229 firsterr = torture_create_kthread(ref_scale_reader, (void *)i, 1230 reader_tasks[i].task); 1231 if (torture_init_error(firsterr)) 1232 goto unwind; 1233 } 1234 1235 // Main Task 1236 init_waitqueue_head(&main_wq); 1237 firsterr = torture_create_kthread(main_func, NULL, main_task); 1238 if (torture_init_error(firsterr)) 1239 goto unwind; 1240 1241 torture_init_end(); 1242 return 0; 1243 1244 unwind: 1245 torture_init_end(); 1246 ref_scale_cleanup(); 1247 if (shutdown) { 1248 WARN_ON(!IS_MODULE(CONFIG_RCU_REF_SCALE_TEST)); 1249 kernel_power_off(); 1250 } 1251 return firsterr; 1252 } 1253 1254 module_init(ref_scale_init); 1255 module_exit(ref_scale_cleanup); 1256