1 // SPDX-License-Identifier: GPL-2.0+ 2 // 3 // Scalability test comparing RCU vs other mechanisms 4 // for acquiring references on objects. 5 // 6 // Copyright (C) Google, 2020. 7 // 8 // Author: Joel Fernandes <joel@joelfernandes.org> 9 10 #define pr_fmt(fmt) fmt 11 12 #include <linux/atomic.h> 13 #include <linux/bitops.h> 14 #include <linux/completion.h> 15 #include <linux/cpu.h> 16 #include <linux/delay.h> 17 #include <linux/err.h> 18 #include <linux/init.h> 19 #include <linux/interrupt.h> 20 #include <linux/kthread.h> 21 #include <linux/kernel.h> 22 #include <linux/mm.h> 23 #include <linux/module.h> 24 #include <linux/moduleparam.h> 25 #include <linux/notifier.h> 26 #include <linux/percpu.h> 27 #include <linux/rcupdate.h> 28 #include <linux/rcupdate_trace.h> 29 #include <linux/reboot.h> 30 #include <linux/sched.h> 31 #include <linux/seq_buf.h> 32 #include <linux/spinlock.h> 33 #include <linux/smp.h> 34 #include <linux/stat.h> 35 #include <linux/srcu.h> 36 #include <linux/slab.h> 37 #include <linux/torture.h> 38 #include <linux/types.h> 39 40 #include "rcu.h" 41 42 #define SCALE_FLAG "-ref-scale: " 43 44 #define SCALEOUT(s, x...) \ 45 pr_alert("%s" SCALE_FLAG s, scale_type, ## x) 46 47 #define VERBOSE_SCALEOUT(s, x...) \ 48 do { \ 49 if (verbose) \ 50 pr_alert("%s" SCALE_FLAG s "\n", scale_type, ## x); \ 51 } while (0) 52 53 static atomic_t verbose_batch_ctr; 54 55 #define VERBOSE_SCALEOUT_BATCH(s, x...) \ 56 do { \ 57 if (verbose && \ 58 (verbose_batched <= 0 || \ 59 !(atomic_inc_return(&verbose_batch_ctr) % verbose_batched))) { \ 60 schedule_timeout_uninterruptible(1); \ 61 pr_alert("%s" SCALE_FLAG s "\n", scale_type, ## x); \ 62 } \ 63 } while (0) 64 65 #define SCALEOUT_ERRSTRING(s, x...) pr_alert("%s" SCALE_FLAG "!!! " s "\n", scale_type, ## x) 66 67 MODULE_DESCRIPTION("Scalability test for object reference mechanisms"); 68 MODULE_LICENSE("GPL"); 69 MODULE_AUTHOR("Joel Fernandes (Google) <joel@joelfernandes.org>"); 70 71 static char *scale_type = "rcu"; 72 module_param(scale_type, charp, 0444); 73 MODULE_PARM_DESC(scale_type, "Type of test (rcu, srcu, refcnt, rwsem, rwlock."); 74 75 torture_param(int, verbose, 0, "Enable verbose debugging printk()s"); 76 torture_param(int, verbose_batched, 0, "Batch verbose debugging printk()s"); 77 78 // Number of seconds to extend warm-up and cool-down for multiple guest OSes 79 torture_param(long, guest_os_delay, 0, 80 "Number of seconds to extend warm-up/cool-down for multiple guest OSes."); 81 // Wait until there are multiple CPUs before starting test. 82 torture_param(int, holdoff, IS_BUILTIN(CONFIG_RCU_REF_SCALE_TEST) ? 10 : 0, 83 "Holdoff time before test start (s)"); 84 // Number of typesafe_lookup structures, that is, the degree of concurrency. 85 torture_param(long, lookup_instances, 0, "Number of typesafe_lookup structures."); 86 // Number of loops per experiment, all readers execute operations concurrently. 87 torture_param(long, loops, 10000, "Number of loops per experiment."); 88 // Number of readers, with -1 defaulting to about 75% of the CPUs. 89 torture_param(int, nreaders, -1, "Number of readers, -1 for 75% of CPUs."); 90 // Number of runs. 91 torture_param(int, nruns, 30, "Number of experiments to run."); 92 // Reader delay in nanoseconds, 0 for no delay. 93 torture_param(int, readdelay, 0, "Read-side delay in nanoseconds."); 94 95 #ifdef MODULE 96 # define REFSCALE_SHUTDOWN 0 97 #else 98 # define REFSCALE_SHUTDOWN 1 99 #endif 100 101 torture_param(bool, shutdown, REFSCALE_SHUTDOWN, 102 "Shutdown at end of scalability tests."); 103 104 struct reader_task { 105 struct task_struct *task; 106 int start_reader; 107 wait_queue_head_t wq; 108 u64 last_duration_ns; 109 }; 110 111 static struct task_struct *shutdown_task; 112 static wait_queue_head_t shutdown_wq; 113 114 static struct task_struct *main_task; 115 static wait_queue_head_t main_wq; 116 static int shutdown_start; 117 118 static struct reader_task *reader_tasks; 119 120 // Number of readers that are part of the current experiment. 121 static atomic_t nreaders_exp; 122 123 // Use to wait for all threads to start. 124 static atomic_t n_init; 125 static atomic_t n_started; 126 static atomic_t n_warmedup; 127 static atomic_t n_cooleddown; 128 129 // Track which experiment is currently running. 130 static int exp_idx; 131 132 // Operations vector for selecting different types of tests. 133 struct ref_scale_ops { 134 bool (*init)(void); 135 void (*cleanup)(void); 136 void (*readsection)(const int nloops); 137 void (*delaysection)(const int nloops, const int udl, const int ndl); 138 const char *name; 139 }; 140 141 static const struct ref_scale_ops *cur_ops; 142 143 static void un_delay(const int udl, const int ndl) 144 { 145 if (udl) 146 udelay(udl); 147 if (ndl) 148 ndelay(ndl); 149 } 150 151 static void ref_rcu_read_section(const int nloops) 152 { 153 int i; 154 155 for (i = nloops; i >= 0; i--) { 156 rcu_read_lock(); 157 rcu_read_unlock(); 158 } 159 } 160 161 static void ref_rcu_delay_section(const int nloops, const int udl, const int ndl) 162 { 163 int i; 164 165 for (i = nloops; i >= 0; i--) { 166 rcu_read_lock(); 167 un_delay(udl, ndl); 168 rcu_read_unlock(); 169 } 170 } 171 172 static bool rcu_sync_scale_init(void) 173 { 174 return true; 175 } 176 177 static const struct ref_scale_ops rcu_ops = { 178 .init = rcu_sync_scale_init, 179 .readsection = ref_rcu_read_section, 180 .delaysection = ref_rcu_delay_section, 181 .name = "rcu" 182 }; 183 184 // Definitions for SRCU ref scale testing. 185 DEFINE_STATIC_SRCU(srcu_refctl_scale); 186 static struct srcu_struct *srcu_ctlp = &srcu_refctl_scale; 187 188 static void srcu_ref_scale_read_section(const int nloops) 189 { 190 int i; 191 int idx; 192 193 for (i = nloops; i >= 0; i--) { 194 idx = srcu_read_lock(srcu_ctlp); 195 srcu_read_unlock(srcu_ctlp, idx); 196 } 197 } 198 199 static void srcu_ref_scale_delay_section(const int nloops, const int udl, const int ndl) 200 { 201 int i; 202 int idx; 203 204 for (i = nloops; i >= 0; i--) { 205 idx = srcu_read_lock(srcu_ctlp); 206 un_delay(udl, ndl); 207 srcu_read_unlock(srcu_ctlp, idx); 208 } 209 } 210 211 static const struct ref_scale_ops srcu_ops = { 212 .init = rcu_sync_scale_init, 213 .readsection = srcu_ref_scale_read_section, 214 .delaysection = srcu_ref_scale_delay_section, 215 .name = "srcu" 216 }; 217 218 static void srcu_lite_ref_scale_read_section(const int nloops) 219 { 220 int i; 221 int idx; 222 223 for (i = nloops; i >= 0; i--) { 224 idx = srcu_read_lock_lite(srcu_ctlp); 225 srcu_read_unlock_lite(srcu_ctlp, idx); 226 } 227 } 228 229 static void srcu_lite_ref_scale_delay_section(const int nloops, const int udl, const int ndl) 230 { 231 int i; 232 int idx; 233 234 for (i = nloops; i >= 0; i--) { 235 idx = srcu_read_lock_lite(srcu_ctlp); 236 un_delay(udl, ndl); 237 srcu_read_unlock_lite(srcu_ctlp, idx); 238 } 239 } 240 241 static const struct ref_scale_ops srcu_lite_ops = { 242 .init = rcu_sync_scale_init, 243 .readsection = srcu_lite_ref_scale_read_section, 244 .delaysection = srcu_lite_ref_scale_delay_section, 245 .name = "srcu-lite" 246 }; 247 248 #ifdef CONFIG_TASKS_RCU 249 250 // Definitions for RCU Tasks ref scale testing: Empty read markers. 251 // These definitions also work for RCU Rude readers. 252 static void rcu_tasks_ref_scale_read_section(const int nloops) 253 { 254 int i; 255 256 for (i = nloops; i >= 0; i--) 257 continue; 258 } 259 260 static void rcu_tasks_ref_scale_delay_section(const int nloops, const int udl, const int ndl) 261 { 262 int i; 263 264 for (i = nloops; i >= 0; i--) 265 un_delay(udl, ndl); 266 } 267 268 static const struct ref_scale_ops rcu_tasks_ops = { 269 .init = rcu_sync_scale_init, 270 .readsection = rcu_tasks_ref_scale_read_section, 271 .delaysection = rcu_tasks_ref_scale_delay_section, 272 .name = "rcu-tasks" 273 }; 274 275 #define RCU_TASKS_OPS &rcu_tasks_ops, 276 277 #else // #ifdef CONFIG_TASKS_RCU 278 279 #define RCU_TASKS_OPS 280 281 #endif // #else // #ifdef CONFIG_TASKS_RCU 282 283 #ifdef CONFIG_TASKS_TRACE_RCU 284 285 // Definitions for RCU Tasks Trace ref scale testing. 286 static void rcu_trace_ref_scale_read_section(const int nloops) 287 { 288 int i; 289 290 for (i = nloops; i >= 0; i--) { 291 rcu_read_lock_trace(); 292 rcu_read_unlock_trace(); 293 } 294 } 295 296 static void rcu_trace_ref_scale_delay_section(const int nloops, const int udl, const int ndl) 297 { 298 int i; 299 300 for (i = nloops; i >= 0; i--) { 301 rcu_read_lock_trace(); 302 un_delay(udl, ndl); 303 rcu_read_unlock_trace(); 304 } 305 } 306 307 static const struct ref_scale_ops rcu_trace_ops = { 308 .init = rcu_sync_scale_init, 309 .readsection = rcu_trace_ref_scale_read_section, 310 .delaysection = rcu_trace_ref_scale_delay_section, 311 .name = "rcu-trace" 312 }; 313 314 #define RCU_TRACE_OPS &rcu_trace_ops, 315 316 #else // #ifdef CONFIG_TASKS_TRACE_RCU 317 318 #define RCU_TRACE_OPS 319 320 #endif // #else // #ifdef CONFIG_TASKS_TRACE_RCU 321 322 // Definitions for reference count 323 static atomic_t refcnt; 324 325 static void ref_refcnt_section(const int nloops) 326 { 327 int i; 328 329 for (i = nloops; i >= 0; i--) { 330 atomic_inc(&refcnt); 331 atomic_dec(&refcnt); 332 } 333 } 334 335 static void ref_refcnt_delay_section(const int nloops, const int udl, const int ndl) 336 { 337 int i; 338 339 for (i = nloops; i >= 0; i--) { 340 atomic_inc(&refcnt); 341 un_delay(udl, ndl); 342 atomic_dec(&refcnt); 343 } 344 } 345 346 static const struct ref_scale_ops refcnt_ops = { 347 .init = rcu_sync_scale_init, 348 .readsection = ref_refcnt_section, 349 .delaysection = ref_refcnt_delay_section, 350 .name = "refcnt" 351 }; 352 353 // Definitions for rwlock 354 static rwlock_t test_rwlock; 355 356 static bool ref_rwlock_init(void) 357 { 358 rwlock_init(&test_rwlock); 359 return true; 360 } 361 362 static void ref_rwlock_section(const int nloops) 363 { 364 int i; 365 366 for (i = nloops; i >= 0; i--) { 367 read_lock(&test_rwlock); 368 read_unlock(&test_rwlock); 369 } 370 } 371 372 static void ref_rwlock_delay_section(const int nloops, const int udl, const int ndl) 373 { 374 int i; 375 376 for (i = nloops; i >= 0; i--) { 377 read_lock(&test_rwlock); 378 un_delay(udl, ndl); 379 read_unlock(&test_rwlock); 380 } 381 } 382 383 static const struct ref_scale_ops rwlock_ops = { 384 .init = ref_rwlock_init, 385 .readsection = ref_rwlock_section, 386 .delaysection = ref_rwlock_delay_section, 387 .name = "rwlock" 388 }; 389 390 // Definitions for rwsem 391 static struct rw_semaphore test_rwsem; 392 393 static bool ref_rwsem_init(void) 394 { 395 init_rwsem(&test_rwsem); 396 return true; 397 } 398 399 static void ref_rwsem_section(const int nloops) 400 { 401 int i; 402 403 for (i = nloops; i >= 0; i--) { 404 down_read(&test_rwsem); 405 up_read(&test_rwsem); 406 } 407 } 408 409 static void ref_rwsem_delay_section(const int nloops, const int udl, const int ndl) 410 { 411 int i; 412 413 for (i = nloops; i >= 0; i--) { 414 down_read(&test_rwsem); 415 un_delay(udl, ndl); 416 up_read(&test_rwsem); 417 } 418 } 419 420 static const struct ref_scale_ops rwsem_ops = { 421 .init = ref_rwsem_init, 422 .readsection = ref_rwsem_section, 423 .delaysection = ref_rwsem_delay_section, 424 .name = "rwsem" 425 }; 426 427 // Definitions for global spinlock 428 static DEFINE_RAW_SPINLOCK(test_lock); 429 430 static void ref_lock_section(const int nloops) 431 { 432 int i; 433 434 preempt_disable(); 435 for (i = nloops; i >= 0; i--) { 436 raw_spin_lock(&test_lock); 437 raw_spin_unlock(&test_lock); 438 } 439 preempt_enable(); 440 } 441 442 static void ref_lock_delay_section(const int nloops, const int udl, const int ndl) 443 { 444 int i; 445 446 preempt_disable(); 447 for (i = nloops; i >= 0; i--) { 448 raw_spin_lock(&test_lock); 449 un_delay(udl, ndl); 450 raw_spin_unlock(&test_lock); 451 } 452 preempt_enable(); 453 } 454 455 static const struct ref_scale_ops lock_ops = { 456 .readsection = ref_lock_section, 457 .delaysection = ref_lock_delay_section, 458 .name = "lock" 459 }; 460 461 // Definitions for global irq-save spinlock 462 463 static void ref_lock_irq_section(const int nloops) 464 { 465 unsigned long flags; 466 int i; 467 468 preempt_disable(); 469 for (i = nloops; i >= 0; i--) { 470 raw_spin_lock_irqsave(&test_lock, flags); 471 raw_spin_unlock_irqrestore(&test_lock, flags); 472 } 473 preempt_enable(); 474 } 475 476 static void ref_lock_irq_delay_section(const int nloops, const int udl, const int ndl) 477 { 478 unsigned long flags; 479 int i; 480 481 preempt_disable(); 482 for (i = nloops; i >= 0; i--) { 483 raw_spin_lock_irqsave(&test_lock, flags); 484 un_delay(udl, ndl); 485 raw_spin_unlock_irqrestore(&test_lock, flags); 486 } 487 preempt_enable(); 488 } 489 490 static const struct ref_scale_ops lock_irq_ops = { 491 .readsection = ref_lock_irq_section, 492 .delaysection = ref_lock_irq_delay_section, 493 .name = "lock-irq" 494 }; 495 496 // Definitions acquire-release. 497 static DEFINE_PER_CPU(unsigned long, test_acqrel); 498 499 static void ref_acqrel_section(const int nloops) 500 { 501 unsigned long x; 502 int i; 503 504 preempt_disable(); 505 for (i = nloops; i >= 0; i--) { 506 x = smp_load_acquire(this_cpu_ptr(&test_acqrel)); 507 smp_store_release(this_cpu_ptr(&test_acqrel), x + 1); 508 } 509 preempt_enable(); 510 } 511 512 static void ref_acqrel_delay_section(const int nloops, const int udl, const int ndl) 513 { 514 unsigned long x; 515 int i; 516 517 preempt_disable(); 518 for (i = nloops; i >= 0; i--) { 519 x = smp_load_acquire(this_cpu_ptr(&test_acqrel)); 520 un_delay(udl, ndl); 521 smp_store_release(this_cpu_ptr(&test_acqrel), x + 1); 522 } 523 preempt_enable(); 524 } 525 526 static const struct ref_scale_ops acqrel_ops = { 527 .readsection = ref_acqrel_section, 528 .delaysection = ref_acqrel_delay_section, 529 .name = "acqrel" 530 }; 531 532 static volatile u64 stopopts; 533 534 static void ref_clock_section(const int nloops) 535 { 536 u64 x = 0; 537 int i; 538 539 preempt_disable(); 540 for (i = nloops; i >= 0; i--) 541 x += ktime_get_real_fast_ns(); 542 preempt_enable(); 543 stopopts = x; 544 } 545 546 static void ref_clock_delay_section(const int nloops, const int udl, const int ndl) 547 { 548 u64 x = 0; 549 int i; 550 551 preempt_disable(); 552 for (i = nloops; i >= 0; i--) { 553 x += ktime_get_real_fast_ns(); 554 un_delay(udl, ndl); 555 } 556 preempt_enable(); 557 stopopts = x; 558 } 559 560 static const struct ref_scale_ops clock_ops = { 561 .readsection = ref_clock_section, 562 .delaysection = ref_clock_delay_section, 563 .name = "clock" 564 }; 565 566 static void ref_jiffies_section(const int nloops) 567 { 568 u64 x = 0; 569 int i; 570 571 preempt_disable(); 572 for (i = nloops; i >= 0; i--) 573 x += jiffies; 574 preempt_enable(); 575 stopopts = x; 576 } 577 578 static void ref_jiffies_delay_section(const int nloops, const int udl, const int ndl) 579 { 580 u64 x = 0; 581 int i; 582 583 preempt_disable(); 584 for (i = nloops; i >= 0; i--) { 585 x += jiffies; 586 un_delay(udl, ndl); 587 } 588 preempt_enable(); 589 stopopts = x; 590 } 591 592 static const struct ref_scale_ops jiffies_ops = { 593 .readsection = ref_jiffies_section, 594 .delaysection = ref_jiffies_delay_section, 595 .name = "jiffies" 596 }; 597 598 //////////////////////////////////////////////////////////////////////// 599 // 600 // Methods leveraging SLAB_TYPESAFE_BY_RCU. 601 // 602 603 // Item to look up in a typesafe manner. Array of pointers to these. 604 struct refscale_typesafe { 605 atomic_t rts_refctr; // Used by all flavors 606 spinlock_t rts_lock; 607 seqlock_t rts_seqlock; 608 unsigned int a; 609 unsigned int b; 610 }; 611 612 static struct kmem_cache *typesafe_kmem_cachep; 613 static struct refscale_typesafe **rtsarray; 614 static long rtsarray_size; 615 static DEFINE_TORTURE_RANDOM_PERCPU(refscale_rand); 616 static bool (*rts_acquire)(struct refscale_typesafe *rtsp, unsigned int *start); 617 static bool (*rts_release)(struct refscale_typesafe *rtsp, unsigned int start); 618 619 // Conditionally acquire an explicit in-structure reference count. 620 static bool typesafe_ref_acquire(struct refscale_typesafe *rtsp, unsigned int *start) 621 { 622 return atomic_inc_not_zero(&rtsp->rts_refctr); 623 } 624 625 // Unconditionally release an explicit in-structure reference count. 626 static bool typesafe_ref_release(struct refscale_typesafe *rtsp, unsigned int start) 627 { 628 if (!atomic_dec_return(&rtsp->rts_refctr)) { 629 WRITE_ONCE(rtsp->a, rtsp->a + 1); 630 kmem_cache_free(typesafe_kmem_cachep, rtsp); 631 } 632 return true; 633 } 634 635 // Unconditionally acquire an explicit in-structure spinlock. 636 static bool typesafe_lock_acquire(struct refscale_typesafe *rtsp, unsigned int *start) 637 { 638 spin_lock(&rtsp->rts_lock); 639 return true; 640 } 641 642 // Unconditionally release an explicit in-structure spinlock. 643 static bool typesafe_lock_release(struct refscale_typesafe *rtsp, unsigned int start) 644 { 645 spin_unlock(&rtsp->rts_lock); 646 return true; 647 } 648 649 // Unconditionally acquire an explicit in-structure sequence lock. 650 static bool typesafe_seqlock_acquire(struct refscale_typesafe *rtsp, unsigned int *start) 651 { 652 *start = read_seqbegin(&rtsp->rts_seqlock); 653 return true; 654 } 655 656 // Conditionally release an explicit in-structure sequence lock. Return 657 // true if this release was successful, that is, if no retry is required. 658 static bool typesafe_seqlock_release(struct refscale_typesafe *rtsp, unsigned int start) 659 { 660 return !read_seqretry(&rtsp->rts_seqlock, start); 661 } 662 663 // Do a read-side critical section with the specified delay in 664 // microseconds and nanoseconds inserted so as to increase probability 665 // of failure. 666 static void typesafe_delay_section(const int nloops, const int udl, const int ndl) 667 { 668 unsigned int a; 669 unsigned int b; 670 int i; 671 long idx; 672 struct refscale_typesafe *rtsp; 673 unsigned int start; 674 675 for (i = nloops; i >= 0; i--) { 676 preempt_disable(); 677 idx = torture_random(this_cpu_ptr(&refscale_rand)) % rtsarray_size; 678 preempt_enable(); 679 retry: 680 rcu_read_lock(); 681 rtsp = rcu_dereference(rtsarray[idx]); 682 a = READ_ONCE(rtsp->a); 683 if (!rts_acquire(rtsp, &start)) { 684 rcu_read_unlock(); 685 goto retry; 686 } 687 if (a != READ_ONCE(rtsp->a)) { 688 (void)rts_release(rtsp, start); 689 rcu_read_unlock(); 690 goto retry; 691 } 692 un_delay(udl, ndl); 693 b = READ_ONCE(rtsp->a); 694 // Remember, seqlock read-side release can fail. 695 if (!rts_release(rtsp, start)) { 696 rcu_read_unlock(); 697 goto retry; 698 } 699 WARN_ONCE(a != b, "Re-read of ->a changed from %u to %u.\n", a, b); 700 b = rtsp->b; 701 rcu_read_unlock(); 702 WARN_ON_ONCE(a * a != b); 703 } 704 } 705 706 // Because the acquisition and release methods are expensive, there 707 // is no point in optimizing away the un_delay() function's two checks. 708 // Thus simply define typesafe_read_section() as a simple wrapper around 709 // typesafe_delay_section(). 710 static void typesafe_read_section(const int nloops) 711 { 712 typesafe_delay_section(nloops, 0, 0); 713 } 714 715 // Allocate and initialize one refscale_typesafe structure. 716 static struct refscale_typesafe *typesafe_alloc_one(void) 717 { 718 struct refscale_typesafe *rtsp; 719 720 rtsp = kmem_cache_alloc(typesafe_kmem_cachep, GFP_KERNEL); 721 if (!rtsp) 722 return NULL; 723 atomic_set(&rtsp->rts_refctr, 1); 724 WRITE_ONCE(rtsp->a, rtsp->a + 1); 725 WRITE_ONCE(rtsp->b, rtsp->a * rtsp->a); 726 return rtsp; 727 } 728 729 // Slab-allocator constructor for refscale_typesafe structures created 730 // out of a new slab of system memory. 731 static void refscale_typesafe_ctor(void *rtsp_in) 732 { 733 struct refscale_typesafe *rtsp = rtsp_in; 734 735 spin_lock_init(&rtsp->rts_lock); 736 seqlock_init(&rtsp->rts_seqlock); 737 preempt_disable(); 738 rtsp->a = torture_random(this_cpu_ptr(&refscale_rand)); 739 preempt_enable(); 740 } 741 742 static const struct ref_scale_ops typesafe_ref_ops; 743 static const struct ref_scale_ops typesafe_lock_ops; 744 static const struct ref_scale_ops typesafe_seqlock_ops; 745 746 // Initialize for a typesafe test. 747 static bool typesafe_init(void) 748 { 749 long idx; 750 long si = lookup_instances; 751 752 typesafe_kmem_cachep = kmem_cache_create("refscale_typesafe", 753 sizeof(struct refscale_typesafe), sizeof(void *), 754 SLAB_TYPESAFE_BY_RCU, refscale_typesafe_ctor); 755 if (!typesafe_kmem_cachep) 756 return false; 757 if (si < 0) 758 si = -si * nr_cpu_ids; 759 else if (si == 0) 760 si = nr_cpu_ids; 761 rtsarray_size = si; 762 rtsarray = kcalloc(si, sizeof(*rtsarray), GFP_KERNEL); 763 if (!rtsarray) 764 return false; 765 for (idx = 0; idx < rtsarray_size; idx++) { 766 rtsarray[idx] = typesafe_alloc_one(); 767 if (!rtsarray[idx]) 768 return false; 769 } 770 if (cur_ops == &typesafe_ref_ops) { 771 rts_acquire = typesafe_ref_acquire; 772 rts_release = typesafe_ref_release; 773 } else if (cur_ops == &typesafe_lock_ops) { 774 rts_acquire = typesafe_lock_acquire; 775 rts_release = typesafe_lock_release; 776 } else if (cur_ops == &typesafe_seqlock_ops) { 777 rts_acquire = typesafe_seqlock_acquire; 778 rts_release = typesafe_seqlock_release; 779 } else { 780 WARN_ON_ONCE(1); 781 return false; 782 } 783 return true; 784 } 785 786 // Clean up after a typesafe test. 787 static void typesafe_cleanup(void) 788 { 789 long idx; 790 791 if (rtsarray) { 792 for (idx = 0; idx < rtsarray_size; idx++) 793 kmem_cache_free(typesafe_kmem_cachep, rtsarray[idx]); 794 kfree(rtsarray); 795 rtsarray = NULL; 796 rtsarray_size = 0; 797 } 798 kmem_cache_destroy(typesafe_kmem_cachep); 799 typesafe_kmem_cachep = NULL; 800 rts_acquire = NULL; 801 rts_release = NULL; 802 } 803 804 // The typesafe_init() function distinguishes these structures by address. 805 static const struct ref_scale_ops typesafe_ref_ops = { 806 .init = typesafe_init, 807 .cleanup = typesafe_cleanup, 808 .readsection = typesafe_read_section, 809 .delaysection = typesafe_delay_section, 810 .name = "typesafe_ref" 811 }; 812 813 static const struct ref_scale_ops typesafe_lock_ops = { 814 .init = typesafe_init, 815 .cleanup = typesafe_cleanup, 816 .readsection = typesafe_read_section, 817 .delaysection = typesafe_delay_section, 818 .name = "typesafe_lock" 819 }; 820 821 static const struct ref_scale_ops typesafe_seqlock_ops = { 822 .init = typesafe_init, 823 .cleanup = typesafe_cleanup, 824 .readsection = typesafe_read_section, 825 .delaysection = typesafe_delay_section, 826 .name = "typesafe_seqlock" 827 }; 828 829 static void rcu_scale_one_reader(void) 830 { 831 if (readdelay <= 0) 832 cur_ops->readsection(loops); 833 else 834 cur_ops->delaysection(loops, readdelay / 1000, readdelay % 1000); 835 } 836 837 // Warm up cache, or, if needed run a series of rcu_scale_one_reader() 838 // to allow multiple rcuscale guest OSes to collect mutually valid data. 839 static void rcu_scale_warm_cool(void) 840 { 841 unsigned long jdone = jiffies + (guest_os_delay > 0 ? guest_os_delay * HZ : -1); 842 843 do { 844 rcu_scale_one_reader(); 845 cond_resched(); 846 } while (time_before(jiffies, jdone)); 847 } 848 849 // Reader kthread. Repeatedly does empty RCU read-side 850 // critical section, minimizing update-side interference. 851 static int 852 ref_scale_reader(void *arg) 853 { 854 unsigned long flags; 855 long me = (long)arg; 856 struct reader_task *rt = &(reader_tasks[me]); 857 u64 start; 858 s64 duration; 859 860 VERBOSE_SCALEOUT_BATCH("ref_scale_reader %ld: task started", me); 861 WARN_ON_ONCE(set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids))); 862 set_user_nice(current, MAX_NICE); 863 atomic_inc(&n_init); 864 if (holdoff) 865 schedule_timeout_interruptible(holdoff * HZ); 866 repeat: 867 VERBOSE_SCALEOUT_BATCH("ref_scale_reader %ld: waiting to start next experiment on cpu %d", me, raw_smp_processor_id()); 868 869 // Wait for signal that this reader can start. 870 wait_event(rt->wq, (atomic_read(&nreaders_exp) && smp_load_acquire(&rt->start_reader)) || 871 torture_must_stop()); 872 873 if (torture_must_stop()) 874 goto end; 875 876 // Make sure that the CPU is affinitized appropriately during testing. 877 WARN_ON_ONCE(raw_smp_processor_id() != me % nr_cpu_ids); 878 879 WRITE_ONCE(rt->start_reader, 0); 880 if (!atomic_dec_return(&n_started)) 881 while (atomic_read_acquire(&n_started)) 882 cpu_relax(); 883 884 VERBOSE_SCALEOUT_BATCH("ref_scale_reader %ld: experiment %d started", me, exp_idx); 885 886 887 // To reduce noise, do an initial cache-warming invocation, check 888 // in, and then keep warming until everyone has checked in. 889 rcu_scale_one_reader(); 890 if (!atomic_dec_return(&n_warmedup)) 891 while (atomic_read_acquire(&n_warmedup)) 892 rcu_scale_one_reader(); 893 // Also keep interrupts disabled. This also has the effect 894 // of preventing entries into slow path for rcu_read_unlock(). 895 local_irq_save(flags); 896 start = ktime_get_mono_fast_ns(); 897 898 rcu_scale_one_reader(); 899 900 duration = ktime_get_mono_fast_ns() - start; 901 local_irq_restore(flags); 902 903 rt->last_duration_ns = WARN_ON_ONCE(duration < 0) ? 0 : duration; 904 // To reduce runtime-skew noise, do maintain-load invocations until 905 // everyone is done. 906 if (!atomic_dec_return(&n_cooleddown)) 907 while (atomic_read_acquire(&n_cooleddown)) 908 rcu_scale_one_reader(); 909 910 if (atomic_dec_and_test(&nreaders_exp)) 911 wake_up(&main_wq); 912 913 VERBOSE_SCALEOUT_BATCH("ref_scale_reader %ld: experiment %d ended, (readers remaining=%d)", 914 me, exp_idx, atomic_read(&nreaders_exp)); 915 916 if (!torture_must_stop()) 917 goto repeat; 918 end: 919 torture_kthread_stopping("ref_scale_reader"); 920 return 0; 921 } 922 923 static void reset_readers(void) 924 { 925 int i; 926 struct reader_task *rt; 927 928 for (i = 0; i < nreaders; i++) { 929 rt = &(reader_tasks[i]); 930 931 rt->last_duration_ns = 0; 932 } 933 } 934 935 // Print the results of each reader and return the sum of all their durations. 936 static u64 process_durations(int n) 937 { 938 int i; 939 struct reader_task *rt; 940 struct seq_buf s; 941 char *buf; 942 u64 sum = 0; 943 944 buf = kmalloc(800 + 64, GFP_KERNEL); 945 if (!buf) 946 return 0; 947 seq_buf_init(&s, buf, 800 + 64); 948 949 seq_buf_printf(&s, "Experiment #%d (Format: <THREAD-NUM>:<Total loop time in ns>)", 950 exp_idx); 951 952 for (i = 0; i < n && !torture_must_stop(); i++) { 953 rt = &(reader_tasks[i]); 954 955 if (i % 5 == 0) 956 seq_buf_putc(&s, '\n'); 957 958 if (seq_buf_used(&s) >= 800) { 959 pr_alert("%s", seq_buf_str(&s)); 960 seq_buf_clear(&s); 961 } 962 963 seq_buf_printf(&s, "%d: %llu\t", i, rt->last_duration_ns); 964 965 sum += rt->last_duration_ns; 966 } 967 pr_alert("%s\n", seq_buf_str(&s)); 968 969 kfree(buf); 970 return sum; 971 } 972 973 // The main_func is the main orchestrator, it performs a bunch of 974 // experiments. For every experiment, it orders all the readers 975 // involved to start and waits for them to finish the experiment. It 976 // then reads their timestamps and starts the next experiment. Each 977 // experiment progresses from 1 concurrent reader to N of them at which 978 // point all the timestamps are printed. 979 static int main_func(void *arg) 980 { 981 int exp, r; 982 char buf1[64]; 983 char *buf; 984 u64 *result_avg; 985 986 set_cpus_allowed_ptr(current, cpumask_of(nreaders % nr_cpu_ids)); 987 set_user_nice(current, MAX_NICE); 988 989 VERBOSE_SCALEOUT("main_func task started"); 990 result_avg = kzalloc(nruns * sizeof(*result_avg), GFP_KERNEL); 991 buf = kzalloc(800 + 64, GFP_KERNEL); 992 if (!result_avg || !buf) { 993 SCALEOUT_ERRSTRING("out of memory"); 994 goto oom_exit; 995 } 996 if (holdoff) 997 schedule_timeout_interruptible(holdoff * HZ); 998 999 // Wait for all threads to start. 1000 atomic_inc(&n_init); 1001 while (atomic_read(&n_init) < nreaders + 1) 1002 schedule_timeout_uninterruptible(1); 1003 1004 // Start exp readers up per experiment 1005 rcu_scale_warm_cool(); 1006 for (exp = 0; exp < nruns && !torture_must_stop(); exp++) { 1007 if (torture_must_stop()) 1008 goto end; 1009 1010 reset_readers(); 1011 atomic_set(&nreaders_exp, nreaders); 1012 atomic_set(&n_started, nreaders); 1013 atomic_set(&n_warmedup, nreaders); 1014 atomic_set(&n_cooleddown, nreaders); 1015 1016 exp_idx = exp; 1017 1018 for (r = 0; r < nreaders; r++) { 1019 smp_store_release(&reader_tasks[r].start_reader, 1); 1020 wake_up(&reader_tasks[r].wq); 1021 } 1022 1023 VERBOSE_SCALEOUT("main_func: experiment started, waiting for %d readers", 1024 nreaders); 1025 1026 wait_event(main_wq, 1027 !atomic_read(&nreaders_exp) || torture_must_stop()); 1028 1029 VERBOSE_SCALEOUT("main_func: experiment ended"); 1030 1031 if (torture_must_stop()) 1032 goto end; 1033 1034 result_avg[exp] = div_u64(1000 * process_durations(nreaders), nreaders * loops); 1035 } 1036 rcu_scale_warm_cool(); 1037 1038 // Print the average of all experiments 1039 SCALEOUT("END OF TEST. Calculating average duration per loop (nanoseconds)...\n"); 1040 1041 pr_alert("Runs\tTime(ns)\n"); 1042 for (exp = 0; exp < nruns; exp++) { 1043 u64 avg; 1044 u32 rem; 1045 1046 avg = div_u64_rem(result_avg[exp], 1000, &rem); 1047 sprintf(buf1, "%d\t%llu.%03u\n", exp + 1, avg, rem); 1048 strcat(buf, buf1); 1049 if (strlen(buf) >= 800) { 1050 pr_alert("%s", buf); 1051 buf[0] = 0; 1052 } 1053 } 1054 1055 pr_alert("%s", buf); 1056 1057 oom_exit: 1058 // This will shutdown everything including us. 1059 if (shutdown) { 1060 shutdown_start = 1; 1061 wake_up(&shutdown_wq); 1062 } 1063 1064 // Wait for torture to stop us 1065 while (!torture_must_stop()) 1066 schedule_timeout_uninterruptible(1); 1067 1068 end: 1069 torture_kthread_stopping("main_func"); 1070 kfree(result_avg); 1071 kfree(buf); 1072 return 0; 1073 } 1074 1075 static void 1076 ref_scale_print_module_parms(const struct ref_scale_ops *cur_ops, const char *tag) 1077 { 1078 pr_alert("%s" SCALE_FLAG 1079 "--- %s: verbose=%d verbose_batched=%d shutdown=%d holdoff=%d lookup_instances=%ld loops=%ld nreaders=%d nruns=%d readdelay=%d\n", scale_type, tag, 1080 verbose, verbose_batched, shutdown, holdoff, lookup_instances, loops, nreaders, nruns, readdelay); 1081 } 1082 1083 static void 1084 ref_scale_cleanup(void) 1085 { 1086 int i; 1087 1088 if (torture_cleanup_begin()) 1089 return; 1090 1091 if (!cur_ops) { 1092 torture_cleanup_end(); 1093 return; 1094 } 1095 1096 if (reader_tasks) { 1097 for (i = 0; i < nreaders; i++) 1098 torture_stop_kthread("ref_scale_reader", 1099 reader_tasks[i].task); 1100 } 1101 kfree(reader_tasks); 1102 1103 torture_stop_kthread("main_task", main_task); 1104 kfree(main_task); 1105 1106 // Do scale-type-specific cleanup operations. 1107 if (cur_ops->cleanup != NULL) 1108 cur_ops->cleanup(); 1109 1110 torture_cleanup_end(); 1111 } 1112 1113 // Shutdown kthread. Just waits to be awakened, then shuts down system. 1114 static int 1115 ref_scale_shutdown(void *arg) 1116 { 1117 wait_event_idle(shutdown_wq, shutdown_start); 1118 1119 smp_mb(); // Wake before output. 1120 ref_scale_cleanup(); 1121 kernel_power_off(); 1122 1123 return -EINVAL; 1124 } 1125 1126 static int __init 1127 ref_scale_init(void) 1128 { 1129 long i; 1130 int firsterr = 0; 1131 static const struct ref_scale_ops *scale_ops[] = { 1132 &rcu_ops, &srcu_ops, &srcu_lite_ops, RCU_TRACE_OPS RCU_TASKS_OPS 1133 &refcnt_ops, &rwlock_ops, &rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops, 1134 &clock_ops, &jiffies_ops, &typesafe_ref_ops, &typesafe_lock_ops, 1135 &typesafe_seqlock_ops, 1136 }; 1137 1138 if (!torture_init_begin(scale_type, verbose)) 1139 return -EBUSY; 1140 1141 for (i = 0; i < ARRAY_SIZE(scale_ops); i++) { 1142 cur_ops = scale_ops[i]; 1143 if (strcmp(scale_type, cur_ops->name) == 0) 1144 break; 1145 } 1146 if (i == ARRAY_SIZE(scale_ops)) { 1147 pr_alert("rcu-scale: invalid scale type: \"%s\"\n", scale_type); 1148 pr_alert("rcu-scale types:"); 1149 for (i = 0; i < ARRAY_SIZE(scale_ops); i++) 1150 pr_cont(" %s", scale_ops[i]->name); 1151 pr_cont("\n"); 1152 firsterr = -EINVAL; 1153 cur_ops = NULL; 1154 goto unwind; 1155 } 1156 if (cur_ops->init) 1157 if (!cur_ops->init()) { 1158 firsterr = -EUCLEAN; 1159 goto unwind; 1160 } 1161 1162 ref_scale_print_module_parms(cur_ops, "Start of test"); 1163 1164 // Shutdown task 1165 if (shutdown) { 1166 init_waitqueue_head(&shutdown_wq); 1167 firsterr = torture_create_kthread(ref_scale_shutdown, NULL, 1168 shutdown_task); 1169 if (torture_init_error(firsterr)) 1170 goto unwind; 1171 schedule_timeout_uninterruptible(1); 1172 } 1173 1174 // Reader tasks (default to ~75% of online CPUs). 1175 if (nreaders < 0) 1176 nreaders = (num_online_cpus() >> 1) + (num_online_cpus() >> 2); 1177 if (WARN_ONCE(loops <= 0, "%s: loops = %ld, adjusted to 1\n", __func__, loops)) 1178 loops = 1; 1179 if (WARN_ONCE(nreaders <= 0, "%s: nreaders = %d, adjusted to 1\n", __func__, nreaders)) 1180 nreaders = 1; 1181 if (WARN_ONCE(nruns <= 0, "%s: nruns = %d, adjusted to 1\n", __func__, nruns)) 1182 nruns = 1; 1183 reader_tasks = kcalloc(nreaders, sizeof(reader_tasks[0]), 1184 GFP_KERNEL); 1185 if (!reader_tasks) { 1186 SCALEOUT_ERRSTRING("out of memory"); 1187 firsterr = -ENOMEM; 1188 goto unwind; 1189 } 1190 1191 VERBOSE_SCALEOUT("Starting %d reader threads", nreaders); 1192 1193 for (i = 0; i < nreaders; i++) { 1194 init_waitqueue_head(&reader_tasks[i].wq); 1195 firsterr = torture_create_kthread(ref_scale_reader, (void *)i, 1196 reader_tasks[i].task); 1197 if (torture_init_error(firsterr)) 1198 goto unwind; 1199 } 1200 1201 // Main Task 1202 init_waitqueue_head(&main_wq); 1203 firsterr = torture_create_kthread(main_func, NULL, main_task); 1204 if (torture_init_error(firsterr)) 1205 goto unwind; 1206 1207 torture_init_end(); 1208 return 0; 1209 1210 unwind: 1211 torture_init_end(); 1212 ref_scale_cleanup(); 1213 if (shutdown) { 1214 WARN_ON(!IS_MODULE(CONFIG_RCU_REF_SCALE_TEST)); 1215 kernel_power_off(); 1216 } 1217 return firsterr; 1218 } 1219 1220 module_init(ref_scale_init); 1221 module_exit(ref_scale_cleanup); 1222