1 // SPDX-License-Identifier: GPL-2.0+ 2 // 3 // Scalability test comparing RCU vs other mechanisms 4 // for acquiring references on objects. 5 // 6 // Copyright (C) Google, 2020. 7 // 8 // Author: Joel Fernandes <joel@joelfernandes.org> 9 10 #define pr_fmt(fmt) fmt 11 12 #include <linux/atomic.h> 13 #include <linux/bitops.h> 14 #include <linux/completion.h> 15 #include <linux/cpu.h> 16 #include <linux/delay.h> 17 #include <linux/err.h> 18 #include <linux/init.h> 19 #include <linux/interrupt.h> 20 #include <linux/kthread.h> 21 #include <linux/kernel.h> 22 #include <linux/mm.h> 23 #include <linux/module.h> 24 #include <linux/moduleparam.h> 25 #include <linux/notifier.h> 26 #include <linux/percpu.h> 27 #include <linux/rcupdate.h> 28 #include <linux/rcupdate_trace.h> 29 #include <linux/reboot.h> 30 #include <linux/sched.h> 31 #include <linux/seq_buf.h> 32 #include <linux/spinlock.h> 33 #include <linux/smp.h> 34 #include <linux/stat.h> 35 #include <linux/srcu.h> 36 #include <linux/slab.h> 37 #include <linux/torture.h> 38 #include <linux/types.h> 39 #include <linux/sched/clock.h> 40 41 #include "rcu.h" 42 43 #define SCALE_FLAG "-ref-scale: " 44 45 #define SCALEOUT(s, x...) \ 46 pr_alert("%s" SCALE_FLAG s, scale_type, ## x) 47 48 #define VERBOSE_SCALEOUT(s, x...) \ 49 do { \ 50 if (verbose) \ 51 pr_alert("%s" SCALE_FLAG s "\n", scale_type, ## x); \ 52 } while (0) 53 54 static atomic_t verbose_batch_ctr; 55 56 #define VERBOSE_SCALEOUT_BATCH(s, x...) \ 57 do { \ 58 if (verbose && \ 59 (verbose_batched <= 0 || \ 60 !(atomic_inc_return(&verbose_batch_ctr) % verbose_batched))) { \ 61 schedule_timeout_uninterruptible(1); \ 62 pr_alert("%s" SCALE_FLAG s "\n", scale_type, ## x); \ 63 } \ 64 } while (0) 65 66 #define SCALEOUT_ERRSTRING(s, x...) pr_alert("%s" SCALE_FLAG "!!! " s "\n", scale_type, ## x) 67 68 MODULE_DESCRIPTION("Scalability test for object reference mechanisms"); 69 MODULE_LICENSE("GPL"); 70 MODULE_AUTHOR("Joel Fernandes (Google) <joel@joelfernandes.org>"); 71 72 static char *scale_type = "rcu"; 73 module_param(scale_type, charp, 0444); 74 MODULE_PARM_DESC(scale_type, "Type of test (rcu, srcu, refcnt, rwsem, rwlock."); 75 76 torture_param(int, verbose, 0, "Enable verbose debugging printk()s"); 77 torture_param(int, verbose_batched, 0, "Batch verbose debugging printk()s"); 78 79 // Number of seconds to extend warm-up and cool-down for multiple guest OSes 80 torture_param(long, guest_os_delay, 0, 81 "Number of seconds to extend warm-up/cool-down for multiple guest OSes."); 82 // Wait until there are multiple CPUs before starting test. 83 torture_param(int, holdoff, IS_BUILTIN(CONFIG_RCU_REF_SCALE_TEST) ? 10 : 0, 84 "Holdoff time before test start (s)"); 85 // Number of typesafe_lookup structures, that is, the degree of concurrency. 86 torture_param(long, lookup_instances, 0, "Number of typesafe_lookup structures."); 87 // Number of loops per experiment, all readers execute operations concurrently. 88 torture_param(int, loops, 10000, "Number of loops per experiment."); 89 // Number of readers, with -1 defaulting to about 75% of the CPUs. 90 torture_param(int, nreaders, -1, "Number of readers, -1 for 75% of CPUs."); 91 // Number of runs. 92 torture_param(int, nruns, 30, "Number of experiments to run."); 93 // Reader delay in nanoseconds, 0 for no delay. 94 torture_param(int, readdelay, 0, "Read-side delay in nanoseconds."); 95 96 #ifdef MODULE 97 # define REFSCALE_SHUTDOWN 0 98 #else 99 # define REFSCALE_SHUTDOWN 1 100 #endif 101 102 torture_param(bool, shutdown, REFSCALE_SHUTDOWN, 103 "Shutdown at end of scalability tests."); 104 105 struct reader_task { 106 struct task_struct *task; 107 int start_reader; 108 wait_queue_head_t wq; 109 u64 last_duration_ns; 110 }; 111 112 static struct task_struct *shutdown_task; 113 static wait_queue_head_t shutdown_wq; 114 115 static struct task_struct *main_task; 116 static wait_queue_head_t main_wq; 117 static int shutdown_start; 118 119 static struct reader_task *reader_tasks; 120 121 // Number of readers that are part of the current experiment. 122 static atomic_t nreaders_exp; 123 124 // Use to wait for all threads to start. 125 static atomic_t n_init; 126 static atomic_t n_started; 127 static atomic_t n_warmedup; 128 static atomic_t n_cooleddown; 129 130 // Track which experiment is currently running. 131 static int exp_idx; 132 133 // Operations vector for selecting different types of tests. 134 struct ref_scale_ops { 135 bool (*init)(void); 136 void (*cleanup)(void); 137 void (*readsection)(const int nloops); 138 void (*delaysection)(const int nloops, const int udl, const int ndl); 139 bool enable_irqs; 140 const char *name; 141 }; 142 143 static const struct ref_scale_ops *cur_ops; 144 145 static void un_delay(const int udl, const int ndl) 146 { 147 if (udl) 148 udelay(udl); 149 if (ndl) 150 ndelay(ndl); 151 } 152 153 static void ref_rcu_read_section(const int nloops) 154 { 155 int i; 156 157 for (i = nloops; i >= 0; i--) { 158 rcu_read_lock(); 159 rcu_read_unlock(); 160 } 161 } 162 163 static void ref_rcu_delay_section(const int nloops, const int udl, const int ndl) 164 { 165 int i; 166 167 for (i = nloops; i >= 0; i--) { 168 rcu_read_lock(); 169 un_delay(udl, ndl); 170 rcu_read_unlock(); 171 } 172 } 173 174 static bool rcu_sync_scale_init(void) 175 { 176 return true; 177 } 178 179 static const struct ref_scale_ops rcu_ops = { 180 .init = rcu_sync_scale_init, 181 .readsection = ref_rcu_read_section, 182 .delaysection = ref_rcu_delay_section, 183 .name = "rcu" 184 }; 185 186 // Definitions for SRCU ref scale testing. 187 DEFINE_STATIC_SRCU(srcu_refctl_scale); 188 DEFINE_STATIC_SRCU_FAST(srcu_fast_refctl_scale); 189 DEFINE_STATIC_SRCU_FAST_UPDOWN(srcu_fast_updown_refctl_scale); 190 static struct srcu_struct *srcu_ctlp = &srcu_refctl_scale; 191 192 static void srcu_ref_scale_read_section(const int nloops) 193 { 194 int i; 195 int idx; 196 197 for (i = nloops; i >= 0; i--) { 198 idx = srcu_read_lock(srcu_ctlp); 199 srcu_read_unlock(srcu_ctlp, idx); 200 } 201 } 202 203 static void srcu_ref_scale_delay_section(const int nloops, const int udl, const int ndl) 204 { 205 int i; 206 int idx; 207 208 for (i = nloops; i >= 0; i--) { 209 idx = srcu_read_lock(srcu_ctlp); 210 un_delay(udl, ndl); 211 srcu_read_unlock(srcu_ctlp, idx); 212 } 213 } 214 215 static const struct ref_scale_ops srcu_ops = { 216 .init = rcu_sync_scale_init, 217 .readsection = srcu_ref_scale_read_section, 218 .delaysection = srcu_ref_scale_delay_section, 219 .name = "srcu" 220 }; 221 222 static bool srcu_fast_sync_scale_init(void) 223 { 224 srcu_ctlp = &srcu_fast_refctl_scale; 225 return true; 226 } 227 228 static void srcu_fast_ref_scale_read_section(const int nloops) 229 { 230 int i; 231 struct srcu_ctr __percpu *scp; 232 233 for (i = nloops; i >= 0; i--) { 234 scp = srcu_read_lock_fast(srcu_ctlp); 235 srcu_read_unlock_fast(srcu_ctlp, scp); 236 } 237 } 238 239 static void srcu_fast_ref_scale_delay_section(const int nloops, const int udl, const int ndl) 240 { 241 int i; 242 struct srcu_ctr __percpu *scp; 243 244 for (i = nloops; i >= 0; i--) { 245 scp = srcu_read_lock_fast(srcu_ctlp); 246 un_delay(udl, ndl); 247 srcu_read_unlock_fast(srcu_ctlp, scp); 248 } 249 } 250 251 static const struct ref_scale_ops srcu_fast_ops = { 252 .init = srcu_fast_sync_scale_init, 253 .readsection = srcu_fast_ref_scale_read_section, 254 .delaysection = srcu_fast_ref_scale_delay_section, 255 .name = "srcu-fast" 256 }; 257 258 static bool srcu_fast_updown_sync_scale_init(void) 259 { 260 srcu_ctlp = &srcu_fast_updown_refctl_scale; 261 return true; 262 } 263 264 static void srcu_fast_updown_ref_scale_read_section(const int nloops) 265 { 266 int i; 267 struct srcu_ctr __percpu *scp; 268 269 for (i = nloops; i >= 0; i--) { 270 scp = srcu_read_lock_fast_updown(srcu_ctlp); 271 srcu_read_unlock_fast_updown(srcu_ctlp, scp); 272 } 273 } 274 275 static void srcu_fast_updown_ref_scale_delay_section(const int nloops, const int udl, const int ndl) 276 { 277 int i; 278 struct srcu_ctr __percpu *scp; 279 280 for (i = nloops; i >= 0; i--) { 281 scp = srcu_read_lock_fast_updown(srcu_ctlp); 282 un_delay(udl, ndl); 283 srcu_read_unlock_fast_updown(srcu_ctlp, scp); 284 } 285 } 286 287 static const struct ref_scale_ops srcu_fast_updown_ops = { 288 .init = srcu_fast_updown_sync_scale_init, 289 .readsection = srcu_fast_updown_ref_scale_read_section, 290 .delaysection = srcu_fast_updown_ref_scale_delay_section, 291 .name = "srcu-fast-updown" 292 }; 293 294 #ifdef CONFIG_TASKS_RCU 295 296 // Definitions for RCU Tasks ref scale testing: Empty read markers. 297 // These definitions also work for RCU Rude readers. 298 static void rcu_tasks_ref_scale_read_section(const int nloops) 299 { 300 int i; 301 302 for (i = nloops; i >= 0; i--) 303 continue; 304 } 305 306 static void rcu_tasks_ref_scale_delay_section(const int nloops, const int udl, const int ndl) 307 { 308 int i; 309 310 for (i = nloops; i >= 0; i--) 311 un_delay(udl, ndl); 312 } 313 314 static const struct ref_scale_ops rcu_tasks_ops = { 315 .init = rcu_sync_scale_init, 316 .readsection = rcu_tasks_ref_scale_read_section, 317 .delaysection = rcu_tasks_ref_scale_delay_section, 318 .name = "rcu-tasks" 319 }; 320 321 #define RCU_TASKS_OPS &rcu_tasks_ops, 322 323 #else // #ifdef CONFIG_TASKS_RCU 324 325 #define RCU_TASKS_OPS 326 327 #endif // #else // #ifdef CONFIG_TASKS_RCU 328 329 #ifdef CONFIG_TASKS_TRACE_RCU 330 331 // Definitions for RCU Tasks Trace ref scale testing. 332 static void rcu_trace_ref_scale_read_section(const int nloops) 333 { 334 int i; 335 336 for (i = nloops; i >= 0; i--) { 337 rcu_read_lock_trace(); 338 rcu_read_unlock_trace(); 339 } 340 } 341 342 static void rcu_trace_ref_scale_delay_section(const int nloops, const int udl, const int ndl) 343 { 344 int i; 345 346 for (i = nloops; i >= 0; i--) { 347 rcu_read_lock_trace(); 348 un_delay(udl, ndl); 349 rcu_read_unlock_trace(); 350 } 351 } 352 353 static const struct ref_scale_ops rcu_trace_ops = { 354 .init = rcu_sync_scale_init, 355 .readsection = rcu_trace_ref_scale_read_section, 356 .delaysection = rcu_trace_ref_scale_delay_section, 357 .name = "rcu-trace" 358 }; 359 360 #define RCU_TRACE_OPS &rcu_trace_ops, 361 362 #else // #ifdef CONFIG_TASKS_TRACE_RCU 363 364 #define RCU_TRACE_OPS 365 366 #endif // #else // #ifdef CONFIG_TASKS_TRACE_RCU 367 368 // Definitions for reference count 369 static atomic_t refcnt; 370 371 // Definitions acquire-release. 372 static DEFINE_PER_CPU(unsigned long, test_acqrel); 373 374 static void ref_refcnt_section(const int nloops) 375 { 376 int i; 377 378 for (i = nloops; i >= 0; i--) { 379 atomic_inc(&refcnt); 380 atomic_dec(&refcnt); 381 } 382 } 383 384 static void ref_refcnt_delay_section(const int nloops, const int udl, const int ndl) 385 { 386 int i; 387 388 for (i = nloops; i >= 0; i--) { 389 atomic_inc(&refcnt); 390 un_delay(udl, ndl); 391 atomic_dec(&refcnt); 392 } 393 } 394 395 static const struct ref_scale_ops refcnt_ops = { 396 .init = rcu_sync_scale_init, 397 .readsection = ref_refcnt_section, 398 .delaysection = ref_refcnt_delay_section, 399 .name = "refcnt" 400 }; 401 402 static void ref_percpuinc_section(const int nloops) 403 { 404 int i; 405 406 for (i = nloops; i >= 0; i--) { 407 this_cpu_inc(test_acqrel); 408 this_cpu_dec(test_acqrel); 409 } 410 } 411 412 static void ref_percpuinc_delay_section(const int nloops, const int udl, const int ndl) 413 { 414 int i; 415 416 for (i = nloops; i >= 0; i--) { 417 this_cpu_inc(test_acqrel); 418 un_delay(udl, ndl); 419 this_cpu_dec(test_acqrel); 420 } 421 } 422 423 static const struct ref_scale_ops percpuinc_ops = { 424 .init = rcu_sync_scale_init, 425 .readsection = ref_percpuinc_section, 426 .delaysection = ref_percpuinc_delay_section, 427 .name = "percpuinc" 428 }; 429 430 // Note that this can lose counts in preemptible kernels. 431 static void ref_incpercpu_section(const int nloops) 432 { 433 int i; 434 435 for (i = nloops; i >= 0; i--) { 436 unsigned long *tap = this_cpu_ptr(&test_acqrel); 437 438 WRITE_ONCE(*tap, READ_ONCE(*tap) + 1); 439 WRITE_ONCE(*tap, READ_ONCE(*tap) - 1); 440 } 441 } 442 443 static void ref_incpercpu_delay_section(const int nloops, const int udl, const int ndl) 444 { 445 int i; 446 447 for (i = nloops; i >= 0; i--) { 448 unsigned long *tap = this_cpu_ptr(&test_acqrel); 449 450 WRITE_ONCE(*tap, READ_ONCE(*tap) + 1); 451 un_delay(udl, ndl); 452 WRITE_ONCE(*tap, READ_ONCE(*tap) - 1); 453 } 454 } 455 456 static const struct ref_scale_ops incpercpu_ops = { 457 .init = rcu_sync_scale_init, 458 .readsection = ref_incpercpu_section, 459 .delaysection = ref_incpercpu_delay_section, 460 .name = "incpercpu" 461 }; 462 463 static void ref_incpercpupreempt_section(const int nloops) 464 { 465 int i; 466 467 for (i = nloops; i >= 0; i--) { 468 unsigned long *tap; 469 470 preempt_disable(); 471 tap = this_cpu_ptr(&test_acqrel); 472 WRITE_ONCE(*tap, READ_ONCE(*tap) + 1); 473 WRITE_ONCE(*tap, READ_ONCE(*tap) - 1); 474 preempt_enable(); 475 } 476 } 477 478 static void ref_incpercpupreempt_delay_section(const int nloops, const int udl, const int ndl) 479 { 480 int i; 481 482 for (i = nloops; i >= 0; i--) { 483 unsigned long *tap; 484 485 preempt_disable(); 486 tap = this_cpu_ptr(&test_acqrel); 487 WRITE_ONCE(*tap, READ_ONCE(*tap) + 1); 488 un_delay(udl, ndl); 489 WRITE_ONCE(*tap, READ_ONCE(*tap) - 1); 490 preempt_enable(); 491 } 492 } 493 494 static const struct ref_scale_ops incpercpupreempt_ops = { 495 .init = rcu_sync_scale_init, 496 .readsection = ref_incpercpupreempt_section, 497 .delaysection = ref_incpercpupreempt_delay_section, 498 .name = "incpercpupreempt" 499 }; 500 501 static void ref_incpercpubh_section(const int nloops) 502 { 503 int i; 504 505 for (i = nloops; i >= 0; i--) { 506 unsigned long *tap; 507 508 local_bh_disable(); 509 tap = this_cpu_ptr(&test_acqrel); 510 WRITE_ONCE(*tap, READ_ONCE(*tap) + 1); 511 WRITE_ONCE(*tap, READ_ONCE(*tap) - 1); 512 local_bh_enable(); 513 } 514 } 515 516 static void ref_incpercpubh_delay_section(const int nloops, const int udl, const int ndl) 517 { 518 int i; 519 520 for (i = nloops; i >= 0; i--) { 521 unsigned long *tap; 522 523 local_bh_disable(); 524 tap = this_cpu_ptr(&test_acqrel); 525 WRITE_ONCE(*tap, READ_ONCE(*tap) + 1); 526 un_delay(udl, ndl); 527 WRITE_ONCE(*tap, READ_ONCE(*tap) - 1); 528 local_bh_enable(); 529 } 530 } 531 532 static const struct ref_scale_ops incpercpubh_ops = { 533 .init = rcu_sync_scale_init, 534 .readsection = ref_incpercpubh_section, 535 .delaysection = ref_incpercpubh_delay_section, 536 .enable_irqs = true, 537 .name = "incpercpubh" 538 }; 539 540 static void ref_incpercpuirqsave_section(const int nloops) 541 { 542 int i; 543 unsigned long flags; 544 545 for (i = nloops; i >= 0; i--) { 546 unsigned long *tap; 547 548 local_irq_save(flags); 549 tap = this_cpu_ptr(&test_acqrel); 550 WRITE_ONCE(*tap, READ_ONCE(*tap) + 1); 551 WRITE_ONCE(*tap, READ_ONCE(*tap) - 1); 552 local_irq_restore(flags); 553 } 554 } 555 556 static void ref_incpercpuirqsave_delay_section(const int nloops, const int udl, const int ndl) 557 { 558 int i; 559 unsigned long flags; 560 561 for (i = nloops; i >= 0; i--) { 562 unsigned long *tap; 563 564 local_irq_save(flags); 565 tap = this_cpu_ptr(&test_acqrel); 566 WRITE_ONCE(*tap, READ_ONCE(*tap) + 1); 567 un_delay(udl, ndl); 568 WRITE_ONCE(*tap, READ_ONCE(*tap) - 1); 569 local_irq_restore(flags); 570 } 571 } 572 573 static const struct ref_scale_ops incpercpuirqsave_ops = { 574 .init = rcu_sync_scale_init, 575 .readsection = ref_incpercpuirqsave_section, 576 .delaysection = ref_incpercpuirqsave_delay_section, 577 .name = "incpercpuirqsave" 578 }; 579 580 // Definitions for rwlock 581 static rwlock_t test_rwlock; 582 583 static bool ref_rwlock_init(void) 584 { 585 rwlock_init(&test_rwlock); 586 return true; 587 } 588 589 static void ref_rwlock_section(const int nloops) 590 { 591 int i; 592 593 for (i = nloops; i >= 0; i--) { 594 read_lock(&test_rwlock); 595 read_unlock(&test_rwlock); 596 } 597 } 598 599 static void ref_rwlock_delay_section(const int nloops, const int udl, const int ndl) 600 { 601 int i; 602 603 for (i = nloops; i >= 0; i--) { 604 read_lock(&test_rwlock); 605 un_delay(udl, ndl); 606 read_unlock(&test_rwlock); 607 } 608 } 609 610 static const struct ref_scale_ops rwlock_ops = { 611 .init = ref_rwlock_init, 612 .readsection = ref_rwlock_section, 613 .delaysection = ref_rwlock_delay_section, 614 .name = "rwlock" 615 }; 616 617 // Definitions for rwsem 618 static struct rw_semaphore test_rwsem; 619 620 static bool ref_rwsem_init(void) 621 { 622 init_rwsem(&test_rwsem); 623 return true; 624 } 625 626 static void ref_rwsem_section(const int nloops) 627 { 628 int i; 629 630 for (i = nloops; i >= 0; i--) { 631 down_read(&test_rwsem); 632 up_read(&test_rwsem); 633 } 634 } 635 636 static void ref_rwsem_delay_section(const int nloops, const int udl, const int ndl) 637 { 638 int i; 639 640 for (i = nloops; i >= 0; i--) { 641 down_read(&test_rwsem); 642 un_delay(udl, ndl); 643 up_read(&test_rwsem); 644 } 645 } 646 647 static const struct ref_scale_ops rwsem_ops = { 648 .init = ref_rwsem_init, 649 .readsection = ref_rwsem_section, 650 .delaysection = ref_rwsem_delay_section, 651 .name = "rwsem" 652 }; 653 654 // Definitions for global spinlock 655 static DEFINE_RAW_SPINLOCK(test_lock); 656 657 static void ref_lock_section(const int nloops) 658 { 659 int i; 660 661 preempt_disable(); 662 for (i = nloops; i >= 0; i--) { 663 raw_spin_lock(&test_lock); 664 raw_spin_unlock(&test_lock); 665 } 666 preempt_enable(); 667 } 668 669 static void ref_lock_delay_section(const int nloops, const int udl, const int ndl) 670 { 671 int i; 672 673 preempt_disable(); 674 for (i = nloops; i >= 0; i--) { 675 raw_spin_lock(&test_lock); 676 un_delay(udl, ndl); 677 raw_spin_unlock(&test_lock); 678 } 679 preempt_enable(); 680 } 681 682 static const struct ref_scale_ops lock_ops = { 683 .readsection = ref_lock_section, 684 .delaysection = ref_lock_delay_section, 685 .name = "lock" 686 }; 687 688 // Definitions for global irq-save spinlock 689 690 static void ref_lock_irq_section(const int nloops) 691 { 692 unsigned long flags; 693 int i; 694 695 preempt_disable(); 696 for (i = nloops; i >= 0; i--) { 697 raw_spin_lock_irqsave(&test_lock, flags); 698 raw_spin_unlock_irqrestore(&test_lock, flags); 699 } 700 preempt_enable(); 701 } 702 703 static void ref_lock_irq_delay_section(const int nloops, const int udl, const int ndl) 704 { 705 unsigned long flags; 706 int i; 707 708 preempt_disable(); 709 for (i = nloops; i >= 0; i--) { 710 raw_spin_lock_irqsave(&test_lock, flags); 711 un_delay(udl, ndl); 712 raw_spin_unlock_irqrestore(&test_lock, flags); 713 } 714 preempt_enable(); 715 } 716 717 static const struct ref_scale_ops lock_irq_ops = { 718 .readsection = ref_lock_irq_section, 719 .delaysection = ref_lock_irq_delay_section, 720 .name = "lock-irq" 721 }; 722 723 static void ref_acqrel_section(const int nloops) 724 { 725 unsigned long x; 726 int i; 727 728 preempt_disable(); 729 for (i = nloops; i >= 0; i--) { 730 x = smp_load_acquire(this_cpu_ptr(&test_acqrel)); 731 smp_store_release(this_cpu_ptr(&test_acqrel), x + 1); 732 } 733 preempt_enable(); 734 } 735 736 static void ref_acqrel_delay_section(const int nloops, const int udl, const int ndl) 737 { 738 unsigned long x; 739 int i; 740 741 preempt_disable(); 742 for (i = nloops; i >= 0; i--) { 743 x = smp_load_acquire(this_cpu_ptr(&test_acqrel)); 744 un_delay(udl, ndl); 745 smp_store_release(this_cpu_ptr(&test_acqrel), x + 1); 746 } 747 preempt_enable(); 748 } 749 750 static const struct ref_scale_ops acqrel_ops = { 751 .readsection = ref_acqrel_section, 752 .delaysection = ref_acqrel_delay_section, 753 .name = "acqrel" 754 }; 755 756 static volatile u64 stopopts; 757 758 static void ref_sched_clock_section(const int nloops) 759 { 760 u64 x = 0; 761 int i; 762 763 preempt_disable(); 764 for (i = nloops; i >= 0; i--) 765 x += sched_clock(); 766 preempt_enable(); 767 stopopts = x; 768 } 769 770 static void ref_sched_clock_delay_section(const int nloops, const int udl, const int ndl) 771 { 772 u64 x = 0; 773 int i; 774 775 preempt_disable(); 776 for (i = nloops; i >= 0; i--) { 777 x += sched_clock(); 778 un_delay(udl, ndl); 779 } 780 preempt_enable(); 781 stopopts = x; 782 } 783 784 static const struct ref_scale_ops sched_clock_ops = { 785 .readsection = ref_sched_clock_section, 786 .delaysection = ref_sched_clock_delay_section, 787 .name = "sched-clock" 788 }; 789 790 791 static void ref_clock_section(const int nloops) 792 { 793 u64 x = 0; 794 int i; 795 796 preempt_disable(); 797 for (i = nloops; i >= 0; i--) 798 x += ktime_get_real_fast_ns(); 799 preempt_enable(); 800 stopopts = x; 801 } 802 803 static void ref_clock_delay_section(const int nloops, const int udl, const int ndl) 804 { 805 u64 x = 0; 806 int i; 807 808 preempt_disable(); 809 for (i = nloops; i >= 0; i--) { 810 x += ktime_get_real_fast_ns(); 811 un_delay(udl, ndl); 812 } 813 preempt_enable(); 814 stopopts = x; 815 } 816 817 static const struct ref_scale_ops clock_ops = { 818 .readsection = ref_clock_section, 819 .delaysection = ref_clock_delay_section, 820 .name = "clock" 821 }; 822 823 static void ref_jiffies_section(const int nloops) 824 { 825 u64 x = 0; 826 int i; 827 828 preempt_disable(); 829 for (i = nloops; i >= 0; i--) 830 x += jiffies; 831 preempt_enable(); 832 stopopts = x; 833 } 834 835 static void ref_jiffies_delay_section(const int nloops, const int udl, const int ndl) 836 { 837 u64 x = 0; 838 int i; 839 840 preempt_disable(); 841 for (i = nloops; i >= 0; i--) { 842 x += jiffies; 843 un_delay(udl, ndl); 844 } 845 preempt_enable(); 846 stopopts = x; 847 } 848 849 static const struct ref_scale_ops jiffies_ops = { 850 .readsection = ref_jiffies_section, 851 .delaysection = ref_jiffies_delay_section, 852 .name = "jiffies" 853 }; 854 855 static void ref_preempt_section(const int nloops) 856 { 857 int i; 858 859 migrate_disable(); 860 for (i = nloops; i >= 0; i--) { 861 preempt_disable(); 862 preempt_enable(); 863 } 864 migrate_enable(); 865 } 866 867 static void ref_preempt_delay_section(const int nloops, const int udl, const int ndl) 868 { 869 int i; 870 871 migrate_disable(); 872 for (i = nloops; i >= 0; i--) { 873 preempt_disable(); 874 un_delay(udl, ndl); 875 preempt_enable(); 876 } 877 migrate_enable(); 878 } 879 880 static const struct ref_scale_ops preempt_ops = { 881 .readsection = ref_preempt_section, 882 .delaysection = ref_preempt_delay_section, 883 .name = "preempt" 884 }; 885 886 static void ref_bh_section(const int nloops) 887 { 888 int i; 889 890 preempt_disable(); 891 for (i = nloops; i >= 0; i--) { 892 local_bh_disable(); 893 local_bh_enable(); 894 } 895 preempt_enable(); 896 } 897 898 static void ref_bh_delay_section(const int nloops, const int udl, const int ndl) 899 { 900 int i; 901 902 preempt_disable(); 903 for (i = nloops; i >= 0; i--) { 904 local_bh_disable(); 905 un_delay(udl, ndl); 906 local_bh_enable(); 907 } 908 preempt_enable(); 909 } 910 911 static const struct ref_scale_ops bh_ops = { 912 .readsection = ref_bh_section, 913 .delaysection = ref_bh_delay_section, 914 .enable_irqs = true, 915 .name = "bh" 916 }; 917 918 static void ref_irq_section(const int nloops) 919 { 920 int i; 921 922 preempt_disable(); 923 for (i = nloops; i >= 0; i--) { 924 local_irq_disable(); 925 local_irq_enable(); 926 } 927 preempt_enable(); 928 } 929 930 static void ref_irq_delay_section(const int nloops, const int udl, const int ndl) 931 { 932 int i; 933 934 preempt_disable(); 935 for (i = nloops; i >= 0; i--) { 936 local_irq_disable(); 937 un_delay(udl, ndl); 938 local_irq_enable(); 939 } 940 preempt_enable(); 941 } 942 943 static const struct ref_scale_ops irq_ops = { 944 .readsection = ref_irq_section, 945 .delaysection = ref_irq_delay_section, 946 .name = "irq" 947 }; 948 949 static void ref_irqsave_section(const int nloops) 950 { 951 unsigned long flags; 952 int i; 953 954 preempt_disable(); 955 for (i = nloops; i >= 0; i--) { 956 local_irq_save(flags); 957 local_irq_restore(flags); 958 } 959 preempt_enable(); 960 } 961 962 static void ref_irqsave_delay_section(const int nloops, const int udl, const int ndl) 963 { 964 unsigned long flags; 965 int i; 966 967 preempt_disable(); 968 for (i = nloops; i >= 0; i--) { 969 local_irq_save(flags); 970 un_delay(udl, ndl); 971 local_irq_restore(flags); 972 } 973 preempt_enable(); 974 } 975 976 static const struct ref_scale_ops irqsave_ops = { 977 .readsection = ref_irqsave_section, 978 .delaysection = ref_irqsave_delay_section, 979 .name = "irqsave" 980 }; 981 982 //////////////////////////////////////////////////////////////////////// 983 // 984 // Methods leveraging SLAB_TYPESAFE_BY_RCU. 985 // 986 987 // Item to look up in a typesafe manner. Array of pointers to these. 988 struct refscale_typesafe { 989 atomic_t rts_refctr; // Used by all flavors 990 spinlock_t rts_lock; 991 seqlock_t rts_seqlock; 992 unsigned int a; 993 unsigned int b; 994 }; 995 996 static struct kmem_cache *typesafe_kmem_cachep; 997 static struct refscale_typesafe **rtsarray; 998 static long rtsarray_size; 999 static DEFINE_TORTURE_RANDOM_PERCPU(refscale_rand); 1000 static bool (*rts_acquire)(struct refscale_typesafe *rtsp, unsigned int *start); 1001 static bool (*rts_release)(struct refscale_typesafe *rtsp, unsigned int start); 1002 1003 // Conditionally acquire an explicit in-structure reference count. 1004 static bool typesafe_ref_acquire(struct refscale_typesafe *rtsp, unsigned int *start) 1005 { 1006 return atomic_inc_not_zero(&rtsp->rts_refctr); 1007 } 1008 1009 // Unconditionally release an explicit in-structure reference count. 1010 static bool typesafe_ref_release(struct refscale_typesafe *rtsp, unsigned int start) 1011 { 1012 if (!atomic_dec_return(&rtsp->rts_refctr)) { 1013 WRITE_ONCE(rtsp->a, rtsp->a + 1); 1014 kmem_cache_free(typesafe_kmem_cachep, rtsp); 1015 } 1016 return true; 1017 } 1018 1019 // Unconditionally acquire an explicit in-structure spinlock. 1020 static bool typesafe_lock_acquire(struct refscale_typesafe *rtsp, unsigned int *start) 1021 { 1022 spin_lock(&rtsp->rts_lock); 1023 return true; 1024 } 1025 1026 // Unconditionally release an explicit in-structure spinlock. 1027 static bool typesafe_lock_release(struct refscale_typesafe *rtsp, unsigned int start) 1028 { 1029 spin_unlock(&rtsp->rts_lock); 1030 return true; 1031 } 1032 1033 // Unconditionally acquire an explicit in-structure sequence lock. 1034 static bool typesafe_seqlock_acquire(struct refscale_typesafe *rtsp, unsigned int *start) 1035 { 1036 *start = read_seqbegin(&rtsp->rts_seqlock); 1037 return true; 1038 } 1039 1040 // Conditionally release an explicit in-structure sequence lock. Return 1041 // true if this release was successful, that is, if no retry is required. 1042 static bool typesafe_seqlock_release(struct refscale_typesafe *rtsp, unsigned int start) 1043 { 1044 return !read_seqretry(&rtsp->rts_seqlock, start); 1045 } 1046 1047 // Do a read-side critical section with the specified delay in 1048 // microseconds and nanoseconds inserted so as to increase probability 1049 // of failure. 1050 static void typesafe_delay_section(const int nloops, const int udl, const int ndl) 1051 { 1052 unsigned int a; 1053 unsigned int b; 1054 int i; 1055 long idx; 1056 struct refscale_typesafe *rtsp; 1057 unsigned int start; 1058 1059 for (i = nloops; i >= 0; i--) { 1060 preempt_disable(); 1061 idx = torture_random(this_cpu_ptr(&refscale_rand)) % rtsarray_size; 1062 preempt_enable(); 1063 retry: 1064 rcu_read_lock(); 1065 rtsp = rcu_dereference(rtsarray[idx]); 1066 a = READ_ONCE(rtsp->a); 1067 if (!rts_acquire(rtsp, &start)) { 1068 rcu_read_unlock(); 1069 goto retry; 1070 } 1071 if (a != READ_ONCE(rtsp->a)) { 1072 (void)rts_release(rtsp, start); 1073 rcu_read_unlock(); 1074 goto retry; 1075 } 1076 un_delay(udl, ndl); 1077 b = READ_ONCE(rtsp->a); 1078 // Remember, seqlock read-side release can fail. 1079 if (!rts_release(rtsp, start)) { 1080 rcu_read_unlock(); 1081 goto retry; 1082 } 1083 WARN_ONCE(a != b, "Re-read of ->a changed from %u to %u.\n", a, b); 1084 b = rtsp->b; 1085 rcu_read_unlock(); 1086 WARN_ON_ONCE(a * a != b); 1087 } 1088 } 1089 1090 // Because the acquisition and release methods are expensive, there 1091 // is no point in optimizing away the un_delay() function's two checks. 1092 // Thus simply define typesafe_read_section() as a simple wrapper around 1093 // typesafe_delay_section(). 1094 static void typesafe_read_section(const int nloops) 1095 { 1096 typesafe_delay_section(nloops, 0, 0); 1097 } 1098 1099 // Allocate and initialize one refscale_typesafe structure. 1100 static struct refscale_typesafe *typesafe_alloc_one(void) 1101 { 1102 struct refscale_typesafe *rtsp; 1103 1104 rtsp = kmem_cache_alloc(typesafe_kmem_cachep, GFP_KERNEL); 1105 if (!rtsp) 1106 return NULL; 1107 atomic_set(&rtsp->rts_refctr, 1); 1108 WRITE_ONCE(rtsp->a, rtsp->a + 1); 1109 WRITE_ONCE(rtsp->b, rtsp->a * rtsp->a); 1110 return rtsp; 1111 } 1112 1113 // Slab-allocator constructor for refscale_typesafe structures created 1114 // out of a new slab of system memory. 1115 static void refscale_typesafe_ctor(void *rtsp_in) 1116 { 1117 struct refscale_typesafe *rtsp = rtsp_in; 1118 1119 spin_lock_init(&rtsp->rts_lock); 1120 seqlock_init(&rtsp->rts_seqlock); 1121 preempt_disable(); 1122 rtsp->a = torture_random(this_cpu_ptr(&refscale_rand)); 1123 preempt_enable(); 1124 } 1125 1126 static const struct ref_scale_ops typesafe_ref_ops; 1127 static const struct ref_scale_ops typesafe_lock_ops; 1128 static const struct ref_scale_ops typesafe_seqlock_ops; 1129 1130 // Initialize for a typesafe test. 1131 static bool typesafe_init(void) 1132 { 1133 long idx; 1134 long si = lookup_instances; 1135 1136 typesafe_kmem_cachep = kmem_cache_create("refscale_typesafe", 1137 sizeof(struct refscale_typesafe), sizeof(void *), 1138 SLAB_TYPESAFE_BY_RCU, refscale_typesafe_ctor); 1139 if (!typesafe_kmem_cachep) 1140 return false; 1141 if (si < 0) 1142 si = -si * nr_cpu_ids; 1143 else if (si == 0) 1144 si = nr_cpu_ids; 1145 rtsarray_size = si; 1146 rtsarray = kcalloc(si, sizeof(*rtsarray), GFP_KERNEL); 1147 if (!rtsarray) 1148 return false; 1149 for (idx = 0; idx < rtsarray_size; idx++) { 1150 rtsarray[idx] = typesafe_alloc_one(); 1151 if (!rtsarray[idx]) 1152 return false; 1153 } 1154 if (cur_ops == &typesafe_ref_ops) { 1155 rts_acquire = typesafe_ref_acquire; 1156 rts_release = typesafe_ref_release; 1157 } else if (cur_ops == &typesafe_lock_ops) { 1158 rts_acquire = typesafe_lock_acquire; 1159 rts_release = typesafe_lock_release; 1160 } else if (cur_ops == &typesafe_seqlock_ops) { 1161 rts_acquire = typesafe_seqlock_acquire; 1162 rts_release = typesafe_seqlock_release; 1163 } else { 1164 WARN_ON_ONCE(1); 1165 return false; 1166 } 1167 return true; 1168 } 1169 1170 // Clean up after a typesafe test. 1171 static void typesafe_cleanup(void) 1172 { 1173 long idx; 1174 1175 if (rtsarray) { 1176 for (idx = 0; idx < rtsarray_size; idx++) 1177 kmem_cache_free(typesafe_kmem_cachep, rtsarray[idx]); 1178 kfree(rtsarray); 1179 rtsarray = NULL; 1180 rtsarray_size = 0; 1181 } 1182 kmem_cache_destroy(typesafe_kmem_cachep); 1183 typesafe_kmem_cachep = NULL; 1184 rts_acquire = NULL; 1185 rts_release = NULL; 1186 } 1187 1188 // The typesafe_init() function distinguishes these structures by address. 1189 static const struct ref_scale_ops typesafe_ref_ops = { 1190 .init = typesafe_init, 1191 .cleanup = typesafe_cleanup, 1192 .readsection = typesafe_read_section, 1193 .delaysection = typesafe_delay_section, 1194 .name = "typesafe_ref" 1195 }; 1196 1197 static const struct ref_scale_ops typesafe_lock_ops = { 1198 .init = typesafe_init, 1199 .cleanup = typesafe_cleanup, 1200 .readsection = typesafe_read_section, 1201 .delaysection = typesafe_delay_section, 1202 .name = "typesafe_lock" 1203 }; 1204 1205 static const struct ref_scale_ops typesafe_seqlock_ops = { 1206 .init = typesafe_init, 1207 .cleanup = typesafe_cleanup, 1208 .readsection = typesafe_read_section, 1209 .delaysection = typesafe_delay_section, 1210 .name = "typesafe_seqlock" 1211 }; 1212 1213 static void rcu_scale_one_reader(void) 1214 { 1215 if (readdelay <= 0) 1216 cur_ops->readsection(loops); 1217 else 1218 cur_ops->delaysection(loops, readdelay / 1000, readdelay % 1000); 1219 } 1220 1221 // Warm up cache, or, if needed run a series of rcu_scale_one_reader() 1222 // to allow multiple rcuscale guest OSes to collect mutually valid data. 1223 static void rcu_scale_warm_cool(void) 1224 { 1225 unsigned long jdone = jiffies + (guest_os_delay > 0 ? guest_os_delay * HZ : -1); 1226 1227 do { 1228 rcu_scale_one_reader(); 1229 cond_resched(); 1230 } while (time_before(jiffies, jdone)); 1231 } 1232 1233 // Reader kthread. Repeatedly does empty RCU read-side 1234 // critical section, minimizing update-side interference. 1235 static int 1236 ref_scale_reader(void *arg) 1237 { 1238 unsigned long flags; 1239 long me = (long)arg; 1240 struct reader_task *rt = &(reader_tasks[me]); 1241 u64 start; 1242 s64 duration; 1243 1244 VERBOSE_SCALEOUT_BATCH("ref_scale_reader %ld: task started", me); 1245 WARN_ON_ONCE(set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids))); 1246 set_user_nice(current, MAX_NICE); 1247 atomic_inc(&n_init); 1248 if (holdoff) 1249 schedule_timeout_interruptible(holdoff * HZ); 1250 repeat: 1251 VERBOSE_SCALEOUT_BATCH("ref_scale_reader %ld: waiting to start next experiment on cpu %d", me, raw_smp_processor_id()); 1252 1253 // Wait for signal that this reader can start. 1254 wait_event(rt->wq, (atomic_read(&nreaders_exp) && smp_load_acquire(&rt->start_reader)) || 1255 torture_must_stop()); 1256 1257 if (torture_must_stop()) 1258 goto end; 1259 1260 // Make sure that the CPU is affinitized appropriately during testing. 1261 WARN_ON_ONCE(raw_smp_processor_id() != me % nr_cpu_ids); 1262 1263 WRITE_ONCE(rt->start_reader, 0); 1264 if (!atomic_dec_return(&n_started)) 1265 while (atomic_read_acquire(&n_started)) 1266 cpu_relax(); 1267 1268 VERBOSE_SCALEOUT_BATCH("ref_scale_reader %ld: experiment %d started", me, exp_idx); 1269 1270 1271 // To reduce noise, do an initial cache-warming invocation, check 1272 // in, and then keep warming until everyone has checked in. 1273 rcu_scale_one_reader(); 1274 if (!atomic_dec_return(&n_warmedup)) 1275 while (atomic_read_acquire(&n_warmedup)) 1276 rcu_scale_one_reader(); 1277 // Also keep interrupts disabled when it is safe to do so, which 1278 // it is not for local_bh_enable(). This also has the effect of 1279 // preventing entries into slow path for rcu_read_unlock(). 1280 if (!cur_ops->enable_irqs) 1281 local_irq_save(flags); 1282 start = ktime_get_mono_fast_ns(); 1283 1284 rcu_scale_one_reader(); 1285 1286 duration = ktime_get_mono_fast_ns() - start; 1287 if (!cur_ops->enable_irqs) 1288 local_irq_restore(flags); 1289 1290 rt->last_duration_ns = WARN_ON_ONCE(duration < 0) ? 0 : duration; 1291 // To reduce runtime-skew noise, do maintain-load invocations until 1292 // everyone is done. 1293 if (!atomic_dec_return(&n_cooleddown)) 1294 while (atomic_read_acquire(&n_cooleddown)) 1295 rcu_scale_one_reader(); 1296 1297 if (atomic_dec_and_test(&nreaders_exp)) 1298 wake_up(&main_wq); 1299 1300 VERBOSE_SCALEOUT_BATCH("ref_scale_reader %ld: experiment %d ended, (readers remaining=%d)", 1301 me, exp_idx, atomic_read(&nreaders_exp)); 1302 1303 if (!torture_must_stop()) 1304 goto repeat; 1305 end: 1306 torture_kthread_stopping("ref_scale_reader"); 1307 return 0; 1308 } 1309 1310 static void reset_readers(void) 1311 { 1312 int i; 1313 struct reader_task *rt; 1314 1315 for (i = 0; i < nreaders; i++) { 1316 rt = &(reader_tasks[i]); 1317 1318 rt->last_duration_ns = 0; 1319 } 1320 } 1321 1322 // Print the results of each reader and return the sum of all their durations. 1323 static u64 process_durations(int n) 1324 { 1325 int i; 1326 struct reader_task *rt; 1327 struct seq_buf s; 1328 char *buf; 1329 u64 sum = 0; 1330 1331 buf = kmalloc(800 + 64, GFP_KERNEL); 1332 if (!buf) 1333 return 0; 1334 seq_buf_init(&s, buf, 800 + 64); 1335 1336 seq_buf_printf(&s, "Experiment #%d (Format: <THREAD-NUM>:<Total loop time in ns>)", 1337 exp_idx); 1338 1339 for (i = 0; i < n && !torture_must_stop(); i++) { 1340 rt = &(reader_tasks[i]); 1341 1342 if (i % 5 == 0) 1343 seq_buf_putc(&s, '\n'); 1344 1345 if (seq_buf_used(&s) >= 800) { 1346 pr_alert("%s", seq_buf_str(&s)); 1347 seq_buf_clear(&s); 1348 } 1349 1350 seq_buf_printf(&s, "%d: %llu\t", i, rt->last_duration_ns); 1351 1352 sum += rt->last_duration_ns; 1353 } 1354 pr_alert("%s\n", seq_buf_str(&s)); 1355 1356 kfree(buf); 1357 return sum; 1358 } 1359 1360 // The main_func is the main orchestrator, it performs a bunch of 1361 // experiments. For every experiment, it orders all the readers 1362 // involved to start and waits for them to finish the experiment. It 1363 // then reads their timestamps and starts the next experiment. Each 1364 // experiment progresses from 1 concurrent reader to N of them at which 1365 // point all the timestamps are printed. 1366 static int main_func(void *arg) 1367 { 1368 int exp, r; 1369 char buf1[64]; 1370 char *buf; 1371 u64 *result_avg; 1372 1373 set_cpus_allowed_ptr(current, cpumask_of(nreaders % nr_cpu_ids)); 1374 set_user_nice(current, MAX_NICE); 1375 1376 VERBOSE_SCALEOUT("main_func task started"); 1377 result_avg = kcalloc(nruns, sizeof(*result_avg), GFP_KERNEL); 1378 buf = kzalloc(800 + 64, GFP_KERNEL); 1379 if (!result_avg || !buf) { 1380 SCALEOUT_ERRSTRING("out of memory"); 1381 goto oom_exit; 1382 } 1383 if (holdoff) 1384 schedule_timeout_interruptible(holdoff * HZ); 1385 1386 // Wait for all threads to start. 1387 atomic_inc(&n_init); 1388 while (atomic_read(&n_init) < nreaders + 1) 1389 schedule_timeout_uninterruptible(1); 1390 1391 // Start exp readers up per experiment 1392 rcu_scale_warm_cool(); 1393 for (exp = 0; exp < nruns && !torture_must_stop(); exp++) { 1394 if (torture_must_stop()) 1395 goto end; 1396 1397 reset_readers(); 1398 atomic_set(&nreaders_exp, nreaders); 1399 atomic_set(&n_started, nreaders); 1400 atomic_set(&n_warmedup, nreaders); 1401 atomic_set(&n_cooleddown, nreaders); 1402 1403 exp_idx = exp; 1404 1405 for (r = 0; r < nreaders; r++) { 1406 smp_store_release(&reader_tasks[r].start_reader, 1); 1407 wake_up(&reader_tasks[r].wq); 1408 } 1409 1410 VERBOSE_SCALEOUT("main_func: experiment started, waiting for %d readers", 1411 nreaders); 1412 1413 wait_event(main_wq, 1414 !atomic_read(&nreaders_exp) || torture_must_stop()); 1415 1416 VERBOSE_SCALEOUT("main_func: experiment ended"); 1417 1418 if (torture_must_stop()) 1419 goto end; 1420 1421 result_avg[exp] = div_u64(1000 * process_durations(nreaders), nreaders * loops); 1422 } 1423 rcu_scale_warm_cool(); 1424 1425 // Print the average of all experiments 1426 SCALEOUT("END OF TEST. Calculating average duration per loop (nanoseconds)...\n"); 1427 1428 pr_alert("Runs\tTime(ns)\n"); 1429 for (exp = 0; exp < nruns; exp++) { 1430 u64 avg; 1431 u32 rem; 1432 1433 avg = div_u64_rem(result_avg[exp], 1000, &rem); 1434 sprintf(buf1, "%d\t%llu.%03u\n", exp + 1, avg, rem); 1435 strcat(buf, buf1); 1436 if (strlen(buf) >= 800) { 1437 pr_alert("%s", buf); 1438 buf[0] = 0; 1439 } 1440 } 1441 1442 pr_alert("%s", buf); 1443 1444 oom_exit: 1445 // This will shutdown everything including us. 1446 if (shutdown) { 1447 shutdown_start = 1; 1448 wake_up(&shutdown_wq); 1449 } 1450 1451 // Wait for torture to stop us 1452 while (!torture_must_stop()) 1453 schedule_timeout_uninterruptible(1); 1454 1455 end: 1456 torture_kthread_stopping("main_func"); 1457 kfree(result_avg); 1458 kfree(buf); 1459 return 0; 1460 } 1461 1462 static void 1463 ref_scale_print_module_parms(const struct ref_scale_ops *cur_ops, const char *tag) 1464 { 1465 pr_alert("%s" SCALE_FLAG 1466 "--- %s: verbose=%d verbose_batched=%d shutdown=%d holdoff=%d lookup_instances=%ld loops=%d nreaders=%d nruns=%d readdelay=%d\n", scale_type, tag, 1467 verbose, verbose_batched, shutdown, holdoff, lookup_instances, loops, nreaders, nruns, readdelay); 1468 } 1469 1470 static void 1471 ref_scale_cleanup(void) 1472 { 1473 int i; 1474 1475 if (torture_cleanup_begin()) 1476 return; 1477 1478 if (!cur_ops) { 1479 torture_cleanup_end(); 1480 return; 1481 } 1482 1483 if (reader_tasks) { 1484 for (i = 0; i < nreaders; i++) 1485 torture_stop_kthread("ref_scale_reader", 1486 reader_tasks[i].task); 1487 } 1488 kfree(reader_tasks); 1489 reader_tasks = NULL; 1490 1491 torture_stop_kthread("main_task", main_task); 1492 1493 // Do scale-type-specific cleanup operations. 1494 if (cur_ops->cleanup != NULL) 1495 cur_ops->cleanup(); 1496 1497 torture_cleanup_end(); 1498 } 1499 1500 // Shutdown kthread. Just waits to be awakened, then shuts down system. 1501 static int 1502 ref_scale_shutdown(void *arg) 1503 { 1504 wait_event_idle(shutdown_wq, shutdown_start); 1505 1506 smp_mb(); // Wake before output. 1507 ref_scale_cleanup(); 1508 kernel_power_off(); 1509 1510 return -EINVAL; 1511 } 1512 1513 static int __init 1514 ref_scale_init(void) 1515 { 1516 long i; 1517 int firsterr = 0; 1518 static const struct ref_scale_ops *scale_ops[] = { 1519 &rcu_ops, &srcu_ops, &srcu_fast_ops, &srcu_fast_updown_ops, 1520 RCU_TRACE_OPS RCU_TASKS_OPS 1521 &refcnt_ops, &percpuinc_ops, &incpercpu_ops, &incpercpupreempt_ops, 1522 &incpercpubh_ops, &incpercpuirqsave_ops, 1523 &rwlock_ops, &rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops, 1524 &sched_clock_ops, &clock_ops, &jiffies_ops, 1525 &preempt_ops, &bh_ops, &irq_ops, &irqsave_ops, 1526 &typesafe_ref_ops, &typesafe_lock_ops, &typesafe_seqlock_ops, 1527 }; 1528 1529 if (!torture_init_begin(scale_type, verbose)) 1530 return -EBUSY; 1531 1532 for (i = 0; i < ARRAY_SIZE(scale_ops); i++) { 1533 cur_ops = scale_ops[i]; 1534 if (strcmp(scale_type, cur_ops->name) == 0) 1535 break; 1536 } 1537 if (i == ARRAY_SIZE(scale_ops)) { 1538 pr_alert("rcu-scale: invalid scale type: \"%s\"\n", scale_type); 1539 pr_alert("rcu-scale types:"); 1540 for (i = 0; i < ARRAY_SIZE(scale_ops); i++) 1541 pr_cont(" %s", scale_ops[i]->name); 1542 pr_cont("\n"); 1543 firsterr = -EINVAL; 1544 cur_ops = NULL; 1545 goto unwind; 1546 } 1547 if (cur_ops->init) 1548 if (!cur_ops->init()) { 1549 firsterr = -EUCLEAN; 1550 goto unwind; 1551 } 1552 1553 ref_scale_print_module_parms(cur_ops, "Start of test"); 1554 1555 // Shutdown task 1556 if (shutdown) { 1557 init_waitqueue_head(&shutdown_wq); 1558 firsterr = torture_create_kthread(ref_scale_shutdown, NULL, 1559 shutdown_task); 1560 if (torture_init_error(firsterr)) 1561 goto unwind; 1562 schedule_timeout_uninterruptible(1); 1563 } 1564 1565 // Reader tasks (default to ~75% of online CPUs). 1566 if (nreaders < 0) 1567 nreaders = (num_online_cpus() >> 1) + (num_online_cpus() >> 2); 1568 if (WARN_ONCE(loops <= 0, "%s: loops = %d, adjusted to 1\n", __func__, loops)) 1569 loops = 1; 1570 if (WARN_ONCE(nreaders <= 0, "%s: nreaders = %d, adjusted to 1\n", __func__, nreaders)) 1571 nreaders = 1; 1572 if (WARN_ONCE(nruns <= 0, "%s: nruns = %d, adjusted to 1\n", __func__, nruns)) 1573 nruns = 1; 1574 if (WARN_ONCE(loops > INT_MAX / nreaders, 1575 "%s: nreaders * loops will overflow, adjusted loops to %d", 1576 __func__, INT_MAX / nreaders)) 1577 loops = INT_MAX / nreaders; 1578 reader_tasks = kcalloc(nreaders, sizeof(reader_tasks[0]), 1579 GFP_KERNEL); 1580 if (!reader_tasks) { 1581 SCALEOUT_ERRSTRING("out of memory"); 1582 firsterr = -ENOMEM; 1583 goto unwind; 1584 } 1585 1586 VERBOSE_SCALEOUT("Starting %d reader threads", nreaders); 1587 1588 for (i = 0; i < nreaders; i++) { 1589 init_waitqueue_head(&reader_tasks[i].wq); 1590 firsterr = torture_create_kthread(ref_scale_reader, (void *)i, 1591 reader_tasks[i].task); 1592 if (torture_init_error(firsterr)) 1593 goto unwind; 1594 } 1595 1596 // Main Task 1597 init_waitqueue_head(&main_wq); 1598 firsterr = torture_create_kthread(main_func, NULL, main_task); 1599 if (torture_init_error(firsterr)) 1600 goto unwind; 1601 1602 torture_init_end(); 1603 return 0; 1604 1605 unwind: 1606 torture_init_end(); 1607 ref_scale_cleanup(); 1608 if (shutdown) { 1609 WARN_ON(!IS_MODULE(CONFIG_RCU_REF_SCALE_TEST)); 1610 kernel_power_off(); 1611 } 1612 return firsterr; 1613 } 1614 1615 module_init(ref_scale_init); 1616 module_exit(ref_scale_cleanup); 1617