1 // SPDX-License-Identifier: GPL-2.0+ 2 // 3 // Scalability test comparing RCU vs other mechanisms 4 // for acquiring references on objects. 5 // 6 // Copyright (C) Google, 2020. 7 // 8 // Author: Joel Fernandes <joel@joelfernandes.org> 9 10 #define pr_fmt(fmt) fmt 11 12 #include <linux/atomic.h> 13 #include <linux/bitops.h> 14 #include <linux/completion.h> 15 #include <linux/cpu.h> 16 #include <linux/delay.h> 17 #include <linux/err.h> 18 #include <linux/init.h> 19 #include <linux/interrupt.h> 20 #include <linux/kthread.h> 21 #include <linux/kernel.h> 22 #include <linux/mm.h> 23 #include <linux/module.h> 24 #include <linux/moduleparam.h> 25 #include <linux/notifier.h> 26 #include <linux/percpu.h> 27 #include <linux/rcupdate.h> 28 #include <linux/rcupdate_trace.h> 29 #include <linux/reboot.h> 30 #include <linux/sched.h> 31 #include <linux/seq_buf.h> 32 #include <linux/spinlock.h> 33 #include <linux/smp.h> 34 #include <linux/stat.h> 35 #include <linux/srcu.h> 36 #include <linux/slab.h> 37 #include <linux/torture.h> 38 #include <linux/types.h> 39 #include <linux/sched/clock.h> 40 41 #include "rcu.h" 42 43 #define SCALE_FLAG "-ref-scale: " 44 45 #define SCALEOUT(s, x...) \ 46 pr_alert("%s" SCALE_FLAG s, scale_type, ## x) 47 48 #define VERBOSE_SCALEOUT(s, x...) \ 49 do { \ 50 if (verbose) \ 51 pr_alert("%s" SCALE_FLAG s "\n", scale_type, ## x); \ 52 } while (0) 53 54 static atomic_t verbose_batch_ctr; 55 56 #define VERBOSE_SCALEOUT_BATCH(s, x...) \ 57 do { \ 58 if (verbose && \ 59 (verbose_batched <= 0 || \ 60 !(atomic_inc_return(&verbose_batch_ctr) % verbose_batched))) { \ 61 schedule_timeout_uninterruptible(1); \ 62 pr_alert("%s" SCALE_FLAG s "\n", scale_type, ## x); \ 63 } \ 64 } while (0) 65 66 #define SCALEOUT_ERRSTRING(s, x...) pr_alert("%s" SCALE_FLAG "!!! " s "\n", scale_type, ## x) 67 68 MODULE_DESCRIPTION("Scalability test for object reference mechanisms"); 69 MODULE_LICENSE("GPL"); 70 MODULE_AUTHOR("Joel Fernandes (Google) <joel@joelfernandes.org>"); 71 72 static char *scale_type = "rcu"; 73 module_param(scale_type, charp, 0444); 74 MODULE_PARM_DESC(scale_type, "Type of test (rcu, srcu, refcnt, rwsem, rwlock."); 75 76 torture_param(int, verbose, 0, "Enable verbose debugging printk()s"); 77 torture_param(int, verbose_batched, 0, "Batch verbose debugging printk()s"); 78 79 // Number of seconds to extend warm-up and cool-down for multiple guest OSes 80 torture_param(long, guest_os_delay, 0, 81 "Number of seconds to extend warm-up/cool-down for multiple guest OSes."); 82 // Wait until there are multiple CPUs before starting test. 83 torture_param(int, holdoff, IS_BUILTIN(CONFIG_RCU_REF_SCALE_TEST) ? 10 : 0, 84 "Holdoff time before test start (s)"); 85 // Number of typesafe_lookup structures, that is, the degree of concurrency. 86 torture_param(long, lookup_instances, 0, "Number of typesafe_lookup structures."); 87 // Number of loops per experiment, all readers execute operations concurrently. 88 torture_param(int, loops, 10000, "Number of loops per experiment."); 89 // Number of readers, with -1 defaulting to about 75% of the CPUs. 90 torture_param(int, nreaders, -1, "Number of readers, -1 for 75% of CPUs."); 91 // Number of runs. 92 torture_param(int, nruns, 30, "Number of experiments to run."); 93 // Reader delay in nanoseconds, 0 for no delay. 94 torture_param(int, readdelay, 0, "Read-side delay in nanoseconds."); 95 // Maximum shutdown delay in seconds, or zero for no shutdown. 96 torture_param(int, shutdown_secs, !IS_MODULE(CONFIG_REPRO_TEST) * 300, 97 "Shutdown at end of scalability tests or at specified timeout (s)."); 98 99 struct reader_task { 100 struct task_struct *task; 101 int start_reader; 102 wait_queue_head_t wq; 103 u64 last_duration_ns; 104 }; 105 106 static struct task_struct *main_task; 107 static wait_queue_head_t main_wq; 108 109 static struct reader_task *reader_tasks; 110 111 // Number of readers that are part of the current experiment. 112 static atomic_t nreaders_exp; 113 114 // Use to wait for all threads to start. 115 static atomic_t n_init; 116 static atomic_t n_started; 117 static atomic_t n_warmedup; 118 static atomic_t n_cooleddown; 119 120 // Track which experiment is currently running. 121 static int exp_idx; 122 123 // Operations vector for selecting different types of tests. 124 struct ref_scale_ops { 125 bool (*init)(void); 126 void (*cleanup)(void); 127 void (*readsection)(const int nloops); 128 void (*delaysection)(const int nloops, const int udl, const int ndl); 129 bool enable_irqs; 130 const char *name; 131 }; 132 133 static const struct ref_scale_ops *cur_ops; 134 135 static void un_delay(const int udl, const int ndl) 136 { 137 if (udl) 138 udelay(udl); 139 if (ndl) 140 ndelay(ndl); 141 } 142 143 static void ref_rcu_read_section(const int nloops) 144 { 145 int i; 146 147 for (i = nloops; i >= 0; i--) { 148 rcu_read_lock(); 149 rcu_read_unlock(); 150 } 151 } 152 153 static void ref_rcu_delay_section(const int nloops, const int udl, const int ndl) 154 { 155 int i; 156 157 for (i = nloops; i >= 0; i--) { 158 rcu_read_lock(); 159 un_delay(udl, ndl); 160 rcu_read_unlock(); 161 } 162 } 163 164 static bool rcu_sync_scale_init(void) 165 { 166 return true; 167 } 168 169 static const struct ref_scale_ops rcu_ops = { 170 .init = rcu_sync_scale_init, 171 .readsection = ref_rcu_read_section, 172 .delaysection = ref_rcu_delay_section, 173 .name = "rcu" 174 }; 175 176 // Definitions for SRCU ref scale testing. 177 DEFINE_STATIC_SRCU(srcu_refctl_scale); 178 DEFINE_STATIC_SRCU_FAST(srcu_fast_refctl_scale); 179 DEFINE_STATIC_SRCU_FAST_UPDOWN(srcu_fast_updown_refctl_scale); 180 static struct srcu_struct *srcu_ctlp = &srcu_refctl_scale; 181 182 static void srcu_ref_scale_read_section(const int nloops) 183 { 184 int i; 185 int idx; 186 187 for (i = nloops; i >= 0; i--) { 188 idx = srcu_read_lock(srcu_ctlp); 189 srcu_read_unlock(srcu_ctlp, idx); 190 } 191 } 192 193 static void srcu_ref_scale_delay_section(const int nloops, const int udl, const int ndl) 194 { 195 int i; 196 int idx; 197 198 for (i = nloops; i >= 0; i--) { 199 idx = srcu_read_lock(srcu_ctlp); 200 un_delay(udl, ndl); 201 srcu_read_unlock(srcu_ctlp, idx); 202 } 203 } 204 205 static const struct ref_scale_ops srcu_ops = { 206 .init = rcu_sync_scale_init, 207 .readsection = srcu_ref_scale_read_section, 208 .delaysection = srcu_ref_scale_delay_section, 209 .name = "srcu" 210 }; 211 212 static bool srcu_fast_sync_scale_init(void) 213 { 214 srcu_ctlp = &srcu_fast_refctl_scale; 215 return true; 216 } 217 218 static void srcu_fast_ref_scale_read_section(const int nloops) 219 { 220 int i; 221 struct srcu_ctr __percpu *scp; 222 223 for (i = nloops; i >= 0; i--) { 224 scp = srcu_read_lock_fast(srcu_ctlp); 225 srcu_read_unlock_fast(srcu_ctlp, scp); 226 } 227 } 228 229 static void srcu_fast_ref_scale_delay_section(const int nloops, const int udl, const int ndl) 230 { 231 int i; 232 struct srcu_ctr __percpu *scp; 233 234 for (i = nloops; i >= 0; i--) { 235 scp = srcu_read_lock_fast(srcu_ctlp); 236 un_delay(udl, ndl); 237 srcu_read_unlock_fast(srcu_ctlp, scp); 238 } 239 } 240 241 static const struct ref_scale_ops srcu_fast_ops = { 242 .init = srcu_fast_sync_scale_init, 243 .readsection = srcu_fast_ref_scale_read_section, 244 .delaysection = srcu_fast_ref_scale_delay_section, 245 .name = "srcu-fast" 246 }; 247 248 static bool srcu_fast_updown_sync_scale_init(void) 249 { 250 srcu_ctlp = &srcu_fast_updown_refctl_scale; 251 return true; 252 } 253 254 static void srcu_fast_updown_ref_scale_read_section(const int nloops) 255 { 256 int i; 257 struct srcu_ctr __percpu *scp; 258 259 for (i = nloops; i >= 0; i--) { 260 scp = srcu_read_lock_fast_updown(srcu_ctlp); 261 srcu_read_unlock_fast_updown(srcu_ctlp, scp); 262 } 263 } 264 265 static void srcu_fast_updown_ref_scale_delay_section(const int nloops, const int udl, const int ndl) 266 { 267 int i; 268 struct srcu_ctr __percpu *scp; 269 270 for (i = nloops; i >= 0; i--) { 271 scp = srcu_read_lock_fast_updown(srcu_ctlp); 272 un_delay(udl, ndl); 273 srcu_read_unlock_fast_updown(srcu_ctlp, scp); 274 } 275 } 276 277 static const struct ref_scale_ops srcu_fast_updown_ops = { 278 .init = srcu_fast_updown_sync_scale_init, 279 .readsection = srcu_fast_updown_ref_scale_read_section, 280 .delaysection = srcu_fast_updown_ref_scale_delay_section, 281 .name = "srcu-fast-updown" 282 }; 283 284 #ifdef CONFIG_TASKS_RCU 285 286 // Definitions for RCU Tasks ref scale testing: Empty read markers. 287 // These definitions also work for RCU Rude readers. 288 static void rcu_tasks_ref_scale_read_section(const int nloops) 289 { 290 int i; 291 292 for (i = nloops; i >= 0; i--) 293 continue; 294 } 295 296 static void rcu_tasks_ref_scale_delay_section(const int nloops, const int udl, const int ndl) 297 { 298 int i; 299 300 for (i = nloops; i >= 0; i--) 301 un_delay(udl, ndl); 302 } 303 304 static const struct ref_scale_ops rcu_tasks_ops = { 305 .init = rcu_sync_scale_init, 306 .readsection = rcu_tasks_ref_scale_read_section, 307 .delaysection = rcu_tasks_ref_scale_delay_section, 308 .name = "rcu-tasks" 309 }; 310 311 #define RCU_TASKS_OPS &rcu_tasks_ops, 312 313 #else // #ifdef CONFIG_TASKS_RCU 314 315 #define RCU_TASKS_OPS 316 317 #endif // #else // #ifdef CONFIG_TASKS_RCU 318 319 #ifdef CONFIG_TASKS_TRACE_RCU 320 321 // Definitions for RCU Tasks Trace ref scale testing. 322 static void rcu_trace_ref_scale_read_section(const int nloops) 323 { 324 int i; 325 326 for (i = nloops; i >= 0; i--) { 327 rcu_read_lock_trace(); 328 rcu_read_unlock_trace(); 329 } 330 } 331 332 static void rcu_trace_ref_scale_delay_section(const int nloops, const int udl, const int ndl) 333 { 334 int i; 335 336 for (i = nloops; i >= 0; i--) { 337 rcu_read_lock_trace(); 338 un_delay(udl, ndl); 339 rcu_read_unlock_trace(); 340 } 341 } 342 343 static const struct ref_scale_ops rcu_trace_ops = { 344 .init = rcu_sync_scale_init, 345 .readsection = rcu_trace_ref_scale_read_section, 346 .delaysection = rcu_trace_ref_scale_delay_section, 347 .name = "rcu-trace" 348 }; 349 350 #define RCU_TRACE_OPS &rcu_trace_ops, 351 352 #else // #ifdef CONFIG_TASKS_TRACE_RCU 353 354 #define RCU_TRACE_OPS 355 356 #endif // #else // #ifdef CONFIG_TASKS_TRACE_RCU 357 358 // Definitions for reference count 359 static atomic_t refcnt; 360 361 // Definitions acquire-release. 362 static DEFINE_PER_CPU(unsigned long, test_acqrel); 363 364 static void ref_refcnt_section(const int nloops) 365 { 366 int i; 367 368 for (i = nloops; i >= 0; i--) { 369 atomic_inc(&refcnt); 370 atomic_dec(&refcnt); 371 } 372 } 373 374 static void ref_refcnt_delay_section(const int nloops, const int udl, const int ndl) 375 { 376 int i; 377 378 for (i = nloops; i >= 0; i--) { 379 atomic_inc(&refcnt); 380 un_delay(udl, ndl); 381 atomic_dec(&refcnt); 382 } 383 } 384 385 static const struct ref_scale_ops refcnt_ops = { 386 .init = rcu_sync_scale_init, 387 .readsection = ref_refcnt_section, 388 .delaysection = ref_refcnt_delay_section, 389 .name = "refcnt" 390 }; 391 392 static void ref_percpuinc_section(const int nloops) 393 { 394 int i; 395 396 for (i = nloops; i >= 0; i--) { 397 this_cpu_inc(test_acqrel); 398 this_cpu_dec(test_acqrel); 399 } 400 } 401 402 static void ref_percpuinc_delay_section(const int nloops, const int udl, const int ndl) 403 { 404 int i; 405 406 for (i = nloops; i >= 0; i--) { 407 this_cpu_inc(test_acqrel); 408 un_delay(udl, ndl); 409 this_cpu_dec(test_acqrel); 410 } 411 } 412 413 static const struct ref_scale_ops percpuinc_ops = { 414 .init = rcu_sync_scale_init, 415 .readsection = ref_percpuinc_section, 416 .delaysection = ref_percpuinc_delay_section, 417 .name = "percpuinc" 418 }; 419 420 // Note that this can lose counts in preemptible kernels. 421 static void ref_incpercpu_section(const int nloops) 422 { 423 int i; 424 425 for (i = nloops; i >= 0; i--) { 426 unsigned long *tap = this_cpu_ptr(&test_acqrel); 427 428 WRITE_ONCE(*tap, READ_ONCE(*tap) + 1); 429 WRITE_ONCE(*tap, READ_ONCE(*tap) - 1); 430 } 431 } 432 433 static void ref_incpercpu_delay_section(const int nloops, const int udl, const int ndl) 434 { 435 int i; 436 437 for (i = nloops; i >= 0; i--) { 438 unsigned long *tap = this_cpu_ptr(&test_acqrel); 439 440 WRITE_ONCE(*tap, READ_ONCE(*tap) + 1); 441 un_delay(udl, ndl); 442 WRITE_ONCE(*tap, READ_ONCE(*tap) - 1); 443 } 444 } 445 446 static const struct ref_scale_ops incpercpu_ops = { 447 .init = rcu_sync_scale_init, 448 .readsection = ref_incpercpu_section, 449 .delaysection = ref_incpercpu_delay_section, 450 .name = "incpercpu" 451 }; 452 453 static void ref_incpercpupreempt_section(const int nloops) 454 { 455 int i; 456 457 for (i = nloops; i >= 0; i--) { 458 unsigned long *tap; 459 460 preempt_disable(); 461 tap = this_cpu_ptr(&test_acqrel); 462 WRITE_ONCE(*tap, READ_ONCE(*tap) + 1); 463 WRITE_ONCE(*tap, READ_ONCE(*tap) - 1); 464 preempt_enable(); 465 } 466 } 467 468 static void ref_incpercpupreempt_delay_section(const int nloops, const int udl, const int ndl) 469 { 470 int i; 471 472 for (i = nloops; i >= 0; i--) { 473 unsigned long *tap; 474 475 preempt_disable(); 476 tap = this_cpu_ptr(&test_acqrel); 477 WRITE_ONCE(*tap, READ_ONCE(*tap) + 1); 478 un_delay(udl, ndl); 479 WRITE_ONCE(*tap, READ_ONCE(*tap) - 1); 480 preempt_enable(); 481 } 482 } 483 484 static const struct ref_scale_ops incpercpupreempt_ops = { 485 .init = rcu_sync_scale_init, 486 .readsection = ref_incpercpupreempt_section, 487 .delaysection = ref_incpercpupreempt_delay_section, 488 .name = "incpercpupreempt" 489 }; 490 491 static void ref_incpercpubh_section(const int nloops) 492 { 493 int i; 494 495 for (i = nloops; i >= 0; i--) { 496 unsigned long *tap; 497 498 local_bh_disable(); 499 tap = this_cpu_ptr(&test_acqrel); 500 WRITE_ONCE(*tap, READ_ONCE(*tap) + 1); 501 WRITE_ONCE(*tap, READ_ONCE(*tap) - 1); 502 local_bh_enable(); 503 } 504 } 505 506 static void ref_incpercpubh_delay_section(const int nloops, const int udl, const int ndl) 507 { 508 int i; 509 510 for (i = nloops; i >= 0; i--) { 511 unsigned long *tap; 512 513 local_bh_disable(); 514 tap = this_cpu_ptr(&test_acqrel); 515 WRITE_ONCE(*tap, READ_ONCE(*tap) + 1); 516 un_delay(udl, ndl); 517 WRITE_ONCE(*tap, READ_ONCE(*tap) - 1); 518 local_bh_enable(); 519 } 520 } 521 522 static const struct ref_scale_ops incpercpubh_ops = { 523 .init = rcu_sync_scale_init, 524 .readsection = ref_incpercpubh_section, 525 .delaysection = ref_incpercpubh_delay_section, 526 .enable_irqs = true, 527 .name = "incpercpubh" 528 }; 529 530 static void ref_incpercpuirqsave_section(const int nloops) 531 { 532 int i; 533 unsigned long flags; 534 535 for (i = nloops; i >= 0; i--) { 536 unsigned long *tap; 537 538 local_irq_save(flags); 539 tap = this_cpu_ptr(&test_acqrel); 540 WRITE_ONCE(*tap, READ_ONCE(*tap) + 1); 541 WRITE_ONCE(*tap, READ_ONCE(*tap) - 1); 542 local_irq_restore(flags); 543 } 544 } 545 546 static void ref_incpercpuirqsave_delay_section(const int nloops, const int udl, const int ndl) 547 { 548 int i; 549 unsigned long flags; 550 551 for (i = nloops; i >= 0; i--) { 552 unsigned long *tap; 553 554 local_irq_save(flags); 555 tap = this_cpu_ptr(&test_acqrel); 556 WRITE_ONCE(*tap, READ_ONCE(*tap) + 1); 557 un_delay(udl, ndl); 558 WRITE_ONCE(*tap, READ_ONCE(*tap) - 1); 559 local_irq_restore(flags); 560 } 561 } 562 563 static const struct ref_scale_ops incpercpuirqsave_ops = { 564 .init = rcu_sync_scale_init, 565 .readsection = ref_incpercpuirqsave_section, 566 .delaysection = ref_incpercpuirqsave_delay_section, 567 .name = "incpercpuirqsave" 568 }; 569 570 // Definitions for rwlock 571 static rwlock_t test_rwlock; 572 573 static bool ref_rwlock_init(void) 574 { 575 rwlock_init(&test_rwlock); 576 return true; 577 } 578 579 static void ref_rwlock_section(const int nloops) 580 { 581 int i; 582 583 for (i = nloops; i >= 0; i--) { 584 read_lock(&test_rwlock); 585 read_unlock(&test_rwlock); 586 } 587 } 588 589 static void ref_rwlock_delay_section(const int nloops, const int udl, const int ndl) 590 { 591 int i; 592 593 for (i = nloops; i >= 0; i--) { 594 read_lock(&test_rwlock); 595 un_delay(udl, ndl); 596 read_unlock(&test_rwlock); 597 } 598 } 599 600 static const struct ref_scale_ops rwlock_ops = { 601 .init = ref_rwlock_init, 602 .readsection = ref_rwlock_section, 603 .delaysection = ref_rwlock_delay_section, 604 .name = "rwlock" 605 }; 606 607 // Definitions for rwsem 608 static struct rw_semaphore test_rwsem; 609 610 static bool ref_rwsem_init(void) 611 { 612 init_rwsem(&test_rwsem); 613 return true; 614 } 615 616 static void ref_rwsem_section(const int nloops) 617 { 618 int i; 619 620 for (i = nloops; i >= 0; i--) { 621 down_read(&test_rwsem); 622 up_read(&test_rwsem); 623 } 624 } 625 626 static void ref_rwsem_delay_section(const int nloops, const int udl, const int ndl) 627 { 628 int i; 629 630 for (i = nloops; i >= 0; i--) { 631 down_read(&test_rwsem); 632 un_delay(udl, ndl); 633 up_read(&test_rwsem); 634 } 635 } 636 637 static const struct ref_scale_ops rwsem_ops = { 638 .init = ref_rwsem_init, 639 .readsection = ref_rwsem_section, 640 .delaysection = ref_rwsem_delay_section, 641 .name = "rwsem" 642 }; 643 644 // Definitions for global spinlock 645 static DEFINE_RAW_SPINLOCK(test_lock); 646 647 static void ref_lock_section(const int nloops) 648 { 649 int i; 650 651 preempt_disable(); 652 for (i = nloops; i >= 0; i--) { 653 raw_spin_lock(&test_lock); 654 raw_spin_unlock(&test_lock); 655 } 656 preempt_enable(); 657 } 658 659 static void ref_lock_delay_section(const int nloops, const int udl, const int ndl) 660 { 661 int i; 662 663 preempt_disable(); 664 for (i = nloops; i >= 0; i--) { 665 raw_spin_lock(&test_lock); 666 un_delay(udl, ndl); 667 raw_spin_unlock(&test_lock); 668 } 669 preempt_enable(); 670 } 671 672 static const struct ref_scale_ops lock_ops = { 673 .readsection = ref_lock_section, 674 .delaysection = ref_lock_delay_section, 675 .name = "lock" 676 }; 677 678 // Definitions for global irq-save spinlock 679 680 static void ref_lock_irq_section(const int nloops) 681 { 682 unsigned long flags; 683 int i; 684 685 preempt_disable(); 686 for (i = nloops; i >= 0; i--) { 687 raw_spin_lock_irqsave(&test_lock, flags); 688 raw_spin_unlock_irqrestore(&test_lock, flags); 689 } 690 preempt_enable(); 691 } 692 693 static void ref_lock_irq_delay_section(const int nloops, const int udl, const int ndl) 694 { 695 unsigned long flags; 696 int i; 697 698 preempt_disable(); 699 for (i = nloops; i >= 0; i--) { 700 raw_spin_lock_irqsave(&test_lock, flags); 701 un_delay(udl, ndl); 702 raw_spin_unlock_irqrestore(&test_lock, flags); 703 } 704 preempt_enable(); 705 } 706 707 static const struct ref_scale_ops lock_irq_ops = { 708 .readsection = ref_lock_irq_section, 709 .delaysection = ref_lock_irq_delay_section, 710 .name = "lock-irq" 711 }; 712 713 static void ref_acqrel_section(const int nloops) 714 { 715 unsigned long x; 716 int i; 717 718 preempt_disable(); 719 for (i = nloops; i >= 0; i--) { 720 x = smp_load_acquire(this_cpu_ptr(&test_acqrel)); 721 smp_store_release(this_cpu_ptr(&test_acqrel), x + 1); 722 } 723 preempt_enable(); 724 } 725 726 static void ref_acqrel_delay_section(const int nloops, const int udl, const int ndl) 727 { 728 unsigned long x; 729 int i; 730 731 preempt_disable(); 732 for (i = nloops; i >= 0; i--) { 733 x = smp_load_acquire(this_cpu_ptr(&test_acqrel)); 734 un_delay(udl, ndl); 735 smp_store_release(this_cpu_ptr(&test_acqrel), x + 1); 736 } 737 preempt_enable(); 738 } 739 740 static const struct ref_scale_ops acqrel_ops = { 741 .readsection = ref_acqrel_section, 742 .delaysection = ref_acqrel_delay_section, 743 .name = "acqrel" 744 }; 745 746 static volatile u64 stopopts; 747 748 static void ref_sched_clock_section(const int nloops) 749 { 750 u64 x = 0; 751 int i; 752 753 preempt_disable(); 754 for (i = nloops; i >= 0; i--) 755 x += sched_clock(); 756 preempt_enable(); 757 stopopts = x; 758 } 759 760 static void ref_sched_clock_delay_section(const int nloops, const int udl, const int ndl) 761 { 762 u64 x = 0; 763 int i; 764 765 preempt_disable(); 766 for (i = nloops; i >= 0; i--) { 767 x += sched_clock(); 768 un_delay(udl, ndl); 769 } 770 preempt_enable(); 771 stopopts = x; 772 } 773 774 static const struct ref_scale_ops sched_clock_ops = { 775 .readsection = ref_sched_clock_section, 776 .delaysection = ref_sched_clock_delay_section, 777 .name = "sched-clock" 778 }; 779 780 781 static void ref_clock_section(const int nloops) 782 { 783 u64 x = 0; 784 int i; 785 786 preempt_disable(); 787 for (i = nloops; i >= 0; i--) 788 x += ktime_get_real_fast_ns(); 789 preempt_enable(); 790 stopopts = x; 791 } 792 793 static void ref_clock_delay_section(const int nloops, const int udl, const int ndl) 794 { 795 u64 x = 0; 796 int i; 797 798 preempt_disable(); 799 for (i = nloops; i >= 0; i--) { 800 x += ktime_get_real_fast_ns(); 801 un_delay(udl, ndl); 802 } 803 preempt_enable(); 804 stopopts = x; 805 } 806 807 static const struct ref_scale_ops clock_ops = { 808 .readsection = ref_clock_section, 809 .delaysection = ref_clock_delay_section, 810 .name = "clock" 811 }; 812 813 static void ref_jiffies_section(const int nloops) 814 { 815 u64 x = 0; 816 int i; 817 818 preempt_disable(); 819 for (i = nloops; i >= 0; i--) 820 x += jiffies; 821 preempt_enable(); 822 stopopts = x; 823 } 824 825 static void ref_jiffies_delay_section(const int nloops, const int udl, const int ndl) 826 { 827 u64 x = 0; 828 int i; 829 830 preempt_disable(); 831 for (i = nloops; i >= 0; i--) { 832 x += jiffies; 833 un_delay(udl, ndl); 834 } 835 preempt_enable(); 836 stopopts = x; 837 } 838 839 static const struct ref_scale_ops jiffies_ops = { 840 .readsection = ref_jiffies_section, 841 .delaysection = ref_jiffies_delay_section, 842 .name = "jiffies" 843 }; 844 845 static void ref_preempt_section(const int nloops) 846 { 847 int i; 848 849 migrate_disable(); 850 for (i = nloops; i >= 0; i--) { 851 preempt_disable(); 852 preempt_enable(); 853 } 854 migrate_enable(); 855 } 856 857 static void ref_preempt_delay_section(const int nloops, const int udl, const int ndl) 858 { 859 int i; 860 861 migrate_disable(); 862 for (i = nloops; i >= 0; i--) { 863 preempt_disable(); 864 un_delay(udl, ndl); 865 preempt_enable(); 866 } 867 migrate_enable(); 868 } 869 870 static const struct ref_scale_ops preempt_ops = { 871 .readsection = ref_preempt_section, 872 .delaysection = ref_preempt_delay_section, 873 .name = "preempt" 874 }; 875 876 static void ref_bh_section(const int nloops) 877 { 878 int i; 879 880 preempt_disable(); 881 for (i = nloops; i >= 0; i--) { 882 local_bh_disable(); 883 local_bh_enable(); 884 } 885 preempt_enable(); 886 } 887 888 static void ref_bh_delay_section(const int nloops, const int udl, const int ndl) 889 { 890 int i; 891 892 preempt_disable(); 893 for (i = nloops; i >= 0; i--) { 894 local_bh_disable(); 895 un_delay(udl, ndl); 896 local_bh_enable(); 897 } 898 preempt_enable(); 899 } 900 901 static const struct ref_scale_ops bh_ops = { 902 .readsection = ref_bh_section, 903 .delaysection = ref_bh_delay_section, 904 .enable_irqs = true, 905 .name = "bh" 906 }; 907 908 static void ref_irq_section(const int nloops) 909 { 910 int i; 911 912 preempt_disable(); 913 for (i = nloops; i >= 0; i--) { 914 local_irq_disable(); 915 local_irq_enable(); 916 } 917 preempt_enable(); 918 } 919 920 static void ref_irq_delay_section(const int nloops, const int udl, const int ndl) 921 { 922 int i; 923 924 preempt_disable(); 925 for (i = nloops; i >= 0; i--) { 926 local_irq_disable(); 927 un_delay(udl, ndl); 928 local_irq_enable(); 929 } 930 preempt_enable(); 931 } 932 933 static const struct ref_scale_ops irq_ops = { 934 .readsection = ref_irq_section, 935 .delaysection = ref_irq_delay_section, 936 .name = "irq" 937 }; 938 939 static void ref_irqsave_section(const int nloops) 940 { 941 unsigned long flags; 942 int i; 943 944 preempt_disable(); 945 for (i = nloops; i >= 0; i--) { 946 local_irq_save(flags); 947 local_irq_restore(flags); 948 } 949 preempt_enable(); 950 } 951 952 static void ref_irqsave_delay_section(const int nloops, const int udl, const int ndl) 953 { 954 unsigned long flags; 955 int i; 956 957 preempt_disable(); 958 for (i = nloops; i >= 0; i--) { 959 local_irq_save(flags); 960 un_delay(udl, ndl); 961 local_irq_restore(flags); 962 } 963 preempt_enable(); 964 } 965 966 static const struct ref_scale_ops irqsave_ops = { 967 .readsection = ref_irqsave_section, 968 .delaysection = ref_irqsave_delay_section, 969 .name = "irqsave" 970 }; 971 972 //////////////////////////////////////////////////////////////////////// 973 // 974 // Methods leveraging SLAB_TYPESAFE_BY_RCU. 975 // 976 977 // Item to look up in a typesafe manner. Array of pointers to these. 978 struct refscale_typesafe { 979 atomic_t rts_refctr; // Used by all flavors 980 spinlock_t rts_lock; 981 seqlock_t rts_seqlock; 982 unsigned int a; 983 unsigned int b; 984 }; 985 986 static struct kmem_cache *typesafe_kmem_cachep; 987 static struct refscale_typesafe **rtsarray; 988 static long rtsarray_size; 989 static DEFINE_TORTURE_RANDOM_PERCPU(refscale_rand); 990 static bool (*rts_acquire)(struct refscale_typesafe *rtsp, unsigned int *start); 991 static bool (*rts_release)(struct refscale_typesafe *rtsp, unsigned int start); 992 993 // Conditionally acquire an explicit in-structure reference count. 994 static bool typesafe_ref_acquire(struct refscale_typesafe *rtsp, unsigned int *start) 995 { 996 return atomic_inc_not_zero(&rtsp->rts_refctr); 997 } 998 999 // Unconditionally release an explicit in-structure reference count. 1000 static bool typesafe_ref_release(struct refscale_typesafe *rtsp, unsigned int start) 1001 { 1002 if (!atomic_dec_return(&rtsp->rts_refctr)) { 1003 WRITE_ONCE(rtsp->a, rtsp->a + 1); 1004 kmem_cache_free(typesafe_kmem_cachep, rtsp); 1005 } 1006 return true; 1007 } 1008 1009 // Unconditionally acquire an explicit in-structure spinlock. 1010 static bool typesafe_lock_acquire(struct refscale_typesafe *rtsp, unsigned int *start) 1011 { 1012 spin_lock(&rtsp->rts_lock); 1013 return true; 1014 } 1015 1016 // Unconditionally release an explicit in-structure spinlock. 1017 static bool typesafe_lock_release(struct refscale_typesafe *rtsp, unsigned int start) 1018 { 1019 spin_unlock(&rtsp->rts_lock); 1020 return true; 1021 } 1022 1023 // Unconditionally acquire an explicit in-structure sequence lock. 1024 static bool typesafe_seqlock_acquire(struct refscale_typesafe *rtsp, unsigned int *start) 1025 { 1026 *start = read_seqbegin(&rtsp->rts_seqlock); 1027 return true; 1028 } 1029 1030 // Conditionally release an explicit in-structure sequence lock. Return 1031 // true if this release was successful, that is, if no retry is required. 1032 static bool typesafe_seqlock_release(struct refscale_typesafe *rtsp, unsigned int start) 1033 { 1034 return !read_seqretry(&rtsp->rts_seqlock, start); 1035 } 1036 1037 // Do a read-side critical section with the specified delay in 1038 // microseconds and nanoseconds inserted so as to increase probability 1039 // of failure. 1040 static void typesafe_delay_section(const int nloops, const int udl, const int ndl) 1041 { 1042 unsigned int a; 1043 unsigned int b; 1044 int i; 1045 long idx; 1046 struct refscale_typesafe *rtsp; 1047 unsigned int start; 1048 1049 for (i = nloops; i >= 0; i--) { 1050 preempt_disable(); 1051 idx = torture_random(this_cpu_ptr(&refscale_rand)) % rtsarray_size; 1052 preempt_enable(); 1053 retry: 1054 rcu_read_lock(); 1055 rtsp = rcu_dereference(rtsarray[idx]); 1056 a = READ_ONCE(rtsp->a); 1057 if (!rts_acquire(rtsp, &start)) { 1058 rcu_read_unlock(); 1059 goto retry; 1060 } 1061 if (a != READ_ONCE(rtsp->a)) { 1062 (void)rts_release(rtsp, start); 1063 rcu_read_unlock(); 1064 goto retry; 1065 } 1066 un_delay(udl, ndl); 1067 b = READ_ONCE(rtsp->a); 1068 // Remember, seqlock read-side release can fail. 1069 if (!rts_release(rtsp, start)) { 1070 rcu_read_unlock(); 1071 goto retry; 1072 } 1073 WARN_ONCE(a != b, "Re-read of ->a changed from %u to %u.\n", a, b); 1074 b = rtsp->b; 1075 rcu_read_unlock(); 1076 WARN_ON_ONCE(a * a != b); 1077 } 1078 } 1079 1080 // Because the acquisition and release methods are expensive, there 1081 // is no point in optimizing away the un_delay() function's two checks. 1082 // Thus simply define typesafe_read_section() as a simple wrapper around 1083 // typesafe_delay_section(). 1084 static void typesafe_read_section(const int nloops) 1085 { 1086 typesafe_delay_section(nloops, 0, 0); 1087 } 1088 1089 // Allocate and initialize one refscale_typesafe structure. 1090 static struct refscale_typesafe *typesafe_alloc_one(void) 1091 { 1092 struct refscale_typesafe *rtsp; 1093 1094 rtsp = kmem_cache_alloc(typesafe_kmem_cachep, GFP_KERNEL); 1095 if (!rtsp) 1096 return NULL; 1097 atomic_set(&rtsp->rts_refctr, 1); 1098 WRITE_ONCE(rtsp->a, rtsp->a + 1); 1099 WRITE_ONCE(rtsp->b, rtsp->a * rtsp->a); 1100 return rtsp; 1101 } 1102 1103 // Slab-allocator constructor for refscale_typesafe structures created 1104 // out of a new slab of system memory. 1105 static void refscale_typesafe_ctor(void *rtsp_in) 1106 { 1107 struct refscale_typesafe *rtsp = rtsp_in; 1108 1109 spin_lock_init(&rtsp->rts_lock); 1110 seqlock_init(&rtsp->rts_seqlock); 1111 preempt_disable(); 1112 rtsp->a = torture_random(this_cpu_ptr(&refscale_rand)); 1113 preempt_enable(); 1114 } 1115 1116 static const struct ref_scale_ops typesafe_ref_ops; 1117 static const struct ref_scale_ops typesafe_lock_ops; 1118 static const struct ref_scale_ops typesafe_seqlock_ops; 1119 1120 // Initialize for a typesafe test. 1121 static bool typesafe_init(void) 1122 { 1123 long idx; 1124 long si = lookup_instances; 1125 1126 typesafe_kmem_cachep = kmem_cache_create("refscale_typesafe", 1127 sizeof(struct refscale_typesafe), sizeof(void *), 1128 SLAB_TYPESAFE_BY_RCU, refscale_typesafe_ctor); 1129 if (!typesafe_kmem_cachep) 1130 return false; 1131 if (si < 0) 1132 si = -si * nr_cpu_ids; 1133 else if (si == 0) 1134 si = nr_cpu_ids; 1135 rtsarray_size = si; 1136 rtsarray = kzalloc_objs(*rtsarray, si); 1137 if (!rtsarray) 1138 return false; 1139 for (idx = 0; idx < rtsarray_size; idx++) { 1140 rtsarray[idx] = typesafe_alloc_one(); 1141 if (!rtsarray[idx]) 1142 return false; 1143 } 1144 if (cur_ops == &typesafe_ref_ops) { 1145 rts_acquire = typesafe_ref_acquire; 1146 rts_release = typesafe_ref_release; 1147 } else if (cur_ops == &typesafe_lock_ops) { 1148 rts_acquire = typesafe_lock_acquire; 1149 rts_release = typesafe_lock_release; 1150 } else if (cur_ops == &typesafe_seqlock_ops) { 1151 rts_acquire = typesafe_seqlock_acquire; 1152 rts_release = typesafe_seqlock_release; 1153 } else { 1154 WARN_ON_ONCE(1); 1155 return false; 1156 } 1157 return true; 1158 } 1159 1160 // Clean up after a typesafe test. 1161 static void typesafe_cleanup(void) 1162 { 1163 long idx; 1164 1165 if (rtsarray) { 1166 for (idx = 0; idx < rtsarray_size; idx++) 1167 kmem_cache_free(typesafe_kmem_cachep, rtsarray[idx]); 1168 kfree(rtsarray); 1169 rtsarray = NULL; 1170 rtsarray_size = 0; 1171 } 1172 kmem_cache_destroy(typesafe_kmem_cachep); 1173 typesafe_kmem_cachep = NULL; 1174 rts_acquire = NULL; 1175 rts_release = NULL; 1176 } 1177 1178 // The typesafe_init() function distinguishes these structures by address. 1179 static const struct ref_scale_ops typesafe_ref_ops = { 1180 .init = typesafe_init, 1181 .cleanup = typesafe_cleanup, 1182 .readsection = typesafe_read_section, 1183 .delaysection = typesafe_delay_section, 1184 .name = "typesafe_ref" 1185 }; 1186 1187 static const struct ref_scale_ops typesafe_lock_ops = { 1188 .init = typesafe_init, 1189 .cleanup = typesafe_cleanup, 1190 .readsection = typesafe_read_section, 1191 .delaysection = typesafe_delay_section, 1192 .name = "typesafe_lock" 1193 }; 1194 1195 static const struct ref_scale_ops typesafe_seqlock_ops = { 1196 .init = typesafe_init, 1197 .cleanup = typesafe_cleanup, 1198 .readsection = typesafe_read_section, 1199 .delaysection = typesafe_delay_section, 1200 .name = "typesafe_seqlock" 1201 }; 1202 1203 static void rcu_scale_one_reader(void) 1204 { 1205 if (readdelay <= 0) 1206 cur_ops->readsection(loops); 1207 else 1208 cur_ops->delaysection(loops, readdelay / 1000, readdelay % 1000); 1209 } 1210 1211 // Warm up cache, or, if needed run a series of rcu_scale_one_reader() 1212 // to allow multiple rcuscale guest OSes to collect mutually valid data. 1213 static void rcu_scale_warm_cool(void) 1214 { 1215 unsigned long jdone = jiffies + (guest_os_delay > 0 ? guest_os_delay * HZ : -1); 1216 1217 do { 1218 rcu_scale_one_reader(); 1219 cond_resched(); 1220 } while (time_before(jiffies, jdone)); 1221 } 1222 1223 // Reader kthread. Repeatedly does empty RCU read-side 1224 // critical section, minimizing update-side interference. 1225 static int 1226 ref_scale_reader(void *arg) 1227 { 1228 unsigned long flags; 1229 long me = (long)arg; 1230 struct reader_task *rt = &(reader_tasks[me]); 1231 u64 start; 1232 s64 duration; 1233 1234 VERBOSE_SCALEOUT_BATCH("ref_scale_reader %ld: task started", me); 1235 WARN_ON_ONCE(set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids))); 1236 set_user_nice(current, MAX_NICE); 1237 atomic_inc(&n_init); 1238 if (holdoff) 1239 schedule_timeout_interruptible(holdoff * HZ); 1240 repeat: 1241 VERBOSE_SCALEOUT_BATCH("ref_scale_reader %ld: waiting to start next experiment on cpu %d", me, raw_smp_processor_id()); 1242 1243 // Wait for signal that this reader can start. 1244 wait_event(rt->wq, (atomic_read(&nreaders_exp) && smp_load_acquire(&rt->start_reader)) || 1245 torture_must_stop()); 1246 1247 if (torture_must_stop()) 1248 goto end; 1249 1250 // Make sure that the CPU is affinitized appropriately during testing. 1251 WARN_ON_ONCE(raw_smp_processor_id() != me % nr_cpu_ids); 1252 1253 WRITE_ONCE(rt->start_reader, 0); 1254 if (!atomic_dec_return(&n_started)) 1255 while (atomic_read_acquire(&n_started)) 1256 cpu_relax(); 1257 1258 VERBOSE_SCALEOUT_BATCH("ref_scale_reader %ld: experiment %d started", me, exp_idx); 1259 1260 1261 // To reduce noise, do an initial cache-warming invocation, check 1262 // in, and then keep warming until everyone has checked in. 1263 rcu_scale_one_reader(); 1264 if (!atomic_dec_return(&n_warmedup)) 1265 while (atomic_read_acquire(&n_warmedup)) 1266 rcu_scale_one_reader(); 1267 // Also keep interrupts disabled when it is safe to do so, which 1268 // it is not for local_bh_enable(). This also has the effect of 1269 // preventing entries into slow path for rcu_read_unlock(). 1270 if (!cur_ops->enable_irqs) 1271 local_irq_save(flags); 1272 start = ktime_get_mono_fast_ns(); 1273 1274 rcu_scale_one_reader(); 1275 1276 duration = ktime_get_mono_fast_ns() - start; 1277 if (!cur_ops->enable_irqs) 1278 local_irq_restore(flags); 1279 1280 rt->last_duration_ns = WARN_ON_ONCE(duration < 0) ? 0 : duration; 1281 // To reduce runtime-skew noise, do maintain-load invocations until 1282 // everyone is done. 1283 if (!atomic_dec_return(&n_cooleddown)) 1284 while (atomic_read_acquire(&n_cooleddown)) 1285 rcu_scale_one_reader(); 1286 1287 if (atomic_dec_and_test(&nreaders_exp)) 1288 wake_up(&main_wq); 1289 1290 VERBOSE_SCALEOUT_BATCH("ref_scale_reader %ld: experiment %d ended, (readers remaining=%d)", 1291 me, exp_idx, atomic_read(&nreaders_exp)); 1292 1293 if (!torture_must_stop()) 1294 goto repeat; 1295 end: 1296 torture_kthread_stopping("ref_scale_reader"); 1297 return 0; 1298 } 1299 1300 static void reset_readers(void) 1301 { 1302 int i; 1303 struct reader_task *rt; 1304 1305 for (i = 0; i < nreaders; i++) { 1306 rt = &(reader_tasks[i]); 1307 1308 rt->last_duration_ns = 0; 1309 } 1310 } 1311 1312 // Print the results of each reader and return the sum of all their durations. 1313 static u64 process_durations(int n) 1314 { 1315 int i; 1316 struct reader_task *rt; 1317 struct seq_buf s; 1318 char *buf; 1319 u64 sum = 0; 1320 1321 buf = kmalloc(800 + 64, GFP_KERNEL); 1322 if (!buf) 1323 return 0; 1324 seq_buf_init(&s, buf, 800 + 64); 1325 1326 seq_buf_printf(&s, "Experiment #%d (Format: <THREAD-NUM>:<Total loop time in ns>)", 1327 exp_idx); 1328 1329 for (i = 0; i < n && !torture_must_stop(); i++) { 1330 rt = &(reader_tasks[i]); 1331 1332 if (i % 5 == 0) 1333 seq_buf_putc(&s, '\n'); 1334 1335 if (seq_buf_used(&s) >= 800) { 1336 pr_alert("%s", seq_buf_str(&s)); 1337 seq_buf_clear(&s); 1338 } 1339 1340 seq_buf_printf(&s, "%d: %llu\t", i, rt->last_duration_ns); 1341 1342 sum += rt->last_duration_ns; 1343 } 1344 pr_alert("%s\n", seq_buf_str(&s)); 1345 1346 kfree(buf); 1347 return sum; 1348 } 1349 1350 static void ref_scale_cleanup(void); 1351 1352 // The main_func is the main orchestrator, it performs a bunch of 1353 // experiments. For every experiment, it orders all the readers 1354 // involved to start and waits for them to finish the experiment. It 1355 // then reads their timestamps and starts the next experiment. Each 1356 // experiment progresses from 1 concurrent reader to N of them at which 1357 // point all the timestamps are printed. 1358 static int main_func(void *arg) 1359 { 1360 int exp, r; 1361 char buf1[64]; 1362 char *buf; 1363 u64 *result_avg; 1364 1365 set_cpus_allowed_ptr(current, cpumask_of(nreaders % nr_cpu_ids)); 1366 set_user_nice(current, MAX_NICE); 1367 1368 VERBOSE_SCALEOUT("main_func task started"); 1369 result_avg = kcalloc(nruns, sizeof(*result_avg), GFP_KERNEL); 1370 buf = kzalloc(800 + 64, GFP_KERNEL); 1371 if (!result_avg || !buf) { 1372 SCALEOUT_ERRSTRING("out of memory"); 1373 goto oom_exit; 1374 } 1375 if (holdoff) 1376 schedule_timeout_interruptible(holdoff * HZ); 1377 1378 // Wait for all threads to start. 1379 atomic_inc(&n_init); 1380 while (atomic_read(&n_init) < nreaders + 1) 1381 schedule_timeout_uninterruptible(1); 1382 1383 // Start exp readers up per experiment 1384 rcu_scale_warm_cool(); 1385 for (exp = 0; exp < nruns && !torture_must_stop(); exp++) { 1386 if (torture_must_stop()) 1387 goto end; 1388 1389 reset_readers(); 1390 atomic_set(&nreaders_exp, nreaders); 1391 atomic_set(&n_started, nreaders); 1392 atomic_set(&n_warmedup, nreaders); 1393 atomic_set(&n_cooleddown, nreaders); 1394 1395 exp_idx = exp; 1396 1397 for (r = 0; r < nreaders; r++) { 1398 smp_store_release(&reader_tasks[r].start_reader, 1); 1399 wake_up(&reader_tasks[r].wq); 1400 } 1401 1402 VERBOSE_SCALEOUT("main_func: experiment started, waiting for %d readers", 1403 nreaders); 1404 1405 wait_event(main_wq, 1406 !atomic_read(&nreaders_exp) || torture_must_stop()); 1407 1408 VERBOSE_SCALEOUT("main_func: experiment ended"); 1409 1410 if (torture_must_stop()) 1411 goto end; 1412 1413 result_avg[exp] = div_u64(1000 * process_durations(nreaders), nreaders * loops); 1414 } 1415 rcu_scale_warm_cool(); 1416 1417 // Print the average of all experiments 1418 SCALEOUT("END OF TEST. Calculating average duration per loop (nanoseconds)...\n"); 1419 1420 pr_alert("Runs\tTime(ns)\n"); 1421 for (exp = 0; exp < nruns; exp++) { 1422 u64 avg; 1423 u32 rem; 1424 1425 avg = div_u64_rem(result_avg[exp], 1000, &rem); 1426 sprintf(buf1, "%d\t%llu.%03u\n", exp + 1, avg, rem); 1427 strcat(buf, buf1); 1428 if (strlen(buf) >= 800) { 1429 pr_alert("%s", buf); 1430 buf[0] = 0; 1431 } 1432 } 1433 1434 pr_alert("%s", buf); 1435 1436 oom_exit: 1437 // This will shutdown everything including us. 1438 if (shutdown_secs) { 1439 main_task = NULL; // Avoid self-kill deadlock. 1440 ref_scale_cleanup(); 1441 kernel_power_off(); 1442 } 1443 1444 // Wait for torture to stop us 1445 while (!torture_must_stop()) 1446 schedule_timeout_uninterruptible(1); 1447 1448 end: 1449 torture_kthread_stopping("main_func"); 1450 kfree(result_avg); 1451 kfree(buf); 1452 return 0; 1453 } 1454 1455 static void 1456 ref_scale_print_module_parms(const struct ref_scale_ops *cur_ops, const char *tag) 1457 { 1458 pr_alert("%s" SCALE_FLAG 1459 "--- %s: verbose=%d verbose_batched=%d shutdown_secs=%d holdoff=%d lookup_instances=%ld loops=%d nreaders=%d nruns=%d readdelay=%d\n", scale_type, tag, 1460 verbose, verbose_batched, shutdown_secs, holdoff, lookup_instances, loops, nreaders, nruns, readdelay); 1461 } 1462 1463 static void 1464 ref_scale_cleanup(void) 1465 { 1466 int i; 1467 1468 if (torture_cleanup_begin()) 1469 return; 1470 1471 if (!cur_ops) { 1472 torture_cleanup_end(); 1473 return; 1474 } 1475 1476 if (reader_tasks) { 1477 for (i = 0; i < nreaders; i++) 1478 torture_stop_kthread("ref_scale_reader", 1479 reader_tasks[i].task); 1480 } 1481 kfree(reader_tasks); 1482 reader_tasks = NULL; 1483 1484 torture_stop_kthread("main_task", main_task); 1485 1486 // Do scale-type-specific cleanup operations. 1487 if (cur_ops->cleanup != NULL) 1488 cur_ops->cleanup(); 1489 1490 torture_cleanup_end(); 1491 } 1492 1493 static int __init 1494 ref_scale_init(void) 1495 { 1496 long i; 1497 int firsterr = 0; 1498 static const struct ref_scale_ops *scale_ops[] = { 1499 &rcu_ops, &srcu_ops, &srcu_fast_ops, &srcu_fast_updown_ops, 1500 RCU_TRACE_OPS RCU_TASKS_OPS 1501 &refcnt_ops, &percpuinc_ops, &incpercpu_ops, &incpercpupreempt_ops, 1502 &incpercpubh_ops, &incpercpuirqsave_ops, 1503 &rwlock_ops, &rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops, 1504 &sched_clock_ops, &clock_ops, &jiffies_ops, 1505 &preempt_ops, &bh_ops, &irq_ops, &irqsave_ops, 1506 &typesafe_ref_ops, &typesafe_lock_ops, &typesafe_seqlock_ops, 1507 }; 1508 1509 if (!torture_init_begin(scale_type, verbose)) 1510 return -EBUSY; 1511 1512 for (i = 0; i < ARRAY_SIZE(scale_ops); i++) { 1513 cur_ops = scale_ops[i]; 1514 if (strcmp(scale_type, cur_ops->name) == 0) 1515 break; 1516 } 1517 if (i == ARRAY_SIZE(scale_ops)) { 1518 pr_alert("rcu-scale: invalid scale type: \"%s\"\n", scale_type); 1519 pr_alert("rcu-scale types:"); 1520 for (i = 0; i < ARRAY_SIZE(scale_ops); i++) 1521 pr_cont(" %s", scale_ops[i]->name); 1522 pr_cont("\n"); 1523 firsterr = -EINVAL; 1524 cur_ops = NULL; 1525 goto unwind; 1526 } 1527 if (cur_ops->init) 1528 if (!cur_ops->init()) { 1529 firsterr = -EUCLEAN; 1530 goto unwind; 1531 } 1532 1533 ref_scale_print_module_parms(cur_ops, "Start of test"); 1534 1535 // Shutdown task 1536 if (shutdown_secs) { 1537 firsterr = torture_shutdown_init(shutdown_secs, ref_scale_cleanup); 1538 if (torture_init_error(firsterr)) 1539 goto unwind; 1540 } 1541 1542 // Reader tasks (default to ~75% of online CPUs). 1543 if (nreaders < 0) 1544 nreaders = (num_online_cpus() >> 1) + (num_online_cpus() >> 2); 1545 if (WARN_ONCE(loops <= 0, "%s: loops = %d, adjusted to 1\n", __func__, loops)) 1546 loops = 1; 1547 if (WARN_ONCE(nreaders <= 0, "%s: nreaders = %d, adjusted to 1\n", __func__, nreaders)) 1548 nreaders = 1; 1549 if (WARN_ONCE(nruns <= 0, "%s: nruns = %d, adjusted to 1\n", __func__, nruns)) 1550 nruns = 1; 1551 if (WARN_ONCE(loops > INT_MAX / nreaders, 1552 "%s: nreaders * loops will overflow, adjusted loops to %d", 1553 __func__, INT_MAX / nreaders)) 1554 loops = INT_MAX / nreaders; 1555 reader_tasks = kzalloc_objs(reader_tasks[0], nreaders); 1556 if (!reader_tasks) { 1557 SCALEOUT_ERRSTRING("out of memory"); 1558 firsterr = -ENOMEM; 1559 goto unwind; 1560 } 1561 1562 VERBOSE_SCALEOUT("Starting %d reader threads", nreaders); 1563 1564 for (i = 0; i < nreaders; i++) { 1565 init_waitqueue_head(&reader_tasks[i].wq); 1566 firsterr = torture_create_kthread(ref_scale_reader, (void *)i, 1567 reader_tasks[i].task); 1568 if (torture_init_error(firsterr)) 1569 goto unwind; 1570 } 1571 1572 // Main Task 1573 init_waitqueue_head(&main_wq); 1574 firsterr = torture_create_kthread(main_func, NULL, main_task); 1575 if (torture_init_error(firsterr)) 1576 goto unwind; 1577 1578 torture_init_end(); 1579 return 0; 1580 1581 unwind: 1582 torture_init_end(); 1583 ref_scale_cleanup(); 1584 if (shutdown_secs) { 1585 WARN_ON(!IS_MODULE(CONFIG_RCU_REF_SCALE_TEST)); 1586 kernel_power_off(); 1587 } 1588 return firsterr; 1589 } 1590 1591 module_init(ref_scale_init); 1592 module_exit(ref_scale_cleanup); 1593