1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * SMP support for ppc. 4 * 5 * Written by Cort Dougan (cort@cs.nmt.edu) borrowing a great 6 * deal of code from the sparc and intel versions. 7 * 8 * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu> 9 * 10 * PowerPC-64 Support added by Dave Engebretsen, Peter Bergner, and 11 * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com 12 */ 13 14 #undef DEBUG 15 16 #include <linux/kernel.h> 17 #include <linux/export.h> 18 #include <linux/sched/mm.h> 19 #include <linux/sched/task_stack.h> 20 #include <linux/sched/topology.h> 21 #include <linux/smp.h> 22 #include <linux/interrupt.h> 23 #include <linux/delay.h> 24 #include <linux/init.h> 25 #include <linux/spinlock.h> 26 #include <linux/cache.h> 27 #include <linux/err.h> 28 #include <linux/device.h> 29 #include <linux/cpu.h> 30 #include <linux/notifier.h> 31 #include <linux/topology.h> 32 #include <linux/profile.h> 33 #include <linux/processor.h> 34 #include <linux/random.h> 35 #include <linux/stackprotector.h> 36 #include <linux/pgtable.h> 37 #include <linux/clockchips.h> 38 39 #include <asm/ptrace.h> 40 #include <linux/atomic.h> 41 #include <asm/irq.h> 42 #include <asm/hw_irq.h> 43 #include <asm/kvm_ppc.h> 44 #include <asm/dbell.h> 45 #include <asm/page.h> 46 #include <asm/prom.h> 47 #include <asm/smp.h> 48 #include <asm/time.h> 49 #include <asm/machdep.h> 50 #include <asm/cputhreads.h> 51 #include <asm/cputable.h> 52 #include <asm/mpic.h> 53 #include <asm/vdso_datapage.h> 54 #ifdef CONFIG_PPC64 55 #include <asm/paca.h> 56 #endif 57 #include <asm/vdso.h> 58 #include <asm/debug.h> 59 #include <asm/kexec.h> 60 #include <asm/asm-prototypes.h> 61 #include <asm/cpu_has_feature.h> 62 #include <asm/ftrace.h> 63 #include <asm/kup.h> 64 65 #ifdef DEBUG 66 #include <asm/udbg.h> 67 #define DBG(fmt...) udbg_printf(fmt) 68 #else 69 #define DBG(fmt...) 70 #endif 71 72 #ifdef CONFIG_HOTPLUG_CPU 73 /* State of each CPU during hotplug phases */ 74 static DEFINE_PER_CPU(int, cpu_state) = { 0 }; 75 #endif 76 77 struct task_struct *secondary_current; 78 bool has_big_cores; 79 bool coregroup_enabled; 80 bool thread_group_shares_l2; 81 bool thread_group_shares_l3; 82 83 DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); 84 DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map); 85 DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map); 86 DEFINE_PER_CPU(cpumask_var_t, cpu_core_map); 87 static DEFINE_PER_CPU(cpumask_var_t, cpu_coregroup_map); 88 89 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); 90 EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map); 91 EXPORT_PER_CPU_SYMBOL(cpu_core_map); 92 EXPORT_SYMBOL_GPL(has_big_cores); 93 94 enum { 95 #ifdef CONFIG_SCHED_SMT 96 smt_idx, 97 #endif 98 cache_idx, 99 mc_idx, 100 die_idx, 101 }; 102 103 #define MAX_THREAD_LIST_SIZE 8 104 #define THREAD_GROUP_SHARE_L1 1 105 #define THREAD_GROUP_SHARE_L2_L3 2 106 struct thread_groups { 107 unsigned int property; 108 unsigned int nr_groups; 109 unsigned int threads_per_group; 110 unsigned int thread_list[MAX_THREAD_LIST_SIZE]; 111 }; 112 113 /* Maximum number of properties that groups of threads within a core can share */ 114 #define MAX_THREAD_GROUP_PROPERTIES 2 115 116 struct thread_groups_list { 117 unsigned int nr_properties; 118 struct thread_groups property_tgs[MAX_THREAD_GROUP_PROPERTIES]; 119 }; 120 121 static struct thread_groups_list tgl[NR_CPUS] __initdata; 122 /* 123 * On big-cores system, thread_group_l1_cache_map for each CPU corresponds to 124 * the set its siblings that share the L1-cache. 125 */ 126 DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map); 127 128 /* 129 * On some big-cores system, thread_group_l2_cache_map for each CPU 130 * corresponds to the set its siblings within the core that share the 131 * L2-cache. 132 */ 133 DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map); 134 135 /* 136 * On P10, thread_group_l3_cache_map for each CPU is equal to the 137 * thread_group_l2_cache_map 138 */ 139 DEFINE_PER_CPU(cpumask_var_t, thread_group_l3_cache_map); 140 141 /* SMP operations for this machine */ 142 struct smp_ops_t *smp_ops; 143 144 /* Can't be static due to PowerMac hackery */ 145 volatile unsigned int cpu_callin_map[NR_CPUS]; 146 147 int smt_enabled_at_boot = 1; 148 149 /* 150 * Returns 1 if the specified cpu should be brought up during boot. 151 * Used to inhibit booting threads if they've been disabled or 152 * limited on the command line 153 */ 154 int smp_generic_cpu_bootable(unsigned int nr) 155 { 156 /* Special case - we inhibit secondary thread startup 157 * during boot if the user requests it. 158 */ 159 if (system_state < SYSTEM_RUNNING && cpu_has_feature(CPU_FTR_SMT)) { 160 if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0) 161 return 0; 162 if (smt_enabled_at_boot 163 && cpu_thread_in_core(nr) >= smt_enabled_at_boot) 164 return 0; 165 } 166 167 return 1; 168 } 169 170 171 #ifdef CONFIG_PPC64 172 int smp_generic_kick_cpu(int nr) 173 { 174 if (nr < 0 || nr >= nr_cpu_ids) 175 return -EINVAL; 176 177 /* 178 * The processor is currently spinning, waiting for the 179 * cpu_start field to become non-zero After we set cpu_start, 180 * the processor will continue on to secondary_start 181 */ 182 if (!paca_ptrs[nr]->cpu_start) { 183 paca_ptrs[nr]->cpu_start = 1; 184 smp_mb(); 185 return 0; 186 } 187 188 #ifdef CONFIG_HOTPLUG_CPU 189 /* 190 * Ok it's not there, so it might be soft-unplugged, let's 191 * try to bring it back 192 */ 193 generic_set_cpu_up(nr); 194 smp_wmb(); 195 smp_send_reschedule(nr); 196 #endif /* CONFIG_HOTPLUG_CPU */ 197 198 return 0; 199 } 200 #endif /* CONFIG_PPC64 */ 201 202 static irqreturn_t call_function_action(int irq, void *data) 203 { 204 generic_smp_call_function_interrupt(); 205 return IRQ_HANDLED; 206 } 207 208 static irqreturn_t reschedule_action(int irq, void *data) 209 { 210 scheduler_ipi(); 211 return IRQ_HANDLED; 212 } 213 214 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST 215 static irqreturn_t tick_broadcast_ipi_action(int irq, void *data) 216 { 217 timer_broadcast_interrupt(); 218 return IRQ_HANDLED; 219 } 220 #endif 221 222 #ifdef CONFIG_NMI_IPI 223 static irqreturn_t nmi_ipi_action(int irq, void *data) 224 { 225 smp_handle_nmi_ipi(get_irq_regs()); 226 return IRQ_HANDLED; 227 } 228 #endif 229 230 static irq_handler_t smp_ipi_action[] = { 231 [PPC_MSG_CALL_FUNCTION] = call_function_action, 232 [PPC_MSG_RESCHEDULE] = reschedule_action, 233 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST 234 [PPC_MSG_TICK_BROADCAST] = tick_broadcast_ipi_action, 235 #endif 236 #ifdef CONFIG_NMI_IPI 237 [PPC_MSG_NMI_IPI] = nmi_ipi_action, 238 #endif 239 }; 240 241 /* 242 * The NMI IPI is a fallback and not truly non-maskable. It is simpler 243 * than going through the call function infrastructure, and strongly 244 * serialized, so it is more appropriate for debugging. 245 */ 246 const char *smp_ipi_name[] = { 247 [PPC_MSG_CALL_FUNCTION] = "ipi call function", 248 [PPC_MSG_RESCHEDULE] = "ipi reschedule", 249 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST 250 [PPC_MSG_TICK_BROADCAST] = "ipi tick-broadcast", 251 #endif 252 #ifdef CONFIG_NMI_IPI 253 [PPC_MSG_NMI_IPI] = "nmi ipi", 254 #endif 255 }; 256 257 /* optional function to request ipi, for controllers with >= 4 ipis */ 258 int smp_request_message_ipi(int virq, int msg) 259 { 260 int err; 261 262 if (msg < 0 || msg > PPC_MSG_NMI_IPI) 263 return -EINVAL; 264 #ifndef CONFIG_NMI_IPI 265 if (msg == PPC_MSG_NMI_IPI) 266 return 1; 267 #endif 268 269 err = request_irq(virq, smp_ipi_action[msg], 270 IRQF_PERCPU | IRQF_NO_THREAD | IRQF_NO_SUSPEND, 271 smp_ipi_name[msg], NULL); 272 WARN(err < 0, "unable to request_irq %d for %s (rc %d)\n", 273 virq, smp_ipi_name[msg], err); 274 275 return err; 276 } 277 278 #ifdef CONFIG_PPC_SMP_MUXED_IPI 279 struct cpu_messages { 280 long messages; /* current messages */ 281 }; 282 static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message); 283 284 void smp_muxed_ipi_set_message(int cpu, int msg) 285 { 286 struct cpu_messages *info = &per_cpu(ipi_message, cpu); 287 char *message = (char *)&info->messages; 288 289 /* 290 * Order previous accesses before accesses in the IPI handler. 291 */ 292 smp_mb(); 293 message[msg] = 1; 294 } 295 296 void smp_muxed_ipi_message_pass(int cpu, int msg) 297 { 298 smp_muxed_ipi_set_message(cpu, msg); 299 300 /* 301 * cause_ipi functions are required to include a full barrier 302 * before doing whatever causes the IPI. 303 */ 304 smp_ops->cause_ipi(cpu); 305 } 306 307 #ifdef __BIG_ENDIAN__ 308 #define IPI_MESSAGE(A) (1uL << ((BITS_PER_LONG - 8) - 8 * (A))) 309 #else 310 #define IPI_MESSAGE(A) (1uL << (8 * (A))) 311 #endif 312 313 irqreturn_t smp_ipi_demux(void) 314 { 315 mb(); /* order any irq clear */ 316 317 return smp_ipi_demux_relaxed(); 318 } 319 320 /* sync-free variant. Callers should ensure synchronization */ 321 irqreturn_t smp_ipi_demux_relaxed(void) 322 { 323 struct cpu_messages *info; 324 unsigned long all; 325 326 info = this_cpu_ptr(&ipi_message); 327 do { 328 all = xchg(&info->messages, 0); 329 #if defined(CONFIG_KVM_XICS) && defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) 330 /* 331 * Must check for PPC_MSG_RM_HOST_ACTION messages 332 * before PPC_MSG_CALL_FUNCTION messages because when 333 * a VM is destroyed, we call kick_all_cpus_sync() 334 * to ensure that any pending PPC_MSG_RM_HOST_ACTION 335 * messages have completed before we free any VCPUs. 336 */ 337 if (all & IPI_MESSAGE(PPC_MSG_RM_HOST_ACTION)) 338 kvmppc_xics_ipi_action(); 339 #endif 340 if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNCTION)) 341 generic_smp_call_function_interrupt(); 342 if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE)) 343 scheduler_ipi(); 344 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST 345 if (all & IPI_MESSAGE(PPC_MSG_TICK_BROADCAST)) 346 timer_broadcast_interrupt(); 347 #endif 348 #ifdef CONFIG_NMI_IPI 349 if (all & IPI_MESSAGE(PPC_MSG_NMI_IPI)) 350 nmi_ipi_action(0, NULL); 351 #endif 352 } while (info->messages); 353 354 return IRQ_HANDLED; 355 } 356 #endif /* CONFIG_PPC_SMP_MUXED_IPI */ 357 358 static inline void do_message_pass(int cpu, int msg) 359 { 360 if (smp_ops->message_pass) 361 smp_ops->message_pass(cpu, msg); 362 #ifdef CONFIG_PPC_SMP_MUXED_IPI 363 else 364 smp_muxed_ipi_message_pass(cpu, msg); 365 #endif 366 } 367 368 void smp_send_reschedule(int cpu) 369 { 370 if (likely(smp_ops)) 371 do_message_pass(cpu, PPC_MSG_RESCHEDULE); 372 } 373 EXPORT_SYMBOL_GPL(smp_send_reschedule); 374 375 void arch_send_call_function_single_ipi(int cpu) 376 { 377 do_message_pass(cpu, PPC_MSG_CALL_FUNCTION); 378 } 379 380 void arch_send_call_function_ipi_mask(const struct cpumask *mask) 381 { 382 unsigned int cpu; 383 384 for_each_cpu(cpu, mask) 385 do_message_pass(cpu, PPC_MSG_CALL_FUNCTION); 386 } 387 388 #ifdef CONFIG_NMI_IPI 389 390 /* 391 * "NMI IPI" system. 392 * 393 * NMI IPIs may not be recoverable, so should not be used as ongoing part of 394 * a running system. They can be used for crash, debug, halt/reboot, etc. 395 * 396 * The IPI call waits with interrupts disabled until all targets enter the 397 * NMI handler, then returns. Subsequent IPIs can be issued before targets 398 * have returned from their handlers, so there is no guarantee about 399 * concurrency or re-entrancy. 400 * 401 * A new NMI can be issued before all targets exit the handler. 402 * 403 * The IPI call may time out without all targets entering the NMI handler. 404 * In that case, there is some logic to recover (and ignore subsequent 405 * NMI interrupts that may eventually be raised), but the platform interrupt 406 * handler may not be able to distinguish this from other exception causes, 407 * which may cause a crash. 408 */ 409 410 static atomic_t __nmi_ipi_lock = ATOMIC_INIT(0); 411 static struct cpumask nmi_ipi_pending_mask; 412 static bool nmi_ipi_busy = false; 413 static void (*nmi_ipi_function)(struct pt_regs *) = NULL; 414 415 static void nmi_ipi_lock_start(unsigned long *flags) 416 { 417 raw_local_irq_save(*flags); 418 hard_irq_disable(); 419 while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) { 420 raw_local_irq_restore(*flags); 421 spin_until_cond(atomic_read(&__nmi_ipi_lock) == 0); 422 raw_local_irq_save(*flags); 423 hard_irq_disable(); 424 } 425 } 426 427 static void nmi_ipi_lock(void) 428 { 429 while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) 430 spin_until_cond(atomic_read(&__nmi_ipi_lock) == 0); 431 } 432 433 static void nmi_ipi_unlock(void) 434 { 435 smp_mb(); 436 WARN_ON(atomic_read(&__nmi_ipi_lock) != 1); 437 atomic_set(&__nmi_ipi_lock, 0); 438 } 439 440 static void nmi_ipi_unlock_end(unsigned long *flags) 441 { 442 nmi_ipi_unlock(); 443 raw_local_irq_restore(*flags); 444 } 445 446 /* 447 * Platform NMI handler calls this to ack 448 */ 449 int smp_handle_nmi_ipi(struct pt_regs *regs) 450 { 451 void (*fn)(struct pt_regs *) = NULL; 452 unsigned long flags; 453 int me = raw_smp_processor_id(); 454 int ret = 0; 455 456 /* 457 * Unexpected NMIs are possible here because the interrupt may not 458 * be able to distinguish NMI IPIs from other types of NMIs, or 459 * because the caller may have timed out. 460 */ 461 nmi_ipi_lock_start(&flags); 462 if (cpumask_test_cpu(me, &nmi_ipi_pending_mask)) { 463 cpumask_clear_cpu(me, &nmi_ipi_pending_mask); 464 fn = READ_ONCE(nmi_ipi_function); 465 WARN_ON_ONCE(!fn); 466 ret = 1; 467 } 468 nmi_ipi_unlock_end(&flags); 469 470 if (fn) 471 fn(regs); 472 473 return ret; 474 } 475 476 static void do_smp_send_nmi_ipi(int cpu, bool safe) 477 { 478 if (!safe && smp_ops->cause_nmi_ipi && smp_ops->cause_nmi_ipi(cpu)) 479 return; 480 481 if (cpu >= 0) { 482 do_message_pass(cpu, PPC_MSG_NMI_IPI); 483 } else { 484 int c; 485 486 for_each_online_cpu(c) { 487 if (c == raw_smp_processor_id()) 488 continue; 489 do_message_pass(c, PPC_MSG_NMI_IPI); 490 } 491 } 492 } 493 494 /* 495 * - cpu is the target CPU (must not be this CPU), or NMI_IPI_ALL_OTHERS. 496 * - fn is the target callback function. 497 * - delay_us > 0 is the delay before giving up waiting for targets to 498 * begin executing the handler, == 0 specifies indefinite delay. 499 */ 500 static int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), 501 u64 delay_us, bool safe) 502 { 503 unsigned long flags; 504 int me = raw_smp_processor_id(); 505 int ret = 1; 506 507 BUG_ON(cpu == me); 508 BUG_ON(cpu < 0 && cpu != NMI_IPI_ALL_OTHERS); 509 510 if (unlikely(!smp_ops)) 511 return 0; 512 513 nmi_ipi_lock_start(&flags); 514 while (nmi_ipi_busy) { 515 nmi_ipi_unlock_end(&flags); 516 spin_until_cond(!nmi_ipi_busy); 517 nmi_ipi_lock_start(&flags); 518 } 519 nmi_ipi_busy = true; 520 nmi_ipi_function = fn; 521 522 WARN_ON_ONCE(!cpumask_empty(&nmi_ipi_pending_mask)); 523 524 if (cpu < 0) { 525 /* ALL_OTHERS */ 526 cpumask_copy(&nmi_ipi_pending_mask, cpu_online_mask); 527 cpumask_clear_cpu(me, &nmi_ipi_pending_mask); 528 } else { 529 cpumask_set_cpu(cpu, &nmi_ipi_pending_mask); 530 } 531 532 nmi_ipi_unlock(); 533 534 /* Interrupts remain hard disabled */ 535 536 do_smp_send_nmi_ipi(cpu, safe); 537 538 nmi_ipi_lock(); 539 /* nmi_ipi_busy is set here, so unlock/lock is okay */ 540 while (!cpumask_empty(&nmi_ipi_pending_mask)) { 541 nmi_ipi_unlock(); 542 udelay(1); 543 nmi_ipi_lock(); 544 if (delay_us) { 545 delay_us--; 546 if (!delay_us) 547 break; 548 } 549 } 550 551 if (!cpumask_empty(&nmi_ipi_pending_mask)) { 552 /* Timeout waiting for CPUs to call smp_handle_nmi_ipi */ 553 ret = 0; 554 cpumask_clear(&nmi_ipi_pending_mask); 555 } 556 557 nmi_ipi_function = NULL; 558 nmi_ipi_busy = false; 559 560 nmi_ipi_unlock_end(&flags); 561 562 return ret; 563 } 564 565 int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us) 566 { 567 return __smp_send_nmi_ipi(cpu, fn, delay_us, false); 568 } 569 570 int smp_send_safe_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us) 571 { 572 return __smp_send_nmi_ipi(cpu, fn, delay_us, true); 573 } 574 #endif /* CONFIG_NMI_IPI */ 575 576 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST 577 void tick_broadcast(const struct cpumask *mask) 578 { 579 unsigned int cpu; 580 581 for_each_cpu(cpu, mask) 582 do_message_pass(cpu, PPC_MSG_TICK_BROADCAST); 583 } 584 #endif 585 586 #ifdef CONFIG_DEBUGGER 587 static void debugger_ipi_callback(struct pt_regs *regs) 588 { 589 debugger_ipi(regs); 590 } 591 592 void smp_send_debugger_break(void) 593 { 594 smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, debugger_ipi_callback, 1000000); 595 } 596 #endif 597 598 #ifdef CONFIG_KEXEC_CORE 599 void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) 600 { 601 int cpu; 602 603 smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_ipi_callback, 1000000); 604 if (kdump_in_progress() && crash_wake_offline) { 605 for_each_present_cpu(cpu) { 606 if (cpu_online(cpu)) 607 continue; 608 /* 609 * crash_ipi_callback will wait for 610 * all cpus, including offline CPUs. 611 * We don't care about nmi_ipi_function. 612 * Offline cpus will jump straight into 613 * crash_ipi_callback, we can skip the 614 * entire NMI dance and waiting for 615 * cpus to clear pending mask, etc. 616 */ 617 do_smp_send_nmi_ipi(cpu, false); 618 } 619 } 620 } 621 #endif 622 623 #ifdef CONFIG_NMI_IPI 624 static void nmi_stop_this_cpu(struct pt_regs *regs) 625 { 626 /* 627 * IRQs are already hard disabled by the smp_handle_nmi_ipi. 628 */ 629 set_cpu_online(smp_processor_id(), false); 630 631 spin_begin(); 632 while (1) 633 spin_cpu_relax(); 634 } 635 636 void smp_send_stop(void) 637 { 638 smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, nmi_stop_this_cpu, 1000000); 639 } 640 641 #else /* CONFIG_NMI_IPI */ 642 643 static void stop_this_cpu(void *dummy) 644 { 645 hard_irq_disable(); 646 647 /* 648 * Offlining CPUs in stop_this_cpu can result in scheduler warnings, 649 * (see commit de6e5d38417e), but printk_safe_flush_on_panic() wants 650 * to know other CPUs are offline before it breaks locks to flush 651 * printk buffers, in case we panic()ed while holding the lock. 652 */ 653 set_cpu_online(smp_processor_id(), false); 654 655 spin_begin(); 656 while (1) 657 spin_cpu_relax(); 658 } 659 660 void smp_send_stop(void) 661 { 662 static bool stopped = false; 663 664 /* 665 * Prevent waiting on csd lock from a previous smp_send_stop. 666 * This is racy, but in general callers try to do the right 667 * thing and only fire off one smp_send_stop (e.g., see 668 * kernel/panic.c) 669 */ 670 if (stopped) 671 return; 672 673 stopped = true; 674 675 smp_call_function(stop_this_cpu, NULL, 0); 676 } 677 #endif /* CONFIG_NMI_IPI */ 678 679 struct task_struct *current_set[NR_CPUS]; 680 681 static void smp_store_cpu_info(int id) 682 { 683 per_cpu(cpu_pvr, id) = mfspr(SPRN_PVR); 684 #ifdef CONFIG_PPC_FSL_BOOK3E 685 per_cpu(next_tlbcam_idx, id) 686 = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) - 1; 687 #endif 688 } 689 690 /* 691 * Relationships between CPUs are maintained in a set of per-cpu cpumasks so 692 * rather than just passing around the cpumask we pass around a function that 693 * returns the that cpumask for the given CPU. 694 */ 695 static void set_cpus_related(int i, int j, struct cpumask *(*get_cpumask)(int)) 696 { 697 cpumask_set_cpu(i, get_cpumask(j)); 698 cpumask_set_cpu(j, get_cpumask(i)); 699 } 700 701 #ifdef CONFIG_HOTPLUG_CPU 702 static void set_cpus_unrelated(int i, int j, 703 struct cpumask *(*get_cpumask)(int)) 704 { 705 cpumask_clear_cpu(i, get_cpumask(j)); 706 cpumask_clear_cpu(j, get_cpumask(i)); 707 } 708 #endif 709 710 /* 711 * Extends set_cpus_related. Instead of setting one CPU at a time in 712 * dstmask, set srcmask at oneshot. dstmask should be super set of srcmask. 713 */ 714 static void or_cpumasks_related(int i, int j, struct cpumask *(*srcmask)(int), 715 struct cpumask *(*dstmask)(int)) 716 { 717 struct cpumask *mask; 718 int k; 719 720 mask = srcmask(j); 721 for_each_cpu(k, srcmask(i)) 722 cpumask_or(dstmask(k), dstmask(k), mask); 723 724 if (i == j) 725 return; 726 727 mask = srcmask(i); 728 for_each_cpu(k, srcmask(j)) 729 cpumask_or(dstmask(k), dstmask(k), mask); 730 } 731 732 /* 733 * parse_thread_groups: Parses the "ibm,thread-groups" device tree 734 * property for the CPU device node @dn and stores 735 * the parsed output in the thread_groups_list 736 * structure @tglp. 737 * 738 * @dn: The device node of the CPU device. 739 * @tglp: Pointer to a thread group list structure into which the parsed 740 * output of "ibm,thread-groups" is stored. 741 * 742 * ibm,thread-groups[0..N-1] array defines which group of threads in 743 * the CPU-device node can be grouped together based on the property. 744 * 745 * This array can represent thread groupings for multiple properties. 746 * 747 * ibm,thread-groups[i + 0] tells us the property based on which the 748 * threads are being grouped together. If this value is 1, it implies 749 * that the threads in the same group share L1, translation cache. If 750 * the value is 2, it implies that the threads in the same group share 751 * the same L2 cache. 752 * 753 * ibm,thread-groups[i+1] tells us how many such thread groups exist for the 754 * property ibm,thread-groups[i] 755 * 756 * ibm,thread-groups[i+2] tells us the number of threads in each such 757 * group. 758 * Suppose k = (ibm,thread-groups[i+1] * ibm,thread-groups[i+2]), then, 759 * 760 * ibm,thread-groups[i+3..i+k+2] (is the list of threads identified by 761 * "ibm,ppc-interrupt-server#s" arranged as per their membership in 762 * the grouping. 763 * 764 * Example: 765 * If "ibm,thread-groups" = [1,2,4,8,10,12,14,9,11,13,15,2,2,4,8,10,12,14,9,11,13,15] 766 * This can be decomposed up into two consecutive arrays: 767 * a) [1,2,4,8,10,12,14,9,11,13,15] 768 * b) [2,2,4,8,10,12,14,9,11,13,15] 769 * 770 * where in, 771 * 772 * a) provides information of Property "1" being shared by "2" groups, 773 * each with "4" threads each. The "ibm,ppc-interrupt-server#s" of 774 * the first group is {8,10,12,14} and the 775 * "ibm,ppc-interrupt-server#s" of the second group is 776 * {9,11,13,15}. Property "1" is indicative of the thread in the 777 * group sharing L1 cache, translation cache and Instruction Data 778 * flow. 779 * 780 * b) provides information of Property "2" being shared by "2" groups, 781 * each group with "4" threads. The "ibm,ppc-interrupt-server#s" of 782 * the first group is {8,10,12,14} and the 783 * "ibm,ppc-interrupt-server#s" of the second group is 784 * {9,11,13,15}. Property "2" indicates that the threads in each 785 * group share the L2-cache. 786 * 787 * Returns 0 on success, -EINVAL if the property does not exist, 788 * -ENODATA if property does not have a value, and -EOVERFLOW if the 789 * property data isn't large enough. 790 */ 791 static int parse_thread_groups(struct device_node *dn, 792 struct thread_groups_list *tglp) 793 { 794 unsigned int property_idx = 0; 795 u32 *thread_group_array; 796 size_t total_threads; 797 int ret = 0, count; 798 u32 *thread_list; 799 int i = 0; 800 801 count = of_property_count_u32_elems(dn, "ibm,thread-groups"); 802 thread_group_array = kcalloc(count, sizeof(u32), GFP_KERNEL); 803 ret = of_property_read_u32_array(dn, "ibm,thread-groups", 804 thread_group_array, count); 805 if (ret) 806 goto out_free; 807 808 while (i < count && property_idx < MAX_THREAD_GROUP_PROPERTIES) { 809 int j; 810 struct thread_groups *tg = &tglp->property_tgs[property_idx++]; 811 812 tg->property = thread_group_array[i]; 813 tg->nr_groups = thread_group_array[i + 1]; 814 tg->threads_per_group = thread_group_array[i + 2]; 815 total_threads = tg->nr_groups * tg->threads_per_group; 816 817 thread_list = &thread_group_array[i + 3]; 818 819 for (j = 0; j < total_threads; j++) 820 tg->thread_list[j] = thread_list[j]; 821 i = i + 3 + total_threads; 822 } 823 824 tglp->nr_properties = property_idx; 825 826 out_free: 827 kfree(thread_group_array); 828 return ret; 829 } 830 831 /* 832 * get_cpu_thread_group_start : Searches the thread group in tg->thread_list 833 * that @cpu belongs to. 834 * 835 * @cpu : The logical CPU whose thread group is being searched. 836 * @tg : The thread-group structure of the CPU node which @cpu belongs 837 * to. 838 * 839 * Returns the index to tg->thread_list that points to the the start 840 * of the thread_group that @cpu belongs to. 841 * 842 * Returns -1 if cpu doesn't belong to any of the groups pointed to by 843 * tg->thread_list. 844 */ 845 static int get_cpu_thread_group_start(int cpu, struct thread_groups *tg) 846 { 847 int hw_cpu_id = get_hard_smp_processor_id(cpu); 848 int i, j; 849 850 for (i = 0; i < tg->nr_groups; i++) { 851 int group_start = i * tg->threads_per_group; 852 853 for (j = 0; j < tg->threads_per_group; j++) { 854 int idx = group_start + j; 855 856 if (tg->thread_list[idx] == hw_cpu_id) 857 return group_start; 858 } 859 } 860 861 return -1; 862 } 863 864 static struct thread_groups *__init get_thread_groups(int cpu, 865 int group_property, 866 int *err) 867 { 868 struct device_node *dn = of_get_cpu_node(cpu, NULL); 869 struct thread_groups_list *cpu_tgl = &tgl[cpu]; 870 struct thread_groups *tg = NULL; 871 int i; 872 *err = 0; 873 874 if (!dn) { 875 *err = -ENODATA; 876 return NULL; 877 } 878 879 if (!cpu_tgl->nr_properties) { 880 *err = parse_thread_groups(dn, cpu_tgl); 881 if (*err) 882 goto out; 883 } 884 885 for (i = 0; i < cpu_tgl->nr_properties; i++) { 886 if (cpu_tgl->property_tgs[i].property == group_property) { 887 tg = &cpu_tgl->property_tgs[i]; 888 break; 889 } 890 } 891 892 if (!tg) 893 *err = -EINVAL; 894 out: 895 of_node_put(dn); 896 return tg; 897 } 898 899 static int update_mask_from_threadgroup(cpumask_var_t *mask, struct thread_groups *tg, int cpu, int cpu_group_start) 900 { 901 int first_thread = cpu_first_thread_sibling(cpu); 902 int i; 903 904 zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu)); 905 906 for (i = first_thread; i < first_thread + threads_per_core; i++) { 907 int i_group_start = get_cpu_thread_group_start(i, tg); 908 909 if (unlikely(i_group_start == -1)) { 910 WARN_ON_ONCE(1); 911 return -ENODATA; 912 } 913 914 if (i_group_start == cpu_group_start) 915 cpumask_set_cpu(i, *mask); 916 } 917 918 return 0; 919 } 920 921 static int __init init_thread_group_cache_map(int cpu, int cache_property) 922 923 { 924 int cpu_group_start = -1, err = 0; 925 struct thread_groups *tg = NULL; 926 cpumask_var_t *mask = NULL; 927 928 if (cache_property != THREAD_GROUP_SHARE_L1 && 929 cache_property != THREAD_GROUP_SHARE_L2_L3) 930 return -EINVAL; 931 932 tg = get_thread_groups(cpu, cache_property, &err); 933 934 if (!tg) 935 return err; 936 937 cpu_group_start = get_cpu_thread_group_start(cpu, tg); 938 939 if (unlikely(cpu_group_start == -1)) { 940 WARN_ON_ONCE(1); 941 return -ENODATA; 942 } 943 944 if (cache_property == THREAD_GROUP_SHARE_L1) { 945 mask = &per_cpu(thread_group_l1_cache_map, cpu); 946 update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start); 947 } 948 else if (cache_property == THREAD_GROUP_SHARE_L2_L3) { 949 mask = &per_cpu(thread_group_l2_cache_map, cpu); 950 update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start); 951 mask = &per_cpu(thread_group_l3_cache_map, cpu); 952 update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start); 953 } 954 955 956 return 0; 957 } 958 959 static bool shared_caches; 960 961 #ifdef CONFIG_SCHED_SMT 962 /* cpumask of CPUs with asymmetric SMT dependency */ 963 static int powerpc_smt_flags(void) 964 { 965 int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES; 966 967 if (cpu_has_feature(CPU_FTR_ASYM_SMT)) { 968 printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n"); 969 flags |= SD_ASYM_PACKING; 970 } 971 return flags; 972 } 973 #endif 974 975 /* 976 * P9 has a slightly odd architecture where pairs of cores share an L2 cache. 977 * This topology makes it *much* cheaper to migrate tasks between adjacent cores 978 * since the migrated task remains cache hot. We want to take advantage of this 979 * at the scheduler level so an extra topology level is required. 980 */ 981 static int powerpc_shared_cache_flags(void) 982 { 983 return SD_SHARE_PKG_RESOURCES; 984 } 985 986 /* 987 * We can't just pass cpu_l2_cache_mask() directly because 988 * returns a non-const pointer and the compiler barfs on that. 989 */ 990 static const struct cpumask *shared_cache_mask(int cpu) 991 { 992 return per_cpu(cpu_l2_cache_map, cpu); 993 } 994 995 #ifdef CONFIG_SCHED_SMT 996 static const struct cpumask *smallcore_smt_mask(int cpu) 997 { 998 return cpu_smallcore_mask(cpu); 999 } 1000 #endif 1001 1002 static struct cpumask *cpu_coregroup_mask(int cpu) 1003 { 1004 return per_cpu(cpu_coregroup_map, cpu); 1005 } 1006 1007 static bool has_coregroup_support(void) 1008 { 1009 return coregroup_enabled; 1010 } 1011 1012 static const struct cpumask *cpu_mc_mask(int cpu) 1013 { 1014 return cpu_coregroup_mask(cpu); 1015 } 1016 1017 static struct sched_domain_topology_level powerpc_topology[] = { 1018 #ifdef CONFIG_SCHED_SMT 1019 { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) }, 1020 #endif 1021 { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) }, 1022 { cpu_mc_mask, SD_INIT_NAME(MC) }, 1023 { cpu_cpu_mask, SD_INIT_NAME(DIE) }, 1024 { NULL, }, 1025 }; 1026 1027 static int __init init_big_cores(void) 1028 { 1029 int cpu; 1030 1031 for_each_possible_cpu(cpu) { 1032 int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L1); 1033 1034 if (err) 1035 return err; 1036 1037 zalloc_cpumask_var_node(&per_cpu(cpu_smallcore_map, cpu), 1038 GFP_KERNEL, 1039 cpu_to_node(cpu)); 1040 } 1041 1042 has_big_cores = true; 1043 1044 for_each_possible_cpu(cpu) { 1045 int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2_L3); 1046 1047 if (err) 1048 return err; 1049 } 1050 1051 thread_group_shares_l2 = true; 1052 thread_group_shares_l3 = true; 1053 pr_debug("L2/L3 cache only shared by the threads in the small core\n"); 1054 1055 return 0; 1056 } 1057 1058 void __init smp_prepare_cpus(unsigned int max_cpus) 1059 { 1060 unsigned int cpu; 1061 1062 DBG("smp_prepare_cpus\n"); 1063 1064 /* 1065 * setup_cpu may need to be called on the boot cpu. We havent 1066 * spun any cpus up but lets be paranoid. 1067 */ 1068 BUG_ON(boot_cpuid != smp_processor_id()); 1069 1070 /* Fixup boot cpu */ 1071 smp_store_cpu_info(boot_cpuid); 1072 cpu_callin_map[boot_cpuid] = 1; 1073 1074 for_each_possible_cpu(cpu) { 1075 zalloc_cpumask_var_node(&per_cpu(cpu_sibling_map, cpu), 1076 GFP_KERNEL, cpu_to_node(cpu)); 1077 zalloc_cpumask_var_node(&per_cpu(cpu_l2_cache_map, cpu), 1078 GFP_KERNEL, cpu_to_node(cpu)); 1079 zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu), 1080 GFP_KERNEL, cpu_to_node(cpu)); 1081 if (has_coregroup_support()) 1082 zalloc_cpumask_var_node(&per_cpu(cpu_coregroup_map, cpu), 1083 GFP_KERNEL, cpu_to_node(cpu)); 1084 1085 #ifdef CONFIG_NUMA 1086 /* 1087 * numa_node_id() works after this. 1088 */ 1089 if (cpu_present(cpu)) { 1090 set_cpu_numa_node(cpu, numa_cpu_lookup_table[cpu]); 1091 set_cpu_numa_mem(cpu, 1092 local_memory_node(numa_cpu_lookup_table[cpu])); 1093 } 1094 #endif 1095 } 1096 1097 /* Init the cpumasks so the boot CPU is related to itself */ 1098 cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid)); 1099 cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid)); 1100 cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid)); 1101 1102 if (has_coregroup_support()) 1103 cpumask_set_cpu(boot_cpuid, cpu_coregroup_mask(boot_cpuid)); 1104 1105 init_big_cores(); 1106 if (has_big_cores) { 1107 cpumask_set_cpu(boot_cpuid, 1108 cpu_smallcore_mask(boot_cpuid)); 1109 } 1110 1111 if (cpu_to_chip_id(boot_cpuid) != -1) { 1112 int idx = DIV_ROUND_UP(num_possible_cpus(), threads_per_core); 1113 1114 /* 1115 * All threads of a core will all belong to the same core, 1116 * chip_id_lookup_table will have one entry per core. 1117 * Assumption: if boot_cpuid doesn't have a chip-id, then no 1118 * other CPUs, will also not have chip-id. 1119 */ 1120 chip_id_lookup_table = kcalloc(idx, sizeof(int), GFP_KERNEL); 1121 if (chip_id_lookup_table) 1122 memset(chip_id_lookup_table, -1, sizeof(int) * idx); 1123 } 1124 1125 if (smp_ops && smp_ops->probe) 1126 smp_ops->probe(); 1127 } 1128 1129 void smp_prepare_boot_cpu(void) 1130 { 1131 BUG_ON(smp_processor_id() != boot_cpuid); 1132 #ifdef CONFIG_PPC64 1133 paca_ptrs[boot_cpuid]->__current = current; 1134 #endif 1135 set_numa_node(numa_cpu_lookup_table[boot_cpuid]); 1136 current_set[boot_cpuid] = current; 1137 } 1138 1139 #ifdef CONFIG_HOTPLUG_CPU 1140 1141 int generic_cpu_disable(void) 1142 { 1143 unsigned int cpu = smp_processor_id(); 1144 1145 if (cpu == boot_cpuid) 1146 return -EBUSY; 1147 1148 set_cpu_online(cpu, false); 1149 #ifdef CONFIG_PPC64 1150 vdso_data->processorCount--; 1151 #endif 1152 /* Update affinity of all IRQs previously aimed at this CPU */ 1153 irq_migrate_all_off_this_cpu(); 1154 1155 /* 1156 * Depending on the details of the interrupt controller, it's possible 1157 * that one of the interrupts we just migrated away from this CPU is 1158 * actually already pending on this CPU. If we leave it in that state 1159 * the interrupt will never be EOI'ed, and will never fire again. So 1160 * temporarily enable interrupts here, to allow any pending interrupt to 1161 * be received (and EOI'ed), before we take this CPU offline. 1162 */ 1163 local_irq_enable(); 1164 mdelay(1); 1165 local_irq_disable(); 1166 1167 return 0; 1168 } 1169 1170 void generic_cpu_die(unsigned int cpu) 1171 { 1172 int i; 1173 1174 for (i = 0; i < 100; i++) { 1175 smp_rmb(); 1176 if (is_cpu_dead(cpu)) 1177 return; 1178 msleep(100); 1179 } 1180 printk(KERN_ERR "CPU%d didn't die...\n", cpu); 1181 } 1182 1183 void generic_set_cpu_dead(unsigned int cpu) 1184 { 1185 per_cpu(cpu_state, cpu) = CPU_DEAD; 1186 } 1187 1188 /* 1189 * The cpu_state should be set to CPU_UP_PREPARE in kick_cpu(), otherwise 1190 * the cpu_state is always CPU_DEAD after calling generic_set_cpu_dead(), 1191 * which makes the delay in generic_cpu_die() not happen. 1192 */ 1193 void generic_set_cpu_up(unsigned int cpu) 1194 { 1195 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; 1196 } 1197 1198 int generic_check_cpu_restart(unsigned int cpu) 1199 { 1200 return per_cpu(cpu_state, cpu) == CPU_UP_PREPARE; 1201 } 1202 1203 int is_cpu_dead(unsigned int cpu) 1204 { 1205 return per_cpu(cpu_state, cpu) == CPU_DEAD; 1206 } 1207 1208 static bool secondaries_inhibited(void) 1209 { 1210 return kvm_hv_mode_active(); 1211 } 1212 1213 #else /* HOTPLUG_CPU */ 1214 1215 #define secondaries_inhibited() 0 1216 1217 #endif 1218 1219 static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle) 1220 { 1221 #ifdef CONFIG_PPC64 1222 paca_ptrs[cpu]->__current = idle; 1223 paca_ptrs[cpu]->kstack = (unsigned long)task_stack_page(idle) + 1224 THREAD_SIZE - STACK_FRAME_OVERHEAD; 1225 #endif 1226 idle->cpu = cpu; 1227 secondary_current = current_set[cpu] = idle; 1228 } 1229 1230 int __cpu_up(unsigned int cpu, struct task_struct *tidle) 1231 { 1232 int rc, c; 1233 1234 /* 1235 * Don't allow secondary threads to come online if inhibited 1236 */ 1237 if (threads_per_core > 1 && secondaries_inhibited() && 1238 cpu_thread_in_subcore(cpu)) 1239 return -EBUSY; 1240 1241 if (smp_ops == NULL || 1242 (smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu))) 1243 return -EINVAL; 1244 1245 cpu_idle_thread_init(cpu, tidle); 1246 1247 /* 1248 * The platform might need to allocate resources prior to bringing 1249 * up the CPU 1250 */ 1251 if (smp_ops->prepare_cpu) { 1252 rc = smp_ops->prepare_cpu(cpu); 1253 if (rc) 1254 return rc; 1255 } 1256 1257 /* Make sure callin-map entry is 0 (can be leftover a CPU 1258 * hotplug 1259 */ 1260 cpu_callin_map[cpu] = 0; 1261 1262 /* The information for processor bringup must 1263 * be written out to main store before we release 1264 * the processor. 1265 */ 1266 smp_mb(); 1267 1268 /* wake up cpus */ 1269 DBG("smp: kicking cpu %d\n", cpu); 1270 rc = smp_ops->kick_cpu(cpu); 1271 if (rc) { 1272 pr_err("smp: failed starting cpu %d (rc %d)\n", cpu, rc); 1273 return rc; 1274 } 1275 1276 /* 1277 * wait to see if the cpu made a callin (is actually up). 1278 * use this value that I found through experimentation. 1279 * -- Cort 1280 */ 1281 if (system_state < SYSTEM_RUNNING) 1282 for (c = 50000; c && !cpu_callin_map[cpu]; c--) 1283 udelay(100); 1284 #ifdef CONFIG_HOTPLUG_CPU 1285 else 1286 /* 1287 * CPUs can take much longer to come up in the 1288 * hotplug case. Wait five seconds. 1289 */ 1290 for (c = 5000; c && !cpu_callin_map[cpu]; c--) 1291 msleep(1); 1292 #endif 1293 1294 if (!cpu_callin_map[cpu]) { 1295 printk(KERN_ERR "Processor %u is stuck.\n", cpu); 1296 return -ENOENT; 1297 } 1298 1299 DBG("Processor %u found.\n", cpu); 1300 1301 if (smp_ops->give_timebase) 1302 smp_ops->give_timebase(); 1303 1304 /* Wait until cpu puts itself in the online & active maps */ 1305 spin_until_cond(cpu_online(cpu)); 1306 1307 return 0; 1308 } 1309 1310 /* Return the value of the reg property corresponding to the given 1311 * logical cpu. 1312 */ 1313 int cpu_to_core_id(int cpu) 1314 { 1315 struct device_node *np; 1316 const __be32 *reg; 1317 int id = -1; 1318 1319 np = of_get_cpu_node(cpu, NULL); 1320 if (!np) 1321 goto out; 1322 1323 reg = of_get_property(np, "reg", NULL); 1324 if (!reg) 1325 goto out; 1326 1327 id = be32_to_cpup(reg); 1328 out: 1329 of_node_put(np); 1330 return id; 1331 } 1332 EXPORT_SYMBOL_GPL(cpu_to_core_id); 1333 1334 /* Helper routines for cpu to core mapping */ 1335 int cpu_core_index_of_thread(int cpu) 1336 { 1337 return cpu >> threads_shift; 1338 } 1339 EXPORT_SYMBOL_GPL(cpu_core_index_of_thread); 1340 1341 int cpu_first_thread_of_core(int core) 1342 { 1343 return core << threads_shift; 1344 } 1345 EXPORT_SYMBOL_GPL(cpu_first_thread_of_core); 1346 1347 /* Must be called when no change can occur to cpu_present_mask, 1348 * i.e. during cpu online or offline. 1349 */ 1350 static struct device_node *cpu_to_l2cache(int cpu) 1351 { 1352 struct device_node *np; 1353 struct device_node *cache; 1354 1355 if (!cpu_present(cpu)) 1356 return NULL; 1357 1358 np = of_get_cpu_node(cpu, NULL); 1359 if (np == NULL) 1360 return NULL; 1361 1362 cache = of_find_next_cache_node(np); 1363 1364 of_node_put(np); 1365 1366 return cache; 1367 } 1368 1369 static bool update_mask_by_l2(int cpu, cpumask_var_t *mask) 1370 { 1371 struct cpumask *(*submask_fn)(int) = cpu_sibling_mask; 1372 struct device_node *l2_cache, *np; 1373 int i; 1374 1375 if (has_big_cores) 1376 submask_fn = cpu_smallcore_mask; 1377 1378 /* 1379 * If the threads in a thread-group share L2 cache, then the 1380 * L2-mask can be obtained from thread_group_l2_cache_map. 1381 */ 1382 if (thread_group_shares_l2) { 1383 cpumask_set_cpu(cpu, cpu_l2_cache_mask(cpu)); 1384 1385 for_each_cpu(i, per_cpu(thread_group_l2_cache_map, cpu)) { 1386 if (cpu_online(i)) 1387 set_cpus_related(i, cpu, cpu_l2_cache_mask); 1388 } 1389 1390 /* Verify that L1-cache siblings are a subset of L2 cache-siblings */ 1391 if (!cpumask_equal(submask_fn(cpu), cpu_l2_cache_mask(cpu)) && 1392 !cpumask_subset(submask_fn(cpu), cpu_l2_cache_mask(cpu))) { 1393 pr_warn_once("CPU %d : Inconsistent L1 and L2 cache siblings\n", 1394 cpu); 1395 } 1396 1397 return true; 1398 } 1399 1400 l2_cache = cpu_to_l2cache(cpu); 1401 if (!l2_cache || !*mask) { 1402 /* Assume only core siblings share cache with this CPU */ 1403 for_each_cpu(i, cpu_sibling_mask(cpu)) 1404 set_cpus_related(cpu, i, cpu_l2_cache_mask); 1405 1406 return false; 1407 } 1408 1409 cpumask_and(*mask, cpu_online_mask, cpu_cpu_mask(cpu)); 1410 1411 /* Update l2-cache mask with all the CPUs that are part of submask */ 1412 or_cpumasks_related(cpu, cpu, submask_fn, cpu_l2_cache_mask); 1413 1414 /* Skip all CPUs already part of current CPU l2-cache mask */ 1415 cpumask_andnot(*mask, *mask, cpu_l2_cache_mask(cpu)); 1416 1417 for_each_cpu(i, *mask) { 1418 /* 1419 * when updating the marks the current CPU has not been marked 1420 * online, but we need to update the cache masks 1421 */ 1422 np = cpu_to_l2cache(i); 1423 1424 /* Skip all CPUs already part of current CPU l2-cache */ 1425 if (np == l2_cache) { 1426 or_cpumasks_related(cpu, i, submask_fn, cpu_l2_cache_mask); 1427 cpumask_andnot(*mask, *mask, submask_fn(i)); 1428 } else { 1429 cpumask_andnot(*mask, *mask, cpu_l2_cache_mask(i)); 1430 } 1431 1432 of_node_put(np); 1433 } 1434 of_node_put(l2_cache); 1435 1436 return true; 1437 } 1438 1439 #ifdef CONFIG_HOTPLUG_CPU 1440 static void remove_cpu_from_masks(int cpu) 1441 { 1442 struct cpumask *(*mask_fn)(int) = cpu_sibling_mask; 1443 int i; 1444 1445 unmap_cpu_from_node(cpu); 1446 1447 if (shared_caches) 1448 mask_fn = cpu_l2_cache_mask; 1449 1450 for_each_cpu(i, mask_fn(cpu)) { 1451 set_cpus_unrelated(cpu, i, cpu_l2_cache_mask); 1452 set_cpus_unrelated(cpu, i, cpu_sibling_mask); 1453 if (has_big_cores) 1454 set_cpus_unrelated(cpu, i, cpu_smallcore_mask); 1455 } 1456 1457 for_each_cpu(i, cpu_core_mask(cpu)) 1458 set_cpus_unrelated(cpu, i, cpu_core_mask); 1459 1460 if (has_coregroup_support()) { 1461 for_each_cpu(i, cpu_coregroup_mask(cpu)) 1462 set_cpus_unrelated(cpu, i, cpu_coregroup_mask); 1463 } 1464 } 1465 #endif 1466 1467 static inline void add_cpu_to_smallcore_masks(int cpu) 1468 { 1469 int i; 1470 1471 if (!has_big_cores) 1472 return; 1473 1474 cpumask_set_cpu(cpu, cpu_smallcore_mask(cpu)); 1475 1476 for_each_cpu(i, per_cpu(thread_group_l1_cache_map, cpu)) { 1477 if (cpu_online(i)) 1478 set_cpus_related(i, cpu, cpu_smallcore_mask); 1479 } 1480 } 1481 1482 static void update_coregroup_mask(int cpu, cpumask_var_t *mask) 1483 { 1484 struct cpumask *(*submask_fn)(int) = cpu_sibling_mask; 1485 int coregroup_id = cpu_to_coregroup_id(cpu); 1486 int i; 1487 1488 if (shared_caches) 1489 submask_fn = cpu_l2_cache_mask; 1490 1491 if (!*mask) { 1492 /* Assume only siblings are part of this CPU's coregroup */ 1493 for_each_cpu(i, submask_fn(cpu)) 1494 set_cpus_related(cpu, i, cpu_coregroup_mask); 1495 1496 return; 1497 } 1498 1499 cpumask_and(*mask, cpu_online_mask, cpu_cpu_mask(cpu)); 1500 1501 /* Update coregroup mask with all the CPUs that are part of submask */ 1502 or_cpumasks_related(cpu, cpu, submask_fn, cpu_coregroup_mask); 1503 1504 /* Skip all CPUs already part of coregroup mask */ 1505 cpumask_andnot(*mask, *mask, cpu_coregroup_mask(cpu)); 1506 1507 for_each_cpu(i, *mask) { 1508 /* Skip all CPUs not part of this coregroup */ 1509 if (coregroup_id == cpu_to_coregroup_id(i)) { 1510 or_cpumasks_related(cpu, i, submask_fn, cpu_coregroup_mask); 1511 cpumask_andnot(*mask, *mask, submask_fn(i)); 1512 } else { 1513 cpumask_andnot(*mask, *mask, cpu_coregroup_mask(i)); 1514 } 1515 } 1516 } 1517 1518 static void add_cpu_to_masks(int cpu) 1519 { 1520 struct cpumask *(*submask_fn)(int) = cpu_sibling_mask; 1521 int first_thread = cpu_first_thread_sibling(cpu); 1522 cpumask_var_t mask; 1523 int chip_id = -1; 1524 bool ret; 1525 int i; 1526 1527 /* 1528 * This CPU will not be in the online mask yet so we need to manually 1529 * add it to it's own thread sibling mask. 1530 */ 1531 map_cpu_to_node(cpu, cpu_to_node(cpu)); 1532 cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); 1533 cpumask_set_cpu(cpu, cpu_core_mask(cpu)); 1534 1535 for (i = first_thread; i < first_thread + threads_per_core; i++) 1536 if (cpu_online(i)) 1537 set_cpus_related(i, cpu, cpu_sibling_mask); 1538 1539 add_cpu_to_smallcore_masks(cpu); 1540 1541 /* In CPU-hotplug path, hence use GFP_ATOMIC */ 1542 ret = alloc_cpumask_var_node(&mask, GFP_ATOMIC, cpu_to_node(cpu)); 1543 update_mask_by_l2(cpu, &mask); 1544 1545 if (has_coregroup_support()) 1546 update_coregroup_mask(cpu, &mask); 1547 1548 if (chip_id_lookup_table && ret) 1549 chip_id = cpu_to_chip_id(cpu); 1550 1551 if (shared_caches) 1552 submask_fn = cpu_l2_cache_mask; 1553 1554 /* Update core_mask with all the CPUs that are part of submask */ 1555 or_cpumasks_related(cpu, cpu, submask_fn, cpu_core_mask); 1556 1557 /* Skip all CPUs already part of current CPU core mask */ 1558 cpumask_andnot(mask, cpu_online_mask, cpu_core_mask(cpu)); 1559 1560 /* If chip_id is -1; limit the cpu_core_mask to within DIE*/ 1561 if (chip_id == -1) 1562 cpumask_and(mask, mask, cpu_cpu_mask(cpu)); 1563 1564 for_each_cpu(i, mask) { 1565 if (chip_id == cpu_to_chip_id(i)) { 1566 or_cpumasks_related(cpu, i, submask_fn, cpu_core_mask); 1567 cpumask_andnot(mask, mask, submask_fn(i)); 1568 } else { 1569 cpumask_andnot(mask, mask, cpu_core_mask(i)); 1570 } 1571 } 1572 1573 free_cpumask_var(mask); 1574 } 1575 1576 /* Activate a secondary processor. */ 1577 void start_secondary(void *unused) 1578 { 1579 unsigned int cpu = raw_smp_processor_id(); 1580 1581 /* PPC64 calls setup_kup() in early_setup_secondary() */ 1582 if (IS_ENABLED(CONFIG_PPC32)) 1583 setup_kup(); 1584 1585 mmgrab(&init_mm); 1586 current->active_mm = &init_mm; 1587 1588 smp_store_cpu_info(cpu); 1589 set_dec(tb_ticks_per_jiffy); 1590 rcu_cpu_starting(cpu); 1591 cpu_callin_map[cpu] = 1; 1592 1593 if (smp_ops->setup_cpu) 1594 smp_ops->setup_cpu(cpu); 1595 if (smp_ops->take_timebase) 1596 smp_ops->take_timebase(); 1597 1598 secondary_cpu_time_init(); 1599 1600 #ifdef CONFIG_PPC64 1601 if (system_state == SYSTEM_RUNNING) 1602 vdso_data->processorCount++; 1603 1604 vdso_getcpu_init(); 1605 #endif 1606 set_numa_node(numa_cpu_lookup_table[cpu]); 1607 set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu])); 1608 1609 /* Update topology CPU masks */ 1610 add_cpu_to_masks(cpu); 1611 1612 /* 1613 * Check for any shared caches. Note that this must be done on a 1614 * per-core basis because one core in the pair might be disabled. 1615 */ 1616 if (!shared_caches) { 1617 struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask; 1618 struct cpumask *mask = cpu_l2_cache_mask(cpu); 1619 1620 if (has_big_cores) 1621 sibling_mask = cpu_smallcore_mask; 1622 1623 if (cpumask_weight(mask) > cpumask_weight(sibling_mask(cpu))) 1624 shared_caches = true; 1625 } 1626 1627 smp_wmb(); 1628 notify_cpu_starting(cpu); 1629 set_cpu_online(cpu, true); 1630 1631 boot_init_stack_canary(); 1632 1633 local_irq_enable(); 1634 1635 /* We can enable ftrace for secondary cpus now */ 1636 this_cpu_enable_ftrace(); 1637 1638 cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); 1639 1640 BUG(); 1641 } 1642 1643 int setup_profiling_timer(unsigned int multiplier) 1644 { 1645 return 0; 1646 } 1647 1648 static void fixup_topology(void) 1649 { 1650 int i; 1651 1652 #ifdef CONFIG_SCHED_SMT 1653 if (has_big_cores) { 1654 pr_info("Big cores detected but using small core scheduling\n"); 1655 powerpc_topology[smt_idx].mask = smallcore_smt_mask; 1656 } 1657 #endif 1658 1659 if (!has_coregroup_support()) 1660 powerpc_topology[mc_idx].mask = powerpc_topology[cache_idx].mask; 1661 1662 /* 1663 * Try to consolidate topology levels here instead of 1664 * allowing scheduler to degenerate. 1665 * - Dont consolidate if masks are different. 1666 * - Dont consolidate if sd_flags exists and are different. 1667 */ 1668 for (i = 1; i <= die_idx; i++) { 1669 if (powerpc_topology[i].mask != powerpc_topology[i - 1].mask) 1670 continue; 1671 1672 if (powerpc_topology[i].sd_flags && powerpc_topology[i - 1].sd_flags && 1673 powerpc_topology[i].sd_flags != powerpc_topology[i - 1].sd_flags) 1674 continue; 1675 1676 if (!powerpc_topology[i - 1].sd_flags) 1677 powerpc_topology[i - 1].sd_flags = powerpc_topology[i].sd_flags; 1678 1679 powerpc_topology[i].mask = powerpc_topology[i + 1].mask; 1680 powerpc_topology[i].sd_flags = powerpc_topology[i + 1].sd_flags; 1681 #ifdef CONFIG_SCHED_DEBUG 1682 powerpc_topology[i].name = powerpc_topology[i + 1].name; 1683 #endif 1684 } 1685 } 1686 1687 void __init smp_cpus_done(unsigned int max_cpus) 1688 { 1689 /* 1690 * We are running pinned to the boot CPU, see rest_init(). 1691 */ 1692 if (smp_ops && smp_ops->setup_cpu) 1693 smp_ops->setup_cpu(boot_cpuid); 1694 1695 if (smp_ops && smp_ops->bringup_done) 1696 smp_ops->bringup_done(); 1697 1698 dump_numa_cpu_topology(); 1699 1700 fixup_topology(); 1701 set_sched_topology(powerpc_topology); 1702 } 1703 1704 #ifdef CONFIG_HOTPLUG_CPU 1705 int __cpu_disable(void) 1706 { 1707 int cpu = smp_processor_id(); 1708 int err; 1709 1710 if (!smp_ops->cpu_disable) 1711 return -ENOSYS; 1712 1713 this_cpu_disable_ftrace(); 1714 1715 err = smp_ops->cpu_disable(); 1716 if (err) 1717 return err; 1718 1719 /* Update sibling maps */ 1720 remove_cpu_from_masks(cpu); 1721 1722 return 0; 1723 } 1724 1725 void __cpu_die(unsigned int cpu) 1726 { 1727 if (smp_ops->cpu_die) 1728 smp_ops->cpu_die(cpu); 1729 } 1730 1731 void arch_cpu_idle_dead(void) 1732 { 1733 sched_preempt_enable_no_resched(); 1734 1735 /* 1736 * Disable on the down path. This will be re-enabled by 1737 * start_secondary() via start_secondary_resume() below 1738 */ 1739 this_cpu_disable_ftrace(); 1740 1741 if (smp_ops->cpu_offline_self) 1742 smp_ops->cpu_offline_self(); 1743 1744 /* If we return, we re-enter start_secondary */ 1745 start_secondary_resume(); 1746 } 1747 1748 #endif 1749