1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * SMP related functions 4 * 5 * Copyright IBM Corp. 1999, 2012 6 * Author(s): Denis Joseph Barrow, 7 * Martin Schwidefsky <schwidefsky@de.ibm.com>, 8 * 9 * based on other smp stuff by 10 * (c) 1995 Alan Cox, CymruNET Ltd <alan@cymru.net> 11 * (c) 1998 Ingo Molnar 12 * 13 * The code outside of smp.c uses logical cpu numbers, only smp.c does 14 * the translation of logical to physical cpu ids. All new code that 15 * operates on physical cpu numbers needs to go into smp.c. 16 */ 17 18 #define KMSG_COMPONENT "cpu" 19 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 20 21 #include <linux/cpufeature.h> 22 #include <linux/workqueue.h> 23 #include <linux/memblock.h> 24 #include <linux/export.h> 25 #include <linux/init.h> 26 #include <linux/mm.h> 27 #include <linux/err.h> 28 #include <linux/spinlock.h> 29 #include <linux/kernel_stat.h> 30 #include <linux/delay.h> 31 #include <linux/interrupt.h> 32 #include <linux/irqflags.h> 33 #include <linux/irq_work.h> 34 #include <linux/cpu.h> 35 #include <linux/slab.h> 36 #include <linux/sched/hotplug.h> 37 #include <linux/sched/task_stack.h> 38 #include <linux/crash_dump.h> 39 #include <linux/kprobes.h> 40 #include <asm/access-regs.h> 41 #include <asm/asm-offsets.h> 42 #include <asm/machine.h> 43 #include <asm/ctlreg.h> 44 #include <asm/pfault.h> 45 #include <asm/diag.h> 46 #include <asm/facility.h> 47 #include <asm/fpu.h> 48 #include <asm/ipl.h> 49 #include <asm/setup.h> 50 #include <asm/irq.h> 51 #include <asm/tlbflush.h> 52 #include <asm/vtimer.h> 53 #include <asm/abs_lowcore.h> 54 #include <asm/sclp.h> 55 #include <asm/debug.h> 56 #include <asm/os_info.h> 57 #include <asm/sigp.h> 58 #include <asm/idle.h> 59 #include <asm/nmi.h> 60 #include <asm/stacktrace.h> 61 #include <asm/topology.h> 62 #include <asm/vdso.h> 63 #include <asm/maccess.h> 64 #include "entry.h" 65 66 enum { 67 ec_schedule = 0, 68 ec_call_function_single, 69 ec_stop_cpu, 70 ec_mcck_pending, 71 ec_irq_work, 72 }; 73 74 enum { 75 CPU_STATE_STANDBY, 76 CPU_STATE_CONFIGURED, 77 }; 78 79 static u8 boot_core_type; 80 DEFINE_PER_CPU(struct pcpu, pcpu_devices); 81 /* 82 * Pointer to the pcpu area of the boot CPU. This is required when a restart 83 * interrupt is triggered on an offline CPU. For that case accessing percpu 84 * data with the common primitives does not work, since the percpu offset is 85 * stored in a non existent lowcore. 86 */ 87 static struct pcpu *ipl_pcpu; 88 89 unsigned int smp_cpu_mt_shift; 90 EXPORT_SYMBOL(smp_cpu_mt_shift); 91 92 unsigned int smp_cpu_mtid; 93 EXPORT_SYMBOL(smp_cpu_mtid); 94 95 #ifdef CONFIG_CRASH_DUMP 96 __vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS]; 97 #endif 98 99 static unsigned int smp_max_threads __initdata = -1U; 100 cpumask_t cpu_setup_mask; 101 102 static int __init early_nosmt(char *s) 103 { 104 smp_max_threads = 1; 105 return 0; 106 } 107 early_param("nosmt", early_nosmt); 108 109 static int __init early_smt(char *s) 110 { 111 get_option(&s, &smp_max_threads); 112 return 0; 113 } 114 early_param("smt", early_smt); 115 116 /* 117 * The smp_cpu_state_mutex must be held when changing the state or polarization 118 * member of a pcpu data structure within the pcpu_devices array. 119 */ 120 DEFINE_MUTEX(smp_cpu_state_mutex); 121 122 /* 123 * Signal processor helper functions. 124 */ 125 static inline int __pcpu_sigp_relax(u16 addr, u8 order, unsigned long parm) 126 { 127 int cc; 128 129 while (1) { 130 cc = __pcpu_sigp(addr, order, parm, NULL); 131 if (cc != SIGP_CC_BUSY) 132 return cc; 133 cpu_relax(); 134 } 135 } 136 137 static int pcpu_sigp_retry(struct pcpu *pcpu, u8 order, u32 parm) 138 { 139 int cc, retry; 140 141 for (retry = 0; ; retry++) { 142 cc = __pcpu_sigp(pcpu->address, order, parm, NULL); 143 if (cc != SIGP_CC_BUSY) 144 break; 145 if (retry >= 3) 146 udelay(10); 147 } 148 return cc; 149 } 150 151 static inline int pcpu_stopped(struct pcpu *pcpu) 152 { 153 u32 status; 154 155 if (__pcpu_sigp(pcpu->address, SIGP_SENSE, 156 0, &status) != SIGP_CC_STATUS_STORED) 157 return 0; 158 return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED)); 159 } 160 161 static inline int pcpu_running(struct pcpu *pcpu) 162 { 163 if (__pcpu_sigp(pcpu->address, SIGP_SENSE_RUNNING, 164 0, NULL) != SIGP_CC_STATUS_STORED) 165 return 1; 166 /* Status stored condition code is equivalent to cpu not running. */ 167 return 0; 168 } 169 170 /* 171 * Find struct pcpu by cpu address. 172 */ 173 static struct pcpu *pcpu_find_address(const struct cpumask *mask, u16 address) 174 { 175 int cpu; 176 177 for_each_cpu(cpu, mask) 178 if (per_cpu(pcpu_devices, cpu).address == address) 179 return &per_cpu(pcpu_devices, cpu); 180 return NULL; 181 } 182 183 static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit) 184 { 185 int order; 186 187 if (test_and_set_bit(ec_bit, &pcpu->ec_mask)) 188 return; 189 order = pcpu_running(pcpu) ? SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL; 190 pcpu->ec_clk = get_tod_clock_fast(); 191 pcpu_sigp_retry(pcpu, order, 0); 192 } 193 194 static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) 195 { 196 unsigned long async_stack, nodat_stack, mcck_stack; 197 struct lowcore *lc; 198 199 lc = (struct lowcore *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); 200 nodat_stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER); 201 async_stack = stack_alloc(); 202 mcck_stack = stack_alloc(); 203 if (!lc || !nodat_stack || !async_stack || !mcck_stack) 204 goto out; 205 memcpy(lc, get_lowcore(), 512); 206 memset((char *) lc + 512, 0, sizeof(*lc) - 512); 207 lc->async_stack = async_stack + STACK_INIT_OFFSET; 208 lc->nodat_stack = nodat_stack + STACK_INIT_OFFSET; 209 lc->mcck_stack = mcck_stack + STACK_INIT_OFFSET; 210 lc->cpu_nr = cpu; 211 lc->spinlock_lockval = arch_spin_lockval(cpu); 212 lc->spinlock_index = 0; 213 lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW); 214 lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); 215 lc->preempt_count = PREEMPT_DISABLED; 216 if (nmi_alloc_mcesa(&lc->mcesad)) 217 goto out; 218 if (abs_lowcore_map(cpu, lc, true)) 219 goto out_mcesa; 220 lowcore_ptr[cpu] = lc; 221 pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, __pa(lc)); 222 return 0; 223 224 out_mcesa: 225 nmi_free_mcesa(&lc->mcesad); 226 out: 227 stack_free(mcck_stack); 228 stack_free(async_stack); 229 free_pages(nodat_stack, THREAD_SIZE_ORDER); 230 free_pages((unsigned long) lc, LC_ORDER); 231 return -ENOMEM; 232 } 233 234 static void pcpu_free_lowcore(struct pcpu *pcpu, int cpu) 235 { 236 unsigned long async_stack, nodat_stack, mcck_stack; 237 struct lowcore *lc; 238 239 lc = lowcore_ptr[cpu]; 240 nodat_stack = lc->nodat_stack - STACK_INIT_OFFSET; 241 async_stack = lc->async_stack - STACK_INIT_OFFSET; 242 mcck_stack = lc->mcck_stack - STACK_INIT_OFFSET; 243 pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0); 244 lowcore_ptr[cpu] = NULL; 245 abs_lowcore_unmap(cpu); 246 nmi_free_mcesa(&lc->mcesad); 247 stack_free(async_stack); 248 stack_free(mcck_stack); 249 free_pages(nodat_stack, THREAD_SIZE_ORDER); 250 free_pages((unsigned long) lc, LC_ORDER); 251 } 252 253 static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) 254 { 255 struct lowcore *lc, *abs_lc; 256 257 lc = lowcore_ptr[cpu]; 258 cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask); 259 cpumask_set_cpu(cpu, mm_cpumask(&init_mm)); 260 lc->cpu_nr = cpu; 261 lc->pcpu = (unsigned long)pcpu; 262 lc->restart_flags = RESTART_FLAG_CTLREGS; 263 lc->spinlock_lockval = arch_spin_lockval(cpu); 264 lc->spinlock_index = 0; 265 lc->percpu_offset = __per_cpu_offset[cpu]; 266 lc->kernel_asce = get_lowcore()->kernel_asce; 267 lc->user_asce = s390_invalid_asce; 268 lc->machine_flags = get_lowcore()->machine_flags; 269 lc->user_timer = lc->system_timer = 270 lc->steal_timer = lc->avg_steal_timer = 0; 271 abs_lc = get_abs_lowcore(); 272 memcpy(lc->cregs_save_area, abs_lc->cregs_save_area, sizeof(lc->cregs_save_area)); 273 put_abs_lowcore(abs_lc); 274 lc->cregs_save_area[1] = lc->kernel_asce; 275 lc->cregs_save_area[7] = lc->user_asce; 276 save_access_regs((unsigned int *) lc->access_regs_save_area); 277 arch_spin_lock_setup(cpu); 278 } 279 280 static void pcpu_attach_task(int cpu, struct task_struct *tsk) 281 { 282 struct lowcore *lc; 283 284 lc = lowcore_ptr[cpu]; 285 lc->kernel_stack = (unsigned long)task_stack_page(tsk) + STACK_INIT_OFFSET; 286 lc->current_task = (unsigned long)tsk; 287 lc->lpp = LPP_MAGIC; 288 lc->current_pid = tsk->pid; 289 lc->user_timer = tsk->thread.user_timer; 290 lc->guest_timer = tsk->thread.guest_timer; 291 lc->system_timer = tsk->thread.system_timer; 292 lc->hardirq_timer = tsk->thread.hardirq_timer; 293 lc->softirq_timer = tsk->thread.softirq_timer; 294 lc->steal_timer = 0; 295 } 296 297 static void pcpu_start_fn(int cpu, void (*func)(void *), void *data) 298 { 299 struct lowcore *lc; 300 301 lc = lowcore_ptr[cpu]; 302 lc->restart_stack = lc->kernel_stack; 303 lc->restart_fn = (unsigned long) func; 304 lc->restart_data = (unsigned long) data; 305 lc->restart_source = -1U; 306 pcpu_sigp_retry(per_cpu_ptr(&pcpu_devices, cpu), SIGP_RESTART, 0); 307 } 308 309 typedef void (pcpu_delegate_fn)(void *); 310 311 /* 312 * Call function via PSW restart on pcpu and stop the current cpu. 313 */ 314 static void __pcpu_delegate(pcpu_delegate_fn *func, void *data) 315 { 316 func(data); /* should not return */ 317 } 318 319 static void pcpu_delegate(struct pcpu *pcpu, int cpu, 320 pcpu_delegate_fn *func, 321 void *data, unsigned long stack) 322 { 323 struct lowcore *lc, *abs_lc; 324 unsigned int source_cpu; 325 326 lc = lowcore_ptr[cpu]; 327 source_cpu = stap(); 328 329 if (pcpu->address == source_cpu) { 330 call_on_stack(2, stack, void, __pcpu_delegate, 331 pcpu_delegate_fn *, func, void *, data); 332 } 333 /* Stop target cpu (if func returns this stops the current cpu). */ 334 pcpu_sigp_retry(pcpu, SIGP_STOP, 0); 335 pcpu_sigp_retry(pcpu, SIGP_CPU_RESET, 0); 336 /* Restart func on the target cpu and stop the current cpu. */ 337 if (lc) { 338 lc->restart_stack = stack; 339 lc->restart_fn = (unsigned long)func; 340 lc->restart_data = (unsigned long)data; 341 lc->restart_source = source_cpu; 342 } else { 343 abs_lc = get_abs_lowcore(); 344 abs_lc->restart_stack = stack; 345 abs_lc->restart_fn = (unsigned long)func; 346 abs_lc->restart_data = (unsigned long)data; 347 abs_lc->restart_source = source_cpu; 348 put_abs_lowcore(abs_lc); 349 } 350 asm volatile( 351 "0: sigp 0,%0,%2 # sigp restart to target cpu\n" 352 " brc 2,0b # busy, try again\n" 353 "1: sigp 0,%1,%3 # sigp stop to current cpu\n" 354 " brc 2,1b # busy, try again\n" 355 : : "d" (pcpu->address), "d" (source_cpu), 356 "K" (SIGP_RESTART), "K" (SIGP_STOP) 357 : "0", "1", "cc"); 358 for (;;) ; 359 } 360 361 /* 362 * Enable additional logical cpus for multi-threading. 363 */ 364 static int pcpu_set_smt(unsigned int mtid) 365 { 366 int cc; 367 368 if (smp_cpu_mtid == mtid) 369 return 0; 370 cc = __pcpu_sigp(0, SIGP_SET_MULTI_THREADING, mtid, NULL); 371 if (cc == 0) { 372 smp_cpu_mtid = mtid; 373 smp_cpu_mt_shift = 0; 374 while (smp_cpu_mtid >= (1U << smp_cpu_mt_shift)) 375 smp_cpu_mt_shift++; 376 per_cpu(pcpu_devices, 0).address = stap(); 377 } 378 return cc; 379 } 380 381 /* 382 * Call function on the ipl CPU. 383 */ 384 void smp_call_ipl_cpu(void (*func)(void *), void *data) 385 { 386 struct lowcore *lc = lowcore_ptr[0]; 387 388 if (ipl_pcpu->address == stap()) 389 lc = get_lowcore(); 390 391 pcpu_delegate(ipl_pcpu, 0, func, data, lc->nodat_stack); 392 } 393 394 int smp_find_processor_id(u16 address) 395 { 396 int cpu; 397 398 for_each_present_cpu(cpu) 399 if (per_cpu(pcpu_devices, cpu).address == address) 400 return cpu; 401 return -1; 402 } 403 404 void schedule_mcck_handler(void) 405 { 406 pcpu_ec_call(this_cpu_ptr(&pcpu_devices), ec_mcck_pending); 407 } 408 409 bool notrace arch_vcpu_is_preempted(int cpu) 410 { 411 if (test_cpu_flag_of(CIF_ENABLED_WAIT, cpu)) 412 return false; 413 if (pcpu_running(per_cpu_ptr(&pcpu_devices, cpu))) 414 return false; 415 return true; 416 } 417 EXPORT_SYMBOL(arch_vcpu_is_preempted); 418 419 void notrace smp_yield_cpu(int cpu) 420 { 421 if (!machine_has_diag9c()) 422 return; 423 diag_stat_inc_norecursion(DIAG_STAT_X09C); 424 asm volatile("diag %0,0,0x9c" 425 : : "d" (per_cpu(pcpu_devices, cpu).address)); 426 } 427 EXPORT_SYMBOL_GPL(smp_yield_cpu); 428 429 /* 430 * Send cpus emergency shutdown signal. This gives the cpus the 431 * opportunity to complete outstanding interrupts. 432 */ 433 void notrace smp_emergency_stop(void) 434 { 435 static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED; 436 static cpumask_t cpumask; 437 u64 end; 438 int cpu; 439 440 arch_spin_lock(&lock); 441 cpumask_copy(&cpumask, cpu_online_mask); 442 cpumask_clear_cpu(smp_processor_id(), &cpumask); 443 444 end = get_tod_clock() + (1000000UL << 12); 445 for_each_cpu(cpu, &cpumask) { 446 struct pcpu *pcpu = per_cpu_ptr(&pcpu_devices, cpu); 447 set_bit(ec_stop_cpu, &pcpu->ec_mask); 448 while (__pcpu_sigp(pcpu->address, SIGP_EMERGENCY_SIGNAL, 449 0, NULL) == SIGP_CC_BUSY && 450 get_tod_clock() < end) 451 cpu_relax(); 452 } 453 while (get_tod_clock() < end) { 454 for_each_cpu(cpu, &cpumask) 455 if (pcpu_stopped(per_cpu_ptr(&pcpu_devices, cpu))) 456 cpumask_clear_cpu(cpu, &cpumask); 457 if (cpumask_empty(&cpumask)) 458 break; 459 cpu_relax(); 460 } 461 arch_spin_unlock(&lock); 462 } 463 NOKPROBE_SYMBOL(smp_emergency_stop); 464 465 /* 466 * Stop all cpus but the current one. 467 */ 468 void smp_send_stop(void) 469 { 470 struct pcpu *pcpu; 471 int cpu; 472 473 /* Disable all interrupts/machine checks */ 474 __load_psw_mask(PSW_KERNEL_BITS); 475 trace_hardirqs_off(); 476 477 debug_set_critical(); 478 479 if (oops_in_progress) 480 smp_emergency_stop(); 481 482 /* stop all processors */ 483 for_each_online_cpu(cpu) { 484 if (cpu == smp_processor_id()) 485 continue; 486 pcpu = per_cpu_ptr(&pcpu_devices, cpu); 487 pcpu_sigp_retry(pcpu, SIGP_STOP, 0); 488 while (!pcpu_stopped(pcpu)) 489 cpu_relax(); 490 } 491 } 492 493 /* 494 * This is the main routine where commands issued by other 495 * cpus are handled. 496 */ 497 static void smp_handle_ext_call(void) 498 { 499 unsigned long bits; 500 501 /* handle bit signal external calls */ 502 bits = this_cpu_xchg(pcpu_devices.ec_mask, 0); 503 if (test_bit(ec_stop_cpu, &bits)) 504 smp_stop_cpu(); 505 if (test_bit(ec_schedule, &bits)) 506 scheduler_ipi(); 507 if (test_bit(ec_call_function_single, &bits)) 508 generic_smp_call_function_single_interrupt(); 509 if (test_bit(ec_mcck_pending, &bits)) 510 s390_handle_mcck(); 511 if (test_bit(ec_irq_work, &bits)) 512 irq_work_run(); 513 } 514 515 static void do_ext_call_interrupt(struct ext_code ext_code, 516 unsigned int param32, unsigned long param64) 517 { 518 inc_irq_stat(ext_code.code == 0x1202 ? IRQEXT_EXC : IRQEXT_EMS); 519 smp_handle_ext_call(); 520 } 521 522 void arch_send_call_function_ipi_mask(const struct cpumask *mask) 523 { 524 int cpu; 525 526 for_each_cpu(cpu, mask) 527 pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_call_function_single); 528 } 529 530 void arch_send_call_function_single_ipi(int cpu) 531 { 532 pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_call_function_single); 533 } 534 535 /* 536 * this function sends a 'reschedule' IPI to another CPU. 537 * it goes straight through and wastes no time serializing 538 * anything. Worst case is that we lose a reschedule ... 539 */ 540 void arch_smp_send_reschedule(int cpu) 541 { 542 pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_schedule); 543 } 544 545 #ifdef CONFIG_IRQ_WORK 546 void arch_irq_work_raise(void) 547 { 548 pcpu_ec_call(this_cpu_ptr(&pcpu_devices), ec_irq_work); 549 } 550 #endif 551 552 #ifdef CONFIG_CRASH_DUMP 553 554 int smp_store_status(int cpu) 555 { 556 struct lowcore *lc; 557 struct pcpu *pcpu; 558 unsigned long pa; 559 560 pcpu = per_cpu_ptr(&pcpu_devices, cpu); 561 lc = lowcore_ptr[cpu]; 562 pa = __pa(&lc->floating_pt_save_area); 563 if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS, 564 pa) != SIGP_CC_ORDER_CODE_ACCEPTED) 565 return -EIO; 566 if (!cpu_has_vx() && !cpu_has_gs()) 567 return 0; 568 pa = lc->mcesad & MCESA_ORIGIN_MASK; 569 if (cpu_has_gs()) 570 pa |= lc->mcesad & MCESA_LC_MASK; 571 if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS, 572 pa) != SIGP_CC_ORDER_CODE_ACCEPTED) 573 return -EIO; 574 return 0; 575 } 576 577 /* 578 * Collect CPU state of the previous, crashed system. 579 * There are three cases: 580 * 1) standard zfcp/nvme dump 581 * condition: OLDMEM_BASE == NULL && is_ipl_type_dump() == true 582 * The state for all CPUs except the boot CPU needs to be collected 583 * with sigp stop-and-store-status. The boot CPU state is located in 584 * the absolute lowcore of the memory stored in the HSA. The zcore code 585 * will copy the boot CPU state from the HSA. 586 * 2) stand-alone kdump for SCSI/NVMe (zfcp/nvme dump with swapped memory) 587 * condition: OLDMEM_BASE != NULL && is_ipl_type_dump() == true 588 * The state for all CPUs except the boot CPU needs to be collected 589 * with sigp stop-and-store-status. The firmware or the boot-loader 590 * stored the registers of the boot CPU in the absolute lowcore in the 591 * memory of the old system. 592 * 3) kdump or stand-alone kdump for DASD 593 * condition: OLDMEM_BASE != NULL && is_ipl_type_dump() == false 594 * The state for all CPUs except the boot CPU needs to be collected 595 * with sigp stop-and-store-status. The kexec code or the boot-loader 596 * stored the registers of the boot CPU in the memory of the old system. 597 * 598 * Note that the legacy kdump mode where the old kernel stored the CPU states 599 * does no longer exist: setup_arch() explicitly deactivates the elfcorehdr= 600 * kernel parameter. The is_kdump_kernel() implementation on s390 is independent 601 * of the elfcorehdr= parameter. 602 */ 603 static bool dump_available(void) 604 { 605 return oldmem_data.start || is_ipl_type_dump(); 606 } 607 608 void __init smp_save_dump_ipl_cpu(void) 609 { 610 struct save_area *sa; 611 void *regs; 612 613 if (!dump_available()) 614 return; 615 sa = save_area_alloc(true); 616 regs = memblock_alloc_or_panic(512, 8); 617 copy_oldmem_kernel(regs, __LC_FPREGS_SAVE_AREA, 512); 618 save_area_add_regs(sa, regs); 619 memblock_free(regs, 512); 620 if (cpu_has_vx()) 621 save_area_add_vxrs(sa, boot_cpu_vector_save_area); 622 } 623 624 void __init smp_save_dump_secondary_cpus(void) 625 { 626 int addr, boot_cpu_addr, max_cpu_addr; 627 struct save_area *sa; 628 void *page; 629 630 if (!dump_available()) 631 return; 632 /* Allocate a page as dumping area for the store status sigps */ 633 page = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); 634 if (!page) 635 panic("ERROR: Failed to allocate %lx bytes below %lx\n", 636 PAGE_SIZE, 1UL << 31); 637 638 /* Set multi-threading state to the previous system. */ 639 pcpu_set_smt(sclp.mtid_prev); 640 boot_cpu_addr = stap(); 641 max_cpu_addr = SCLP_MAX_CORES << sclp.mtid_prev; 642 for (addr = 0; addr <= max_cpu_addr; addr++) { 643 if (addr == boot_cpu_addr) 644 continue; 645 if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0) == 646 SIGP_CC_NOT_OPERATIONAL) 647 continue; 648 sa = save_area_alloc(false); 649 __pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, __pa(page)); 650 save_area_add_regs(sa, page); 651 if (cpu_has_vx()) { 652 __pcpu_sigp_relax(addr, SIGP_STORE_ADDITIONAL_STATUS, __pa(page)); 653 save_area_add_vxrs(sa, page); 654 } 655 } 656 memblock_free(page, PAGE_SIZE); 657 diag_amode31_ops.diag308_reset(); 658 pcpu_set_smt(0); 659 } 660 #endif /* CONFIG_CRASH_DUMP */ 661 662 void smp_cpu_set_polarization(int cpu, int val) 663 { 664 per_cpu(pcpu_devices, cpu).polarization = val; 665 } 666 667 int smp_cpu_get_polarization(int cpu) 668 { 669 return per_cpu(pcpu_devices, cpu).polarization; 670 } 671 672 void smp_cpu_set_capacity(int cpu, unsigned long val) 673 { 674 per_cpu(pcpu_devices, cpu).capacity = val; 675 } 676 677 unsigned long smp_cpu_get_capacity(int cpu) 678 { 679 return per_cpu(pcpu_devices, cpu).capacity; 680 } 681 682 void smp_set_core_capacity(int cpu, unsigned long val) 683 { 684 int i; 685 686 cpu = smp_get_base_cpu(cpu); 687 for (i = cpu; (i <= cpu + smp_cpu_mtid) && (i < nr_cpu_ids); i++) 688 smp_cpu_set_capacity(i, val); 689 } 690 691 int smp_cpu_get_cpu_address(int cpu) 692 { 693 return per_cpu(pcpu_devices, cpu).address; 694 } 695 696 static void __ref smp_get_core_info(struct sclp_core_info *info, int early) 697 { 698 static int use_sigp_detection; 699 int address; 700 701 if (use_sigp_detection || sclp_get_core_info(info, early)) { 702 use_sigp_detection = 1; 703 for (address = 0; 704 address < (SCLP_MAX_CORES << smp_cpu_mt_shift); 705 address += (1U << smp_cpu_mt_shift)) { 706 if (__pcpu_sigp_relax(address, SIGP_SENSE, 0) == 707 SIGP_CC_NOT_OPERATIONAL) 708 continue; 709 info->core[info->configured].core_id = 710 address >> smp_cpu_mt_shift; 711 info->configured++; 712 } 713 info->combined = info->configured; 714 } 715 } 716 717 static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail, 718 bool configured, bool early) 719 { 720 struct pcpu *pcpu; 721 int cpu, nr, i; 722 u16 address; 723 724 nr = 0; 725 if (sclp.has_core_type && core->type != boot_core_type) 726 return nr; 727 cpu = cpumask_first(avail); 728 address = core->core_id << smp_cpu_mt_shift; 729 for (i = 0; (i <= smp_cpu_mtid) && (cpu < nr_cpu_ids); i++) { 730 if (pcpu_find_address(cpu_present_mask, address + i)) 731 continue; 732 pcpu = per_cpu_ptr(&pcpu_devices, cpu); 733 pcpu->address = address + i; 734 if (configured) 735 pcpu->state = CPU_STATE_CONFIGURED; 736 else 737 pcpu->state = CPU_STATE_STANDBY; 738 smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); 739 smp_cpu_set_capacity(cpu, CPU_CAPACITY_HIGH); 740 set_cpu_present(cpu, true); 741 if (!early && arch_register_cpu(cpu)) 742 set_cpu_present(cpu, false); 743 else 744 nr++; 745 cpumask_clear_cpu(cpu, avail); 746 cpu = cpumask_next(cpu, avail); 747 } 748 return nr; 749 } 750 751 static int __smp_rescan_cpus(struct sclp_core_info *info, bool early) 752 { 753 struct sclp_core_entry *core; 754 static cpumask_t avail; 755 bool configured; 756 u16 core_id; 757 int nr, i; 758 759 cpus_read_lock(); 760 mutex_lock(&smp_cpu_state_mutex); 761 nr = 0; 762 cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); 763 /* 764 * Add IPL core first (which got logical CPU number 0) to make sure 765 * that all SMT threads get subsequent logical CPU numbers. 766 */ 767 if (early) { 768 core_id = per_cpu(pcpu_devices, 0).address >> smp_cpu_mt_shift; 769 for (i = 0; i < info->configured; i++) { 770 core = &info->core[i]; 771 if (core->core_id == core_id) { 772 nr += smp_add_core(core, &avail, true, early); 773 break; 774 } 775 } 776 } 777 for (i = 0; i < info->combined; i++) { 778 configured = i < info->configured; 779 nr += smp_add_core(&info->core[i], &avail, configured, early); 780 } 781 mutex_unlock(&smp_cpu_state_mutex); 782 cpus_read_unlock(); 783 return nr; 784 } 785 786 void __init smp_detect_cpus(void) 787 { 788 unsigned int cpu, mtid, c_cpus, s_cpus; 789 struct sclp_core_info *info; 790 u16 address; 791 792 /* Get CPU information */ 793 info = memblock_alloc_or_panic(sizeof(*info), 8); 794 smp_get_core_info(info, 1); 795 /* Find boot CPU type */ 796 if (sclp.has_core_type) { 797 address = stap(); 798 for (cpu = 0; cpu < info->combined; cpu++) 799 if (info->core[cpu].core_id == address) { 800 /* The boot cpu dictates the cpu type. */ 801 boot_core_type = info->core[cpu].type; 802 break; 803 } 804 if (cpu >= info->combined) 805 panic("Could not find boot CPU type"); 806 } 807 808 /* Set multi-threading state for the current system */ 809 mtid = boot_core_type ? sclp.mtid : sclp.mtid_cp; 810 mtid = (mtid < smp_max_threads) ? mtid : smp_max_threads - 1; 811 pcpu_set_smt(mtid); 812 813 /* Print number of CPUs */ 814 c_cpus = s_cpus = 0; 815 for (cpu = 0; cpu < info->combined; cpu++) { 816 if (sclp.has_core_type && 817 info->core[cpu].type != boot_core_type) 818 continue; 819 if (cpu < info->configured) 820 c_cpus += smp_cpu_mtid + 1; 821 else 822 s_cpus += smp_cpu_mtid + 1; 823 } 824 pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus); 825 memblock_free(info, sizeof(*info)); 826 } 827 828 /* 829 * Activate a secondary processor. 830 */ 831 static void smp_start_secondary(void *cpuvoid) 832 { 833 struct lowcore *lc = get_lowcore(); 834 int cpu = raw_smp_processor_id(); 835 836 lc->last_update_clock = get_tod_clock(); 837 lc->restart_stack = (unsigned long)restart_stack; 838 lc->restart_fn = (unsigned long)do_restart; 839 lc->restart_data = 0; 840 lc->restart_source = -1U; 841 lc->restart_flags = 0; 842 restore_access_regs(lc->access_regs_save_area); 843 cpu_init(); 844 rcutree_report_cpu_starting(cpu); 845 init_cpu_timer(); 846 vtime_init(); 847 vdso_getcpu_init(); 848 pfault_init(); 849 cpumask_set_cpu(cpu, &cpu_setup_mask); 850 update_cpu_masks(); 851 notify_cpu_starting(cpu); 852 if (topology_cpu_dedicated(cpu)) 853 set_cpu_flag(CIF_DEDICATED_CPU); 854 else 855 clear_cpu_flag(CIF_DEDICATED_CPU); 856 set_cpu_online(cpu, true); 857 inc_irq_stat(CPU_RST); 858 local_irq_enable(); 859 cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); 860 } 861 862 /* Upping and downing of CPUs */ 863 int __cpu_up(unsigned int cpu, struct task_struct *tidle) 864 { 865 struct pcpu *pcpu = per_cpu_ptr(&pcpu_devices, cpu); 866 int rc; 867 868 if (pcpu->state != CPU_STATE_CONFIGURED) 869 return -EIO; 870 if (pcpu_sigp_retry(pcpu, SIGP_INITIAL_CPU_RESET, 0) != 871 SIGP_CC_ORDER_CODE_ACCEPTED) 872 return -EIO; 873 874 rc = pcpu_alloc_lowcore(pcpu, cpu); 875 if (rc) 876 return rc; 877 /* 878 * Make sure global control register contents do not change 879 * until new CPU has initialized control registers. 880 */ 881 system_ctlreg_lock(); 882 pcpu_prepare_secondary(pcpu, cpu); 883 pcpu_attach_task(cpu, tidle); 884 pcpu_start_fn(cpu, smp_start_secondary, NULL); 885 /* Wait until cpu puts itself in the online & active maps */ 886 while (!cpu_online(cpu)) 887 cpu_relax(); 888 system_ctlreg_unlock(); 889 return 0; 890 } 891 892 static unsigned int setup_possible_cpus __initdata; 893 894 static int __init _setup_possible_cpus(char *s) 895 { 896 get_option(&s, &setup_possible_cpus); 897 return 0; 898 } 899 early_param("possible_cpus", _setup_possible_cpus); 900 901 int __cpu_disable(void) 902 { 903 struct ctlreg cregs[16]; 904 int cpu; 905 906 /* Handle possible pending IPIs */ 907 smp_handle_ext_call(); 908 cpu = smp_processor_id(); 909 set_cpu_online(cpu, false); 910 cpumask_clear_cpu(cpu, &cpu_setup_mask); 911 update_cpu_masks(); 912 /* Disable pseudo page faults on this cpu. */ 913 pfault_fini(); 914 /* Disable interrupt sources via control register. */ 915 __local_ctl_store(0, 15, cregs); 916 cregs[0].val &= ~0x0000ee70UL; /* disable all external interrupts */ 917 cregs[6].val &= ~0xff000000UL; /* disable all I/O interrupts */ 918 cregs[14].val &= ~0x1f000000UL; /* disable most machine checks */ 919 __local_ctl_load(0, 15, cregs); 920 clear_cpu_flag(CIF_NOHZ_DELAY); 921 return 0; 922 } 923 924 void __cpu_die(unsigned int cpu) 925 { 926 struct pcpu *pcpu; 927 928 /* Wait until target cpu is down */ 929 pcpu = per_cpu_ptr(&pcpu_devices, cpu); 930 while (!pcpu_stopped(pcpu)) 931 cpu_relax(); 932 pcpu_free_lowcore(pcpu, cpu); 933 cpumask_clear_cpu(cpu, mm_cpumask(&init_mm)); 934 cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask); 935 pcpu->flags = 0; 936 } 937 938 void __noreturn cpu_die(void) 939 { 940 idle_task_exit(); 941 pcpu_sigp_retry(this_cpu_ptr(&pcpu_devices), SIGP_STOP, 0); 942 for (;;) ; 943 } 944 945 void __init smp_fill_possible_mask(void) 946 { 947 unsigned int possible, sclp_max, cpu; 948 949 sclp_max = max(sclp.mtid, sclp.mtid_cp) + 1; 950 sclp_max = min(smp_max_threads, sclp_max); 951 sclp_max = (sclp.max_cores * sclp_max) ?: nr_cpu_ids; 952 possible = setup_possible_cpus ?: nr_cpu_ids; 953 possible = min(possible, sclp_max); 954 for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++) 955 set_cpu_possible(cpu, true); 956 } 957 958 void __init smp_prepare_cpus(unsigned int max_cpus) 959 { 960 if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt)) 961 panic("Couldn't request external interrupt 0x1201"); 962 system_ctl_set_bit(0, 14); 963 if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt)) 964 panic("Couldn't request external interrupt 0x1202"); 965 system_ctl_set_bit(0, 13); 966 smp_rescan_cpus(true); 967 } 968 969 void __init smp_prepare_boot_cpu(void) 970 { 971 struct lowcore *lc = get_lowcore(); 972 973 WARN_ON(!cpu_present(0) || !cpu_online(0)); 974 lc->percpu_offset = __per_cpu_offset[0]; 975 ipl_pcpu = per_cpu_ptr(&pcpu_devices, 0); 976 ipl_pcpu->state = CPU_STATE_CONFIGURED; 977 lc->pcpu = (unsigned long)ipl_pcpu; 978 smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN); 979 smp_cpu_set_capacity(0, CPU_CAPACITY_HIGH); 980 } 981 982 void __init smp_setup_processor_id(void) 983 { 984 struct lowcore *lc = get_lowcore(); 985 986 lc->cpu_nr = 0; 987 per_cpu(pcpu_devices, 0).address = stap(); 988 lc->spinlock_lockval = arch_spin_lockval(0); 989 lc->spinlock_index = 0; 990 } 991 992 /* 993 * the frequency of the profiling timer can be changed 994 * by writing a multiplier value into /proc/profile. 995 * 996 * usually you want to run this on all CPUs ;) 997 */ 998 int setup_profiling_timer(unsigned int multiplier) 999 { 1000 return 0; 1001 } 1002 1003 static ssize_t cpu_configure_show(struct device *dev, 1004 struct device_attribute *attr, char *buf) 1005 { 1006 ssize_t count; 1007 1008 mutex_lock(&smp_cpu_state_mutex); 1009 count = sysfs_emit(buf, "%d\n", per_cpu(pcpu_devices, dev->id).state); 1010 mutex_unlock(&smp_cpu_state_mutex); 1011 return count; 1012 } 1013 1014 static ssize_t cpu_configure_store(struct device *dev, 1015 struct device_attribute *attr, 1016 const char *buf, size_t count) 1017 { 1018 struct pcpu *pcpu; 1019 int cpu, val, rc, i; 1020 char delim; 1021 1022 if (sscanf(buf, "%d %c", &val, &delim) != 1) 1023 return -EINVAL; 1024 if (val != 0 && val != 1) 1025 return -EINVAL; 1026 cpus_read_lock(); 1027 mutex_lock(&smp_cpu_state_mutex); 1028 rc = -EBUSY; 1029 /* disallow configuration changes of online cpus */ 1030 cpu = dev->id; 1031 cpu = smp_get_base_cpu(cpu); 1032 for (i = 0; i <= smp_cpu_mtid; i++) 1033 if (cpu_online(cpu + i)) 1034 goto out; 1035 pcpu = per_cpu_ptr(&pcpu_devices, cpu); 1036 rc = 0; 1037 switch (val) { 1038 case 0: 1039 if (pcpu->state != CPU_STATE_CONFIGURED) 1040 break; 1041 rc = sclp_core_deconfigure(pcpu->address >> smp_cpu_mt_shift); 1042 if (rc) 1043 break; 1044 for (i = 0; i <= smp_cpu_mtid; i++) { 1045 if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i)) 1046 continue; 1047 per_cpu(pcpu_devices, cpu + i).state = CPU_STATE_STANDBY; 1048 smp_cpu_set_polarization(cpu + i, 1049 POLARIZATION_UNKNOWN); 1050 } 1051 topology_expect_change(); 1052 break; 1053 case 1: 1054 if (pcpu->state != CPU_STATE_STANDBY) 1055 break; 1056 rc = sclp_core_configure(pcpu->address >> smp_cpu_mt_shift); 1057 if (rc) 1058 break; 1059 for (i = 0; i <= smp_cpu_mtid; i++) { 1060 if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i)) 1061 continue; 1062 per_cpu(pcpu_devices, cpu + i).state = CPU_STATE_CONFIGURED; 1063 smp_cpu_set_polarization(cpu + i, 1064 POLARIZATION_UNKNOWN); 1065 } 1066 topology_expect_change(); 1067 break; 1068 default: 1069 break; 1070 } 1071 out: 1072 mutex_unlock(&smp_cpu_state_mutex); 1073 cpus_read_unlock(); 1074 return rc ? rc : count; 1075 } 1076 static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store); 1077 1078 static ssize_t show_cpu_address(struct device *dev, 1079 struct device_attribute *attr, char *buf) 1080 { 1081 return sysfs_emit(buf, "%d\n", per_cpu(pcpu_devices, dev->id).address); 1082 } 1083 static DEVICE_ATTR(address, 0444, show_cpu_address, NULL); 1084 1085 static struct attribute *cpu_common_attrs[] = { 1086 &dev_attr_configure.attr, 1087 &dev_attr_address.attr, 1088 NULL, 1089 }; 1090 1091 static struct attribute_group cpu_common_attr_group = { 1092 .attrs = cpu_common_attrs, 1093 }; 1094 1095 static struct attribute *cpu_online_attrs[] = { 1096 &dev_attr_idle_count.attr, 1097 &dev_attr_idle_time_us.attr, 1098 NULL, 1099 }; 1100 1101 static struct attribute_group cpu_online_attr_group = { 1102 .attrs = cpu_online_attrs, 1103 }; 1104 1105 static int smp_cpu_online(unsigned int cpu) 1106 { 1107 struct cpu *c = per_cpu_ptr(&cpu_devices, cpu); 1108 1109 return sysfs_create_group(&c->dev.kobj, &cpu_online_attr_group); 1110 } 1111 1112 static int smp_cpu_pre_down(unsigned int cpu) 1113 { 1114 struct cpu *c = per_cpu_ptr(&cpu_devices, cpu); 1115 1116 sysfs_remove_group(&c->dev.kobj, &cpu_online_attr_group); 1117 return 0; 1118 } 1119 1120 bool arch_cpu_is_hotpluggable(int cpu) 1121 { 1122 return !!cpu; 1123 } 1124 1125 int arch_register_cpu(int cpu) 1126 { 1127 struct cpu *c = per_cpu_ptr(&cpu_devices, cpu); 1128 int rc; 1129 1130 c->hotpluggable = arch_cpu_is_hotpluggable(cpu); 1131 rc = register_cpu(c, cpu); 1132 if (rc) 1133 goto out; 1134 rc = sysfs_create_group(&c->dev.kobj, &cpu_common_attr_group); 1135 if (rc) 1136 goto out_cpu; 1137 rc = topology_cpu_init(c); 1138 if (rc) 1139 goto out_topology; 1140 return 0; 1141 1142 out_topology: 1143 sysfs_remove_group(&c->dev.kobj, &cpu_common_attr_group); 1144 out_cpu: 1145 unregister_cpu(c); 1146 out: 1147 return rc; 1148 } 1149 1150 int __ref smp_rescan_cpus(bool early) 1151 { 1152 struct sclp_core_info *info; 1153 int nr; 1154 1155 info = kzalloc(sizeof(*info), GFP_KERNEL); 1156 if (!info) 1157 return -ENOMEM; 1158 smp_get_core_info(info, 0); 1159 nr = __smp_rescan_cpus(info, early); 1160 kfree(info); 1161 if (nr) 1162 topology_schedule_update(); 1163 return 0; 1164 } 1165 1166 static ssize_t __ref rescan_store(struct device *dev, 1167 struct device_attribute *attr, 1168 const char *buf, 1169 size_t count) 1170 { 1171 int rc; 1172 1173 rc = lock_device_hotplug_sysfs(); 1174 if (rc) 1175 return rc; 1176 rc = smp_rescan_cpus(false); 1177 unlock_device_hotplug(); 1178 return rc ? rc : count; 1179 } 1180 static DEVICE_ATTR_WO(rescan); 1181 1182 static int __init s390_smp_init(void) 1183 { 1184 struct device *dev_root; 1185 int rc; 1186 1187 dev_root = bus_get_dev_root(&cpu_subsys); 1188 if (dev_root) { 1189 rc = device_create_file(dev_root, &dev_attr_rescan); 1190 put_device(dev_root); 1191 if (rc) 1192 return rc; 1193 } 1194 rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "s390/smp:online", 1195 smp_cpu_online, smp_cpu_pre_down); 1196 rc = rc <= 0 ? rc : 0; 1197 return rc; 1198 } 1199 subsys_initcall(s390_smp_init); 1200