1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * SMP related functions 4 * 5 * Copyright IBM Corp. 1999, 2012 6 * Author(s): Denis Joseph Barrow, 7 * Martin Schwidefsky <schwidefsky@de.ibm.com>, 8 * 9 * based on other smp stuff by 10 * (c) 1995 Alan Cox, CymruNET Ltd <alan@cymru.net> 11 * (c) 1998 Ingo Molnar 12 * 13 * The code outside of smp.c uses logical cpu numbers, only smp.c does 14 * the translation of logical to physical cpu ids. All new code that 15 * operates on physical cpu numbers needs to go into smp.c. 16 */ 17 18 #define KMSG_COMPONENT "cpu" 19 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 20 21 #include <linux/workqueue.h> 22 #include <linux/memblock.h> 23 #include <linux/export.h> 24 #include <linux/init.h> 25 #include <linux/mm.h> 26 #include <linux/err.h> 27 #include <linux/spinlock.h> 28 #include <linux/kernel_stat.h> 29 #include <linux/delay.h> 30 #include <linux/interrupt.h> 31 #include <linux/irqflags.h> 32 #include <linux/irq_work.h> 33 #include <linux/cpu.h> 34 #include <linux/slab.h> 35 #include <linux/sched/hotplug.h> 36 #include <linux/sched/task_stack.h> 37 #include <linux/crash_dump.h> 38 #include <linux/kprobes.h> 39 #include <asm/access-regs.h> 40 #include <asm/asm-offsets.h> 41 #include <asm/ctlreg.h> 42 #include <asm/pfault.h> 43 #include <asm/diag.h> 44 #include <asm/facility.h> 45 #include <asm/fpu.h> 46 #include <asm/ipl.h> 47 #include <asm/setup.h> 48 #include <asm/irq.h> 49 #include <asm/tlbflush.h> 50 #include <asm/vtimer.h> 51 #include <asm/abs_lowcore.h> 52 #include <asm/sclp.h> 53 #include <asm/debug.h> 54 #include <asm/os_info.h> 55 #include <asm/sigp.h> 56 #include <asm/idle.h> 57 #include <asm/nmi.h> 58 #include <asm/stacktrace.h> 59 #include <asm/topology.h> 60 #include <asm/vdso.h> 61 #include <asm/maccess.h> 62 #include "entry.h" 63 64 enum { 65 ec_schedule = 0, 66 ec_call_function_single, 67 ec_stop_cpu, 68 ec_mcck_pending, 69 ec_irq_work, 70 }; 71 72 enum { 73 CPU_STATE_STANDBY, 74 CPU_STATE_CONFIGURED, 75 }; 76 77 struct pcpu { 78 unsigned long ec_mask; /* bit mask for ec_xxx functions */ 79 unsigned long ec_clk; /* sigp timestamp for ec_xxx */ 80 signed char state; /* physical cpu state */ 81 signed char polarization; /* physical polarization */ 82 u16 address; /* physical cpu address */ 83 }; 84 85 static u8 boot_core_type; 86 static struct pcpu pcpu_devices[NR_CPUS]; 87 88 unsigned int smp_cpu_mt_shift; 89 EXPORT_SYMBOL(smp_cpu_mt_shift); 90 91 unsigned int smp_cpu_mtid; 92 EXPORT_SYMBOL(smp_cpu_mtid); 93 94 #ifdef CONFIG_CRASH_DUMP 95 __vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS]; 96 #endif 97 98 static unsigned int smp_max_threads __initdata = -1U; 99 cpumask_t cpu_setup_mask; 100 101 static int __init early_nosmt(char *s) 102 { 103 smp_max_threads = 1; 104 return 0; 105 } 106 early_param("nosmt", early_nosmt); 107 108 static int __init early_smt(char *s) 109 { 110 get_option(&s, &smp_max_threads); 111 return 0; 112 } 113 early_param("smt", early_smt); 114 115 /* 116 * The smp_cpu_state_mutex must be held when changing the state or polarization 117 * member of a pcpu data structure within the pcpu_devices array. 118 */ 119 DEFINE_MUTEX(smp_cpu_state_mutex); 120 121 /* 122 * Signal processor helper functions. 123 */ 124 static inline int __pcpu_sigp_relax(u16 addr, u8 order, unsigned long parm) 125 { 126 int cc; 127 128 while (1) { 129 cc = __pcpu_sigp(addr, order, parm, NULL); 130 if (cc != SIGP_CC_BUSY) 131 return cc; 132 cpu_relax(); 133 } 134 } 135 136 static int pcpu_sigp_retry(struct pcpu *pcpu, u8 order, u32 parm) 137 { 138 int cc, retry; 139 140 for (retry = 0; ; retry++) { 141 cc = __pcpu_sigp(pcpu->address, order, parm, NULL); 142 if (cc != SIGP_CC_BUSY) 143 break; 144 if (retry >= 3) 145 udelay(10); 146 } 147 return cc; 148 } 149 150 static inline int pcpu_stopped(struct pcpu *pcpu) 151 { 152 u32 status; 153 154 if (__pcpu_sigp(pcpu->address, SIGP_SENSE, 155 0, &status) != SIGP_CC_STATUS_STORED) 156 return 0; 157 return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED)); 158 } 159 160 static inline int pcpu_running(struct pcpu *pcpu) 161 { 162 if (__pcpu_sigp(pcpu->address, SIGP_SENSE_RUNNING, 163 0, NULL) != SIGP_CC_STATUS_STORED) 164 return 1; 165 /* Status stored condition code is equivalent to cpu not running. */ 166 return 0; 167 } 168 169 /* 170 * Find struct pcpu by cpu address. 171 */ 172 static struct pcpu *pcpu_find_address(const struct cpumask *mask, u16 address) 173 { 174 int cpu; 175 176 for_each_cpu(cpu, mask) 177 if (pcpu_devices[cpu].address == address) 178 return pcpu_devices + cpu; 179 return NULL; 180 } 181 182 static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit) 183 { 184 int order; 185 186 if (test_and_set_bit(ec_bit, &pcpu->ec_mask)) 187 return; 188 order = pcpu_running(pcpu) ? SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL; 189 pcpu->ec_clk = get_tod_clock_fast(); 190 pcpu_sigp_retry(pcpu, order, 0); 191 } 192 193 static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) 194 { 195 unsigned long async_stack, nodat_stack, mcck_stack; 196 struct lowcore *lc; 197 198 lc = (struct lowcore *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); 199 nodat_stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER); 200 async_stack = stack_alloc(); 201 mcck_stack = stack_alloc(); 202 if (!lc || !nodat_stack || !async_stack || !mcck_stack) 203 goto out; 204 memcpy(lc, get_lowcore(), 512); 205 memset((char *) lc + 512, 0, sizeof(*lc) - 512); 206 lc->async_stack = async_stack + STACK_INIT_OFFSET; 207 lc->nodat_stack = nodat_stack + STACK_INIT_OFFSET; 208 lc->mcck_stack = mcck_stack + STACK_INIT_OFFSET; 209 lc->cpu_nr = cpu; 210 lc->spinlock_lockval = arch_spin_lockval(cpu); 211 lc->spinlock_index = 0; 212 lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW); 213 lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); 214 lc->preempt_count = PREEMPT_DISABLED; 215 if (nmi_alloc_mcesa(&lc->mcesad)) 216 goto out; 217 if (abs_lowcore_map(cpu, lc, true)) 218 goto out_mcesa; 219 lowcore_ptr[cpu] = lc; 220 pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, __pa(lc)); 221 return 0; 222 223 out_mcesa: 224 nmi_free_mcesa(&lc->mcesad); 225 out: 226 stack_free(mcck_stack); 227 stack_free(async_stack); 228 free_pages(nodat_stack, THREAD_SIZE_ORDER); 229 free_pages((unsigned long) lc, LC_ORDER); 230 return -ENOMEM; 231 } 232 233 static void pcpu_free_lowcore(struct pcpu *pcpu) 234 { 235 unsigned long async_stack, nodat_stack, mcck_stack; 236 struct lowcore *lc; 237 int cpu; 238 239 cpu = pcpu - pcpu_devices; 240 lc = lowcore_ptr[cpu]; 241 nodat_stack = lc->nodat_stack - STACK_INIT_OFFSET; 242 async_stack = lc->async_stack - STACK_INIT_OFFSET; 243 mcck_stack = lc->mcck_stack - STACK_INIT_OFFSET; 244 pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0); 245 lowcore_ptr[cpu] = NULL; 246 abs_lowcore_unmap(cpu); 247 nmi_free_mcesa(&lc->mcesad); 248 stack_free(async_stack); 249 stack_free(mcck_stack); 250 free_pages(nodat_stack, THREAD_SIZE_ORDER); 251 free_pages((unsigned long) lc, LC_ORDER); 252 } 253 254 static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) 255 { 256 struct lowcore *lc, *abs_lc; 257 258 lc = lowcore_ptr[cpu]; 259 cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask); 260 cpumask_set_cpu(cpu, mm_cpumask(&init_mm)); 261 lc->cpu_nr = cpu; 262 lc->restart_flags = RESTART_FLAG_CTLREGS; 263 lc->spinlock_lockval = arch_spin_lockval(cpu); 264 lc->spinlock_index = 0; 265 lc->percpu_offset = __per_cpu_offset[cpu]; 266 lc->kernel_asce = get_lowcore()->kernel_asce; 267 lc->user_asce = s390_invalid_asce; 268 lc->machine_flags = get_lowcore()->machine_flags; 269 lc->user_timer = lc->system_timer = 270 lc->steal_timer = lc->avg_steal_timer = 0; 271 abs_lc = get_abs_lowcore(); 272 memcpy(lc->cregs_save_area, abs_lc->cregs_save_area, sizeof(lc->cregs_save_area)); 273 put_abs_lowcore(abs_lc); 274 lc->cregs_save_area[1] = lc->kernel_asce; 275 lc->cregs_save_area[7] = lc->user_asce; 276 save_access_regs((unsigned int *) lc->access_regs_save_area); 277 arch_spin_lock_setup(cpu); 278 } 279 280 static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk) 281 { 282 struct lowcore *lc; 283 int cpu; 284 285 cpu = pcpu - pcpu_devices; 286 lc = lowcore_ptr[cpu]; 287 lc->kernel_stack = (unsigned long)task_stack_page(tsk) + STACK_INIT_OFFSET; 288 lc->current_task = (unsigned long)tsk; 289 lc->lpp = LPP_MAGIC; 290 lc->current_pid = tsk->pid; 291 lc->user_timer = tsk->thread.user_timer; 292 lc->guest_timer = tsk->thread.guest_timer; 293 lc->system_timer = tsk->thread.system_timer; 294 lc->hardirq_timer = tsk->thread.hardirq_timer; 295 lc->softirq_timer = tsk->thread.softirq_timer; 296 lc->steal_timer = 0; 297 } 298 299 static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data) 300 { 301 struct lowcore *lc; 302 int cpu; 303 304 cpu = pcpu - pcpu_devices; 305 lc = lowcore_ptr[cpu]; 306 lc->restart_stack = lc->kernel_stack; 307 lc->restart_fn = (unsigned long) func; 308 lc->restart_data = (unsigned long) data; 309 lc->restart_source = -1U; 310 pcpu_sigp_retry(pcpu, SIGP_RESTART, 0); 311 } 312 313 typedef void (pcpu_delegate_fn)(void *); 314 315 /* 316 * Call function via PSW restart on pcpu and stop the current cpu. 317 */ 318 static void __pcpu_delegate(pcpu_delegate_fn *func, void *data) 319 { 320 func(data); /* should not return */ 321 } 322 323 static void pcpu_delegate(struct pcpu *pcpu, 324 pcpu_delegate_fn *func, 325 void *data, unsigned long stack) 326 { 327 struct lowcore *lc, *abs_lc; 328 unsigned int source_cpu; 329 330 lc = lowcore_ptr[pcpu - pcpu_devices]; 331 source_cpu = stap(); 332 333 if (pcpu->address == source_cpu) { 334 call_on_stack(2, stack, void, __pcpu_delegate, 335 pcpu_delegate_fn *, func, void *, data); 336 } 337 /* Stop target cpu (if func returns this stops the current cpu). */ 338 pcpu_sigp_retry(pcpu, SIGP_STOP, 0); 339 pcpu_sigp_retry(pcpu, SIGP_CPU_RESET, 0); 340 /* Restart func on the target cpu and stop the current cpu. */ 341 if (lc) { 342 lc->restart_stack = stack; 343 lc->restart_fn = (unsigned long)func; 344 lc->restart_data = (unsigned long)data; 345 lc->restart_source = source_cpu; 346 } else { 347 abs_lc = get_abs_lowcore(); 348 abs_lc->restart_stack = stack; 349 abs_lc->restart_fn = (unsigned long)func; 350 abs_lc->restart_data = (unsigned long)data; 351 abs_lc->restart_source = source_cpu; 352 put_abs_lowcore(abs_lc); 353 } 354 asm volatile( 355 "0: sigp 0,%0,%2 # sigp restart to target cpu\n" 356 " brc 2,0b # busy, try again\n" 357 "1: sigp 0,%1,%3 # sigp stop to current cpu\n" 358 " brc 2,1b # busy, try again\n" 359 : : "d" (pcpu->address), "d" (source_cpu), 360 "K" (SIGP_RESTART), "K" (SIGP_STOP) 361 : "0", "1", "cc"); 362 for (;;) ; 363 } 364 365 /* 366 * Enable additional logical cpus for multi-threading. 367 */ 368 static int pcpu_set_smt(unsigned int mtid) 369 { 370 int cc; 371 372 if (smp_cpu_mtid == mtid) 373 return 0; 374 cc = __pcpu_sigp(0, SIGP_SET_MULTI_THREADING, mtid, NULL); 375 if (cc == 0) { 376 smp_cpu_mtid = mtid; 377 smp_cpu_mt_shift = 0; 378 while (smp_cpu_mtid >= (1U << smp_cpu_mt_shift)) 379 smp_cpu_mt_shift++; 380 pcpu_devices[0].address = stap(); 381 } 382 return cc; 383 } 384 385 /* 386 * Call function on an online CPU. 387 */ 388 void smp_call_online_cpu(void (*func)(void *), void *data) 389 { 390 struct pcpu *pcpu; 391 392 /* Use the current cpu if it is online. */ 393 pcpu = pcpu_find_address(cpu_online_mask, stap()); 394 if (!pcpu) 395 /* Use the first online cpu. */ 396 pcpu = pcpu_devices + cpumask_first(cpu_online_mask); 397 pcpu_delegate(pcpu, func, data, (unsigned long) restart_stack); 398 } 399 400 /* 401 * Call function on the ipl CPU. 402 */ 403 void smp_call_ipl_cpu(void (*func)(void *), void *data) 404 { 405 struct lowcore *lc = lowcore_ptr[0]; 406 407 if (pcpu_devices[0].address == stap()) 408 lc = get_lowcore(); 409 410 pcpu_delegate(&pcpu_devices[0], func, data, 411 lc->nodat_stack); 412 } 413 414 int smp_find_processor_id(u16 address) 415 { 416 int cpu; 417 418 for_each_present_cpu(cpu) 419 if (pcpu_devices[cpu].address == address) 420 return cpu; 421 return -1; 422 } 423 424 void schedule_mcck_handler(void) 425 { 426 pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_mcck_pending); 427 } 428 429 bool notrace arch_vcpu_is_preempted(int cpu) 430 { 431 if (test_cpu_flag_of(CIF_ENABLED_WAIT, cpu)) 432 return false; 433 if (pcpu_running(pcpu_devices + cpu)) 434 return false; 435 return true; 436 } 437 EXPORT_SYMBOL(arch_vcpu_is_preempted); 438 439 void notrace smp_yield_cpu(int cpu) 440 { 441 if (!MACHINE_HAS_DIAG9C) 442 return; 443 diag_stat_inc_norecursion(DIAG_STAT_X09C); 444 asm volatile("diag %0,0,0x9c" 445 : : "d" (pcpu_devices[cpu].address)); 446 } 447 EXPORT_SYMBOL_GPL(smp_yield_cpu); 448 449 /* 450 * Send cpus emergency shutdown signal. This gives the cpus the 451 * opportunity to complete outstanding interrupts. 452 */ 453 void notrace smp_emergency_stop(void) 454 { 455 static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED; 456 static cpumask_t cpumask; 457 u64 end; 458 int cpu; 459 460 arch_spin_lock(&lock); 461 cpumask_copy(&cpumask, cpu_online_mask); 462 cpumask_clear_cpu(smp_processor_id(), &cpumask); 463 464 end = get_tod_clock() + (1000000UL << 12); 465 for_each_cpu(cpu, &cpumask) { 466 struct pcpu *pcpu = pcpu_devices + cpu; 467 set_bit(ec_stop_cpu, &pcpu->ec_mask); 468 while (__pcpu_sigp(pcpu->address, SIGP_EMERGENCY_SIGNAL, 469 0, NULL) == SIGP_CC_BUSY && 470 get_tod_clock() < end) 471 cpu_relax(); 472 } 473 while (get_tod_clock() < end) { 474 for_each_cpu(cpu, &cpumask) 475 if (pcpu_stopped(pcpu_devices + cpu)) 476 cpumask_clear_cpu(cpu, &cpumask); 477 if (cpumask_empty(&cpumask)) 478 break; 479 cpu_relax(); 480 } 481 arch_spin_unlock(&lock); 482 } 483 NOKPROBE_SYMBOL(smp_emergency_stop); 484 485 /* 486 * Stop all cpus but the current one. 487 */ 488 void smp_send_stop(void) 489 { 490 int cpu; 491 492 /* Disable all interrupts/machine checks */ 493 __load_psw_mask(PSW_KERNEL_BITS); 494 trace_hardirqs_off(); 495 496 debug_set_critical(); 497 498 if (oops_in_progress) 499 smp_emergency_stop(); 500 501 /* stop all processors */ 502 for_each_online_cpu(cpu) { 503 if (cpu == smp_processor_id()) 504 continue; 505 pcpu_sigp_retry(pcpu_devices + cpu, SIGP_STOP, 0); 506 while (!pcpu_stopped(pcpu_devices + cpu)) 507 cpu_relax(); 508 } 509 } 510 511 /* 512 * This is the main routine where commands issued by other 513 * cpus are handled. 514 */ 515 static void smp_handle_ext_call(void) 516 { 517 unsigned long bits; 518 519 /* handle bit signal external calls */ 520 bits = xchg(&pcpu_devices[smp_processor_id()].ec_mask, 0); 521 if (test_bit(ec_stop_cpu, &bits)) 522 smp_stop_cpu(); 523 if (test_bit(ec_schedule, &bits)) 524 scheduler_ipi(); 525 if (test_bit(ec_call_function_single, &bits)) 526 generic_smp_call_function_single_interrupt(); 527 if (test_bit(ec_mcck_pending, &bits)) 528 s390_handle_mcck(); 529 if (test_bit(ec_irq_work, &bits)) 530 irq_work_run(); 531 } 532 533 static void do_ext_call_interrupt(struct ext_code ext_code, 534 unsigned int param32, unsigned long param64) 535 { 536 inc_irq_stat(ext_code.code == 0x1202 ? IRQEXT_EXC : IRQEXT_EMS); 537 smp_handle_ext_call(); 538 } 539 540 void arch_send_call_function_ipi_mask(const struct cpumask *mask) 541 { 542 int cpu; 543 544 for_each_cpu(cpu, mask) 545 pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single); 546 } 547 548 void arch_send_call_function_single_ipi(int cpu) 549 { 550 pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single); 551 } 552 553 /* 554 * this function sends a 'reschedule' IPI to another CPU. 555 * it goes straight through and wastes no time serializing 556 * anything. Worst case is that we lose a reschedule ... 557 */ 558 void arch_smp_send_reschedule(int cpu) 559 { 560 pcpu_ec_call(pcpu_devices + cpu, ec_schedule); 561 } 562 563 #ifdef CONFIG_IRQ_WORK 564 void arch_irq_work_raise(void) 565 { 566 pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_irq_work); 567 } 568 #endif 569 570 #ifdef CONFIG_CRASH_DUMP 571 572 int smp_store_status(int cpu) 573 { 574 struct lowcore *lc; 575 struct pcpu *pcpu; 576 unsigned long pa; 577 578 pcpu = pcpu_devices + cpu; 579 lc = lowcore_ptr[cpu]; 580 pa = __pa(&lc->floating_pt_save_area); 581 if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS, 582 pa) != SIGP_CC_ORDER_CODE_ACCEPTED) 583 return -EIO; 584 if (!cpu_has_vx() && !MACHINE_HAS_GS) 585 return 0; 586 pa = lc->mcesad & MCESA_ORIGIN_MASK; 587 if (MACHINE_HAS_GS) 588 pa |= lc->mcesad & MCESA_LC_MASK; 589 if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS, 590 pa) != SIGP_CC_ORDER_CODE_ACCEPTED) 591 return -EIO; 592 return 0; 593 } 594 595 /* 596 * Collect CPU state of the previous, crashed system. 597 * There are four cases: 598 * 1) standard zfcp/nvme dump 599 * condition: OLDMEM_BASE == NULL && is_ipl_type_dump() == true 600 * The state for all CPUs except the boot CPU needs to be collected 601 * with sigp stop-and-store-status. The boot CPU state is located in 602 * the absolute lowcore of the memory stored in the HSA. The zcore code 603 * will copy the boot CPU state from the HSA. 604 * 2) stand-alone kdump for SCSI/NVMe (zfcp/nvme dump with swapped memory) 605 * condition: OLDMEM_BASE != NULL && is_ipl_type_dump() == true 606 * The state for all CPUs except the boot CPU needs to be collected 607 * with sigp stop-and-store-status. The firmware or the boot-loader 608 * stored the registers of the boot CPU in the absolute lowcore in the 609 * memory of the old system. 610 * 3) kdump and the old kernel did not store the CPU state, 611 * or stand-alone kdump for DASD 612 * condition: OLDMEM_BASE != NULL && !is_kdump_kernel() 613 * The state for all CPUs except the boot CPU needs to be collected 614 * with sigp stop-and-store-status. The kexec code or the boot-loader 615 * stored the registers of the boot CPU in the memory of the old system. 616 * 4) kdump and the old kernel stored the CPU state 617 * condition: OLDMEM_BASE != NULL && is_kdump_kernel() 618 * This case does not exist for s390 anymore, setup_arch explicitly 619 * deactivates the elfcorehdr= kernel parameter 620 */ 621 static bool dump_available(void) 622 { 623 return oldmem_data.start || is_ipl_type_dump(); 624 } 625 626 void __init smp_save_dump_ipl_cpu(void) 627 { 628 struct save_area *sa; 629 void *regs; 630 631 if (!dump_available()) 632 return; 633 sa = save_area_alloc(true); 634 regs = memblock_alloc(512, 8); 635 if (!sa || !regs) 636 panic("could not allocate memory for boot CPU save area\n"); 637 copy_oldmem_kernel(regs, __LC_FPREGS_SAVE_AREA, 512); 638 save_area_add_regs(sa, regs); 639 memblock_free(regs, 512); 640 if (cpu_has_vx()) 641 save_area_add_vxrs(sa, boot_cpu_vector_save_area); 642 } 643 644 void __init smp_save_dump_secondary_cpus(void) 645 { 646 int addr, boot_cpu_addr, max_cpu_addr; 647 struct save_area *sa; 648 void *page; 649 650 if (!dump_available()) 651 return; 652 /* Allocate a page as dumping area for the store status sigps */ 653 page = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); 654 if (!page) 655 panic("ERROR: Failed to allocate %lx bytes below %lx\n", 656 PAGE_SIZE, 1UL << 31); 657 658 /* Set multi-threading state to the previous system. */ 659 pcpu_set_smt(sclp.mtid_prev); 660 boot_cpu_addr = stap(); 661 max_cpu_addr = SCLP_MAX_CORES << sclp.mtid_prev; 662 for (addr = 0; addr <= max_cpu_addr; addr++) { 663 if (addr == boot_cpu_addr) 664 continue; 665 if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0) == 666 SIGP_CC_NOT_OPERATIONAL) 667 continue; 668 sa = save_area_alloc(false); 669 if (!sa) 670 panic("could not allocate memory for save area\n"); 671 __pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, __pa(page)); 672 save_area_add_regs(sa, page); 673 if (cpu_has_vx()) { 674 __pcpu_sigp_relax(addr, SIGP_STORE_ADDITIONAL_STATUS, __pa(page)); 675 save_area_add_vxrs(sa, page); 676 } 677 } 678 memblock_free(page, PAGE_SIZE); 679 diag_amode31_ops.diag308_reset(); 680 pcpu_set_smt(0); 681 } 682 #endif /* CONFIG_CRASH_DUMP */ 683 684 void smp_cpu_set_polarization(int cpu, int val) 685 { 686 pcpu_devices[cpu].polarization = val; 687 } 688 689 int smp_cpu_get_polarization(int cpu) 690 { 691 return pcpu_devices[cpu].polarization; 692 } 693 694 int smp_cpu_get_cpu_address(int cpu) 695 { 696 return pcpu_devices[cpu].address; 697 } 698 699 static void __ref smp_get_core_info(struct sclp_core_info *info, int early) 700 { 701 static int use_sigp_detection; 702 int address; 703 704 if (use_sigp_detection || sclp_get_core_info(info, early)) { 705 use_sigp_detection = 1; 706 for (address = 0; 707 address < (SCLP_MAX_CORES << smp_cpu_mt_shift); 708 address += (1U << smp_cpu_mt_shift)) { 709 if (__pcpu_sigp_relax(address, SIGP_SENSE, 0) == 710 SIGP_CC_NOT_OPERATIONAL) 711 continue; 712 info->core[info->configured].core_id = 713 address >> smp_cpu_mt_shift; 714 info->configured++; 715 } 716 info->combined = info->configured; 717 } 718 } 719 720 static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail, 721 bool configured, bool early) 722 { 723 struct pcpu *pcpu; 724 int cpu, nr, i; 725 u16 address; 726 727 nr = 0; 728 if (sclp.has_core_type && core->type != boot_core_type) 729 return nr; 730 cpu = cpumask_first(avail); 731 address = core->core_id << smp_cpu_mt_shift; 732 for (i = 0; (i <= smp_cpu_mtid) && (cpu < nr_cpu_ids); i++) { 733 if (pcpu_find_address(cpu_present_mask, address + i)) 734 continue; 735 pcpu = pcpu_devices + cpu; 736 pcpu->address = address + i; 737 if (configured) 738 pcpu->state = CPU_STATE_CONFIGURED; 739 else 740 pcpu->state = CPU_STATE_STANDBY; 741 smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); 742 set_cpu_present(cpu, true); 743 if (!early && arch_register_cpu(cpu)) 744 set_cpu_present(cpu, false); 745 else 746 nr++; 747 cpumask_clear_cpu(cpu, avail); 748 cpu = cpumask_next(cpu, avail); 749 } 750 return nr; 751 } 752 753 static int __smp_rescan_cpus(struct sclp_core_info *info, bool early) 754 { 755 struct sclp_core_entry *core; 756 static cpumask_t avail; 757 bool configured; 758 u16 core_id; 759 int nr, i; 760 761 cpus_read_lock(); 762 mutex_lock(&smp_cpu_state_mutex); 763 nr = 0; 764 cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); 765 /* 766 * Add IPL core first (which got logical CPU number 0) to make sure 767 * that all SMT threads get subsequent logical CPU numbers. 768 */ 769 if (early) { 770 core_id = pcpu_devices[0].address >> smp_cpu_mt_shift; 771 for (i = 0; i < info->configured; i++) { 772 core = &info->core[i]; 773 if (core->core_id == core_id) { 774 nr += smp_add_core(core, &avail, true, early); 775 break; 776 } 777 } 778 } 779 for (i = 0; i < info->combined; i++) { 780 configured = i < info->configured; 781 nr += smp_add_core(&info->core[i], &avail, configured, early); 782 } 783 mutex_unlock(&smp_cpu_state_mutex); 784 cpus_read_unlock(); 785 return nr; 786 } 787 788 void __init smp_detect_cpus(void) 789 { 790 unsigned int cpu, mtid, c_cpus, s_cpus; 791 struct sclp_core_info *info; 792 u16 address; 793 794 /* Get CPU information */ 795 info = memblock_alloc(sizeof(*info), 8); 796 if (!info) 797 panic("%s: Failed to allocate %zu bytes align=0x%x\n", 798 __func__, sizeof(*info), 8); 799 smp_get_core_info(info, 1); 800 /* Find boot CPU type */ 801 if (sclp.has_core_type) { 802 address = stap(); 803 for (cpu = 0; cpu < info->combined; cpu++) 804 if (info->core[cpu].core_id == address) { 805 /* The boot cpu dictates the cpu type. */ 806 boot_core_type = info->core[cpu].type; 807 break; 808 } 809 if (cpu >= info->combined) 810 panic("Could not find boot CPU type"); 811 } 812 813 /* Set multi-threading state for the current system */ 814 mtid = boot_core_type ? sclp.mtid : sclp.mtid_cp; 815 mtid = (mtid < smp_max_threads) ? mtid : smp_max_threads - 1; 816 pcpu_set_smt(mtid); 817 818 /* Print number of CPUs */ 819 c_cpus = s_cpus = 0; 820 for (cpu = 0; cpu < info->combined; cpu++) { 821 if (sclp.has_core_type && 822 info->core[cpu].type != boot_core_type) 823 continue; 824 if (cpu < info->configured) 825 c_cpus += smp_cpu_mtid + 1; 826 else 827 s_cpus += smp_cpu_mtid + 1; 828 } 829 pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus); 830 memblock_free(info, sizeof(*info)); 831 } 832 833 /* 834 * Activate a secondary processor. 835 */ 836 static void smp_start_secondary(void *cpuvoid) 837 { 838 struct lowcore *lc = get_lowcore(); 839 int cpu = raw_smp_processor_id(); 840 841 lc->last_update_clock = get_tod_clock(); 842 lc->restart_stack = (unsigned long)restart_stack; 843 lc->restart_fn = (unsigned long)do_restart; 844 lc->restart_data = 0; 845 lc->restart_source = -1U; 846 lc->restart_flags = 0; 847 restore_access_regs(lc->access_regs_save_area); 848 cpu_init(); 849 rcutree_report_cpu_starting(cpu); 850 init_cpu_timer(); 851 vtime_init(); 852 vdso_getcpu_init(); 853 pfault_init(); 854 cpumask_set_cpu(cpu, &cpu_setup_mask); 855 update_cpu_masks(); 856 notify_cpu_starting(cpu); 857 if (topology_cpu_dedicated(cpu)) 858 set_cpu_flag(CIF_DEDICATED_CPU); 859 else 860 clear_cpu_flag(CIF_DEDICATED_CPU); 861 set_cpu_online(cpu, true); 862 inc_irq_stat(CPU_RST); 863 local_irq_enable(); 864 cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); 865 } 866 867 /* Upping and downing of CPUs */ 868 int __cpu_up(unsigned int cpu, struct task_struct *tidle) 869 { 870 struct pcpu *pcpu = pcpu_devices + cpu; 871 int rc; 872 873 if (pcpu->state != CPU_STATE_CONFIGURED) 874 return -EIO; 875 if (pcpu_sigp_retry(pcpu, SIGP_INITIAL_CPU_RESET, 0) != 876 SIGP_CC_ORDER_CODE_ACCEPTED) 877 return -EIO; 878 879 rc = pcpu_alloc_lowcore(pcpu, cpu); 880 if (rc) 881 return rc; 882 /* 883 * Make sure global control register contents do not change 884 * until new CPU has initialized control registers. 885 */ 886 system_ctlreg_lock(); 887 pcpu_prepare_secondary(pcpu, cpu); 888 pcpu_attach_task(pcpu, tidle); 889 pcpu_start_fn(pcpu, smp_start_secondary, NULL); 890 /* Wait until cpu puts itself in the online & active maps */ 891 while (!cpu_online(cpu)) 892 cpu_relax(); 893 system_ctlreg_unlock(); 894 return 0; 895 } 896 897 static unsigned int setup_possible_cpus __initdata; 898 899 static int __init _setup_possible_cpus(char *s) 900 { 901 get_option(&s, &setup_possible_cpus); 902 return 0; 903 } 904 early_param("possible_cpus", _setup_possible_cpus); 905 906 int __cpu_disable(void) 907 { 908 struct ctlreg cregs[16]; 909 int cpu; 910 911 /* Handle possible pending IPIs */ 912 smp_handle_ext_call(); 913 cpu = smp_processor_id(); 914 set_cpu_online(cpu, false); 915 cpumask_clear_cpu(cpu, &cpu_setup_mask); 916 update_cpu_masks(); 917 /* Disable pseudo page faults on this cpu. */ 918 pfault_fini(); 919 /* Disable interrupt sources via control register. */ 920 __local_ctl_store(0, 15, cregs); 921 cregs[0].val &= ~0x0000ee70UL; /* disable all external interrupts */ 922 cregs[6].val &= ~0xff000000UL; /* disable all I/O interrupts */ 923 cregs[14].val &= ~0x1f000000UL; /* disable most machine checks */ 924 __local_ctl_load(0, 15, cregs); 925 clear_cpu_flag(CIF_NOHZ_DELAY); 926 return 0; 927 } 928 929 void __cpu_die(unsigned int cpu) 930 { 931 struct pcpu *pcpu; 932 933 /* Wait until target cpu is down */ 934 pcpu = pcpu_devices + cpu; 935 while (!pcpu_stopped(pcpu)) 936 cpu_relax(); 937 pcpu_free_lowcore(pcpu); 938 cpumask_clear_cpu(cpu, mm_cpumask(&init_mm)); 939 cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask); 940 } 941 942 void __noreturn cpu_die(void) 943 { 944 idle_task_exit(); 945 pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0); 946 for (;;) ; 947 } 948 949 void __init smp_fill_possible_mask(void) 950 { 951 unsigned int possible, sclp_max, cpu; 952 953 sclp_max = max(sclp.mtid, sclp.mtid_cp) + 1; 954 sclp_max = min(smp_max_threads, sclp_max); 955 sclp_max = (sclp.max_cores * sclp_max) ?: nr_cpu_ids; 956 possible = setup_possible_cpus ?: nr_cpu_ids; 957 possible = min(possible, sclp_max); 958 for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++) 959 set_cpu_possible(cpu, true); 960 } 961 962 void __init smp_prepare_cpus(unsigned int max_cpus) 963 { 964 if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt)) 965 panic("Couldn't request external interrupt 0x1201"); 966 system_ctl_set_bit(0, 14); 967 if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt)) 968 panic("Couldn't request external interrupt 0x1202"); 969 system_ctl_set_bit(0, 13); 970 smp_rescan_cpus(true); 971 } 972 973 void __init smp_prepare_boot_cpu(void) 974 { 975 struct pcpu *pcpu = pcpu_devices; 976 977 WARN_ON(!cpu_present(0) || !cpu_online(0)); 978 pcpu->state = CPU_STATE_CONFIGURED; 979 get_lowcore()->percpu_offset = __per_cpu_offset[0]; 980 smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN); 981 } 982 983 void __init smp_setup_processor_id(void) 984 { 985 struct lowcore *lc = get_lowcore(); 986 987 pcpu_devices[0].address = stap(); 988 lc->cpu_nr = 0; 989 lc->spinlock_lockval = arch_spin_lockval(0); 990 lc->spinlock_index = 0; 991 } 992 993 /* 994 * the frequency of the profiling timer can be changed 995 * by writing a multiplier value into /proc/profile. 996 * 997 * usually you want to run this on all CPUs ;) 998 */ 999 int setup_profiling_timer(unsigned int multiplier) 1000 { 1001 return 0; 1002 } 1003 1004 static ssize_t cpu_configure_show(struct device *dev, 1005 struct device_attribute *attr, char *buf) 1006 { 1007 ssize_t count; 1008 1009 mutex_lock(&smp_cpu_state_mutex); 1010 count = sprintf(buf, "%d\n", pcpu_devices[dev->id].state); 1011 mutex_unlock(&smp_cpu_state_mutex); 1012 return count; 1013 } 1014 1015 static ssize_t cpu_configure_store(struct device *dev, 1016 struct device_attribute *attr, 1017 const char *buf, size_t count) 1018 { 1019 struct pcpu *pcpu; 1020 int cpu, val, rc, i; 1021 char delim; 1022 1023 if (sscanf(buf, "%d %c", &val, &delim) != 1) 1024 return -EINVAL; 1025 if (val != 0 && val != 1) 1026 return -EINVAL; 1027 cpus_read_lock(); 1028 mutex_lock(&smp_cpu_state_mutex); 1029 rc = -EBUSY; 1030 /* disallow configuration changes of online cpus */ 1031 cpu = dev->id; 1032 cpu = smp_get_base_cpu(cpu); 1033 for (i = 0; i <= smp_cpu_mtid; i++) 1034 if (cpu_online(cpu + i)) 1035 goto out; 1036 pcpu = pcpu_devices + cpu; 1037 rc = 0; 1038 switch (val) { 1039 case 0: 1040 if (pcpu->state != CPU_STATE_CONFIGURED) 1041 break; 1042 rc = sclp_core_deconfigure(pcpu->address >> smp_cpu_mt_shift); 1043 if (rc) 1044 break; 1045 for (i = 0; i <= smp_cpu_mtid; i++) { 1046 if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i)) 1047 continue; 1048 pcpu[i].state = CPU_STATE_STANDBY; 1049 smp_cpu_set_polarization(cpu + i, 1050 POLARIZATION_UNKNOWN); 1051 } 1052 topology_expect_change(); 1053 break; 1054 case 1: 1055 if (pcpu->state != CPU_STATE_STANDBY) 1056 break; 1057 rc = sclp_core_configure(pcpu->address >> smp_cpu_mt_shift); 1058 if (rc) 1059 break; 1060 for (i = 0; i <= smp_cpu_mtid; i++) { 1061 if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i)) 1062 continue; 1063 pcpu[i].state = CPU_STATE_CONFIGURED; 1064 smp_cpu_set_polarization(cpu + i, 1065 POLARIZATION_UNKNOWN); 1066 } 1067 topology_expect_change(); 1068 break; 1069 default: 1070 break; 1071 } 1072 out: 1073 mutex_unlock(&smp_cpu_state_mutex); 1074 cpus_read_unlock(); 1075 return rc ? rc : count; 1076 } 1077 static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store); 1078 1079 static ssize_t show_cpu_address(struct device *dev, 1080 struct device_attribute *attr, char *buf) 1081 { 1082 return sprintf(buf, "%d\n", pcpu_devices[dev->id].address); 1083 } 1084 static DEVICE_ATTR(address, 0444, show_cpu_address, NULL); 1085 1086 static struct attribute *cpu_common_attrs[] = { 1087 &dev_attr_configure.attr, 1088 &dev_attr_address.attr, 1089 NULL, 1090 }; 1091 1092 static struct attribute_group cpu_common_attr_group = { 1093 .attrs = cpu_common_attrs, 1094 }; 1095 1096 static struct attribute *cpu_online_attrs[] = { 1097 &dev_attr_idle_count.attr, 1098 &dev_attr_idle_time_us.attr, 1099 NULL, 1100 }; 1101 1102 static struct attribute_group cpu_online_attr_group = { 1103 .attrs = cpu_online_attrs, 1104 }; 1105 1106 static int smp_cpu_online(unsigned int cpu) 1107 { 1108 struct cpu *c = &per_cpu(cpu_devices, cpu); 1109 1110 return sysfs_create_group(&c->dev.kobj, &cpu_online_attr_group); 1111 } 1112 1113 static int smp_cpu_pre_down(unsigned int cpu) 1114 { 1115 struct cpu *c = &per_cpu(cpu_devices, cpu); 1116 1117 sysfs_remove_group(&c->dev.kobj, &cpu_online_attr_group); 1118 return 0; 1119 } 1120 1121 bool arch_cpu_is_hotpluggable(int cpu) 1122 { 1123 return !!cpu; 1124 } 1125 1126 int arch_register_cpu(int cpu) 1127 { 1128 struct cpu *c = &per_cpu(cpu_devices, cpu); 1129 int rc; 1130 1131 c->hotpluggable = arch_cpu_is_hotpluggable(cpu); 1132 rc = register_cpu(c, cpu); 1133 if (rc) 1134 goto out; 1135 rc = sysfs_create_group(&c->dev.kobj, &cpu_common_attr_group); 1136 if (rc) 1137 goto out_cpu; 1138 rc = topology_cpu_init(c); 1139 if (rc) 1140 goto out_topology; 1141 return 0; 1142 1143 out_topology: 1144 sysfs_remove_group(&c->dev.kobj, &cpu_common_attr_group); 1145 out_cpu: 1146 unregister_cpu(c); 1147 out: 1148 return rc; 1149 } 1150 1151 int __ref smp_rescan_cpus(bool early) 1152 { 1153 struct sclp_core_info *info; 1154 int nr; 1155 1156 info = kzalloc(sizeof(*info), GFP_KERNEL); 1157 if (!info) 1158 return -ENOMEM; 1159 smp_get_core_info(info, 0); 1160 nr = __smp_rescan_cpus(info, early); 1161 kfree(info); 1162 if (nr) 1163 topology_schedule_update(); 1164 return 0; 1165 } 1166 1167 static ssize_t __ref rescan_store(struct device *dev, 1168 struct device_attribute *attr, 1169 const char *buf, 1170 size_t count) 1171 { 1172 int rc; 1173 1174 rc = lock_device_hotplug_sysfs(); 1175 if (rc) 1176 return rc; 1177 rc = smp_rescan_cpus(false); 1178 unlock_device_hotplug(); 1179 return rc ? rc : count; 1180 } 1181 static DEVICE_ATTR_WO(rescan); 1182 1183 static int __init s390_smp_init(void) 1184 { 1185 struct device *dev_root; 1186 int rc; 1187 1188 dev_root = bus_get_dev_root(&cpu_subsys); 1189 if (dev_root) { 1190 rc = device_create_file(dev_root, &dev_attr_rescan); 1191 put_device(dev_root); 1192 if (rc) 1193 return rc; 1194 } 1195 rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "s390/smp:online", 1196 smp_cpu_online, smp_cpu_pre_down); 1197 rc = rc <= 0 ? rc : 0; 1198 return rc; 1199 } 1200 subsys_initcall(s390_smp_init); 1201