1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * SMP related functions 4 * 5 * Copyright IBM Corp. 1999, 2012 6 * Author(s): Denis Joseph Barrow, 7 * Martin Schwidefsky <schwidefsky@de.ibm.com>, 8 * Heiko Carstens <heiko.carstens@de.ibm.com>, 9 * 10 * based on other smp stuff by 11 * (c) 1995 Alan Cox, CymruNET Ltd <alan@cymru.net> 12 * (c) 1998 Ingo Molnar 13 * 14 * The code outside of smp.c uses logical cpu numbers, only smp.c does 15 * the translation of logical to physical cpu ids. All new code that 16 * operates on physical cpu numbers needs to go into smp.c. 17 */ 18 19 #define KMSG_COMPONENT "cpu" 20 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 21 22 #include <linux/workqueue.h> 23 #include <linux/bootmem.h> 24 #include <linux/export.h> 25 #include <linux/init.h> 26 #include <linux/mm.h> 27 #include <linux/err.h> 28 #include <linux/spinlock.h> 29 #include <linux/kernel_stat.h> 30 #include <linux/kmemleak.h> 31 #include <linux/delay.h> 32 #include <linux/interrupt.h> 33 #include <linux/irqflags.h> 34 #include <linux/cpu.h> 35 #include <linux/slab.h> 36 #include <linux/sched/hotplug.h> 37 #include <linux/sched/task_stack.h> 38 #include <linux/crash_dump.h> 39 #include <linux/memblock.h> 40 #include <linux/kprobes.h> 41 #include <asm/asm-offsets.h> 42 #include <asm/diag.h> 43 #include <asm/switch_to.h> 44 #include <asm/facility.h> 45 #include <asm/ipl.h> 46 #include <asm/setup.h> 47 #include <asm/irq.h> 48 #include <asm/tlbflush.h> 49 #include <asm/vtimer.h> 50 #include <asm/lowcore.h> 51 #include <asm/sclp.h> 52 #include <asm/vdso.h> 53 #include <asm/debug.h> 54 #include <asm/os_info.h> 55 #include <asm/sigp.h> 56 #include <asm/idle.h> 57 #include <asm/nmi.h> 58 #include "entry.h" 59 60 enum { 61 ec_schedule = 0, 62 ec_call_function_single, 63 ec_stop_cpu, 64 }; 65 66 enum { 67 CPU_STATE_STANDBY, 68 CPU_STATE_CONFIGURED, 69 }; 70 71 static DEFINE_PER_CPU(struct cpu *, cpu_device); 72 73 struct pcpu { 74 struct lowcore *lowcore; /* lowcore page(s) for the cpu */ 75 unsigned long ec_mask; /* bit mask for ec_xxx functions */ 76 unsigned long ec_clk; /* sigp timestamp for ec_xxx */ 77 signed char state; /* physical cpu state */ 78 signed char polarization; /* physical polarization */ 79 u16 address; /* physical cpu address */ 80 }; 81 82 static u8 boot_core_type; 83 static struct pcpu pcpu_devices[NR_CPUS]; 84 85 unsigned int smp_cpu_mt_shift; 86 EXPORT_SYMBOL(smp_cpu_mt_shift); 87 88 unsigned int smp_cpu_mtid; 89 EXPORT_SYMBOL(smp_cpu_mtid); 90 91 #ifdef CONFIG_CRASH_DUMP 92 __vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS]; 93 #endif 94 95 static unsigned int smp_max_threads __initdata = -1U; 96 97 static int __init early_nosmt(char *s) 98 { 99 smp_max_threads = 1; 100 return 0; 101 } 102 early_param("nosmt", early_nosmt); 103 104 static int __init early_smt(char *s) 105 { 106 get_option(&s, &smp_max_threads); 107 return 0; 108 } 109 early_param("smt", early_smt); 110 111 /* 112 * The smp_cpu_state_mutex must be held when changing the state or polarization 113 * member of a pcpu data structure within the pcpu_devices arreay. 114 */ 115 DEFINE_MUTEX(smp_cpu_state_mutex); 116 117 /* 118 * Signal processor helper functions. 119 */ 120 static inline int __pcpu_sigp_relax(u16 addr, u8 order, unsigned long parm) 121 { 122 int cc; 123 124 while (1) { 125 cc = __pcpu_sigp(addr, order, parm, NULL); 126 if (cc != SIGP_CC_BUSY) 127 return cc; 128 cpu_relax(); 129 } 130 } 131 132 static int pcpu_sigp_retry(struct pcpu *pcpu, u8 order, u32 parm) 133 { 134 int cc, retry; 135 136 for (retry = 0; ; retry++) { 137 cc = __pcpu_sigp(pcpu->address, order, parm, NULL); 138 if (cc != SIGP_CC_BUSY) 139 break; 140 if (retry >= 3) 141 udelay(10); 142 } 143 return cc; 144 } 145 146 static inline int pcpu_stopped(struct pcpu *pcpu) 147 { 148 u32 uninitialized_var(status); 149 150 if (__pcpu_sigp(pcpu->address, SIGP_SENSE, 151 0, &status) != SIGP_CC_STATUS_STORED) 152 return 0; 153 return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED)); 154 } 155 156 static inline int pcpu_running(struct pcpu *pcpu) 157 { 158 if (__pcpu_sigp(pcpu->address, SIGP_SENSE_RUNNING, 159 0, NULL) != SIGP_CC_STATUS_STORED) 160 return 1; 161 /* Status stored condition code is equivalent to cpu not running. */ 162 return 0; 163 } 164 165 /* 166 * Find struct pcpu by cpu address. 167 */ 168 static struct pcpu *pcpu_find_address(const struct cpumask *mask, u16 address) 169 { 170 int cpu; 171 172 for_each_cpu(cpu, mask) 173 if (pcpu_devices[cpu].address == address) 174 return pcpu_devices + cpu; 175 return NULL; 176 } 177 178 static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit) 179 { 180 int order; 181 182 if (test_and_set_bit(ec_bit, &pcpu->ec_mask)) 183 return; 184 order = pcpu_running(pcpu) ? SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL; 185 pcpu->ec_clk = get_tod_clock_fast(); 186 pcpu_sigp_retry(pcpu, order, 0); 187 } 188 189 #define ASYNC_FRAME_OFFSET (ASYNC_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE) 190 #define PANIC_FRAME_OFFSET (PAGE_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE) 191 192 static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) 193 { 194 unsigned long async_stack, panic_stack; 195 struct lowcore *lc; 196 197 if (pcpu != &pcpu_devices[0]) { 198 pcpu->lowcore = (struct lowcore *) 199 __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); 200 async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); 201 panic_stack = __get_free_page(GFP_KERNEL); 202 if (!pcpu->lowcore || !panic_stack || !async_stack) 203 goto out; 204 } else { 205 async_stack = pcpu->lowcore->async_stack - ASYNC_FRAME_OFFSET; 206 panic_stack = pcpu->lowcore->panic_stack - PANIC_FRAME_OFFSET; 207 } 208 lc = pcpu->lowcore; 209 memcpy(lc, &S390_lowcore, 512); 210 memset((char *) lc + 512, 0, sizeof(*lc) - 512); 211 lc->async_stack = async_stack + ASYNC_FRAME_OFFSET; 212 lc->panic_stack = panic_stack + PANIC_FRAME_OFFSET; 213 lc->cpu_nr = cpu; 214 lc->spinlock_lockval = arch_spin_lockval(cpu); 215 lc->spinlock_index = 0; 216 if (nmi_alloc_per_cpu(lc)) 217 goto out; 218 if (vdso_alloc_per_cpu(lc)) 219 goto out_mcesa; 220 lowcore_ptr[cpu] = lc; 221 pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, (u32)(unsigned long) lc); 222 return 0; 223 224 out_mcesa: 225 nmi_free_per_cpu(lc); 226 out: 227 if (pcpu != &pcpu_devices[0]) { 228 free_page(panic_stack); 229 free_pages(async_stack, ASYNC_ORDER); 230 free_pages((unsigned long) pcpu->lowcore, LC_ORDER); 231 } 232 return -ENOMEM; 233 } 234 235 #ifdef CONFIG_HOTPLUG_CPU 236 237 static void pcpu_free_lowcore(struct pcpu *pcpu) 238 { 239 pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0); 240 lowcore_ptr[pcpu - pcpu_devices] = NULL; 241 vdso_free_per_cpu(pcpu->lowcore); 242 nmi_free_per_cpu(pcpu->lowcore); 243 if (pcpu == &pcpu_devices[0]) 244 return; 245 free_page(pcpu->lowcore->panic_stack-PANIC_FRAME_OFFSET); 246 free_pages(pcpu->lowcore->async_stack-ASYNC_FRAME_OFFSET, ASYNC_ORDER); 247 free_pages((unsigned long) pcpu->lowcore, LC_ORDER); 248 } 249 250 #endif /* CONFIG_HOTPLUG_CPU */ 251 252 static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) 253 { 254 struct lowcore *lc = pcpu->lowcore; 255 256 cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask); 257 cpumask_set_cpu(cpu, mm_cpumask(&init_mm)); 258 lc->cpu_nr = cpu; 259 lc->spinlock_lockval = arch_spin_lockval(cpu); 260 lc->spinlock_index = 0; 261 lc->percpu_offset = __per_cpu_offset[cpu]; 262 lc->kernel_asce = S390_lowcore.kernel_asce; 263 lc->machine_flags = S390_lowcore.machine_flags; 264 lc->user_timer = lc->system_timer = lc->steal_timer = 0; 265 __ctl_store(lc->cregs_save_area, 0, 15); 266 save_access_regs((unsigned int *) lc->access_regs_save_area); 267 memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, 268 MAX_FACILITY_BIT/8); 269 arch_spin_lock_setup(cpu); 270 } 271 272 static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk) 273 { 274 struct lowcore *lc = pcpu->lowcore; 275 276 lc->kernel_stack = (unsigned long) task_stack_page(tsk) 277 + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); 278 lc->current_task = (unsigned long) tsk; 279 lc->lpp = LPP_MAGIC; 280 lc->current_pid = tsk->pid; 281 lc->user_timer = tsk->thread.user_timer; 282 lc->guest_timer = tsk->thread.guest_timer; 283 lc->system_timer = tsk->thread.system_timer; 284 lc->hardirq_timer = tsk->thread.hardirq_timer; 285 lc->softirq_timer = tsk->thread.softirq_timer; 286 lc->steal_timer = 0; 287 } 288 289 static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data) 290 { 291 struct lowcore *lc = pcpu->lowcore; 292 293 lc->restart_stack = lc->kernel_stack; 294 lc->restart_fn = (unsigned long) func; 295 lc->restart_data = (unsigned long) data; 296 lc->restart_source = -1UL; 297 pcpu_sigp_retry(pcpu, SIGP_RESTART, 0); 298 } 299 300 /* 301 * Call function via PSW restart on pcpu and stop the current cpu. 302 */ 303 static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *), 304 void *data, unsigned long stack) 305 { 306 struct lowcore *lc = lowcore_ptr[pcpu - pcpu_devices]; 307 unsigned long source_cpu = stap(); 308 309 __load_psw_mask(PSW_KERNEL_BITS); 310 if (pcpu->address == source_cpu) 311 func(data); /* should not return */ 312 /* Stop target cpu (if func returns this stops the current cpu). */ 313 pcpu_sigp_retry(pcpu, SIGP_STOP, 0); 314 /* Restart func on the target cpu and stop the current cpu. */ 315 mem_assign_absolute(lc->restart_stack, stack); 316 mem_assign_absolute(lc->restart_fn, (unsigned long) func); 317 mem_assign_absolute(lc->restart_data, (unsigned long) data); 318 mem_assign_absolute(lc->restart_source, source_cpu); 319 asm volatile( 320 "0: sigp 0,%0,%2 # sigp restart to target cpu\n" 321 " brc 2,0b # busy, try again\n" 322 "1: sigp 0,%1,%3 # sigp stop to current cpu\n" 323 " brc 2,1b # busy, try again\n" 324 : : "d" (pcpu->address), "d" (source_cpu), 325 "K" (SIGP_RESTART), "K" (SIGP_STOP) 326 : "0", "1", "cc"); 327 for (;;) ; 328 } 329 330 /* 331 * Enable additional logical cpus for multi-threading. 332 */ 333 static int pcpu_set_smt(unsigned int mtid) 334 { 335 int cc; 336 337 if (smp_cpu_mtid == mtid) 338 return 0; 339 cc = __pcpu_sigp(0, SIGP_SET_MULTI_THREADING, mtid, NULL); 340 if (cc == 0) { 341 smp_cpu_mtid = mtid; 342 smp_cpu_mt_shift = 0; 343 while (smp_cpu_mtid >= (1U << smp_cpu_mt_shift)) 344 smp_cpu_mt_shift++; 345 pcpu_devices[0].address = stap(); 346 } 347 return cc; 348 } 349 350 /* 351 * Call function on an online CPU. 352 */ 353 void smp_call_online_cpu(void (*func)(void *), void *data) 354 { 355 struct pcpu *pcpu; 356 357 /* Use the current cpu if it is online. */ 358 pcpu = pcpu_find_address(cpu_online_mask, stap()); 359 if (!pcpu) 360 /* Use the first online cpu. */ 361 pcpu = pcpu_devices + cpumask_first(cpu_online_mask); 362 pcpu_delegate(pcpu, func, data, (unsigned long) restart_stack); 363 } 364 365 /* 366 * Call function on the ipl CPU. 367 */ 368 void smp_call_ipl_cpu(void (*func)(void *), void *data) 369 { 370 pcpu_delegate(&pcpu_devices[0], func, data, 371 pcpu_devices->lowcore->panic_stack - 372 PANIC_FRAME_OFFSET + PAGE_SIZE); 373 } 374 375 int smp_find_processor_id(u16 address) 376 { 377 int cpu; 378 379 for_each_present_cpu(cpu) 380 if (pcpu_devices[cpu].address == address) 381 return cpu; 382 return -1; 383 } 384 385 bool arch_vcpu_is_preempted(int cpu) 386 { 387 if (test_cpu_flag_of(CIF_ENABLED_WAIT, cpu)) 388 return false; 389 if (pcpu_running(pcpu_devices + cpu)) 390 return false; 391 return true; 392 } 393 EXPORT_SYMBOL(arch_vcpu_is_preempted); 394 395 void smp_yield_cpu(int cpu) 396 { 397 if (MACHINE_HAS_DIAG9C) { 398 diag_stat_inc_norecursion(DIAG_STAT_X09C); 399 asm volatile("diag %0,0,0x9c" 400 : : "d" (pcpu_devices[cpu].address)); 401 } else if (MACHINE_HAS_DIAG44) { 402 diag_stat_inc_norecursion(DIAG_STAT_X044); 403 asm volatile("diag 0,0,0x44"); 404 } 405 } 406 407 /* 408 * Send cpus emergency shutdown signal. This gives the cpus the 409 * opportunity to complete outstanding interrupts. 410 */ 411 void notrace smp_emergency_stop(void) 412 { 413 cpumask_t cpumask; 414 u64 end; 415 int cpu; 416 417 cpumask_copy(&cpumask, cpu_online_mask); 418 cpumask_clear_cpu(smp_processor_id(), &cpumask); 419 420 end = get_tod_clock() + (1000000UL << 12); 421 for_each_cpu(cpu, &cpumask) { 422 struct pcpu *pcpu = pcpu_devices + cpu; 423 set_bit(ec_stop_cpu, &pcpu->ec_mask); 424 while (__pcpu_sigp(pcpu->address, SIGP_EMERGENCY_SIGNAL, 425 0, NULL) == SIGP_CC_BUSY && 426 get_tod_clock() < end) 427 cpu_relax(); 428 } 429 while (get_tod_clock() < end) { 430 for_each_cpu(cpu, &cpumask) 431 if (pcpu_stopped(pcpu_devices + cpu)) 432 cpumask_clear_cpu(cpu, &cpumask); 433 if (cpumask_empty(&cpumask)) 434 break; 435 cpu_relax(); 436 } 437 } 438 NOKPROBE_SYMBOL(smp_emergency_stop); 439 440 /* 441 * Stop all cpus but the current one. 442 */ 443 void smp_send_stop(void) 444 { 445 int cpu; 446 447 /* Disable all interrupts/machine checks */ 448 __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT); 449 trace_hardirqs_off(); 450 451 debug_set_critical(); 452 453 if (oops_in_progress) 454 smp_emergency_stop(); 455 456 /* stop all processors */ 457 for_each_online_cpu(cpu) { 458 if (cpu == smp_processor_id()) 459 continue; 460 pcpu_sigp_retry(pcpu_devices + cpu, SIGP_STOP, 0); 461 while (!pcpu_stopped(pcpu_devices + cpu)) 462 cpu_relax(); 463 } 464 } 465 466 /* 467 * This is the main routine where commands issued by other 468 * cpus are handled. 469 */ 470 static void smp_handle_ext_call(void) 471 { 472 unsigned long bits; 473 474 /* handle bit signal external calls */ 475 bits = xchg(&pcpu_devices[smp_processor_id()].ec_mask, 0); 476 if (test_bit(ec_stop_cpu, &bits)) 477 smp_stop_cpu(); 478 if (test_bit(ec_schedule, &bits)) 479 scheduler_ipi(); 480 if (test_bit(ec_call_function_single, &bits)) 481 generic_smp_call_function_single_interrupt(); 482 } 483 484 static void do_ext_call_interrupt(struct ext_code ext_code, 485 unsigned int param32, unsigned long param64) 486 { 487 inc_irq_stat(ext_code.code == 0x1202 ? IRQEXT_EXC : IRQEXT_EMS); 488 smp_handle_ext_call(); 489 } 490 491 void arch_send_call_function_ipi_mask(const struct cpumask *mask) 492 { 493 int cpu; 494 495 for_each_cpu(cpu, mask) 496 pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single); 497 } 498 499 void arch_send_call_function_single_ipi(int cpu) 500 { 501 pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single); 502 } 503 504 /* 505 * this function sends a 'reschedule' IPI to another CPU. 506 * it goes straight through and wastes no time serializing 507 * anything. Worst case is that we lose a reschedule ... 508 */ 509 void smp_send_reschedule(int cpu) 510 { 511 pcpu_ec_call(pcpu_devices + cpu, ec_schedule); 512 } 513 514 /* 515 * parameter area for the set/clear control bit callbacks 516 */ 517 struct ec_creg_mask_parms { 518 unsigned long orval; 519 unsigned long andval; 520 int cr; 521 }; 522 523 /* 524 * callback for setting/clearing control bits 525 */ 526 static void smp_ctl_bit_callback(void *info) 527 { 528 struct ec_creg_mask_parms *pp = info; 529 unsigned long cregs[16]; 530 531 __ctl_store(cregs, 0, 15); 532 cregs[pp->cr] = (cregs[pp->cr] & pp->andval) | pp->orval; 533 __ctl_load(cregs, 0, 15); 534 } 535 536 /* 537 * Set a bit in a control register of all cpus 538 */ 539 void smp_ctl_set_bit(int cr, int bit) 540 { 541 struct ec_creg_mask_parms parms = { 1UL << bit, -1UL, cr }; 542 543 on_each_cpu(smp_ctl_bit_callback, &parms, 1); 544 } 545 EXPORT_SYMBOL(smp_ctl_set_bit); 546 547 /* 548 * Clear a bit in a control register of all cpus 549 */ 550 void smp_ctl_clear_bit(int cr, int bit) 551 { 552 struct ec_creg_mask_parms parms = { 0, ~(1UL << bit), cr }; 553 554 on_each_cpu(smp_ctl_bit_callback, &parms, 1); 555 } 556 EXPORT_SYMBOL(smp_ctl_clear_bit); 557 558 #ifdef CONFIG_CRASH_DUMP 559 560 int smp_store_status(int cpu) 561 { 562 struct pcpu *pcpu = pcpu_devices + cpu; 563 unsigned long pa; 564 565 pa = __pa(&pcpu->lowcore->floating_pt_save_area); 566 if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS, 567 pa) != SIGP_CC_ORDER_CODE_ACCEPTED) 568 return -EIO; 569 if (!MACHINE_HAS_VX && !MACHINE_HAS_GS) 570 return 0; 571 pa = __pa(pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK); 572 if (MACHINE_HAS_GS) 573 pa |= pcpu->lowcore->mcesad & MCESA_LC_MASK; 574 if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS, 575 pa) != SIGP_CC_ORDER_CODE_ACCEPTED) 576 return -EIO; 577 return 0; 578 } 579 580 /* 581 * Collect CPU state of the previous, crashed system. 582 * There are four cases: 583 * 1) standard zfcp dump 584 * condition: OLDMEM_BASE == NULL && ipl_info.type == IPL_TYPE_FCP_DUMP 585 * The state for all CPUs except the boot CPU needs to be collected 586 * with sigp stop-and-store-status. The boot CPU state is located in 587 * the absolute lowcore of the memory stored in the HSA. The zcore code 588 * will copy the boot CPU state from the HSA. 589 * 2) stand-alone kdump for SCSI (zfcp dump with swapped memory) 590 * condition: OLDMEM_BASE != NULL && ipl_info.type == IPL_TYPE_FCP_DUMP 591 * The state for all CPUs except the boot CPU needs to be collected 592 * with sigp stop-and-store-status. The firmware or the boot-loader 593 * stored the registers of the boot CPU in the absolute lowcore in the 594 * memory of the old system. 595 * 3) kdump and the old kernel did not store the CPU state, 596 * or stand-alone kdump for DASD 597 * condition: OLDMEM_BASE != NULL && !is_kdump_kernel() 598 * The state for all CPUs except the boot CPU needs to be collected 599 * with sigp stop-and-store-status. The kexec code or the boot-loader 600 * stored the registers of the boot CPU in the memory of the old system. 601 * 4) kdump and the old kernel stored the CPU state 602 * condition: OLDMEM_BASE != NULL && is_kdump_kernel() 603 * This case does not exist for s390 anymore, setup_arch explicitly 604 * deactivates the elfcorehdr= kernel parameter 605 */ 606 static __init void smp_save_cpu_vxrs(struct save_area *sa, u16 addr, 607 bool is_boot_cpu, unsigned long page) 608 { 609 __vector128 *vxrs = (__vector128 *) page; 610 611 if (is_boot_cpu) 612 vxrs = boot_cpu_vector_save_area; 613 else 614 __pcpu_sigp_relax(addr, SIGP_STORE_ADDITIONAL_STATUS, page); 615 save_area_add_vxrs(sa, vxrs); 616 } 617 618 static __init void smp_save_cpu_regs(struct save_area *sa, u16 addr, 619 bool is_boot_cpu, unsigned long page) 620 { 621 void *regs = (void *) page; 622 623 if (is_boot_cpu) 624 copy_oldmem_kernel(regs, (void *) __LC_FPREGS_SAVE_AREA, 512); 625 else 626 __pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, page); 627 save_area_add_regs(sa, regs); 628 } 629 630 void __init smp_save_dump_cpus(void) 631 { 632 int addr, boot_cpu_addr, max_cpu_addr; 633 struct save_area *sa; 634 unsigned long page; 635 bool is_boot_cpu; 636 637 if (!(OLDMEM_BASE || ipl_info.type == IPL_TYPE_FCP_DUMP)) 638 /* No previous system present, normal boot. */ 639 return; 640 /* Allocate a page as dumping area for the store status sigps */ 641 page = memblock_alloc_base(PAGE_SIZE, PAGE_SIZE, 1UL << 31); 642 /* Set multi-threading state to the previous system. */ 643 pcpu_set_smt(sclp.mtid_prev); 644 boot_cpu_addr = stap(); 645 max_cpu_addr = SCLP_MAX_CORES << sclp.mtid_prev; 646 for (addr = 0; addr <= max_cpu_addr; addr++) { 647 if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0) == 648 SIGP_CC_NOT_OPERATIONAL) 649 continue; 650 is_boot_cpu = (addr == boot_cpu_addr); 651 /* Allocate save area */ 652 sa = save_area_alloc(is_boot_cpu); 653 if (!sa) 654 panic("could not allocate memory for save area\n"); 655 if (MACHINE_HAS_VX) 656 /* Get the vector registers */ 657 smp_save_cpu_vxrs(sa, addr, is_boot_cpu, page); 658 /* 659 * For a zfcp dump OLDMEM_BASE == NULL and the registers 660 * of the boot CPU are stored in the HSA. To retrieve 661 * these registers an SCLP request is required which is 662 * done by drivers/s390/char/zcore.c:init_cpu_info() 663 */ 664 if (!is_boot_cpu || OLDMEM_BASE) 665 /* Get the CPU registers */ 666 smp_save_cpu_regs(sa, addr, is_boot_cpu, page); 667 } 668 memblock_free(page, PAGE_SIZE); 669 diag308_reset(); 670 pcpu_set_smt(0); 671 } 672 #endif /* CONFIG_CRASH_DUMP */ 673 674 void smp_cpu_set_polarization(int cpu, int val) 675 { 676 pcpu_devices[cpu].polarization = val; 677 } 678 679 int smp_cpu_get_polarization(int cpu) 680 { 681 return pcpu_devices[cpu].polarization; 682 } 683 684 static void __ref smp_get_core_info(struct sclp_core_info *info, int early) 685 { 686 static int use_sigp_detection; 687 int address; 688 689 if (use_sigp_detection || sclp_get_core_info(info, early)) { 690 use_sigp_detection = 1; 691 for (address = 0; 692 address < (SCLP_MAX_CORES << smp_cpu_mt_shift); 693 address += (1U << smp_cpu_mt_shift)) { 694 if (__pcpu_sigp_relax(address, SIGP_SENSE, 0) == 695 SIGP_CC_NOT_OPERATIONAL) 696 continue; 697 info->core[info->configured].core_id = 698 address >> smp_cpu_mt_shift; 699 info->configured++; 700 } 701 info->combined = info->configured; 702 } 703 } 704 705 static int smp_add_present_cpu(int cpu); 706 707 static int __smp_rescan_cpus(struct sclp_core_info *info, int sysfs_add) 708 { 709 struct pcpu *pcpu; 710 cpumask_t avail; 711 int cpu, nr, i, j; 712 u16 address; 713 714 nr = 0; 715 cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); 716 cpu = cpumask_first(&avail); 717 for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) { 718 if (sclp.has_core_type && info->core[i].type != boot_core_type) 719 continue; 720 address = info->core[i].core_id << smp_cpu_mt_shift; 721 for (j = 0; j <= smp_cpu_mtid; j++) { 722 if (pcpu_find_address(cpu_present_mask, address + j)) 723 continue; 724 pcpu = pcpu_devices + cpu; 725 pcpu->address = address + j; 726 pcpu->state = 727 (cpu >= info->configured*(smp_cpu_mtid + 1)) ? 728 CPU_STATE_STANDBY : CPU_STATE_CONFIGURED; 729 smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); 730 set_cpu_present(cpu, true); 731 if (sysfs_add && smp_add_present_cpu(cpu) != 0) 732 set_cpu_present(cpu, false); 733 else 734 nr++; 735 cpu = cpumask_next(cpu, &avail); 736 if (cpu >= nr_cpu_ids) 737 break; 738 } 739 } 740 return nr; 741 } 742 743 void __init smp_detect_cpus(void) 744 { 745 unsigned int cpu, mtid, c_cpus, s_cpus; 746 struct sclp_core_info *info; 747 u16 address; 748 749 /* Get CPU information */ 750 info = memblock_virt_alloc(sizeof(*info), 8); 751 smp_get_core_info(info, 1); 752 /* Find boot CPU type */ 753 if (sclp.has_core_type) { 754 address = stap(); 755 for (cpu = 0; cpu < info->combined; cpu++) 756 if (info->core[cpu].core_id == address) { 757 /* The boot cpu dictates the cpu type. */ 758 boot_core_type = info->core[cpu].type; 759 break; 760 } 761 if (cpu >= info->combined) 762 panic("Could not find boot CPU type"); 763 } 764 765 /* Set multi-threading state for the current system */ 766 mtid = boot_core_type ? sclp.mtid : sclp.mtid_cp; 767 mtid = (mtid < smp_max_threads) ? mtid : smp_max_threads - 1; 768 pcpu_set_smt(mtid); 769 770 /* Print number of CPUs */ 771 c_cpus = s_cpus = 0; 772 for (cpu = 0; cpu < info->combined; cpu++) { 773 if (sclp.has_core_type && 774 info->core[cpu].type != boot_core_type) 775 continue; 776 if (cpu < info->configured) 777 c_cpus += smp_cpu_mtid + 1; 778 else 779 s_cpus += smp_cpu_mtid + 1; 780 } 781 pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus); 782 783 /* Add CPUs present at boot */ 784 get_online_cpus(); 785 __smp_rescan_cpus(info, 0); 786 put_online_cpus(); 787 memblock_free_early((unsigned long)info, sizeof(*info)); 788 } 789 790 /* 791 * Activate a secondary processor. 792 */ 793 static void smp_start_secondary(void *cpuvoid) 794 { 795 int cpu = smp_processor_id(); 796 797 S390_lowcore.last_update_clock = get_tod_clock(); 798 S390_lowcore.restart_stack = (unsigned long) restart_stack; 799 S390_lowcore.restart_fn = (unsigned long) do_restart; 800 S390_lowcore.restart_data = 0; 801 S390_lowcore.restart_source = -1UL; 802 restore_access_regs(S390_lowcore.access_regs_save_area); 803 __ctl_load(S390_lowcore.cregs_save_area, 0, 15); 804 __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT); 805 cpu_init(); 806 preempt_disable(); 807 init_cpu_timer(); 808 vtime_init(); 809 pfault_init(); 810 notify_cpu_starting(cpu); 811 if (topology_cpu_dedicated(cpu)) 812 set_cpu_flag(CIF_DEDICATED_CPU); 813 else 814 clear_cpu_flag(CIF_DEDICATED_CPU); 815 set_cpu_online(cpu, true); 816 inc_irq_stat(CPU_RST); 817 local_irq_enable(); 818 cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); 819 } 820 821 /* Upping and downing of CPUs */ 822 int __cpu_up(unsigned int cpu, struct task_struct *tidle) 823 { 824 struct pcpu *pcpu; 825 int base, i, rc; 826 827 pcpu = pcpu_devices + cpu; 828 if (pcpu->state != CPU_STATE_CONFIGURED) 829 return -EIO; 830 base = smp_get_base_cpu(cpu); 831 for (i = 0; i <= smp_cpu_mtid; i++) { 832 if (base + i < nr_cpu_ids) 833 if (cpu_online(base + i)) 834 break; 835 } 836 /* 837 * If this is the first CPU of the core to get online 838 * do an initial CPU reset. 839 */ 840 if (i > smp_cpu_mtid && 841 pcpu_sigp_retry(pcpu_devices + base, SIGP_INITIAL_CPU_RESET, 0) != 842 SIGP_CC_ORDER_CODE_ACCEPTED) 843 return -EIO; 844 845 rc = pcpu_alloc_lowcore(pcpu, cpu); 846 if (rc) 847 return rc; 848 pcpu_prepare_secondary(pcpu, cpu); 849 pcpu_attach_task(pcpu, tidle); 850 pcpu_start_fn(pcpu, smp_start_secondary, NULL); 851 /* Wait until cpu puts itself in the online & active maps */ 852 while (!cpu_online(cpu)) 853 cpu_relax(); 854 return 0; 855 } 856 857 static unsigned int setup_possible_cpus __initdata; 858 859 static int __init _setup_possible_cpus(char *s) 860 { 861 get_option(&s, &setup_possible_cpus); 862 return 0; 863 } 864 early_param("possible_cpus", _setup_possible_cpus); 865 866 #ifdef CONFIG_HOTPLUG_CPU 867 868 int __cpu_disable(void) 869 { 870 unsigned long cregs[16]; 871 872 /* Handle possible pending IPIs */ 873 smp_handle_ext_call(); 874 set_cpu_online(smp_processor_id(), false); 875 /* Disable pseudo page faults on this cpu. */ 876 pfault_fini(); 877 /* Disable interrupt sources via control register. */ 878 __ctl_store(cregs, 0, 15); 879 cregs[0] &= ~0x0000ee70UL; /* disable all external interrupts */ 880 cregs[6] &= ~0xff000000UL; /* disable all I/O interrupts */ 881 cregs[14] &= ~0x1f000000UL; /* disable most machine checks */ 882 __ctl_load(cregs, 0, 15); 883 clear_cpu_flag(CIF_NOHZ_DELAY); 884 return 0; 885 } 886 887 void __cpu_die(unsigned int cpu) 888 { 889 struct pcpu *pcpu; 890 891 /* Wait until target cpu is down */ 892 pcpu = pcpu_devices + cpu; 893 while (!pcpu_stopped(pcpu)) 894 cpu_relax(); 895 pcpu_free_lowcore(pcpu); 896 cpumask_clear_cpu(cpu, mm_cpumask(&init_mm)); 897 cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask); 898 } 899 900 void __noreturn cpu_die(void) 901 { 902 idle_task_exit(); 903 pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0); 904 for (;;) ; 905 } 906 907 #endif /* CONFIG_HOTPLUG_CPU */ 908 909 void __init smp_fill_possible_mask(void) 910 { 911 unsigned int possible, sclp_max, cpu; 912 913 sclp_max = max(sclp.mtid, sclp.mtid_cp) + 1; 914 sclp_max = min(smp_max_threads, sclp_max); 915 sclp_max = (sclp.max_cores * sclp_max) ?: nr_cpu_ids; 916 possible = setup_possible_cpus ?: nr_cpu_ids; 917 possible = min(possible, sclp_max); 918 for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++) 919 set_cpu_possible(cpu, true); 920 } 921 922 void __init smp_prepare_cpus(unsigned int max_cpus) 923 { 924 /* request the 0x1201 emergency signal external interrupt */ 925 if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt)) 926 panic("Couldn't request external interrupt 0x1201"); 927 /* request the 0x1202 external call external interrupt */ 928 if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt)) 929 panic("Couldn't request external interrupt 0x1202"); 930 } 931 932 void __init smp_prepare_boot_cpu(void) 933 { 934 struct pcpu *pcpu = pcpu_devices; 935 936 WARN_ON(!cpu_present(0) || !cpu_online(0)); 937 pcpu->state = CPU_STATE_CONFIGURED; 938 pcpu->lowcore = (struct lowcore *)(unsigned long) store_prefix(); 939 S390_lowcore.percpu_offset = __per_cpu_offset[0]; 940 smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN); 941 } 942 943 void __init smp_cpus_done(unsigned int max_cpus) 944 { 945 } 946 947 void __init smp_setup_processor_id(void) 948 { 949 pcpu_devices[0].address = stap(); 950 S390_lowcore.cpu_nr = 0; 951 S390_lowcore.spinlock_lockval = arch_spin_lockval(0); 952 S390_lowcore.spinlock_index = 0; 953 } 954 955 /* 956 * the frequency of the profiling timer can be changed 957 * by writing a multiplier value into /proc/profile. 958 * 959 * usually you want to run this on all CPUs ;) 960 */ 961 int setup_profiling_timer(unsigned int multiplier) 962 { 963 return 0; 964 } 965 966 #ifdef CONFIG_HOTPLUG_CPU 967 static ssize_t cpu_configure_show(struct device *dev, 968 struct device_attribute *attr, char *buf) 969 { 970 ssize_t count; 971 972 mutex_lock(&smp_cpu_state_mutex); 973 count = sprintf(buf, "%d\n", pcpu_devices[dev->id].state); 974 mutex_unlock(&smp_cpu_state_mutex); 975 return count; 976 } 977 978 static ssize_t cpu_configure_store(struct device *dev, 979 struct device_attribute *attr, 980 const char *buf, size_t count) 981 { 982 struct pcpu *pcpu; 983 int cpu, val, rc, i; 984 char delim; 985 986 if (sscanf(buf, "%d %c", &val, &delim) != 1) 987 return -EINVAL; 988 if (val != 0 && val != 1) 989 return -EINVAL; 990 get_online_cpus(); 991 mutex_lock(&smp_cpu_state_mutex); 992 rc = -EBUSY; 993 /* disallow configuration changes of online cpus and cpu 0 */ 994 cpu = dev->id; 995 cpu = smp_get_base_cpu(cpu); 996 if (cpu == 0) 997 goto out; 998 for (i = 0; i <= smp_cpu_mtid; i++) 999 if (cpu_online(cpu + i)) 1000 goto out; 1001 pcpu = pcpu_devices + cpu; 1002 rc = 0; 1003 switch (val) { 1004 case 0: 1005 if (pcpu->state != CPU_STATE_CONFIGURED) 1006 break; 1007 rc = sclp_core_deconfigure(pcpu->address >> smp_cpu_mt_shift); 1008 if (rc) 1009 break; 1010 for (i = 0; i <= smp_cpu_mtid; i++) { 1011 if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i)) 1012 continue; 1013 pcpu[i].state = CPU_STATE_STANDBY; 1014 smp_cpu_set_polarization(cpu + i, 1015 POLARIZATION_UNKNOWN); 1016 } 1017 topology_expect_change(); 1018 break; 1019 case 1: 1020 if (pcpu->state != CPU_STATE_STANDBY) 1021 break; 1022 rc = sclp_core_configure(pcpu->address >> smp_cpu_mt_shift); 1023 if (rc) 1024 break; 1025 for (i = 0; i <= smp_cpu_mtid; i++) { 1026 if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i)) 1027 continue; 1028 pcpu[i].state = CPU_STATE_CONFIGURED; 1029 smp_cpu_set_polarization(cpu + i, 1030 POLARIZATION_UNKNOWN); 1031 } 1032 topology_expect_change(); 1033 break; 1034 default: 1035 break; 1036 } 1037 out: 1038 mutex_unlock(&smp_cpu_state_mutex); 1039 put_online_cpus(); 1040 return rc ? rc : count; 1041 } 1042 static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store); 1043 #endif /* CONFIG_HOTPLUG_CPU */ 1044 1045 static ssize_t show_cpu_address(struct device *dev, 1046 struct device_attribute *attr, char *buf) 1047 { 1048 return sprintf(buf, "%d\n", pcpu_devices[dev->id].address); 1049 } 1050 static DEVICE_ATTR(address, 0444, show_cpu_address, NULL); 1051 1052 static struct attribute *cpu_common_attrs[] = { 1053 #ifdef CONFIG_HOTPLUG_CPU 1054 &dev_attr_configure.attr, 1055 #endif 1056 &dev_attr_address.attr, 1057 NULL, 1058 }; 1059 1060 static struct attribute_group cpu_common_attr_group = { 1061 .attrs = cpu_common_attrs, 1062 }; 1063 1064 static struct attribute *cpu_online_attrs[] = { 1065 &dev_attr_idle_count.attr, 1066 &dev_attr_idle_time_us.attr, 1067 NULL, 1068 }; 1069 1070 static struct attribute_group cpu_online_attr_group = { 1071 .attrs = cpu_online_attrs, 1072 }; 1073 1074 static int smp_cpu_online(unsigned int cpu) 1075 { 1076 struct device *s = &per_cpu(cpu_device, cpu)->dev; 1077 1078 return sysfs_create_group(&s->kobj, &cpu_online_attr_group); 1079 } 1080 static int smp_cpu_pre_down(unsigned int cpu) 1081 { 1082 struct device *s = &per_cpu(cpu_device, cpu)->dev; 1083 1084 sysfs_remove_group(&s->kobj, &cpu_online_attr_group); 1085 return 0; 1086 } 1087 1088 static int smp_add_present_cpu(int cpu) 1089 { 1090 struct device *s; 1091 struct cpu *c; 1092 int rc; 1093 1094 c = kzalloc(sizeof(*c), GFP_KERNEL); 1095 if (!c) 1096 return -ENOMEM; 1097 per_cpu(cpu_device, cpu) = c; 1098 s = &c->dev; 1099 c->hotpluggable = 1; 1100 rc = register_cpu(c, cpu); 1101 if (rc) 1102 goto out; 1103 rc = sysfs_create_group(&s->kobj, &cpu_common_attr_group); 1104 if (rc) 1105 goto out_cpu; 1106 rc = topology_cpu_init(c); 1107 if (rc) 1108 goto out_topology; 1109 return 0; 1110 1111 out_topology: 1112 sysfs_remove_group(&s->kobj, &cpu_common_attr_group); 1113 out_cpu: 1114 #ifdef CONFIG_HOTPLUG_CPU 1115 unregister_cpu(c); 1116 #endif 1117 out: 1118 return rc; 1119 } 1120 1121 #ifdef CONFIG_HOTPLUG_CPU 1122 1123 int __ref smp_rescan_cpus(void) 1124 { 1125 struct sclp_core_info *info; 1126 int nr; 1127 1128 info = kzalloc(sizeof(*info), GFP_KERNEL); 1129 if (!info) 1130 return -ENOMEM; 1131 smp_get_core_info(info, 0); 1132 get_online_cpus(); 1133 mutex_lock(&smp_cpu_state_mutex); 1134 nr = __smp_rescan_cpus(info, 1); 1135 mutex_unlock(&smp_cpu_state_mutex); 1136 put_online_cpus(); 1137 kfree(info); 1138 if (nr) 1139 topology_schedule_update(); 1140 return 0; 1141 } 1142 1143 static ssize_t __ref rescan_store(struct device *dev, 1144 struct device_attribute *attr, 1145 const char *buf, 1146 size_t count) 1147 { 1148 int rc; 1149 1150 rc = smp_rescan_cpus(); 1151 return rc ? rc : count; 1152 } 1153 static DEVICE_ATTR(rescan, 0200, NULL, rescan_store); 1154 #endif /* CONFIG_HOTPLUG_CPU */ 1155 1156 static int __init s390_smp_init(void) 1157 { 1158 int cpu, rc = 0; 1159 1160 #ifdef CONFIG_HOTPLUG_CPU 1161 rc = device_create_file(cpu_subsys.dev_root, &dev_attr_rescan); 1162 if (rc) 1163 return rc; 1164 #endif 1165 for_each_present_cpu(cpu) { 1166 rc = smp_add_present_cpu(cpu); 1167 if (rc) 1168 goto out; 1169 } 1170 1171 rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "s390/smp:online", 1172 smp_cpu_online, smp_cpu_pre_down); 1173 rc = rc <= 0 ? rc : 0; 1174 out: 1175 return rc; 1176 } 1177 subsys_initcall(s390_smp_init); 1178