xref: /linux/arch/s390/kernel/smp.c (revision e839ca528718e68cad32a307dc9aabf01ef3eb05)
1 /*
2  *  SMP related functions
3  *
4  *    Copyright IBM Corp. 1999,2012
5  *    Author(s): Denis Joseph Barrow,
6  *		 Martin Schwidefsky <schwidefsky@de.ibm.com>,
7  *		 Heiko Carstens <heiko.carstens@de.ibm.com>,
8  *
9  *  based on other smp stuff by
10  *    (c) 1995 Alan Cox, CymruNET Ltd  <alan@cymru.net>
11  *    (c) 1998 Ingo Molnar
12  *
13  * The code outside of smp.c uses logical cpu numbers, only smp.c does
14  * the translation of logical to physical cpu ids. All new code that
15  * operates on physical cpu numbers needs to go into smp.c.
16  */
17 
18 #define KMSG_COMPONENT "cpu"
19 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
20 
21 #include <linux/workqueue.h>
22 #include <linux/module.h>
23 #include <linux/init.h>
24 #include <linux/mm.h>
25 #include <linux/err.h>
26 #include <linux/spinlock.h>
27 #include <linux/kernel_stat.h>
28 #include <linux/delay.h>
29 #include <linux/interrupt.h>
30 #include <linux/irqflags.h>
31 #include <linux/cpu.h>
32 #include <linux/slab.h>
33 #include <linux/crash_dump.h>
34 #include <asm/asm-offsets.h>
35 #include <asm/ipl.h>
36 #include <asm/setup.h>
37 #include <asm/irq.h>
38 #include <asm/tlbflush.h>
39 #include <asm/timer.h>
40 #include <asm/lowcore.h>
41 #include <asm/sclp.h>
42 #include <asm/vdso.h>
43 #include <asm/debug.h>
44 #include <asm/os_info.h>
45 #include "entry.h"
46 
47 enum {
48 	sigp_sense = 1,
49 	sigp_external_call = 2,
50 	sigp_emergency_signal = 3,
51 	sigp_start = 4,
52 	sigp_stop = 5,
53 	sigp_restart = 6,
54 	sigp_stop_and_store_status = 9,
55 	sigp_initial_cpu_reset = 11,
56 	sigp_cpu_reset = 12,
57 	sigp_set_prefix = 13,
58 	sigp_store_status_at_address = 14,
59 	sigp_store_extended_status_at_address = 15,
60 	sigp_set_architecture = 18,
61 	sigp_conditional_emergency_signal = 19,
62 	sigp_sense_running = 21,
63 };
64 
65 enum {
66 	sigp_order_code_accepted = 0,
67 	sigp_status_stored = 1,
68 	sigp_busy = 2,
69 	sigp_not_operational = 3,
70 };
71 
72 enum {
73 	ec_schedule = 0,
74 	ec_call_function,
75 	ec_call_function_single,
76 	ec_stop_cpu,
77 };
78 
79 enum {
80 	CPU_STATE_STANDBY,
81 	CPU_STATE_CONFIGURED,
82 };
83 
84 struct pcpu {
85 	struct cpu cpu;
86 	struct task_struct *idle;	/* idle process for the cpu */
87 	struct _lowcore *lowcore;	/* lowcore page(s) for the cpu */
88 	unsigned long async_stack;	/* async stack for the cpu */
89 	unsigned long panic_stack;	/* panic stack for the cpu */
90 	unsigned long ec_mask;		/* bit mask for ec_xxx functions */
91 	int state;			/* physical cpu state */
92 	u32 status;			/* last status received via sigp */
93 	u16 address;			/* physical cpu address */
94 };
95 
96 static u8 boot_cpu_type;
97 static u16 boot_cpu_address;
98 static struct pcpu pcpu_devices[NR_CPUS];
99 
100 DEFINE_MUTEX(smp_cpu_state_mutex);
101 
102 /*
103  * Signal processor helper functions.
104  */
105 static inline int __pcpu_sigp(u16 addr, u8 order, u32 parm, u32 *status)
106 {
107 	register unsigned int reg1 asm ("1") = parm;
108 	int cc;
109 
110 	asm volatile(
111 		"	sigp	%1,%2,0(%3)\n"
112 		"	ipm	%0\n"
113 		"	srl	%0,28\n"
114 		: "=d" (cc), "+d" (reg1) : "d" (addr), "a" (order) : "cc");
115 	if (status && cc == 1)
116 		*status = reg1;
117 	return cc;
118 }
119 
120 static inline int __pcpu_sigp_relax(u16 addr, u8 order, u32 parm, u32 *status)
121 {
122 	int cc;
123 
124 	while (1) {
125 		cc = __pcpu_sigp(addr, order, parm, status);
126 		if (cc != sigp_busy)
127 			return cc;
128 		cpu_relax();
129 	}
130 }
131 
132 static int pcpu_sigp_retry(struct pcpu *pcpu, u8 order, u32 parm)
133 {
134 	int cc, retry;
135 
136 	for (retry = 0; ; retry++) {
137 		cc = __pcpu_sigp(pcpu->address, order, parm, &pcpu->status);
138 		if (cc != sigp_busy)
139 			break;
140 		if (retry >= 3)
141 			udelay(10);
142 	}
143 	return cc;
144 }
145 
146 static inline int pcpu_stopped(struct pcpu *pcpu)
147 {
148 	if (__pcpu_sigp(pcpu->address, sigp_sense,
149 			0, &pcpu->status) != sigp_status_stored)
150 		return 0;
151 	/* Check for stopped and check stop state */
152 	return !!(pcpu->status & 0x50);
153 }
154 
155 static inline int pcpu_running(struct pcpu *pcpu)
156 {
157 	if (__pcpu_sigp(pcpu->address, sigp_sense_running,
158 			0, &pcpu->status) != sigp_status_stored)
159 		return 1;
160 	/* Check for running status */
161 	return !(pcpu->status & 0x400);
162 }
163 
164 /*
165  * Find struct pcpu by cpu address.
166  */
167 static struct pcpu *pcpu_find_address(const struct cpumask *mask, int address)
168 {
169 	int cpu;
170 
171 	for_each_cpu(cpu, mask)
172 		if (pcpu_devices[cpu].address == address)
173 			return pcpu_devices + cpu;
174 	return NULL;
175 }
176 
177 static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
178 {
179 	int order;
180 
181 	set_bit(ec_bit, &pcpu->ec_mask);
182 	order = pcpu_running(pcpu) ?
183 		sigp_external_call : sigp_emergency_signal;
184 	pcpu_sigp_retry(pcpu, order, 0);
185 }
186 
187 static int __cpuinit pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
188 {
189 	struct _lowcore *lc;
190 
191 	if (pcpu != &pcpu_devices[0]) {
192 		pcpu->lowcore =	(struct _lowcore *)
193 			__get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
194 		pcpu->async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
195 		pcpu->panic_stack = __get_free_page(GFP_KERNEL);
196 		if (!pcpu->lowcore || !pcpu->panic_stack || !pcpu->async_stack)
197 			goto out;
198 	}
199 	lc = pcpu->lowcore;
200 	memcpy(lc, &S390_lowcore, 512);
201 	memset((char *) lc + 512, 0, sizeof(*lc) - 512);
202 	lc->async_stack = pcpu->async_stack + ASYNC_SIZE;
203 	lc->panic_stack = pcpu->panic_stack + PAGE_SIZE;
204 	lc->cpu_nr = cpu;
205 #ifndef CONFIG_64BIT
206 	if (MACHINE_HAS_IEEE) {
207 		lc->extended_save_area_addr = get_zeroed_page(GFP_KERNEL);
208 		if (!lc->extended_save_area_addr)
209 			goto out;
210 	}
211 #else
212 	if (vdso_alloc_per_cpu(lc))
213 		goto out;
214 #endif
215 	lowcore_ptr[cpu] = lc;
216 	pcpu_sigp_retry(pcpu, sigp_set_prefix, (u32)(unsigned long) lc);
217 	return 0;
218 out:
219 	if (pcpu != &pcpu_devices[0]) {
220 		free_page(pcpu->panic_stack);
221 		free_pages(pcpu->async_stack, ASYNC_ORDER);
222 		free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
223 	}
224 	return -ENOMEM;
225 }
226 
227 static void pcpu_free_lowcore(struct pcpu *pcpu)
228 {
229 	pcpu_sigp_retry(pcpu, sigp_set_prefix, 0);
230 	lowcore_ptr[pcpu - pcpu_devices] = NULL;
231 #ifndef CONFIG_64BIT
232 	if (MACHINE_HAS_IEEE) {
233 		struct _lowcore *lc = pcpu->lowcore;
234 
235 		free_page((unsigned long) lc->extended_save_area_addr);
236 		lc->extended_save_area_addr = 0;
237 	}
238 #else
239 	vdso_free_per_cpu(pcpu->lowcore);
240 #endif
241 	if (pcpu != &pcpu_devices[0]) {
242 		free_page(pcpu->panic_stack);
243 		free_pages(pcpu->async_stack, ASYNC_ORDER);
244 		free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
245 	}
246 }
247 
248 static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
249 {
250 	struct _lowcore *lc = pcpu->lowcore;
251 
252 	atomic_inc(&init_mm.context.attach_count);
253 	lc->cpu_nr = cpu;
254 	lc->percpu_offset = __per_cpu_offset[cpu];
255 	lc->kernel_asce = S390_lowcore.kernel_asce;
256 	lc->machine_flags = S390_lowcore.machine_flags;
257 	lc->ftrace_func = S390_lowcore.ftrace_func;
258 	lc->user_timer = lc->system_timer = lc->steal_timer = 0;
259 	__ctl_store(lc->cregs_save_area, 0, 15);
260 	save_access_regs((unsigned int *) lc->access_regs_save_area);
261 	memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
262 	       MAX_FACILITY_BIT/8);
263 }
264 
265 static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk)
266 {
267 	struct _lowcore *lc = pcpu->lowcore;
268 	struct thread_info *ti = task_thread_info(tsk);
269 
270 	lc->kernel_stack = (unsigned long) task_stack_page(tsk) + THREAD_SIZE;
271 	lc->thread_info = (unsigned long) task_thread_info(tsk);
272 	lc->current_task = (unsigned long) tsk;
273 	lc->user_timer = ti->user_timer;
274 	lc->system_timer = ti->system_timer;
275 	lc->steal_timer = 0;
276 }
277 
278 static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data)
279 {
280 	struct _lowcore *lc = pcpu->lowcore;
281 
282 	lc->restart_stack = lc->kernel_stack;
283 	lc->restart_fn = (unsigned long) func;
284 	lc->restart_data = (unsigned long) data;
285 	lc->restart_source = -1UL;
286 	pcpu_sigp_retry(pcpu, sigp_restart, 0);
287 }
288 
289 /*
290  * Call function via PSW restart on pcpu and stop the current cpu.
291  */
292 static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *),
293 			  void *data, unsigned long stack)
294 {
295 	struct _lowcore *lc = pcpu->lowcore;
296 	unsigned short this_cpu;
297 
298 	__load_psw_mask(psw_kernel_bits);
299 	this_cpu = stap();
300 	if (pcpu->address == this_cpu)
301 		func(data);	/* should not return */
302 	/* Stop target cpu (if func returns this stops the current cpu). */
303 	pcpu_sigp_retry(pcpu, sigp_stop, 0);
304 	/* Restart func on the target cpu and stop the current cpu. */
305 	lc->restart_stack = stack;
306 	lc->restart_fn = (unsigned long) func;
307 	lc->restart_data = (unsigned long) data;
308 	lc->restart_source = (unsigned long) this_cpu;
309 	asm volatile(
310 		"0:	sigp	0,%0,6	# sigp restart to target cpu\n"
311 		"	brc	2,0b	# busy, try again\n"
312 		"1:	sigp	0,%1,5	# sigp stop to current cpu\n"
313 		"	brc	2,1b	# busy, try again\n"
314 		: : "d" (pcpu->address), "d" (this_cpu) : "0", "1", "cc");
315 	for (;;) ;
316 }
317 
318 /*
319  * Call function on an online CPU.
320  */
321 void smp_call_online_cpu(void (*func)(void *), void *data)
322 {
323 	struct pcpu *pcpu;
324 
325 	/* Use the current cpu if it is online. */
326 	pcpu = pcpu_find_address(cpu_online_mask, stap());
327 	if (!pcpu)
328 		/* Use the first online cpu. */
329 		pcpu = pcpu_devices + cpumask_first(cpu_online_mask);
330 	pcpu_delegate(pcpu, func, data, (unsigned long) restart_stack);
331 }
332 
333 /*
334  * Call function on the ipl CPU.
335  */
336 void smp_call_ipl_cpu(void (*func)(void *), void *data)
337 {
338 	pcpu_delegate(&pcpu_devices[0], func, data,
339 		      pcpu_devices->panic_stack + PAGE_SIZE);
340 }
341 
342 int smp_find_processor_id(u16 address)
343 {
344 	int cpu;
345 
346 	for_each_present_cpu(cpu)
347 		if (pcpu_devices[cpu].address == address)
348 			return cpu;
349 	return -1;
350 }
351 
352 int smp_vcpu_scheduled(int cpu)
353 {
354 	return pcpu_running(pcpu_devices + cpu);
355 }
356 
357 void smp_yield(void)
358 {
359 	if (MACHINE_HAS_DIAG44)
360 		asm volatile("diag 0,0,0x44");
361 }
362 
363 void smp_yield_cpu(int cpu)
364 {
365 	if (MACHINE_HAS_DIAG9C)
366 		asm volatile("diag %0,0,0x9c"
367 			     : : "d" (pcpu_devices[cpu].address));
368 	else if (MACHINE_HAS_DIAG44)
369 		asm volatile("diag 0,0,0x44");
370 }
371 
372 /*
373  * Send cpus emergency shutdown signal. This gives the cpus the
374  * opportunity to complete outstanding interrupts.
375  */
376 void smp_emergency_stop(cpumask_t *cpumask)
377 {
378 	u64 end;
379 	int cpu;
380 
381 	end = get_clock() + (1000000UL << 12);
382 	for_each_cpu(cpu, cpumask) {
383 		struct pcpu *pcpu = pcpu_devices + cpu;
384 		set_bit(ec_stop_cpu, &pcpu->ec_mask);
385 		while (__pcpu_sigp(pcpu->address, sigp_emergency_signal,
386 				   0, NULL) == sigp_busy &&
387 		       get_clock() < end)
388 			cpu_relax();
389 	}
390 	while (get_clock() < end) {
391 		for_each_cpu(cpu, cpumask)
392 			if (pcpu_stopped(pcpu_devices + cpu))
393 				cpumask_clear_cpu(cpu, cpumask);
394 		if (cpumask_empty(cpumask))
395 			break;
396 		cpu_relax();
397 	}
398 }
399 
400 /*
401  * Stop all cpus but the current one.
402  */
403 void smp_send_stop(void)
404 {
405 	cpumask_t cpumask;
406 	int cpu;
407 
408 	/* Disable all interrupts/machine checks */
409 	__load_psw_mask(psw_kernel_bits | PSW_MASK_DAT);
410 	trace_hardirqs_off();
411 
412 	debug_set_critical();
413 	cpumask_copy(&cpumask, cpu_online_mask);
414 	cpumask_clear_cpu(smp_processor_id(), &cpumask);
415 
416 	if (oops_in_progress)
417 		smp_emergency_stop(&cpumask);
418 
419 	/* stop all processors */
420 	for_each_cpu(cpu, &cpumask) {
421 		struct pcpu *pcpu = pcpu_devices + cpu;
422 		pcpu_sigp_retry(pcpu, sigp_stop, 0);
423 		while (!pcpu_stopped(pcpu))
424 			cpu_relax();
425 	}
426 }
427 
428 /*
429  * Stop the current cpu.
430  */
431 void smp_stop_cpu(void)
432 {
433 	pcpu_sigp_retry(pcpu_devices + smp_processor_id(), sigp_stop, 0);
434 	for (;;) ;
435 }
436 
437 /*
438  * This is the main routine where commands issued by other
439  * cpus are handled.
440  */
441 static void do_ext_call_interrupt(struct ext_code ext_code,
442 				  unsigned int param32, unsigned long param64)
443 {
444 	unsigned long bits;
445 	int cpu;
446 
447 	cpu = smp_processor_id();
448 	if (ext_code.code == 0x1202)
449 		kstat_cpu(cpu).irqs[EXTINT_EXC]++;
450 	else
451 		kstat_cpu(cpu).irqs[EXTINT_EMS]++;
452 	/*
453 	 * handle bit signal external calls
454 	 */
455 	bits = xchg(&pcpu_devices[cpu].ec_mask, 0);
456 
457 	if (test_bit(ec_stop_cpu, &bits))
458 		smp_stop_cpu();
459 
460 	if (test_bit(ec_schedule, &bits))
461 		scheduler_ipi();
462 
463 	if (test_bit(ec_call_function, &bits))
464 		generic_smp_call_function_interrupt();
465 
466 	if (test_bit(ec_call_function_single, &bits))
467 		generic_smp_call_function_single_interrupt();
468 
469 }
470 
471 void arch_send_call_function_ipi_mask(const struct cpumask *mask)
472 {
473 	int cpu;
474 
475 	for_each_cpu(cpu, mask)
476 		pcpu_ec_call(pcpu_devices + cpu, ec_call_function);
477 }
478 
479 void arch_send_call_function_single_ipi(int cpu)
480 {
481 	pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
482 }
483 
484 #ifndef CONFIG_64BIT
485 /*
486  * this function sends a 'purge tlb' signal to another CPU.
487  */
488 static void smp_ptlb_callback(void *info)
489 {
490 	__tlb_flush_local();
491 }
492 
493 void smp_ptlb_all(void)
494 {
495 	on_each_cpu(smp_ptlb_callback, NULL, 1);
496 }
497 EXPORT_SYMBOL(smp_ptlb_all);
498 #endif /* ! CONFIG_64BIT */
499 
500 /*
501  * this function sends a 'reschedule' IPI to another CPU.
502  * it goes straight through and wastes no time serializing
503  * anything. Worst case is that we lose a reschedule ...
504  */
505 void smp_send_reschedule(int cpu)
506 {
507 	pcpu_ec_call(pcpu_devices + cpu, ec_schedule);
508 }
509 
510 /*
511  * parameter area for the set/clear control bit callbacks
512  */
513 struct ec_creg_mask_parms {
514 	unsigned long orval;
515 	unsigned long andval;
516 	int cr;
517 };
518 
519 /*
520  * callback for setting/clearing control bits
521  */
522 static void smp_ctl_bit_callback(void *info)
523 {
524 	struct ec_creg_mask_parms *pp = info;
525 	unsigned long cregs[16];
526 
527 	__ctl_store(cregs, 0, 15);
528 	cregs[pp->cr] = (cregs[pp->cr] & pp->andval) | pp->orval;
529 	__ctl_load(cregs, 0, 15);
530 }
531 
532 /*
533  * Set a bit in a control register of all cpus
534  */
535 void smp_ctl_set_bit(int cr, int bit)
536 {
537 	struct ec_creg_mask_parms parms = { 1UL << bit, -1UL, cr };
538 
539 	on_each_cpu(smp_ctl_bit_callback, &parms, 1);
540 }
541 EXPORT_SYMBOL(smp_ctl_set_bit);
542 
543 /*
544  * Clear a bit in a control register of all cpus
545  */
546 void smp_ctl_clear_bit(int cr, int bit)
547 {
548 	struct ec_creg_mask_parms parms = { 0, ~(1UL << bit), cr };
549 
550 	on_each_cpu(smp_ctl_bit_callback, &parms, 1);
551 }
552 EXPORT_SYMBOL(smp_ctl_clear_bit);
553 
554 #if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_CRASH_DUMP)
555 
556 struct save_area *zfcpdump_save_areas[NR_CPUS + 1];
557 EXPORT_SYMBOL_GPL(zfcpdump_save_areas);
558 
559 static void __init smp_get_save_area(int cpu, u16 address)
560 {
561 	void *lc = pcpu_devices[0].lowcore;
562 	struct save_area *save_area;
563 
564 	if (is_kdump_kernel())
565 		return;
566 	if (!OLDMEM_BASE && (address == boot_cpu_address ||
567 			     ipl_info.type != IPL_TYPE_FCP_DUMP))
568 		return;
569 	if (cpu >= NR_CPUS) {
570 		pr_warning("CPU %i exceeds the maximum %i and is excluded "
571 			   "from the dump\n", cpu, NR_CPUS - 1);
572 		return;
573 	}
574 	save_area = kmalloc(sizeof(struct save_area), GFP_KERNEL);
575 	if (!save_area)
576 		panic("could not allocate memory for save area\n");
577 	zfcpdump_save_areas[cpu] = save_area;
578 #ifdef CONFIG_CRASH_DUMP
579 	if (address == boot_cpu_address) {
580 		/* Copy the registers of the boot cpu. */
581 		copy_oldmem_page(1, (void *) save_area, sizeof(*save_area),
582 				 SAVE_AREA_BASE - PAGE_SIZE, 0);
583 		return;
584 	}
585 #endif
586 	/* Get the registers of a non-boot cpu. */
587 	__pcpu_sigp_relax(address, sigp_stop_and_store_status, 0, NULL);
588 	memcpy_real(save_area, lc + SAVE_AREA_BASE, sizeof(*save_area));
589 }
590 
591 int smp_store_status(int cpu)
592 {
593 	struct pcpu *pcpu;
594 
595 	pcpu = pcpu_devices + cpu;
596 	if (__pcpu_sigp_relax(pcpu->address, sigp_stop_and_store_status,
597 			      0, NULL) != sigp_order_code_accepted)
598 		return -EIO;
599 	return 0;
600 }
601 
602 #else /* CONFIG_ZFCPDUMP || CONFIG_CRASH_DUMP */
603 
604 static inline void smp_get_save_area(int cpu, u16 address) { }
605 
606 #endif /* CONFIG_ZFCPDUMP || CONFIG_CRASH_DUMP */
607 
608 static struct sclp_cpu_info *smp_get_cpu_info(void)
609 {
610 	static int use_sigp_detection;
611 	struct sclp_cpu_info *info;
612 	int address;
613 
614 	info = kzalloc(sizeof(*info), GFP_KERNEL);
615 	if (info && (use_sigp_detection || sclp_get_cpu_info(info))) {
616 		use_sigp_detection = 1;
617 		for (address = 0; address <= MAX_CPU_ADDRESS; address++) {
618 			if (__pcpu_sigp_relax(address, sigp_sense, 0, NULL) ==
619 			    sigp_not_operational)
620 				continue;
621 			info->cpu[info->configured].address = address;
622 			info->configured++;
623 		}
624 		info->combined = info->configured;
625 	}
626 	return info;
627 }
628 
629 static int __devinit smp_add_present_cpu(int cpu);
630 
631 static int __devinit __smp_rescan_cpus(struct sclp_cpu_info *info,
632 				       int sysfs_add)
633 {
634 	struct pcpu *pcpu;
635 	cpumask_t avail;
636 	int cpu, nr, i;
637 
638 	nr = 0;
639 	cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
640 	cpu = cpumask_first(&avail);
641 	for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) {
642 		if (info->has_cpu_type && info->cpu[i].type != boot_cpu_type)
643 			continue;
644 		if (pcpu_find_address(cpu_present_mask, info->cpu[i].address))
645 			continue;
646 		pcpu = pcpu_devices + cpu;
647 		pcpu->address = info->cpu[i].address;
648 		pcpu->state = (cpu >= info->configured) ?
649 			CPU_STATE_STANDBY : CPU_STATE_CONFIGURED;
650 		cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
651 		set_cpu_present(cpu, true);
652 		if (sysfs_add && smp_add_present_cpu(cpu) != 0)
653 			set_cpu_present(cpu, false);
654 		else
655 			nr++;
656 		cpu = cpumask_next(cpu, &avail);
657 	}
658 	return nr;
659 }
660 
661 static void __init smp_detect_cpus(void)
662 {
663 	unsigned int cpu, c_cpus, s_cpus;
664 	struct sclp_cpu_info *info;
665 
666 	info = smp_get_cpu_info();
667 	if (!info)
668 		panic("smp_detect_cpus failed to allocate memory\n");
669 	if (info->has_cpu_type) {
670 		for (cpu = 0; cpu < info->combined; cpu++) {
671 			if (info->cpu[cpu].address != boot_cpu_address)
672 				continue;
673 			/* The boot cpu dictates the cpu type. */
674 			boot_cpu_type = info->cpu[cpu].type;
675 			break;
676 		}
677 	}
678 	c_cpus = s_cpus = 0;
679 	for (cpu = 0; cpu < info->combined; cpu++) {
680 		if (info->has_cpu_type && info->cpu[cpu].type != boot_cpu_type)
681 			continue;
682 		if (cpu < info->configured) {
683 			smp_get_save_area(c_cpus, info->cpu[cpu].address);
684 			c_cpus++;
685 		} else
686 			s_cpus++;
687 	}
688 	pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus);
689 	get_online_cpus();
690 	__smp_rescan_cpus(info, 0);
691 	put_online_cpus();
692 	kfree(info);
693 }
694 
695 /*
696  *	Activate a secondary processor.
697  */
698 static void __cpuinit smp_start_secondary(void *cpuvoid)
699 {
700 	S390_lowcore.last_update_clock = get_clock();
701 	S390_lowcore.restart_stack = (unsigned long) restart_stack;
702 	S390_lowcore.restart_fn = (unsigned long) do_restart;
703 	S390_lowcore.restart_data = 0;
704 	S390_lowcore.restart_source = -1UL;
705 	restore_access_regs(S390_lowcore.access_regs_save_area);
706 	__ctl_load(S390_lowcore.cregs_save_area, 0, 15);
707 	__load_psw_mask(psw_kernel_bits | PSW_MASK_DAT);
708 	cpu_init();
709 	preempt_disable();
710 	init_cpu_timer();
711 	init_cpu_vtimer();
712 	pfault_init();
713 	notify_cpu_starting(smp_processor_id());
714 	ipi_call_lock();
715 	set_cpu_online(smp_processor_id(), true);
716 	ipi_call_unlock();
717 	local_irq_enable();
718 	/* cpu_idle will call schedule for us */
719 	cpu_idle();
720 }
721 
722 struct create_idle {
723 	struct work_struct work;
724 	struct task_struct *idle;
725 	struct completion done;
726 	int cpu;
727 };
728 
729 static void __cpuinit smp_fork_idle(struct work_struct *work)
730 {
731 	struct create_idle *c_idle;
732 
733 	c_idle = container_of(work, struct create_idle, work);
734 	c_idle->idle = fork_idle(c_idle->cpu);
735 	complete(&c_idle->done);
736 }
737 
738 /* Upping and downing of CPUs */
739 int __cpuinit __cpu_up(unsigned int cpu)
740 {
741 	struct create_idle c_idle;
742 	struct pcpu *pcpu;
743 	int rc;
744 
745 	pcpu = pcpu_devices + cpu;
746 	if (pcpu->state != CPU_STATE_CONFIGURED)
747 		return -EIO;
748 	if (pcpu_sigp_retry(pcpu, sigp_initial_cpu_reset, 0) !=
749 	    sigp_order_code_accepted)
750 		return -EIO;
751 	if (!pcpu->idle) {
752 		c_idle.done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done);
753 		INIT_WORK_ONSTACK(&c_idle.work, smp_fork_idle);
754 		c_idle.cpu = cpu;
755 		schedule_work(&c_idle.work);
756 		wait_for_completion(&c_idle.done);
757 		if (IS_ERR(c_idle.idle))
758 			return PTR_ERR(c_idle.idle);
759 		pcpu->idle = c_idle.idle;
760 	}
761 	init_idle(pcpu->idle, cpu);
762 	rc = pcpu_alloc_lowcore(pcpu, cpu);
763 	if (rc)
764 		return rc;
765 	pcpu_prepare_secondary(pcpu, cpu);
766 	pcpu_attach_task(pcpu, pcpu->idle);
767 	pcpu_start_fn(pcpu, smp_start_secondary, NULL);
768 	while (!cpu_online(cpu))
769 		cpu_relax();
770 	return 0;
771 }
772 
773 static int __init setup_possible_cpus(char *s)
774 {
775 	int max, cpu;
776 
777 	if (kstrtoint(s, 0, &max) < 0)
778 		return 0;
779 	init_cpu_possible(cpumask_of(0));
780 	for (cpu = 1; cpu < max && cpu < nr_cpu_ids; cpu++)
781 		set_cpu_possible(cpu, true);
782 	return 0;
783 }
784 early_param("possible_cpus", setup_possible_cpus);
785 
786 #ifdef CONFIG_HOTPLUG_CPU
787 
788 int __cpu_disable(void)
789 {
790 	unsigned long cregs[16];
791 
792 	set_cpu_online(smp_processor_id(), false);
793 	/* Disable pseudo page faults on this cpu. */
794 	pfault_fini();
795 	/* Disable interrupt sources via control register. */
796 	__ctl_store(cregs, 0, 15);
797 	cregs[0]  &= ~0x0000ee70UL;	/* disable all external interrupts */
798 	cregs[6]  &= ~0xff000000UL;	/* disable all I/O interrupts */
799 	cregs[14] &= ~0x1f000000UL;	/* disable most machine checks */
800 	__ctl_load(cregs, 0, 15);
801 	return 0;
802 }
803 
804 void __cpu_die(unsigned int cpu)
805 {
806 	struct pcpu *pcpu;
807 
808 	/* Wait until target cpu is down */
809 	pcpu = pcpu_devices + cpu;
810 	while (!pcpu_stopped(pcpu))
811 		cpu_relax();
812 	pcpu_free_lowcore(pcpu);
813 	atomic_dec(&init_mm.context.attach_count);
814 }
815 
816 void __noreturn cpu_die(void)
817 {
818 	idle_task_exit();
819 	pcpu_sigp_retry(pcpu_devices + smp_processor_id(), sigp_stop, 0);
820 	for (;;) ;
821 }
822 
823 #endif /* CONFIG_HOTPLUG_CPU */
824 
825 static void smp_call_os_info_init_fn(void)
826 {
827 	int (*init_fn)(void);
828 	unsigned long size;
829 
830 	init_fn = os_info_old_entry(OS_INFO_INIT_FN, &size);
831 	if (!init_fn)
832 		return;
833 	init_fn();
834 }
835 
836 void __init smp_prepare_cpus(unsigned int max_cpus)
837 {
838 	/* request the 0x1201 emergency signal external interrupt */
839 	if (register_external_interrupt(0x1201, do_ext_call_interrupt) != 0)
840 		panic("Couldn't request external interrupt 0x1201");
841 	/* request the 0x1202 external call external interrupt */
842 	if (register_external_interrupt(0x1202, do_ext_call_interrupt) != 0)
843 		panic("Couldn't request external interrupt 0x1202");
844 	smp_call_os_info_init_fn();
845 	smp_detect_cpus();
846 }
847 
848 void __init smp_prepare_boot_cpu(void)
849 {
850 	struct pcpu *pcpu = pcpu_devices;
851 
852 	boot_cpu_address = stap();
853 	pcpu->idle = current;
854 	pcpu->state = CPU_STATE_CONFIGURED;
855 	pcpu->address = boot_cpu_address;
856 	pcpu->lowcore = (struct _lowcore *)(unsigned long) store_prefix();
857 	pcpu->async_stack = S390_lowcore.async_stack - ASYNC_SIZE;
858 	pcpu->panic_stack = S390_lowcore.panic_stack - PAGE_SIZE;
859 	S390_lowcore.percpu_offset = __per_cpu_offset[0];
860 	cpu_set_polarization(0, POLARIZATION_UNKNOWN);
861 	set_cpu_present(0, true);
862 	set_cpu_online(0, true);
863 }
864 
865 void __init smp_cpus_done(unsigned int max_cpus)
866 {
867 }
868 
869 void __init smp_setup_processor_id(void)
870 {
871 	S390_lowcore.cpu_nr = 0;
872 }
873 
874 /*
875  * the frequency of the profiling timer can be changed
876  * by writing a multiplier value into /proc/profile.
877  *
878  * usually you want to run this on all CPUs ;)
879  */
880 int setup_profiling_timer(unsigned int multiplier)
881 {
882 	return 0;
883 }
884 
885 #ifdef CONFIG_HOTPLUG_CPU
886 static ssize_t cpu_configure_show(struct device *dev,
887 				  struct device_attribute *attr, char *buf)
888 {
889 	ssize_t count;
890 
891 	mutex_lock(&smp_cpu_state_mutex);
892 	count = sprintf(buf, "%d\n", pcpu_devices[dev->id].state);
893 	mutex_unlock(&smp_cpu_state_mutex);
894 	return count;
895 }
896 
897 static ssize_t cpu_configure_store(struct device *dev,
898 				   struct device_attribute *attr,
899 				   const char *buf, size_t count)
900 {
901 	struct pcpu *pcpu;
902 	int cpu, val, rc;
903 	char delim;
904 
905 	if (sscanf(buf, "%d %c", &val, &delim) != 1)
906 		return -EINVAL;
907 	if (val != 0 && val != 1)
908 		return -EINVAL;
909 	get_online_cpus();
910 	mutex_lock(&smp_cpu_state_mutex);
911 	rc = -EBUSY;
912 	/* disallow configuration changes of online cpus and cpu 0 */
913 	cpu = dev->id;
914 	if (cpu_online(cpu) || cpu == 0)
915 		goto out;
916 	pcpu = pcpu_devices + cpu;
917 	rc = 0;
918 	switch (val) {
919 	case 0:
920 		if (pcpu->state != CPU_STATE_CONFIGURED)
921 			break;
922 		rc = sclp_cpu_deconfigure(pcpu->address);
923 		if (rc)
924 			break;
925 		pcpu->state = CPU_STATE_STANDBY;
926 		cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
927 		topology_expect_change();
928 		break;
929 	case 1:
930 		if (pcpu->state != CPU_STATE_STANDBY)
931 			break;
932 		rc = sclp_cpu_configure(pcpu->address);
933 		if (rc)
934 			break;
935 		pcpu->state = CPU_STATE_CONFIGURED;
936 		cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
937 		topology_expect_change();
938 		break;
939 	default:
940 		break;
941 	}
942 out:
943 	mutex_unlock(&smp_cpu_state_mutex);
944 	put_online_cpus();
945 	return rc ? rc : count;
946 }
947 static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store);
948 #endif /* CONFIG_HOTPLUG_CPU */
949 
950 static ssize_t show_cpu_address(struct device *dev,
951 				struct device_attribute *attr, char *buf)
952 {
953 	return sprintf(buf, "%d\n", pcpu_devices[dev->id].address);
954 }
955 static DEVICE_ATTR(address, 0444, show_cpu_address, NULL);
956 
957 static struct attribute *cpu_common_attrs[] = {
958 #ifdef CONFIG_HOTPLUG_CPU
959 	&dev_attr_configure.attr,
960 #endif
961 	&dev_attr_address.attr,
962 	NULL,
963 };
964 
965 static struct attribute_group cpu_common_attr_group = {
966 	.attrs = cpu_common_attrs,
967 };
968 
969 static ssize_t show_capability(struct device *dev,
970 				struct device_attribute *attr, char *buf)
971 {
972 	unsigned int capability;
973 	int rc;
974 
975 	rc = get_cpu_capability(&capability);
976 	if (rc)
977 		return rc;
978 	return sprintf(buf, "%u\n", capability);
979 }
980 static DEVICE_ATTR(capability, 0444, show_capability, NULL);
981 
982 static ssize_t show_idle_count(struct device *dev,
983 				struct device_attribute *attr, char *buf)
984 {
985 	struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
986 	unsigned long long idle_count;
987 	unsigned int sequence;
988 
989 	do {
990 		sequence = ACCESS_ONCE(idle->sequence);
991 		idle_count = ACCESS_ONCE(idle->idle_count);
992 		if (ACCESS_ONCE(idle->idle_enter))
993 			idle_count++;
994 	} while ((sequence & 1) || (idle->sequence != sequence));
995 	return sprintf(buf, "%llu\n", idle_count);
996 }
997 static DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL);
998 
999 static ssize_t show_idle_time(struct device *dev,
1000 				struct device_attribute *attr, char *buf)
1001 {
1002 	struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
1003 	unsigned long long now, idle_time, idle_enter, idle_exit;
1004 	unsigned int sequence;
1005 
1006 	do {
1007 		now = get_clock();
1008 		sequence = ACCESS_ONCE(idle->sequence);
1009 		idle_time = ACCESS_ONCE(idle->idle_time);
1010 		idle_enter = ACCESS_ONCE(idle->idle_enter);
1011 		idle_exit = ACCESS_ONCE(idle->idle_exit);
1012 	} while ((sequence & 1) || (idle->sequence != sequence));
1013 	idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0;
1014 	return sprintf(buf, "%llu\n", idle_time >> 12);
1015 }
1016 static DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
1017 
1018 static struct attribute *cpu_online_attrs[] = {
1019 	&dev_attr_capability.attr,
1020 	&dev_attr_idle_count.attr,
1021 	&dev_attr_idle_time_us.attr,
1022 	NULL,
1023 };
1024 
1025 static struct attribute_group cpu_online_attr_group = {
1026 	.attrs = cpu_online_attrs,
1027 };
1028 
1029 static int __cpuinit smp_cpu_notify(struct notifier_block *self,
1030 				    unsigned long action, void *hcpu)
1031 {
1032 	unsigned int cpu = (unsigned int)(long)hcpu;
1033 	struct cpu *c = &pcpu_devices[cpu].cpu;
1034 	struct device *s = &c->dev;
1035 	struct s390_idle_data *idle;
1036 	int err = 0;
1037 
1038 	switch (action) {
1039 	case CPU_ONLINE:
1040 	case CPU_ONLINE_FROZEN:
1041 		idle = &per_cpu(s390_idle, cpu);
1042 		memset(idle, 0, sizeof(struct s390_idle_data));
1043 		err = sysfs_create_group(&s->kobj, &cpu_online_attr_group);
1044 		break;
1045 	case CPU_DEAD:
1046 	case CPU_DEAD_FROZEN:
1047 		sysfs_remove_group(&s->kobj, &cpu_online_attr_group);
1048 		break;
1049 	}
1050 	return notifier_from_errno(err);
1051 }
1052 
1053 static struct notifier_block __cpuinitdata smp_cpu_nb = {
1054 	.notifier_call = smp_cpu_notify,
1055 };
1056 
1057 static int __devinit smp_add_present_cpu(int cpu)
1058 {
1059 	struct cpu *c = &pcpu_devices[cpu].cpu;
1060 	struct device *s = &c->dev;
1061 	int rc;
1062 
1063 	c->hotpluggable = 1;
1064 	rc = register_cpu(c, cpu);
1065 	if (rc)
1066 		goto out;
1067 	rc = sysfs_create_group(&s->kobj, &cpu_common_attr_group);
1068 	if (rc)
1069 		goto out_cpu;
1070 	if (cpu_online(cpu)) {
1071 		rc = sysfs_create_group(&s->kobj, &cpu_online_attr_group);
1072 		if (rc)
1073 			goto out_online;
1074 	}
1075 	rc = topology_cpu_init(c);
1076 	if (rc)
1077 		goto out_topology;
1078 	return 0;
1079 
1080 out_topology:
1081 	if (cpu_online(cpu))
1082 		sysfs_remove_group(&s->kobj, &cpu_online_attr_group);
1083 out_online:
1084 	sysfs_remove_group(&s->kobj, &cpu_common_attr_group);
1085 out_cpu:
1086 #ifdef CONFIG_HOTPLUG_CPU
1087 	unregister_cpu(c);
1088 #endif
1089 out:
1090 	return rc;
1091 }
1092 
1093 #ifdef CONFIG_HOTPLUG_CPU
1094 
1095 int __ref smp_rescan_cpus(void)
1096 {
1097 	struct sclp_cpu_info *info;
1098 	int nr;
1099 
1100 	info = smp_get_cpu_info();
1101 	if (!info)
1102 		return -ENOMEM;
1103 	get_online_cpus();
1104 	mutex_lock(&smp_cpu_state_mutex);
1105 	nr = __smp_rescan_cpus(info, 1);
1106 	mutex_unlock(&smp_cpu_state_mutex);
1107 	put_online_cpus();
1108 	kfree(info);
1109 	if (nr)
1110 		topology_schedule_update();
1111 	return 0;
1112 }
1113 
1114 static ssize_t __ref rescan_store(struct device *dev,
1115 				  struct device_attribute *attr,
1116 				  const char *buf,
1117 				  size_t count)
1118 {
1119 	int rc;
1120 
1121 	rc = smp_rescan_cpus();
1122 	return rc ? rc : count;
1123 }
1124 static DEVICE_ATTR(rescan, 0200, NULL, rescan_store);
1125 #endif /* CONFIG_HOTPLUG_CPU */
1126 
1127 static int __init s390_smp_init(void)
1128 {
1129 	int cpu, rc;
1130 
1131 	register_cpu_notifier(&smp_cpu_nb);
1132 #ifdef CONFIG_HOTPLUG_CPU
1133 	rc = device_create_file(cpu_subsys.dev_root, &dev_attr_rescan);
1134 	if (rc)
1135 		return rc;
1136 #endif
1137 	for_each_present_cpu(cpu) {
1138 		rc = smp_add_present_cpu(cpu);
1139 		if (rc)
1140 			return rc;
1141 	}
1142 	return 0;
1143 }
1144 subsys_initcall(s390_smp_init);
1145