xref: /linux/arch/s390/kernel/smp.c (revision 71ca97da9d027009d318d319cbacf54a72f666c1)
1 /*
2  *  SMP related functions
3  *
4  *    Copyright IBM Corp. 1999,2012
5  *    Author(s): Denis Joseph Barrow,
6  *		 Martin Schwidefsky <schwidefsky@de.ibm.com>,
7  *		 Heiko Carstens <heiko.carstens@de.ibm.com>,
8  *
9  *  based on other smp stuff by
10  *    (c) 1995 Alan Cox, CymruNET Ltd  <alan@cymru.net>
11  *    (c) 1998 Ingo Molnar
12  *
13  * The code outside of smp.c uses logical cpu numbers, only smp.c does
14  * the translation of logical to physical cpu ids. All new code that
15  * operates on physical cpu numbers needs to go into smp.c.
16  */
17 
18 #define KMSG_COMPONENT "cpu"
19 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
20 
21 #include <linux/workqueue.h>
22 #include <linux/module.h>
23 #include <linux/init.h>
24 #include <linux/mm.h>
25 #include <linux/err.h>
26 #include <linux/spinlock.h>
27 #include <linux/kernel_stat.h>
28 #include <linux/delay.h>
29 #include <linux/interrupt.h>
30 #include <linux/irqflags.h>
31 #include <linux/cpu.h>
32 #include <linux/slab.h>
33 #include <linux/crash_dump.h>
34 #include <asm/asm-offsets.h>
35 #include <asm/switch_to.h>
36 #include <asm/facility.h>
37 #include <asm/ipl.h>
38 #include <asm/setup.h>
39 #include <asm/irq.h>
40 #include <asm/tlbflush.h>
41 #include <asm/timer.h>
42 #include <asm/lowcore.h>
43 #include <asm/sclp.h>
44 #include <asm/vdso.h>
45 #include <asm/debug.h>
46 #include <asm/os_info.h>
47 #include "entry.h"
48 
49 enum {
50 	sigp_sense = 1,
51 	sigp_external_call = 2,
52 	sigp_emergency_signal = 3,
53 	sigp_start = 4,
54 	sigp_stop = 5,
55 	sigp_restart = 6,
56 	sigp_stop_and_store_status = 9,
57 	sigp_initial_cpu_reset = 11,
58 	sigp_cpu_reset = 12,
59 	sigp_set_prefix = 13,
60 	sigp_store_status_at_address = 14,
61 	sigp_store_extended_status_at_address = 15,
62 	sigp_set_architecture = 18,
63 	sigp_conditional_emergency_signal = 19,
64 	sigp_sense_running = 21,
65 };
66 
67 enum {
68 	sigp_order_code_accepted = 0,
69 	sigp_status_stored = 1,
70 	sigp_busy = 2,
71 	sigp_not_operational = 3,
72 };
73 
74 enum {
75 	ec_schedule = 0,
76 	ec_call_function,
77 	ec_call_function_single,
78 	ec_stop_cpu,
79 };
80 
81 enum {
82 	CPU_STATE_STANDBY,
83 	CPU_STATE_CONFIGURED,
84 };
85 
86 struct pcpu {
87 	struct cpu cpu;
88 	struct _lowcore *lowcore;	/* lowcore page(s) for the cpu */
89 	unsigned long async_stack;	/* async stack for the cpu */
90 	unsigned long panic_stack;	/* panic stack for the cpu */
91 	unsigned long ec_mask;		/* bit mask for ec_xxx functions */
92 	int state;			/* physical cpu state */
93 	u32 status;			/* last status received via sigp */
94 	u16 address;			/* physical cpu address */
95 };
96 
97 static u8 boot_cpu_type;
98 static u16 boot_cpu_address;
99 static struct pcpu pcpu_devices[NR_CPUS];
100 
101 DEFINE_MUTEX(smp_cpu_state_mutex);
102 
103 /*
104  * Signal processor helper functions.
105  */
106 static inline int __pcpu_sigp(u16 addr, u8 order, u32 parm, u32 *status)
107 {
108 	register unsigned int reg1 asm ("1") = parm;
109 	int cc;
110 
111 	asm volatile(
112 		"	sigp	%1,%2,0(%3)\n"
113 		"	ipm	%0\n"
114 		"	srl	%0,28\n"
115 		: "=d" (cc), "+d" (reg1) : "d" (addr), "a" (order) : "cc");
116 	if (status && cc == 1)
117 		*status = reg1;
118 	return cc;
119 }
120 
121 static inline int __pcpu_sigp_relax(u16 addr, u8 order, u32 parm, u32 *status)
122 {
123 	int cc;
124 
125 	while (1) {
126 		cc = __pcpu_sigp(addr, order, parm, status);
127 		if (cc != sigp_busy)
128 			return cc;
129 		cpu_relax();
130 	}
131 }
132 
133 static int pcpu_sigp_retry(struct pcpu *pcpu, u8 order, u32 parm)
134 {
135 	int cc, retry;
136 
137 	for (retry = 0; ; retry++) {
138 		cc = __pcpu_sigp(pcpu->address, order, parm, &pcpu->status);
139 		if (cc != sigp_busy)
140 			break;
141 		if (retry >= 3)
142 			udelay(10);
143 	}
144 	return cc;
145 }
146 
147 static inline int pcpu_stopped(struct pcpu *pcpu)
148 {
149 	if (__pcpu_sigp(pcpu->address, sigp_sense,
150 			0, &pcpu->status) != sigp_status_stored)
151 		return 0;
152 	/* Check for stopped and check stop state */
153 	return !!(pcpu->status & 0x50);
154 }
155 
156 static inline int pcpu_running(struct pcpu *pcpu)
157 {
158 	if (__pcpu_sigp(pcpu->address, sigp_sense_running,
159 			0, &pcpu->status) != sigp_status_stored)
160 		return 1;
161 	/* Check for running status */
162 	return !(pcpu->status & 0x400);
163 }
164 
165 /*
166  * Find struct pcpu by cpu address.
167  */
168 static struct pcpu *pcpu_find_address(const struct cpumask *mask, int address)
169 {
170 	int cpu;
171 
172 	for_each_cpu(cpu, mask)
173 		if (pcpu_devices[cpu].address == address)
174 			return pcpu_devices + cpu;
175 	return NULL;
176 }
177 
178 static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
179 {
180 	int order;
181 
182 	set_bit(ec_bit, &pcpu->ec_mask);
183 	order = pcpu_running(pcpu) ?
184 		sigp_external_call : sigp_emergency_signal;
185 	pcpu_sigp_retry(pcpu, order, 0);
186 }
187 
188 static int __cpuinit pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
189 {
190 	struct _lowcore *lc;
191 
192 	if (pcpu != &pcpu_devices[0]) {
193 		pcpu->lowcore =	(struct _lowcore *)
194 			__get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
195 		pcpu->async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
196 		pcpu->panic_stack = __get_free_page(GFP_KERNEL);
197 		if (!pcpu->lowcore || !pcpu->panic_stack || !pcpu->async_stack)
198 			goto out;
199 	}
200 	lc = pcpu->lowcore;
201 	memcpy(lc, &S390_lowcore, 512);
202 	memset((char *) lc + 512, 0, sizeof(*lc) - 512);
203 	lc->async_stack = pcpu->async_stack + ASYNC_SIZE;
204 	lc->panic_stack = pcpu->panic_stack + PAGE_SIZE;
205 	lc->cpu_nr = cpu;
206 #ifndef CONFIG_64BIT
207 	if (MACHINE_HAS_IEEE) {
208 		lc->extended_save_area_addr = get_zeroed_page(GFP_KERNEL);
209 		if (!lc->extended_save_area_addr)
210 			goto out;
211 	}
212 #else
213 	if (vdso_alloc_per_cpu(lc))
214 		goto out;
215 #endif
216 	lowcore_ptr[cpu] = lc;
217 	pcpu_sigp_retry(pcpu, sigp_set_prefix, (u32)(unsigned long) lc);
218 	return 0;
219 out:
220 	if (pcpu != &pcpu_devices[0]) {
221 		free_page(pcpu->panic_stack);
222 		free_pages(pcpu->async_stack, ASYNC_ORDER);
223 		free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
224 	}
225 	return -ENOMEM;
226 }
227 
228 #ifdef CONFIG_HOTPLUG_CPU
229 
230 static void pcpu_free_lowcore(struct pcpu *pcpu)
231 {
232 	pcpu_sigp_retry(pcpu, sigp_set_prefix, 0);
233 	lowcore_ptr[pcpu - pcpu_devices] = NULL;
234 #ifndef CONFIG_64BIT
235 	if (MACHINE_HAS_IEEE) {
236 		struct _lowcore *lc = pcpu->lowcore;
237 
238 		free_page((unsigned long) lc->extended_save_area_addr);
239 		lc->extended_save_area_addr = 0;
240 	}
241 #else
242 	vdso_free_per_cpu(pcpu->lowcore);
243 #endif
244 	if (pcpu != &pcpu_devices[0]) {
245 		free_page(pcpu->panic_stack);
246 		free_pages(pcpu->async_stack, ASYNC_ORDER);
247 		free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
248 	}
249 }
250 
251 #endif /* CONFIG_HOTPLUG_CPU */
252 
253 static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
254 {
255 	struct _lowcore *lc = pcpu->lowcore;
256 
257 	atomic_inc(&init_mm.context.attach_count);
258 	lc->cpu_nr = cpu;
259 	lc->percpu_offset = __per_cpu_offset[cpu];
260 	lc->kernel_asce = S390_lowcore.kernel_asce;
261 	lc->machine_flags = S390_lowcore.machine_flags;
262 	lc->ftrace_func = S390_lowcore.ftrace_func;
263 	lc->user_timer = lc->system_timer = lc->steal_timer = 0;
264 	__ctl_store(lc->cregs_save_area, 0, 15);
265 	save_access_regs((unsigned int *) lc->access_regs_save_area);
266 	memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
267 	       MAX_FACILITY_BIT/8);
268 }
269 
270 static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk)
271 {
272 	struct _lowcore *lc = pcpu->lowcore;
273 	struct thread_info *ti = task_thread_info(tsk);
274 
275 	lc->kernel_stack = (unsigned long) task_stack_page(tsk) + THREAD_SIZE;
276 	lc->thread_info = (unsigned long) task_thread_info(tsk);
277 	lc->current_task = (unsigned long) tsk;
278 	lc->user_timer = ti->user_timer;
279 	lc->system_timer = ti->system_timer;
280 	lc->steal_timer = 0;
281 }
282 
283 static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data)
284 {
285 	struct _lowcore *lc = pcpu->lowcore;
286 
287 	lc->restart_stack = lc->kernel_stack;
288 	lc->restart_fn = (unsigned long) func;
289 	lc->restart_data = (unsigned long) data;
290 	lc->restart_source = -1UL;
291 	pcpu_sigp_retry(pcpu, sigp_restart, 0);
292 }
293 
294 /*
295  * Call function via PSW restart on pcpu and stop the current cpu.
296  */
297 static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *),
298 			  void *data, unsigned long stack)
299 {
300 	struct _lowcore *lc = pcpu->lowcore;
301 	unsigned short this_cpu;
302 
303 	__load_psw_mask(psw_kernel_bits);
304 	this_cpu = stap();
305 	if (pcpu->address == this_cpu)
306 		func(data);	/* should not return */
307 	/* Stop target cpu (if func returns this stops the current cpu). */
308 	pcpu_sigp_retry(pcpu, sigp_stop, 0);
309 	/* Restart func on the target cpu and stop the current cpu. */
310 	lc->restart_stack = stack;
311 	lc->restart_fn = (unsigned long) func;
312 	lc->restart_data = (unsigned long) data;
313 	lc->restart_source = (unsigned long) this_cpu;
314 	asm volatile(
315 		"0:	sigp	0,%0,6	# sigp restart to target cpu\n"
316 		"	brc	2,0b	# busy, try again\n"
317 		"1:	sigp	0,%1,5	# sigp stop to current cpu\n"
318 		"	brc	2,1b	# busy, try again\n"
319 		: : "d" (pcpu->address), "d" (this_cpu) : "0", "1", "cc");
320 	for (;;) ;
321 }
322 
323 /*
324  * Call function on an online CPU.
325  */
326 void smp_call_online_cpu(void (*func)(void *), void *data)
327 {
328 	struct pcpu *pcpu;
329 
330 	/* Use the current cpu if it is online. */
331 	pcpu = pcpu_find_address(cpu_online_mask, stap());
332 	if (!pcpu)
333 		/* Use the first online cpu. */
334 		pcpu = pcpu_devices + cpumask_first(cpu_online_mask);
335 	pcpu_delegate(pcpu, func, data, (unsigned long) restart_stack);
336 }
337 
338 /*
339  * Call function on the ipl CPU.
340  */
341 void smp_call_ipl_cpu(void (*func)(void *), void *data)
342 {
343 	pcpu_delegate(&pcpu_devices[0], func, data,
344 		      pcpu_devices->panic_stack + PAGE_SIZE);
345 }
346 
347 int smp_find_processor_id(u16 address)
348 {
349 	int cpu;
350 
351 	for_each_present_cpu(cpu)
352 		if (pcpu_devices[cpu].address == address)
353 			return cpu;
354 	return -1;
355 }
356 
357 int smp_vcpu_scheduled(int cpu)
358 {
359 	return pcpu_running(pcpu_devices + cpu);
360 }
361 
362 void smp_yield(void)
363 {
364 	if (MACHINE_HAS_DIAG44)
365 		asm volatile("diag 0,0,0x44");
366 }
367 
368 void smp_yield_cpu(int cpu)
369 {
370 	if (MACHINE_HAS_DIAG9C)
371 		asm volatile("diag %0,0,0x9c"
372 			     : : "d" (pcpu_devices[cpu].address));
373 	else if (MACHINE_HAS_DIAG44)
374 		asm volatile("diag 0,0,0x44");
375 }
376 
377 /*
378  * Send cpus emergency shutdown signal. This gives the cpus the
379  * opportunity to complete outstanding interrupts.
380  */
381 void smp_emergency_stop(cpumask_t *cpumask)
382 {
383 	u64 end;
384 	int cpu;
385 
386 	end = get_clock() + (1000000UL << 12);
387 	for_each_cpu(cpu, cpumask) {
388 		struct pcpu *pcpu = pcpu_devices + cpu;
389 		set_bit(ec_stop_cpu, &pcpu->ec_mask);
390 		while (__pcpu_sigp(pcpu->address, sigp_emergency_signal,
391 				   0, NULL) == sigp_busy &&
392 		       get_clock() < end)
393 			cpu_relax();
394 	}
395 	while (get_clock() < end) {
396 		for_each_cpu(cpu, cpumask)
397 			if (pcpu_stopped(pcpu_devices + cpu))
398 				cpumask_clear_cpu(cpu, cpumask);
399 		if (cpumask_empty(cpumask))
400 			break;
401 		cpu_relax();
402 	}
403 }
404 
405 /*
406  * Stop all cpus but the current one.
407  */
408 void smp_send_stop(void)
409 {
410 	cpumask_t cpumask;
411 	int cpu;
412 
413 	/* Disable all interrupts/machine checks */
414 	__load_psw_mask(psw_kernel_bits | PSW_MASK_DAT);
415 	trace_hardirqs_off();
416 
417 	debug_set_critical();
418 	cpumask_copy(&cpumask, cpu_online_mask);
419 	cpumask_clear_cpu(smp_processor_id(), &cpumask);
420 
421 	if (oops_in_progress)
422 		smp_emergency_stop(&cpumask);
423 
424 	/* stop all processors */
425 	for_each_cpu(cpu, &cpumask) {
426 		struct pcpu *pcpu = pcpu_devices + cpu;
427 		pcpu_sigp_retry(pcpu, sigp_stop, 0);
428 		while (!pcpu_stopped(pcpu))
429 			cpu_relax();
430 	}
431 }
432 
433 /*
434  * Stop the current cpu.
435  */
436 void smp_stop_cpu(void)
437 {
438 	pcpu_sigp_retry(pcpu_devices + smp_processor_id(), sigp_stop, 0);
439 	for (;;) ;
440 }
441 
442 /*
443  * This is the main routine where commands issued by other
444  * cpus are handled.
445  */
446 static void do_ext_call_interrupt(struct ext_code ext_code,
447 				  unsigned int param32, unsigned long param64)
448 {
449 	unsigned long bits;
450 	int cpu;
451 
452 	cpu = smp_processor_id();
453 	if (ext_code.code == 0x1202)
454 		kstat_cpu(cpu).irqs[EXTINT_EXC]++;
455 	else
456 		kstat_cpu(cpu).irqs[EXTINT_EMS]++;
457 	/*
458 	 * handle bit signal external calls
459 	 */
460 	bits = xchg(&pcpu_devices[cpu].ec_mask, 0);
461 
462 	if (test_bit(ec_stop_cpu, &bits))
463 		smp_stop_cpu();
464 
465 	if (test_bit(ec_schedule, &bits))
466 		scheduler_ipi();
467 
468 	if (test_bit(ec_call_function, &bits))
469 		generic_smp_call_function_interrupt();
470 
471 	if (test_bit(ec_call_function_single, &bits))
472 		generic_smp_call_function_single_interrupt();
473 
474 }
475 
476 void arch_send_call_function_ipi_mask(const struct cpumask *mask)
477 {
478 	int cpu;
479 
480 	for_each_cpu(cpu, mask)
481 		pcpu_ec_call(pcpu_devices + cpu, ec_call_function);
482 }
483 
484 void arch_send_call_function_single_ipi(int cpu)
485 {
486 	pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
487 }
488 
489 #ifndef CONFIG_64BIT
490 /*
491  * this function sends a 'purge tlb' signal to another CPU.
492  */
493 static void smp_ptlb_callback(void *info)
494 {
495 	__tlb_flush_local();
496 }
497 
498 void smp_ptlb_all(void)
499 {
500 	on_each_cpu(smp_ptlb_callback, NULL, 1);
501 }
502 EXPORT_SYMBOL(smp_ptlb_all);
503 #endif /* ! CONFIG_64BIT */
504 
505 /*
506  * this function sends a 'reschedule' IPI to another CPU.
507  * it goes straight through and wastes no time serializing
508  * anything. Worst case is that we lose a reschedule ...
509  */
510 void smp_send_reschedule(int cpu)
511 {
512 	pcpu_ec_call(pcpu_devices + cpu, ec_schedule);
513 }
514 
515 /*
516  * parameter area for the set/clear control bit callbacks
517  */
518 struct ec_creg_mask_parms {
519 	unsigned long orval;
520 	unsigned long andval;
521 	int cr;
522 };
523 
524 /*
525  * callback for setting/clearing control bits
526  */
527 static void smp_ctl_bit_callback(void *info)
528 {
529 	struct ec_creg_mask_parms *pp = info;
530 	unsigned long cregs[16];
531 
532 	__ctl_store(cregs, 0, 15);
533 	cregs[pp->cr] = (cregs[pp->cr] & pp->andval) | pp->orval;
534 	__ctl_load(cregs, 0, 15);
535 }
536 
537 /*
538  * Set a bit in a control register of all cpus
539  */
540 void smp_ctl_set_bit(int cr, int bit)
541 {
542 	struct ec_creg_mask_parms parms = { 1UL << bit, -1UL, cr };
543 
544 	on_each_cpu(smp_ctl_bit_callback, &parms, 1);
545 }
546 EXPORT_SYMBOL(smp_ctl_set_bit);
547 
548 /*
549  * Clear a bit in a control register of all cpus
550  */
551 void smp_ctl_clear_bit(int cr, int bit)
552 {
553 	struct ec_creg_mask_parms parms = { 0, ~(1UL << bit), cr };
554 
555 	on_each_cpu(smp_ctl_bit_callback, &parms, 1);
556 }
557 EXPORT_SYMBOL(smp_ctl_clear_bit);
558 
559 #if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_CRASH_DUMP)
560 
561 struct save_area *zfcpdump_save_areas[NR_CPUS + 1];
562 EXPORT_SYMBOL_GPL(zfcpdump_save_areas);
563 
564 static void __init smp_get_save_area(int cpu, u16 address)
565 {
566 	void *lc = pcpu_devices[0].lowcore;
567 	struct save_area *save_area;
568 
569 	if (is_kdump_kernel())
570 		return;
571 	if (!OLDMEM_BASE && (address == boot_cpu_address ||
572 			     ipl_info.type != IPL_TYPE_FCP_DUMP))
573 		return;
574 	if (cpu >= NR_CPUS) {
575 		pr_warning("CPU %i exceeds the maximum %i and is excluded "
576 			   "from the dump\n", cpu, NR_CPUS - 1);
577 		return;
578 	}
579 	save_area = kmalloc(sizeof(struct save_area), GFP_KERNEL);
580 	if (!save_area)
581 		panic("could not allocate memory for save area\n");
582 	zfcpdump_save_areas[cpu] = save_area;
583 #ifdef CONFIG_CRASH_DUMP
584 	if (address == boot_cpu_address) {
585 		/* Copy the registers of the boot cpu. */
586 		copy_oldmem_page(1, (void *) save_area, sizeof(*save_area),
587 				 SAVE_AREA_BASE - PAGE_SIZE, 0);
588 		return;
589 	}
590 #endif
591 	/* Get the registers of a non-boot cpu. */
592 	__pcpu_sigp_relax(address, sigp_stop_and_store_status, 0, NULL);
593 	memcpy_real(save_area, lc + SAVE_AREA_BASE, sizeof(*save_area));
594 }
595 
596 int smp_store_status(int cpu)
597 {
598 	struct pcpu *pcpu;
599 
600 	pcpu = pcpu_devices + cpu;
601 	if (__pcpu_sigp_relax(pcpu->address, sigp_stop_and_store_status,
602 			      0, NULL) != sigp_order_code_accepted)
603 		return -EIO;
604 	return 0;
605 }
606 
607 #else /* CONFIG_ZFCPDUMP || CONFIG_CRASH_DUMP */
608 
609 static inline void smp_get_save_area(int cpu, u16 address) { }
610 
611 #endif /* CONFIG_ZFCPDUMP || CONFIG_CRASH_DUMP */
612 
613 static struct sclp_cpu_info *smp_get_cpu_info(void)
614 {
615 	static int use_sigp_detection;
616 	struct sclp_cpu_info *info;
617 	int address;
618 
619 	info = kzalloc(sizeof(*info), GFP_KERNEL);
620 	if (info && (use_sigp_detection || sclp_get_cpu_info(info))) {
621 		use_sigp_detection = 1;
622 		for (address = 0; address <= MAX_CPU_ADDRESS; address++) {
623 			if (__pcpu_sigp_relax(address, sigp_sense, 0, NULL) ==
624 			    sigp_not_operational)
625 				continue;
626 			info->cpu[info->configured].address = address;
627 			info->configured++;
628 		}
629 		info->combined = info->configured;
630 	}
631 	return info;
632 }
633 
634 static int __devinit smp_add_present_cpu(int cpu);
635 
636 static int __devinit __smp_rescan_cpus(struct sclp_cpu_info *info,
637 				       int sysfs_add)
638 {
639 	struct pcpu *pcpu;
640 	cpumask_t avail;
641 	int cpu, nr, i;
642 
643 	nr = 0;
644 	cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
645 	cpu = cpumask_first(&avail);
646 	for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) {
647 		if (info->has_cpu_type && info->cpu[i].type != boot_cpu_type)
648 			continue;
649 		if (pcpu_find_address(cpu_present_mask, info->cpu[i].address))
650 			continue;
651 		pcpu = pcpu_devices + cpu;
652 		pcpu->address = info->cpu[i].address;
653 		pcpu->state = (cpu >= info->configured) ?
654 			CPU_STATE_STANDBY : CPU_STATE_CONFIGURED;
655 		cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
656 		set_cpu_present(cpu, true);
657 		if (sysfs_add && smp_add_present_cpu(cpu) != 0)
658 			set_cpu_present(cpu, false);
659 		else
660 			nr++;
661 		cpu = cpumask_next(cpu, &avail);
662 	}
663 	return nr;
664 }
665 
666 static void __init smp_detect_cpus(void)
667 {
668 	unsigned int cpu, c_cpus, s_cpus;
669 	struct sclp_cpu_info *info;
670 
671 	info = smp_get_cpu_info();
672 	if (!info)
673 		panic("smp_detect_cpus failed to allocate memory\n");
674 	if (info->has_cpu_type) {
675 		for (cpu = 0; cpu < info->combined; cpu++) {
676 			if (info->cpu[cpu].address != boot_cpu_address)
677 				continue;
678 			/* The boot cpu dictates the cpu type. */
679 			boot_cpu_type = info->cpu[cpu].type;
680 			break;
681 		}
682 	}
683 	c_cpus = s_cpus = 0;
684 	for (cpu = 0; cpu < info->combined; cpu++) {
685 		if (info->has_cpu_type && info->cpu[cpu].type != boot_cpu_type)
686 			continue;
687 		if (cpu < info->configured) {
688 			smp_get_save_area(c_cpus, info->cpu[cpu].address);
689 			c_cpus++;
690 		} else
691 			s_cpus++;
692 	}
693 	pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus);
694 	get_online_cpus();
695 	__smp_rescan_cpus(info, 0);
696 	put_online_cpus();
697 	kfree(info);
698 }
699 
700 /*
701  *	Activate a secondary processor.
702  */
703 static void __cpuinit smp_start_secondary(void *cpuvoid)
704 {
705 	S390_lowcore.last_update_clock = get_clock();
706 	S390_lowcore.restart_stack = (unsigned long) restart_stack;
707 	S390_lowcore.restart_fn = (unsigned long) do_restart;
708 	S390_lowcore.restart_data = 0;
709 	S390_lowcore.restart_source = -1UL;
710 	restore_access_regs(S390_lowcore.access_regs_save_area);
711 	__ctl_load(S390_lowcore.cregs_save_area, 0, 15);
712 	__load_psw_mask(psw_kernel_bits | PSW_MASK_DAT);
713 	cpu_init();
714 	preempt_disable();
715 	init_cpu_timer();
716 	init_cpu_vtimer();
717 	pfault_init();
718 	notify_cpu_starting(smp_processor_id());
719 	ipi_call_lock();
720 	set_cpu_online(smp_processor_id(), true);
721 	ipi_call_unlock();
722 	local_irq_enable();
723 	/* cpu_idle will call schedule for us */
724 	cpu_idle();
725 }
726 
727 /* Upping and downing of CPUs */
728 int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle)
729 {
730 	struct pcpu *pcpu;
731 	int rc;
732 
733 	pcpu = pcpu_devices + cpu;
734 	if (pcpu->state != CPU_STATE_CONFIGURED)
735 		return -EIO;
736 	if (pcpu_sigp_retry(pcpu, sigp_initial_cpu_reset, 0) !=
737 	    sigp_order_code_accepted)
738 		return -EIO;
739 
740 	rc = pcpu_alloc_lowcore(pcpu, cpu);
741 	if (rc)
742 		return rc;
743 	pcpu_prepare_secondary(pcpu, cpu);
744 	pcpu_attach_task(pcpu, tidle);
745 	pcpu_start_fn(pcpu, smp_start_secondary, NULL);
746 	while (!cpu_online(cpu))
747 		cpu_relax();
748 	return 0;
749 }
750 
751 static int __init setup_possible_cpus(char *s)
752 {
753 	int max, cpu;
754 
755 	if (kstrtoint(s, 0, &max) < 0)
756 		return 0;
757 	init_cpu_possible(cpumask_of(0));
758 	for (cpu = 1; cpu < max && cpu < nr_cpu_ids; cpu++)
759 		set_cpu_possible(cpu, true);
760 	return 0;
761 }
762 early_param("possible_cpus", setup_possible_cpus);
763 
764 #ifdef CONFIG_HOTPLUG_CPU
765 
766 int __cpu_disable(void)
767 {
768 	unsigned long cregs[16];
769 
770 	set_cpu_online(smp_processor_id(), false);
771 	/* Disable pseudo page faults on this cpu. */
772 	pfault_fini();
773 	/* Disable interrupt sources via control register. */
774 	__ctl_store(cregs, 0, 15);
775 	cregs[0]  &= ~0x0000ee70UL;	/* disable all external interrupts */
776 	cregs[6]  &= ~0xff000000UL;	/* disable all I/O interrupts */
777 	cregs[14] &= ~0x1f000000UL;	/* disable most machine checks */
778 	__ctl_load(cregs, 0, 15);
779 	return 0;
780 }
781 
782 void __cpu_die(unsigned int cpu)
783 {
784 	struct pcpu *pcpu;
785 
786 	/* Wait until target cpu is down */
787 	pcpu = pcpu_devices + cpu;
788 	while (!pcpu_stopped(pcpu))
789 		cpu_relax();
790 	pcpu_free_lowcore(pcpu);
791 	atomic_dec(&init_mm.context.attach_count);
792 }
793 
794 void __noreturn cpu_die(void)
795 {
796 	idle_task_exit();
797 	pcpu_sigp_retry(pcpu_devices + smp_processor_id(), sigp_stop, 0);
798 	for (;;) ;
799 }
800 
801 #endif /* CONFIG_HOTPLUG_CPU */
802 
803 static void smp_call_os_info_init_fn(void)
804 {
805 	int (*init_fn)(void);
806 	unsigned long size;
807 
808 	init_fn = os_info_old_entry(OS_INFO_INIT_FN, &size);
809 	if (!init_fn)
810 		return;
811 	init_fn();
812 }
813 
814 void __init smp_prepare_cpus(unsigned int max_cpus)
815 {
816 	/* request the 0x1201 emergency signal external interrupt */
817 	if (register_external_interrupt(0x1201, do_ext_call_interrupt) != 0)
818 		panic("Couldn't request external interrupt 0x1201");
819 	/* request the 0x1202 external call external interrupt */
820 	if (register_external_interrupt(0x1202, do_ext_call_interrupt) != 0)
821 		panic("Couldn't request external interrupt 0x1202");
822 	smp_call_os_info_init_fn();
823 	smp_detect_cpus();
824 }
825 
826 void __init smp_prepare_boot_cpu(void)
827 {
828 	struct pcpu *pcpu = pcpu_devices;
829 
830 	boot_cpu_address = stap();
831 	pcpu->state = CPU_STATE_CONFIGURED;
832 	pcpu->address = boot_cpu_address;
833 	pcpu->lowcore = (struct _lowcore *)(unsigned long) store_prefix();
834 	pcpu->async_stack = S390_lowcore.async_stack - ASYNC_SIZE;
835 	pcpu->panic_stack = S390_lowcore.panic_stack - PAGE_SIZE;
836 	S390_lowcore.percpu_offset = __per_cpu_offset[0];
837 	cpu_set_polarization(0, POLARIZATION_UNKNOWN);
838 	set_cpu_present(0, true);
839 	set_cpu_online(0, true);
840 }
841 
842 void __init smp_cpus_done(unsigned int max_cpus)
843 {
844 }
845 
846 void __init smp_setup_processor_id(void)
847 {
848 	S390_lowcore.cpu_nr = 0;
849 }
850 
851 /*
852  * the frequency of the profiling timer can be changed
853  * by writing a multiplier value into /proc/profile.
854  *
855  * usually you want to run this on all CPUs ;)
856  */
857 int setup_profiling_timer(unsigned int multiplier)
858 {
859 	return 0;
860 }
861 
862 #ifdef CONFIG_HOTPLUG_CPU
863 static ssize_t cpu_configure_show(struct device *dev,
864 				  struct device_attribute *attr, char *buf)
865 {
866 	ssize_t count;
867 
868 	mutex_lock(&smp_cpu_state_mutex);
869 	count = sprintf(buf, "%d\n", pcpu_devices[dev->id].state);
870 	mutex_unlock(&smp_cpu_state_mutex);
871 	return count;
872 }
873 
874 static ssize_t cpu_configure_store(struct device *dev,
875 				   struct device_attribute *attr,
876 				   const char *buf, size_t count)
877 {
878 	struct pcpu *pcpu;
879 	int cpu, val, rc;
880 	char delim;
881 
882 	if (sscanf(buf, "%d %c", &val, &delim) != 1)
883 		return -EINVAL;
884 	if (val != 0 && val != 1)
885 		return -EINVAL;
886 	get_online_cpus();
887 	mutex_lock(&smp_cpu_state_mutex);
888 	rc = -EBUSY;
889 	/* disallow configuration changes of online cpus and cpu 0 */
890 	cpu = dev->id;
891 	if (cpu_online(cpu) || cpu == 0)
892 		goto out;
893 	pcpu = pcpu_devices + cpu;
894 	rc = 0;
895 	switch (val) {
896 	case 0:
897 		if (pcpu->state != CPU_STATE_CONFIGURED)
898 			break;
899 		rc = sclp_cpu_deconfigure(pcpu->address);
900 		if (rc)
901 			break;
902 		pcpu->state = CPU_STATE_STANDBY;
903 		cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
904 		topology_expect_change();
905 		break;
906 	case 1:
907 		if (pcpu->state != CPU_STATE_STANDBY)
908 			break;
909 		rc = sclp_cpu_configure(pcpu->address);
910 		if (rc)
911 			break;
912 		pcpu->state = CPU_STATE_CONFIGURED;
913 		cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
914 		topology_expect_change();
915 		break;
916 	default:
917 		break;
918 	}
919 out:
920 	mutex_unlock(&smp_cpu_state_mutex);
921 	put_online_cpus();
922 	return rc ? rc : count;
923 }
924 static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store);
925 #endif /* CONFIG_HOTPLUG_CPU */
926 
927 static ssize_t show_cpu_address(struct device *dev,
928 				struct device_attribute *attr, char *buf)
929 {
930 	return sprintf(buf, "%d\n", pcpu_devices[dev->id].address);
931 }
932 static DEVICE_ATTR(address, 0444, show_cpu_address, NULL);
933 
934 static struct attribute *cpu_common_attrs[] = {
935 #ifdef CONFIG_HOTPLUG_CPU
936 	&dev_attr_configure.attr,
937 #endif
938 	&dev_attr_address.attr,
939 	NULL,
940 };
941 
942 static struct attribute_group cpu_common_attr_group = {
943 	.attrs = cpu_common_attrs,
944 };
945 
946 static ssize_t show_capability(struct device *dev,
947 				struct device_attribute *attr, char *buf)
948 {
949 	unsigned int capability;
950 	int rc;
951 
952 	rc = get_cpu_capability(&capability);
953 	if (rc)
954 		return rc;
955 	return sprintf(buf, "%u\n", capability);
956 }
957 static DEVICE_ATTR(capability, 0444, show_capability, NULL);
958 
959 static ssize_t show_idle_count(struct device *dev,
960 				struct device_attribute *attr, char *buf)
961 {
962 	struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
963 	unsigned long long idle_count;
964 	unsigned int sequence;
965 
966 	do {
967 		sequence = ACCESS_ONCE(idle->sequence);
968 		idle_count = ACCESS_ONCE(idle->idle_count);
969 		if (ACCESS_ONCE(idle->idle_enter))
970 			idle_count++;
971 	} while ((sequence & 1) || (idle->sequence != sequence));
972 	return sprintf(buf, "%llu\n", idle_count);
973 }
974 static DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL);
975 
976 static ssize_t show_idle_time(struct device *dev,
977 				struct device_attribute *attr, char *buf)
978 {
979 	struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
980 	unsigned long long now, idle_time, idle_enter, idle_exit;
981 	unsigned int sequence;
982 
983 	do {
984 		now = get_clock();
985 		sequence = ACCESS_ONCE(idle->sequence);
986 		idle_time = ACCESS_ONCE(idle->idle_time);
987 		idle_enter = ACCESS_ONCE(idle->idle_enter);
988 		idle_exit = ACCESS_ONCE(idle->idle_exit);
989 	} while ((sequence & 1) || (idle->sequence != sequence));
990 	idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0;
991 	return sprintf(buf, "%llu\n", idle_time >> 12);
992 }
993 static DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
994 
995 static struct attribute *cpu_online_attrs[] = {
996 	&dev_attr_capability.attr,
997 	&dev_attr_idle_count.attr,
998 	&dev_attr_idle_time_us.attr,
999 	NULL,
1000 };
1001 
1002 static struct attribute_group cpu_online_attr_group = {
1003 	.attrs = cpu_online_attrs,
1004 };
1005 
1006 static int __cpuinit smp_cpu_notify(struct notifier_block *self,
1007 				    unsigned long action, void *hcpu)
1008 {
1009 	unsigned int cpu = (unsigned int)(long)hcpu;
1010 	struct cpu *c = &pcpu_devices[cpu].cpu;
1011 	struct device *s = &c->dev;
1012 	struct s390_idle_data *idle;
1013 	int err = 0;
1014 
1015 	switch (action) {
1016 	case CPU_ONLINE:
1017 	case CPU_ONLINE_FROZEN:
1018 		idle = &per_cpu(s390_idle, cpu);
1019 		memset(idle, 0, sizeof(struct s390_idle_data));
1020 		err = sysfs_create_group(&s->kobj, &cpu_online_attr_group);
1021 		break;
1022 	case CPU_DEAD:
1023 	case CPU_DEAD_FROZEN:
1024 		sysfs_remove_group(&s->kobj, &cpu_online_attr_group);
1025 		break;
1026 	}
1027 	return notifier_from_errno(err);
1028 }
1029 
1030 static struct notifier_block __cpuinitdata smp_cpu_nb = {
1031 	.notifier_call = smp_cpu_notify,
1032 };
1033 
1034 static int __devinit smp_add_present_cpu(int cpu)
1035 {
1036 	struct cpu *c = &pcpu_devices[cpu].cpu;
1037 	struct device *s = &c->dev;
1038 	int rc;
1039 
1040 	c->hotpluggable = 1;
1041 	rc = register_cpu(c, cpu);
1042 	if (rc)
1043 		goto out;
1044 	rc = sysfs_create_group(&s->kobj, &cpu_common_attr_group);
1045 	if (rc)
1046 		goto out_cpu;
1047 	if (cpu_online(cpu)) {
1048 		rc = sysfs_create_group(&s->kobj, &cpu_online_attr_group);
1049 		if (rc)
1050 			goto out_online;
1051 	}
1052 	rc = topology_cpu_init(c);
1053 	if (rc)
1054 		goto out_topology;
1055 	return 0;
1056 
1057 out_topology:
1058 	if (cpu_online(cpu))
1059 		sysfs_remove_group(&s->kobj, &cpu_online_attr_group);
1060 out_online:
1061 	sysfs_remove_group(&s->kobj, &cpu_common_attr_group);
1062 out_cpu:
1063 #ifdef CONFIG_HOTPLUG_CPU
1064 	unregister_cpu(c);
1065 #endif
1066 out:
1067 	return rc;
1068 }
1069 
1070 #ifdef CONFIG_HOTPLUG_CPU
1071 
1072 int __ref smp_rescan_cpus(void)
1073 {
1074 	struct sclp_cpu_info *info;
1075 	int nr;
1076 
1077 	info = smp_get_cpu_info();
1078 	if (!info)
1079 		return -ENOMEM;
1080 	get_online_cpus();
1081 	mutex_lock(&smp_cpu_state_mutex);
1082 	nr = __smp_rescan_cpus(info, 1);
1083 	mutex_unlock(&smp_cpu_state_mutex);
1084 	put_online_cpus();
1085 	kfree(info);
1086 	if (nr)
1087 		topology_schedule_update();
1088 	return 0;
1089 }
1090 
1091 static ssize_t __ref rescan_store(struct device *dev,
1092 				  struct device_attribute *attr,
1093 				  const char *buf,
1094 				  size_t count)
1095 {
1096 	int rc;
1097 
1098 	rc = smp_rescan_cpus();
1099 	return rc ? rc : count;
1100 }
1101 static DEVICE_ATTR(rescan, 0200, NULL, rescan_store);
1102 #endif /* CONFIG_HOTPLUG_CPU */
1103 
1104 static int __init s390_smp_init(void)
1105 {
1106 	int cpu, rc;
1107 
1108 	register_cpu_notifier(&smp_cpu_nb);
1109 #ifdef CONFIG_HOTPLUG_CPU
1110 	rc = device_create_file(cpu_subsys.dev_root, &dev_attr_rescan);
1111 	if (rc)
1112 		return rc;
1113 #endif
1114 	for_each_present_cpu(cpu) {
1115 		rc = smp_add_present_cpu(cpu);
1116 		if (rc)
1117 			return rc;
1118 	}
1119 	return 0;
1120 }
1121 subsys_initcall(s390_smp_init);
1122