xref: /linux/arch/powerpc/platforms/pseries/hotplug-cpu.c (revision cdd30ebb1b9f36159d66f088b61aee264e649d7a)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * pseries CPU Hotplug infrastructure.
4  *
5  * Split out from arch/powerpc/platforms/pseries/setup.c
6  *  arch/powerpc/kernel/rtas.c, and arch/powerpc/platforms/pseries/smp.c
7  *
8  * Peter Bergner, IBM	March 2001.
9  * Copyright (C) 2001 IBM.
10  * Dave Engebretsen, Peter Bergner, and
11  * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
12  * Plus various changes from other IBM teams...
13  *
14  * Copyright (C) 2006 Michael Ellerman, IBM Corporation
15  */
16 
17 #define pr_fmt(fmt)     "pseries-hotplug-cpu: " fmt
18 
19 #include <linux/kernel.h>
20 #include <linux/interrupt.h>
21 #include <linux/delay.h>
22 #include <linux/sched.h>	/* for idle_task_exit */
23 #include <linux/sched/hotplug.h>
24 #include <linux/cpu.h>
25 #include <linux/of.h>
26 #include <linux/slab.h>
27 #include <asm/prom.h>
28 #include <asm/rtas.h>
29 #include <asm/firmware.h>
30 #include <asm/machdep.h>
31 #include <asm/vdso_datapage.h>
32 #include <asm/xics.h>
33 #include <asm/xive.h>
34 #include <asm/plpar_wrappers.h>
35 #include <asm/topology.h>
36 #include <asm/systemcfg.h>
37 
38 #include "pseries.h"
39 
40 /* This version can't take the spinlock, because it never returns */
41 static int rtas_stop_self_token = RTAS_UNKNOWN_SERVICE;
42 
43 /*
44  * Record the CPU ids used on each nodes.
45  * Protected by cpu_add_remove_lock.
46  */
47 static cpumask_var_t node_recorded_ids_map[MAX_NUMNODES];
48 
49 static void rtas_stop_self(void)
50 {
51 	static struct rtas_args args;
52 
53 	local_irq_disable();
54 
55 	BUG_ON(rtas_stop_self_token == RTAS_UNKNOWN_SERVICE);
56 
57 	rtas_call_unlocked(&args, rtas_stop_self_token, 0, 1, NULL);
58 
59 	panic("Alas, I survived.\n");
60 }
61 
62 static void pseries_cpu_offline_self(void)
63 {
64 	unsigned int hwcpu = hard_smp_processor_id();
65 
66 	local_irq_disable();
67 	idle_task_exit();
68 	if (xive_enabled())
69 		xive_teardown_cpu();
70 	else
71 		xics_teardown_cpu();
72 
73 	unregister_slb_shadow(hwcpu);
74 	unregister_vpa(hwcpu);
75 	rtas_stop_self();
76 
77 	/* Should never get here... */
78 	BUG();
79 	for(;;);
80 }
81 
82 static int pseries_cpu_disable(void)
83 {
84 	int cpu = smp_processor_id();
85 
86 	set_cpu_online(cpu, false);
87 #ifdef CONFIG_PPC64_PROC_SYSTEMCFG
88 	systemcfg->processorCount--;
89 #endif
90 
91 	/*fix boot_cpuid here*/
92 	if (cpu == boot_cpuid)
93 		boot_cpuid = cpumask_any(cpu_online_mask);
94 
95 	/* FIXME: abstract this to not be platform specific later on */
96 	if (xive_enabled())
97 		xive_smp_disable_cpu();
98 	else
99 		xics_migrate_irqs_away();
100 
101 	cleanup_cpu_mmu_context();
102 
103 	return 0;
104 }
105 
106 /*
107  * pseries_cpu_die: Wait for the cpu to die.
108  * @cpu: logical processor id of the CPU whose death we're awaiting.
109  *
110  * This function is called from the context of the thread which is performing
111  * the cpu-offline. Here we wait for long enough to allow the cpu in question
112  * to self-destroy so that the cpu-offline thread can send the CPU_DEAD
113  * notifications.
114  *
115  * OTOH, pseries_cpu_offline_self() is called by the @cpu when it wants to
116  * self-destruct.
117  */
118 static void pseries_cpu_die(unsigned int cpu)
119 {
120 	int cpu_status = 1;
121 	unsigned int pcpu = get_hard_smp_processor_id(cpu);
122 	unsigned long timeout = jiffies + msecs_to_jiffies(120000);
123 
124 	while (true) {
125 		cpu_status = smp_query_cpu_stopped(pcpu);
126 		if (cpu_status == QCSS_STOPPED ||
127 		    cpu_status == QCSS_HARDWARE_ERROR)
128 			break;
129 
130 		if (time_after(jiffies, timeout)) {
131 			pr_warn("CPU %i (hwid %i) didn't die after 120 seconds\n",
132 				cpu, pcpu);
133 			timeout = jiffies + msecs_to_jiffies(120000);
134 		}
135 
136 		cond_resched();
137 	}
138 
139 	if (cpu_status == QCSS_HARDWARE_ERROR) {
140 		pr_warn("CPU %i (hwid %i) reported error while dying\n",
141 			cpu, pcpu);
142 	}
143 
144 	paca_ptrs[cpu]->cpu_start = 0;
145 }
146 
147 /**
148  * find_cpu_id_range - found a linear ranger of @nthreads free CPU ids.
149  * @nthreads : the number of threads (cpu ids)
150  * @assigned_node : the node it belongs to or NUMA_NO_NODE if free ids from any
151  *                  node can be peek.
152  * @cpu_mask: the returned CPU mask.
153  *
154  * Returns 0 on success.
155  */
156 static int find_cpu_id_range(unsigned int nthreads, int assigned_node,
157 			     cpumask_var_t *cpu_mask)
158 {
159 	cpumask_var_t candidate_mask;
160 	unsigned int cpu, node;
161 	int rc = -ENOSPC;
162 
163 	if (!zalloc_cpumask_var(&candidate_mask, GFP_KERNEL))
164 		return -ENOMEM;
165 
166 	cpumask_clear(*cpu_mask);
167 	for (cpu = 0; cpu < nthreads; cpu++)
168 		cpumask_set_cpu(cpu, *cpu_mask);
169 
170 	BUG_ON(!cpumask_subset(cpu_present_mask, cpu_possible_mask));
171 
172 	/* Get a bitmap of unoccupied slots. */
173 	cpumask_xor(candidate_mask, cpu_possible_mask, cpu_present_mask);
174 
175 	if (assigned_node != NUMA_NO_NODE) {
176 		/*
177 		 * Remove free ids previously assigned on the other nodes. We
178 		 * can walk only online nodes because once a node became online
179 		 * it is not turned offlined back.
180 		 */
181 		for_each_online_node(node) {
182 			if (node == assigned_node)
183 				continue;
184 			cpumask_andnot(candidate_mask, candidate_mask,
185 				       node_recorded_ids_map[node]);
186 		}
187 	}
188 
189 	if (cpumask_empty(candidate_mask))
190 		goto out;
191 
192 	while (!cpumask_empty(*cpu_mask)) {
193 		if (cpumask_subset(*cpu_mask, candidate_mask))
194 			/* Found a range where we can insert the new cpu(s) */
195 			break;
196 		cpumask_shift_left(*cpu_mask, *cpu_mask, nthreads);
197 	}
198 
199 	if (!cpumask_empty(*cpu_mask))
200 		rc = 0;
201 
202 out:
203 	free_cpumask_var(candidate_mask);
204 	return rc;
205 }
206 
207 /*
208  * Update cpu_present_mask and paca(s) for a new cpu node.  The wrinkle
209  * here is that a cpu device node may represent multiple logical cpus
210  * in the SMT case.  We must honor the assumption in other code that
211  * the logical ids for sibling SMT threads x and y are adjacent, such
212  * that x^1 == y and y^1 == x.
213  */
214 static int pseries_add_processor(struct device_node *np)
215 {
216 	int len, nthreads, node, cpu, assigned_node;
217 	int rc = 0;
218 	cpumask_var_t cpu_mask;
219 	const __be32 *intserv;
220 
221 	intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len);
222 	if (!intserv)
223 		return 0;
224 
225 	nthreads = len / sizeof(u32);
226 
227 	if (!alloc_cpumask_var(&cpu_mask, GFP_KERNEL))
228 		return -ENOMEM;
229 
230 	/*
231 	 * Fetch from the DT nodes read by dlpar_configure_connector() the NUMA
232 	 * node id the added CPU belongs to.
233 	 */
234 	node = of_node_to_nid(np);
235 	if (node < 0 || !node_possible(node))
236 		node = first_online_node;
237 
238 	BUG_ON(node == NUMA_NO_NODE);
239 	assigned_node = node;
240 
241 	cpu_maps_update_begin();
242 
243 	rc = find_cpu_id_range(nthreads, node, &cpu_mask);
244 	if (rc && nr_node_ids > 1) {
245 		/*
246 		 * Try again, considering the free CPU ids from the other node.
247 		 */
248 		node = NUMA_NO_NODE;
249 		rc = find_cpu_id_range(nthreads, NUMA_NO_NODE, &cpu_mask);
250 	}
251 
252 	if (rc) {
253 		pr_err("Cannot add cpu %pOF; this system configuration"
254 		       " supports %d logical cpus.\n", np, num_possible_cpus());
255 		goto out;
256 	}
257 
258 	for_each_cpu(cpu, cpu_mask) {
259 		BUG_ON(cpu_present(cpu));
260 		set_cpu_present(cpu, true);
261 		set_hard_smp_processor_id(cpu, be32_to_cpu(*intserv++));
262 	}
263 
264 	/* Record the newly used CPU ids for the associate node. */
265 	cpumask_or(node_recorded_ids_map[assigned_node],
266 		   node_recorded_ids_map[assigned_node], cpu_mask);
267 
268 	/*
269 	 * If node is set to NUMA_NO_NODE, CPU ids have be reused from
270 	 * another node, remove them from its mask.
271 	 */
272 	if (node == NUMA_NO_NODE) {
273 		cpu = cpumask_first(cpu_mask);
274 		pr_warn("Reusing free CPU ids %d-%d from another node\n",
275 			cpu, cpu + nthreads - 1);
276 		for_each_online_node(node) {
277 			if (node == assigned_node)
278 				continue;
279 			cpumask_andnot(node_recorded_ids_map[node],
280 				       node_recorded_ids_map[node],
281 				       cpu_mask);
282 		}
283 	}
284 
285 out:
286 	cpu_maps_update_done();
287 	free_cpumask_var(cpu_mask);
288 	return rc;
289 }
290 
291 /*
292  * Update the present map for a cpu node which is going away, and set
293  * the hard id in the paca(s) to -1 to be consistent with boot time
294  * convention for non-present cpus.
295  */
296 static void pseries_remove_processor(struct device_node *np)
297 {
298 	unsigned int cpu;
299 	int len, nthreads, i;
300 	const __be32 *intserv;
301 	u32 thread;
302 
303 	intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len);
304 	if (!intserv)
305 		return;
306 
307 	nthreads = len / sizeof(u32);
308 
309 	cpu_maps_update_begin();
310 	for (i = 0; i < nthreads; i++) {
311 		thread = be32_to_cpu(intserv[i]);
312 		for_each_present_cpu(cpu) {
313 			if (get_hard_smp_processor_id(cpu) != thread)
314 				continue;
315 			BUG_ON(cpu_online(cpu));
316 			set_cpu_present(cpu, false);
317 			set_hard_smp_processor_id(cpu, -1);
318 			update_numa_cpu_lookup_table(cpu, -1);
319 			break;
320 		}
321 		if (cpu >= nr_cpu_ids)
322 			printk(KERN_WARNING "Could not find cpu to remove "
323 			       "with physical id 0x%x\n", thread);
324 	}
325 	cpu_maps_update_done();
326 }
327 
328 static int dlpar_offline_cpu(struct device_node *dn)
329 {
330 	int rc = 0;
331 	unsigned int cpu;
332 	int len, nthreads, i;
333 	const __be32 *intserv;
334 	u32 thread;
335 
336 	intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
337 	if (!intserv)
338 		return -EINVAL;
339 
340 	nthreads = len / sizeof(u32);
341 
342 	cpu_maps_update_begin();
343 	for (i = 0; i < nthreads; i++) {
344 		thread = be32_to_cpu(intserv[i]);
345 		for_each_present_cpu(cpu) {
346 			if (get_hard_smp_processor_id(cpu) != thread)
347 				continue;
348 
349 			if (!cpu_online(cpu))
350 				break;
351 
352 			/*
353 			 * device_offline() will return -EBUSY (via cpu_down()) if there
354 			 * is only one CPU left. Check it here to fail earlier and with a
355 			 * more informative error message, while also retaining the
356 			 * cpu_add_remove_lock to be sure that no CPUs are being
357 			 * online/offlined during this check.
358 			 */
359 			if (num_online_cpus() == 1) {
360 				pr_warn("Unable to remove last online CPU %pOFn\n", dn);
361 				rc = -EBUSY;
362 				goto out_unlock;
363 			}
364 
365 			cpu_maps_update_done();
366 			rc = device_offline(get_cpu_device(cpu));
367 			if (rc)
368 				goto out;
369 			cpu_maps_update_begin();
370 			break;
371 		}
372 		if (cpu == num_possible_cpus()) {
373 			pr_warn("Could not find cpu to offline with physical id 0x%x\n",
374 				thread);
375 		}
376 	}
377 out_unlock:
378 	cpu_maps_update_done();
379 
380 out:
381 	return rc;
382 }
383 
384 static int dlpar_online_cpu(struct device_node *dn)
385 {
386 	int rc = 0;
387 	unsigned int cpu;
388 	int len, nthreads, i;
389 	const __be32 *intserv;
390 	u32 thread;
391 
392 	intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
393 	if (!intserv)
394 		return -EINVAL;
395 
396 	nthreads = len / sizeof(u32);
397 
398 	cpu_maps_update_begin();
399 	for (i = 0; i < nthreads; i++) {
400 		thread = be32_to_cpu(intserv[i]);
401 		for_each_present_cpu(cpu) {
402 			if (get_hard_smp_processor_id(cpu) != thread)
403 				continue;
404 
405 			if (!topology_is_primary_thread(cpu)) {
406 				if (cpu_smt_control != CPU_SMT_ENABLED)
407 					break;
408 				if (!topology_smt_thread_allowed(cpu))
409 					break;
410 			}
411 
412 			cpu_maps_update_done();
413 			find_and_update_cpu_nid(cpu);
414 			rc = device_online(get_cpu_device(cpu));
415 			if (rc) {
416 				dlpar_offline_cpu(dn);
417 				goto out;
418 			}
419 			cpu_maps_update_begin();
420 
421 			break;
422 		}
423 		if (cpu == num_possible_cpus())
424 			printk(KERN_WARNING "Could not find cpu to online "
425 			       "with physical id 0x%x\n", thread);
426 	}
427 	cpu_maps_update_done();
428 
429 out:
430 	return rc;
431 
432 }
433 
434 static bool dlpar_cpu_exists(struct device_node *parent, u32 drc_index)
435 {
436 	struct device_node *child = NULL;
437 	u32 my_drc_index;
438 	bool found;
439 	int rc;
440 
441 	/* Assume cpu doesn't exist */
442 	found = false;
443 
444 	for_each_child_of_node(parent, child) {
445 		rc = of_property_read_u32(child, "ibm,my-drc-index",
446 					  &my_drc_index);
447 		if (rc)
448 			continue;
449 
450 		if (my_drc_index == drc_index) {
451 			of_node_put(child);
452 			found = true;
453 			break;
454 		}
455 	}
456 
457 	return found;
458 }
459 
460 static bool drc_info_valid_index(struct device_node *parent, u32 drc_index)
461 {
462 	struct property *info;
463 	struct of_drc_info drc;
464 	const __be32 *value;
465 	u32 index;
466 	int count, i, j;
467 
468 	info = of_find_property(parent, "ibm,drc-info", NULL);
469 	if (!info)
470 		return false;
471 
472 	value = of_prop_next_u32(info, NULL, &count);
473 
474 	/* First value of ibm,drc-info is number of drc-info records */
475 	if (value)
476 		value++;
477 	else
478 		return false;
479 
480 	for (i = 0; i < count; i++) {
481 		if (of_read_drc_info_cell(&info, &value, &drc))
482 			return false;
483 
484 		if (strncmp(drc.drc_type, "CPU", 3))
485 			break;
486 
487 		if (drc_index > drc.last_drc_index)
488 			continue;
489 
490 		index = drc.drc_index_start;
491 		for (j = 0; j < drc.num_sequential_elems; j++) {
492 			if (drc_index == index)
493 				return true;
494 
495 			index += drc.sequential_inc;
496 		}
497 	}
498 
499 	return false;
500 }
501 
502 static bool valid_cpu_drc_index(struct device_node *parent, u32 drc_index)
503 {
504 	bool found = false;
505 	int rc, index;
506 
507 	if (of_property_present(parent, "ibm,drc-info"))
508 		return drc_info_valid_index(parent, drc_index);
509 
510 	/* Note that the format of the ibm,drc-indexes array is
511 	 * the number of entries in the array followed by the array
512 	 * of drc values so we start looking at index = 1.
513 	 */
514 	index = 1;
515 	while (!found) {
516 		u32 drc;
517 
518 		rc = of_property_read_u32_index(parent, "ibm,drc-indexes",
519 						index++, &drc);
520 
521 		if (rc)
522 			break;
523 
524 		if (drc == drc_index)
525 			found = true;
526 	}
527 
528 	return found;
529 }
530 
531 static int pseries_cpuhp_attach_nodes(struct device_node *dn)
532 {
533 	struct of_changeset cs;
534 	int ret;
535 
536 	/*
537 	 * This device node is unattached but may have siblings; open-code the
538 	 * traversal.
539 	 */
540 	for (of_changeset_init(&cs); dn != NULL; dn = dn->sibling) {
541 		ret = of_changeset_attach_node(&cs, dn);
542 		if (ret)
543 			goto out;
544 	}
545 
546 	ret = of_changeset_apply(&cs);
547 out:
548 	of_changeset_destroy(&cs);
549 	return ret;
550 }
551 
552 static ssize_t dlpar_cpu_add(u32 drc_index)
553 {
554 	struct device_node *dn, *parent;
555 	int rc, saved_rc;
556 
557 	pr_debug("Attempting to add CPU, drc index: %x\n", drc_index);
558 
559 	parent = of_find_node_by_path("/cpus");
560 	if (!parent) {
561 		pr_warn("Failed to find CPU root node \"/cpus\"\n");
562 		return -ENODEV;
563 	}
564 
565 	if (dlpar_cpu_exists(parent, drc_index)) {
566 		of_node_put(parent);
567 		pr_warn("CPU with drc index %x already exists\n", drc_index);
568 		return -EINVAL;
569 	}
570 
571 	if (!valid_cpu_drc_index(parent, drc_index)) {
572 		of_node_put(parent);
573 		pr_warn("Cannot find CPU (drc index %x) to add.\n", drc_index);
574 		return -EINVAL;
575 	}
576 
577 	rc = dlpar_acquire_drc(drc_index);
578 	if (rc) {
579 		pr_warn("Failed to acquire DRC, rc: %d, drc index: %x\n",
580 			rc, drc_index);
581 		of_node_put(parent);
582 		return -EINVAL;
583 	}
584 
585 	dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
586 	if (!dn) {
587 		pr_warn("Failed call to configure-connector, drc index: %x\n",
588 			drc_index);
589 		dlpar_release_drc(drc_index);
590 		of_node_put(parent);
591 		return -EINVAL;
592 	}
593 
594 	rc = pseries_cpuhp_attach_nodes(dn);
595 
596 	/* Regardless we are done with parent now */
597 	of_node_put(parent);
598 
599 	if (rc) {
600 		saved_rc = rc;
601 		pr_warn("Failed to attach node %pOFn, rc: %d, drc index: %x\n",
602 			dn, rc, drc_index);
603 
604 		rc = dlpar_release_drc(drc_index);
605 		if (!rc)
606 			dlpar_free_cc_nodes(dn);
607 
608 		return saved_rc;
609 	}
610 
611 	update_numa_distance(dn);
612 
613 	rc = dlpar_online_cpu(dn);
614 	if (rc) {
615 		saved_rc = rc;
616 		pr_warn("Failed to online cpu %pOFn, rc: %d, drc index: %x\n",
617 			dn, rc, drc_index);
618 
619 		rc = dlpar_detach_node(dn);
620 		if (!rc)
621 			dlpar_release_drc(drc_index);
622 
623 		return saved_rc;
624 	}
625 
626 	pr_debug("Successfully added CPU %pOFn, drc index: %x\n", dn,
627 		 drc_index);
628 	return rc;
629 }
630 
631 static unsigned int pseries_cpuhp_cache_use_count(const struct device_node *cachedn)
632 {
633 	unsigned int use_count = 0;
634 	struct device_node *dn, *tn;
635 
636 	WARN_ON(!of_node_is_type(cachedn, "cache"));
637 
638 	for_each_of_cpu_node(dn) {
639 		tn = of_find_next_cache_node(dn);
640 		of_node_put(tn);
641 		if (tn == cachedn)
642 			use_count++;
643 	}
644 
645 	for_each_node_by_type(dn, "cache") {
646 		tn = of_find_next_cache_node(dn);
647 		of_node_put(tn);
648 		if (tn == cachedn)
649 			use_count++;
650 	}
651 
652 	return use_count;
653 }
654 
655 static int pseries_cpuhp_detach_nodes(struct device_node *cpudn)
656 {
657 	struct device_node *dn;
658 	struct of_changeset cs;
659 	int ret = 0;
660 
661 	of_changeset_init(&cs);
662 	ret = of_changeset_detach_node(&cs, cpudn);
663 	if (ret)
664 		goto out;
665 
666 	dn = cpudn;
667 	while ((dn = of_find_next_cache_node(dn))) {
668 		if (pseries_cpuhp_cache_use_count(dn) > 1) {
669 			of_node_put(dn);
670 			break;
671 		}
672 
673 		ret = of_changeset_detach_node(&cs, dn);
674 		of_node_put(dn);
675 		if (ret)
676 			goto out;
677 	}
678 
679 	ret = of_changeset_apply(&cs);
680 out:
681 	of_changeset_destroy(&cs);
682 	return ret;
683 }
684 
685 static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
686 {
687 	int rc;
688 
689 	pr_debug("Attempting to remove CPU %pOFn, drc index: %x\n",
690 		 dn, drc_index);
691 
692 	rc = dlpar_offline_cpu(dn);
693 	if (rc) {
694 		pr_warn("Failed to offline CPU %pOFn, rc: %d\n", dn, rc);
695 		return -EINVAL;
696 	}
697 
698 	rc = dlpar_release_drc(drc_index);
699 	if (rc) {
700 		pr_warn("Failed to release drc (%x) for CPU %pOFn, rc: %d\n",
701 			drc_index, dn, rc);
702 		dlpar_online_cpu(dn);
703 		return rc;
704 	}
705 
706 	rc = pseries_cpuhp_detach_nodes(dn);
707 	if (rc) {
708 		int saved_rc = rc;
709 
710 		pr_warn("Failed to detach CPU %pOFn, rc: %d", dn, rc);
711 
712 		rc = dlpar_acquire_drc(drc_index);
713 		if (!rc)
714 			dlpar_online_cpu(dn);
715 
716 		return saved_rc;
717 	}
718 
719 	pr_debug("Successfully removed CPU, drc index: %x\n", drc_index);
720 	return 0;
721 }
722 
723 static struct device_node *cpu_drc_index_to_dn(u32 drc_index)
724 {
725 	struct device_node *dn;
726 	u32 my_index;
727 	int rc;
728 
729 	for_each_node_by_type(dn, "cpu") {
730 		rc = of_property_read_u32(dn, "ibm,my-drc-index", &my_index);
731 		if (rc)
732 			continue;
733 
734 		if (my_index == drc_index)
735 			break;
736 	}
737 
738 	return dn;
739 }
740 
741 static int dlpar_cpu_remove_by_index(u32 drc_index)
742 {
743 	struct device_node *dn;
744 	int rc;
745 
746 	dn = cpu_drc_index_to_dn(drc_index);
747 	if (!dn) {
748 		pr_warn("Cannot find CPU (drc index %x) to remove\n",
749 			drc_index);
750 		return -ENODEV;
751 	}
752 
753 	rc = dlpar_cpu_remove(dn, drc_index);
754 	of_node_put(dn);
755 	return rc;
756 }
757 
758 int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
759 {
760 	u32 drc_index;
761 	int rc;
762 
763 	drc_index = be32_to_cpu(hp_elog->_drc_u.drc_index);
764 
765 	lock_device_hotplug();
766 
767 	switch (hp_elog->action) {
768 	case PSERIES_HP_ELOG_ACTION_REMOVE:
769 		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) {
770 			rc = dlpar_cpu_remove_by_index(drc_index);
771 			/*
772 			 * Setting the isolation state of an UNISOLATED/CONFIGURED
773 			 * device to UNISOLATE is a no-op, but the hypervisor can
774 			 * use it as a hint that the CPU removal failed.
775 			 */
776 			if (rc)
777 				dlpar_unisolate_drc(drc_index);
778 		}
779 		else
780 			rc = -EINVAL;
781 		break;
782 	case PSERIES_HP_ELOG_ACTION_ADD:
783 		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
784 			rc = dlpar_cpu_add(drc_index);
785 		else
786 			rc = -EINVAL;
787 		break;
788 	default:
789 		pr_err("Invalid action (%d) specified\n", hp_elog->action);
790 		rc = -EINVAL;
791 		break;
792 	}
793 
794 	unlock_device_hotplug();
795 	return rc;
796 }
797 
798 #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
799 
800 static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
801 {
802 	u32 drc_index;
803 	int rc;
804 
805 	rc = kstrtou32(buf, 0, &drc_index);
806 	if (rc)
807 		return -EINVAL;
808 
809 	rc = dlpar_cpu_add(drc_index);
810 
811 	return rc ? rc : count;
812 }
813 
814 static ssize_t dlpar_cpu_release(const char *buf, size_t count)
815 {
816 	struct device_node *dn;
817 	u32 drc_index;
818 	int rc;
819 
820 	dn = of_find_node_by_path(buf);
821 	if (!dn)
822 		return -EINVAL;
823 
824 	rc = of_property_read_u32(dn, "ibm,my-drc-index", &drc_index);
825 	if (rc) {
826 		of_node_put(dn);
827 		return -EINVAL;
828 	}
829 
830 	rc = dlpar_cpu_remove(dn, drc_index);
831 	of_node_put(dn);
832 
833 	return rc ? rc : count;
834 }
835 
836 #endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
837 
838 static int pseries_smp_notifier(struct notifier_block *nb,
839 				unsigned long action, void *data)
840 {
841 	struct of_reconfig_data *rd = data;
842 	int err = 0;
843 
844 	switch (action) {
845 	case OF_RECONFIG_ATTACH_NODE:
846 		err = pseries_add_processor(rd->dn);
847 		break;
848 	case OF_RECONFIG_DETACH_NODE:
849 		pseries_remove_processor(rd->dn);
850 		break;
851 	}
852 	return notifier_from_errno(err);
853 }
854 
855 static struct notifier_block pseries_smp_nb = {
856 	.notifier_call = pseries_smp_notifier,
857 };
858 
859 void __init pseries_cpu_hotplug_init(void)
860 {
861 	int qcss_tok;
862 
863 	rtas_stop_self_token = rtas_function_token(RTAS_FN_STOP_SELF);
864 	qcss_tok = rtas_function_token(RTAS_FN_QUERY_CPU_STOPPED_STATE);
865 
866 	if (rtas_stop_self_token == RTAS_UNKNOWN_SERVICE ||
867 			qcss_tok == RTAS_UNKNOWN_SERVICE) {
868 		printk(KERN_INFO "CPU Hotplug not supported by firmware "
869 				"- disabling.\n");
870 		return;
871 	}
872 
873 	smp_ops->cpu_offline_self = pseries_cpu_offline_self;
874 	smp_ops->cpu_disable = pseries_cpu_disable;
875 	smp_ops->cpu_die = pseries_cpu_die;
876 }
877 
878 static int __init pseries_dlpar_init(void)
879 {
880 	unsigned int node;
881 
882 #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
883 	ppc_md.cpu_probe = dlpar_cpu_probe;
884 	ppc_md.cpu_release = dlpar_cpu_release;
885 #endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
886 
887 	/* Processors can be added/removed only on LPAR */
888 	if (firmware_has_feature(FW_FEATURE_LPAR)) {
889 		for_each_node(node) {
890 			if (!alloc_cpumask_var_node(&node_recorded_ids_map[node],
891 						    GFP_KERNEL, node))
892 				return -ENOMEM;
893 
894 			/* Record ids of CPU added at boot time */
895 			cpumask_copy(node_recorded_ids_map[node],
896 				     cpumask_of_node(node));
897 		}
898 
899 		of_reconfig_notifier_register(&pseries_smp_nb);
900 	}
901 
902 	return 0;
903 }
904 machine_arch_initcall(pseries, pseries_dlpar_init);
905