xref: /linux/arch/powerpc/platforms/pseries/hotplug-cpu.c (revision 6beeaf48db6c548fcfc2ad32739d33af2fef3a5b)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * pseries CPU Hotplug infrastructure.
4  *
5  * Split out from arch/powerpc/platforms/pseries/setup.c
6  *  arch/powerpc/kernel/rtas.c, and arch/powerpc/platforms/pseries/smp.c
7  *
8  * Peter Bergner, IBM	March 2001.
9  * Copyright (C) 2001 IBM.
10  * Dave Engebretsen, Peter Bergner, and
11  * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
12  * Plus various changes from other IBM teams...
13  *
14  * Copyright (C) 2006 Michael Ellerman, IBM Corporation
15  */
16 
17 #define pr_fmt(fmt)     "pseries-hotplug-cpu: " fmt
18 
19 #include <linux/kernel.h>
20 #include <linux/interrupt.h>
21 #include <linux/delay.h>
22 #include <linux/sched.h>	/* for idle_task_exit */
23 #include <linux/sched/hotplug.h>
24 #include <linux/cpu.h>
25 #include <linux/of.h>
26 #include <linux/slab.h>
27 #include <asm/prom.h>
28 #include <asm/rtas.h>
29 #include <asm/firmware.h>
30 #include <asm/machdep.h>
31 #include <asm/vdso_datapage.h>
32 #include <asm/xics.h>
33 #include <asm/xive.h>
34 #include <asm/plpar_wrappers.h>
35 #include <asm/topology.h>
36 
37 #include "pseries.h"
38 
39 /* This version can't take the spinlock, because it never returns */
40 static int rtas_stop_self_token = RTAS_UNKNOWN_SERVICE;
41 
42 /*
43  * Record the CPU ids used on each nodes.
44  * Protected by cpu_add_remove_lock.
45  */
46 static cpumask_var_t node_recorded_ids_map[MAX_NUMNODES];
47 
48 static void rtas_stop_self(void)
49 {
50 	static struct rtas_args args;
51 
52 	local_irq_disable();
53 
54 	BUG_ON(rtas_stop_self_token == RTAS_UNKNOWN_SERVICE);
55 
56 	rtas_call_unlocked(&args, rtas_stop_self_token, 0, 1, NULL);
57 
58 	panic("Alas, I survived.\n");
59 }
60 
61 static void pseries_cpu_offline_self(void)
62 {
63 	unsigned int hwcpu = hard_smp_processor_id();
64 
65 	local_irq_disable();
66 	idle_task_exit();
67 	if (xive_enabled())
68 		xive_teardown_cpu();
69 	else
70 		xics_teardown_cpu();
71 
72 	unregister_slb_shadow(hwcpu);
73 	rtas_stop_self();
74 
75 	/* Should never get here... */
76 	BUG();
77 	for(;;);
78 }
79 
80 static int pseries_cpu_disable(void)
81 {
82 	int cpu = smp_processor_id();
83 
84 	set_cpu_online(cpu, false);
85 	vdso_data->processorCount--;
86 
87 	/*fix boot_cpuid here*/
88 	if (cpu == boot_cpuid)
89 		boot_cpuid = cpumask_any(cpu_online_mask);
90 
91 	/* FIXME: abstract this to not be platform specific later on */
92 	if (xive_enabled())
93 		xive_smp_disable_cpu();
94 	else
95 		xics_migrate_irqs_away();
96 
97 	cleanup_cpu_mmu_context();
98 
99 	return 0;
100 }
101 
102 /*
103  * pseries_cpu_die: Wait for the cpu to die.
104  * @cpu: logical processor id of the CPU whose death we're awaiting.
105  *
106  * This function is called from the context of the thread which is performing
107  * the cpu-offline. Here we wait for long enough to allow the cpu in question
108  * to self-destroy so that the cpu-offline thread can send the CPU_DEAD
109  * notifications.
110  *
111  * OTOH, pseries_cpu_offline_self() is called by the @cpu when it wants to
112  * self-destruct.
113  */
114 static void pseries_cpu_die(unsigned int cpu)
115 {
116 	int cpu_status = 1;
117 	unsigned int pcpu = get_hard_smp_processor_id(cpu);
118 	unsigned long timeout = jiffies + msecs_to_jiffies(120000);
119 
120 	while (true) {
121 		cpu_status = smp_query_cpu_stopped(pcpu);
122 		if (cpu_status == QCSS_STOPPED ||
123 		    cpu_status == QCSS_HARDWARE_ERROR)
124 			break;
125 
126 		if (time_after(jiffies, timeout)) {
127 			pr_warn("CPU %i (hwid %i) didn't die after 120 seconds\n",
128 				cpu, pcpu);
129 			timeout = jiffies + msecs_to_jiffies(120000);
130 		}
131 
132 		cond_resched();
133 	}
134 
135 	if (cpu_status == QCSS_HARDWARE_ERROR) {
136 		pr_warn("CPU %i (hwid %i) reported error while dying\n",
137 			cpu, pcpu);
138 	}
139 
140 	/* Isolation and deallocation are definitely done by
141 	 * drslot_chrp_cpu.  If they were not they would be
142 	 * done here.  Change isolate state to Isolate and
143 	 * change allocation-state to Unusable.
144 	 */
145 	paca_ptrs[cpu]->cpu_start = 0;
146 }
147 
148 /**
149  * find_cpu_id_range - found a linear ranger of @nthreads free CPU ids.
150  * @nthreads : the number of threads (cpu ids)
151  * @assigned_node : the node it belongs to or NUMA_NO_NODE if free ids from any
152  *                  node can be peek.
153  * @cpu_mask: the returned CPU mask.
154  *
155  * Returns 0 on success.
156  */
157 static int find_cpu_id_range(unsigned int nthreads, int assigned_node,
158 			     cpumask_var_t *cpu_mask)
159 {
160 	cpumask_var_t candidate_mask;
161 	unsigned int cpu, node;
162 	int rc = -ENOSPC;
163 
164 	if (!zalloc_cpumask_var(&candidate_mask, GFP_KERNEL))
165 		return -ENOMEM;
166 
167 	cpumask_clear(*cpu_mask);
168 	for (cpu = 0; cpu < nthreads; cpu++)
169 		cpumask_set_cpu(cpu, *cpu_mask);
170 
171 	BUG_ON(!cpumask_subset(cpu_present_mask, cpu_possible_mask));
172 
173 	/* Get a bitmap of unoccupied slots. */
174 	cpumask_xor(candidate_mask, cpu_possible_mask, cpu_present_mask);
175 
176 	if (assigned_node != NUMA_NO_NODE) {
177 		/*
178 		 * Remove free ids previously assigned on the other nodes. We
179 		 * can walk only online nodes because once a node became online
180 		 * it is not turned offlined back.
181 		 */
182 		for_each_online_node(node) {
183 			if (node == assigned_node)
184 				continue;
185 			cpumask_andnot(candidate_mask, candidate_mask,
186 				       node_recorded_ids_map[node]);
187 		}
188 	}
189 
190 	if (cpumask_empty(candidate_mask))
191 		goto out;
192 
193 	while (!cpumask_empty(*cpu_mask)) {
194 		if (cpumask_subset(*cpu_mask, candidate_mask))
195 			/* Found a range where we can insert the new cpu(s) */
196 			break;
197 		cpumask_shift_left(*cpu_mask, *cpu_mask, nthreads);
198 	}
199 
200 	if (!cpumask_empty(*cpu_mask))
201 		rc = 0;
202 
203 out:
204 	free_cpumask_var(candidate_mask);
205 	return rc;
206 }
207 
208 /*
209  * Update cpu_present_mask and paca(s) for a new cpu node.  The wrinkle
210  * here is that a cpu device node may represent multiple logical cpus
211  * in the SMT case.  We must honor the assumption in other code that
212  * the logical ids for sibling SMT threads x and y are adjacent, such
213  * that x^1 == y and y^1 == x.
214  */
215 static int pseries_add_processor(struct device_node *np)
216 {
217 	int len, nthreads, node, cpu, assigned_node;
218 	int rc = 0;
219 	cpumask_var_t cpu_mask;
220 	const __be32 *intserv;
221 
222 	intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len);
223 	if (!intserv)
224 		return 0;
225 
226 	nthreads = len / sizeof(u32);
227 
228 	if (!alloc_cpumask_var(&cpu_mask, GFP_KERNEL))
229 		return -ENOMEM;
230 
231 	/*
232 	 * Fetch from the DT nodes read by dlpar_configure_connector() the NUMA
233 	 * node id the added CPU belongs to.
234 	 */
235 	node = of_node_to_nid(np);
236 	if (node < 0 || !node_possible(node))
237 		node = first_online_node;
238 
239 	BUG_ON(node == NUMA_NO_NODE);
240 	assigned_node = node;
241 
242 	cpu_maps_update_begin();
243 
244 	rc = find_cpu_id_range(nthreads, node, &cpu_mask);
245 	if (rc && nr_node_ids > 1) {
246 		/*
247 		 * Try again, considering the free CPU ids from the other node.
248 		 */
249 		node = NUMA_NO_NODE;
250 		rc = find_cpu_id_range(nthreads, NUMA_NO_NODE, &cpu_mask);
251 	}
252 
253 	if (rc) {
254 		pr_err("Cannot add cpu %pOF; this system configuration"
255 		       " supports %d logical cpus.\n", np, num_possible_cpus());
256 		goto out;
257 	}
258 
259 	for_each_cpu(cpu, cpu_mask) {
260 		BUG_ON(cpu_present(cpu));
261 		set_cpu_present(cpu, true);
262 		set_hard_smp_processor_id(cpu, be32_to_cpu(*intserv++));
263 	}
264 
265 	/* Record the newly used CPU ids for the associate node. */
266 	cpumask_or(node_recorded_ids_map[assigned_node],
267 		   node_recorded_ids_map[assigned_node], cpu_mask);
268 
269 	/*
270 	 * If node is set to NUMA_NO_NODE, CPU ids have be reused from
271 	 * another node, remove them from its mask.
272 	 */
273 	if (node == NUMA_NO_NODE) {
274 		cpu = cpumask_first(cpu_mask);
275 		pr_warn("Reusing free CPU ids %d-%d from another node\n",
276 			cpu, cpu + nthreads - 1);
277 		for_each_online_node(node) {
278 			if (node == assigned_node)
279 				continue;
280 			cpumask_andnot(node_recorded_ids_map[node],
281 				       node_recorded_ids_map[node],
282 				       cpu_mask);
283 		}
284 	}
285 
286 out:
287 	cpu_maps_update_done();
288 	free_cpumask_var(cpu_mask);
289 	return rc;
290 }
291 
292 /*
293  * Update the present map for a cpu node which is going away, and set
294  * the hard id in the paca(s) to -1 to be consistent with boot time
295  * convention for non-present cpus.
296  */
297 static void pseries_remove_processor(struct device_node *np)
298 {
299 	unsigned int cpu;
300 	int len, nthreads, i;
301 	const __be32 *intserv;
302 	u32 thread;
303 
304 	intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len);
305 	if (!intserv)
306 		return;
307 
308 	nthreads = len / sizeof(u32);
309 
310 	cpu_maps_update_begin();
311 	for (i = 0; i < nthreads; i++) {
312 		thread = be32_to_cpu(intserv[i]);
313 		for_each_present_cpu(cpu) {
314 			if (get_hard_smp_processor_id(cpu) != thread)
315 				continue;
316 			BUG_ON(cpu_online(cpu));
317 			set_cpu_present(cpu, false);
318 			set_hard_smp_processor_id(cpu, -1);
319 			update_numa_cpu_lookup_table(cpu, -1);
320 			break;
321 		}
322 		if (cpu >= nr_cpu_ids)
323 			printk(KERN_WARNING "Could not find cpu to remove "
324 			       "with physical id 0x%x\n", thread);
325 	}
326 	cpu_maps_update_done();
327 }
328 
329 static int dlpar_offline_cpu(struct device_node *dn)
330 {
331 	int rc = 0;
332 	unsigned int cpu;
333 	int len, nthreads, i;
334 	const __be32 *intserv;
335 	u32 thread;
336 
337 	intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
338 	if (!intserv)
339 		return -EINVAL;
340 
341 	nthreads = len / sizeof(u32);
342 
343 	cpu_maps_update_begin();
344 	for (i = 0; i < nthreads; i++) {
345 		thread = be32_to_cpu(intserv[i]);
346 		for_each_present_cpu(cpu) {
347 			if (get_hard_smp_processor_id(cpu) != thread)
348 				continue;
349 
350 			if (!cpu_online(cpu))
351 				break;
352 
353 			/*
354 			 * device_offline() will return -EBUSY (via cpu_down()) if there
355 			 * is only one CPU left. Check it here to fail earlier and with a
356 			 * more informative error message, while also retaining the
357 			 * cpu_add_remove_lock to be sure that no CPUs are being
358 			 * online/offlined during this check.
359 			 */
360 			if (num_online_cpus() == 1) {
361 				pr_warn("Unable to remove last online CPU %pOFn\n", dn);
362 				rc = -EBUSY;
363 				goto out_unlock;
364 			}
365 
366 			cpu_maps_update_done();
367 			rc = device_offline(get_cpu_device(cpu));
368 			if (rc)
369 				goto out;
370 			cpu_maps_update_begin();
371 			break;
372 		}
373 		if (cpu == num_possible_cpus()) {
374 			pr_warn("Could not find cpu to offline with physical id 0x%x\n",
375 				thread);
376 		}
377 	}
378 out_unlock:
379 	cpu_maps_update_done();
380 
381 out:
382 	return rc;
383 }
384 
385 static int dlpar_online_cpu(struct device_node *dn)
386 {
387 	int rc = 0;
388 	unsigned int cpu;
389 	int len, nthreads, i;
390 	const __be32 *intserv;
391 	u32 thread;
392 
393 	intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
394 	if (!intserv)
395 		return -EINVAL;
396 
397 	nthreads = len / sizeof(u32);
398 
399 	cpu_maps_update_begin();
400 	for (i = 0; i < nthreads; i++) {
401 		thread = be32_to_cpu(intserv[i]);
402 		for_each_present_cpu(cpu) {
403 			if (get_hard_smp_processor_id(cpu) != thread)
404 				continue;
405 			cpu_maps_update_done();
406 			find_and_online_cpu_nid(cpu);
407 			rc = device_online(get_cpu_device(cpu));
408 			if (rc) {
409 				dlpar_offline_cpu(dn);
410 				goto out;
411 			}
412 			cpu_maps_update_begin();
413 
414 			break;
415 		}
416 		if (cpu == num_possible_cpus())
417 			printk(KERN_WARNING "Could not find cpu to online "
418 			       "with physical id 0x%x\n", thread);
419 	}
420 	cpu_maps_update_done();
421 
422 out:
423 	return rc;
424 
425 }
426 
427 static bool dlpar_cpu_exists(struct device_node *parent, u32 drc_index)
428 {
429 	struct device_node *child = NULL;
430 	u32 my_drc_index;
431 	bool found;
432 	int rc;
433 
434 	/* Assume cpu doesn't exist */
435 	found = false;
436 
437 	for_each_child_of_node(parent, child) {
438 		rc = of_property_read_u32(child, "ibm,my-drc-index",
439 					  &my_drc_index);
440 		if (rc)
441 			continue;
442 
443 		if (my_drc_index == drc_index) {
444 			of_node_put(child);
445 			found = true;
446 			break;
447 		}
448 	}
449 
450 	return found;
451 }
452 
453 static bool drc_info_valid_index(struct device_node *parent, u32 drc_index)
454 {
455 	struct property *info;
456 	struct of_drc_info drc;
457 	const __be32 *value;
458 	u32 index;
459 	int count, i, j;
460 
461 	info = of_find_property(parent, "ibm,drc-info", NULL);
462 	if (!info)
463 		return false;
464 
465 	value = of_prop_next_u32(info, NULL, &count);
466 
467 	/* First value of ibm,drc-info is number of drc-info records */
468 	if (value)
469 		value++;
470 	else
471 		return false;
472 
473 	for (i = 0; i < count; i++) {
474 		if (of_read_drc_info_cell(&info, &value, &drc))
475 			return false;
476 
477 		if (strncmp(drc.drc_type, "CPU", 3))
478 			break;
479 
480 		if (drc_index > drc.last_drc_index)
481 			continue;
482 
483 		index = drc.drc_index_start;
484 		for (j = 0; j < drc.num_sequential_elems; j++) {
485 			if (drc_index == index)
486 				return true;
487 
488 			index += drc.sequential_inc;
489 		}
490 	}
491 
492 	return false;
493 }
494 
495 static bool valid_cpu_drc_index(struct device_node *parent, u32 drc_index)
496 {
497 	bool found = false;
498 	int rc, index;
499 
500 	if (of_find_property(parent, "ibm,drc-info", NULL))
501 		return drc_info_valid_index(parent, drc_index);
502 
503 	/* Note that the format of the ibm,drc-indexes array is
504 	 * the number of entries in the array followed by the array
505 	 * of drc values so we start looking at index = 1.
506 	 */
507 	index = 1;
508 	while (!found) {
509 		u32 drc;
510 
511 		rc = of_property_read_u32_index(parent, "ibm,drc-indexes",
512 						index++, &drc);
513 
514 		if (rc)
515 			break;
516 
517 		if (drc == drc_index)
518 			found = true;
519 	}
520 
521 	return found;
522 }
523 
524 static ssize_t dlpar_cpu_add(u32 drc_index)
525 {
526 	struct device_node *dn, *parent;
527 	int rc, saved_rc;
528 
529 	pr_debug("Attempting to add CPU, drc index: %x\n", drc_index);
530 
531 	parent = of_find_node_by_path("/cpus");
532 	if (!parent) {
533 		pr_warn("Failed to find CPU root node \"/cpus\"\n");
534 		return -ENODEV;
535 	}
536 
537 	if (dlpar_cpu_exists(parent, drc_index)) {
538 		of_node_put(parent);
539 		pr_warn("CPU with drc index %x already exists\n", drc_index);
540 		return -EINVAL;
541 	}
542 
543 	if (!valid_cpu_drc_index(parent, drc_index)) {
544 		of_node_put(parent);
545 		pr_warn("Cannot find CPU (drc index %x) to add.\n", drc_index);
546 		return -EINVAL;
547 	}
548 
549 	rc = dlpar_acquire_drc(drc_index);
550 	if (rc) {
551 		pr_warn("Failed to acquire DRC, rc: %d, drc index: %x\n",
552 			rc, drc_index);
553 		of_node_put(parent);
554 		return -EINVAL;
555 	}
556 
557 	dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
558 	if (!dn) {
559 		pr_warn("Failed call to configure-connector, drc index: %x\n",
560 			drc_index);
561 		dlpar_release_drc(drc_index);
562 		of_node_put(parent);
563 		return -EINVAL;
564 	}
565 
566 	rc = dlpar_attach_node(dn, parent);
567 
568 	/* Regardless we are done with parent now */
569 	of_node_put(parent);
570 
571 	if (rc) {
572 		saved_rc = rc;
573 		pr_warn("Failed to attach node %pOFn, rc: %d, drc index: %x\n",
574 			dn, rc, drc_index);
575 
576 		rc = dlpar_release_drc(drc_index);
577 		if (!rc)
578 			dlpar_free_cc_nodes(dn);
579 
580 		return saved_rc;
581 	}
582 
583 	update_numa_distance(dn);
584 
585 	rc = dlpar_online_cpu(dn);
586 	if (rc) {
587 		saved_rc = rc;
588 		pr_warn("Failed to online cpu %pOFn, rc: %d, drc index: %x\n",
589 			dn, rc, drc_index);
590 
591 		rc = dlpar_detach_node(dn);
592 		if (!rc)
593 			dlpar_release_drc(drc_index);
594 
595 		return saved_rc;
596 	}
597 
598 	pr_debug("Successfully added CPU %pOFn, drc index: %x\n", dn,
599 		 drc_index);
600 	return rc;
601 }
602 
603 static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
604 {
605 	int rc;
606 
607 	pr_debug("Attempting to remove CPU %pOFn, drc index: %x\n",
608 		 dn, drc_index);
609 
610 	rc = dlpar_offline_cpu(dn);
611 	if (rc) {
612 		pr_warn("Failed to offline CPU %pOFn, rc: %d\n", dn, rc);
613 		return -EINVAL;
614 	}
615 
616 	rc = dlpar_release_drc(drc_index);
617 	if (rc) {
618 		pr_warn("Failed to release drc (%x) for CPU %pOFn, rc: %d\n",
619 			drc_index, dn, rc);
620 		dlpar_online_cpu(dn);
621 		return rc;
622 	}
623 
624 	rc = dlpar_detach_node(dn);
625 	if (rc) {
626 		int saved_rc = rc;
627 
628 		pr_warn("Failed to detach CPU %pOFn, rc: %d", dn, rc);
629 
630 		rc = dlpar_acquire_drc(drc_index);
631 		if (!rc)
632 			dlpar_online_cpu(dn);
633 
634 		return saved_rc;
635 	}
636 
637 	pr_debug("Successfully removed CPU, drc index: %x\n", drc_index);
638 	return 0;
639 }
640 
641 static struct device_node *cpu_drc_index_to_dn(u32 drc_index)
642 {
643 	struct device_node *dn;
644 	u32 my_index;
645 	int rc;
646 
647 	for_each_node_by_type(dn, "cpu") {
648 		rc = of_property_read_u32(dn, "ibm,my-drc-index", &my_index);
649 		if (rc)
650 			continue;
651 
652 		if (my_index == drc_index)
653 			break;
654 	}
655 
656 	return dn;
657 }
658 
659 static int dlpar_cpu_remove_by_index(u32 drc_index)
660 {
661 	struct device_node *dn;
662 	int rc;
663 
664 	dn = cpu_drc_index_to_dn(drc_index);
665 	if (!dn) {
666 		pr_warn("Cannot find CPU (drc index %x) to remove\n",
667 			drc_index);
668 		return -ENODEV;
669 	}
670 
671 	rc = dlpar_cpu_remove(dn, drc_index);
672 	of_node_put(dn);
673 	return rc;
674 }
675 
676 static int find_dlpar_cpus_to_remove(u32 *cpu_drcs, int cpus_to_remove)
677 {
678 	struct device_node *dn;
679 	int cpus_found = 0;
680 	int rc;
681 
682 	/* We want to find cpus_to_remove + 1 CPUs to ensure we do not
683 	 * remove the last CPU.
684 	 */
685 	for_each_node_by_type(dn, "cpu") {
686 		cpus_found++;
687 
688 		if (cpus_found > cpus_to_remove) {
689 			of_node_put(dn);
690 			break;
691 		}
692 
693 		/* Note that cpus_found is always 1 ahead of the index
694 		 * into the cpu_drcs array, so we use cpus_found - 1
695 		 */
696 		rc = of_property_read_u32(dn, "ibm,my-drc-index",
697 					  &cpu_drcs[cpus_found - 1]);
698 		if (rc) {
699 			pr_warn("Error occurred getting drc-index for %pOFn\n",
700 				dn);
701 			of_node_put(dn);
702 			return -1;
703 		}
704 	}
705 
706 	if (cpus_found < cpus_to_remove) {
707 		pr_warn("Failed to find enough CPUs (%d of %d) to remove\n",
708 			cpus_found, cpus_to_remove);
709 	} else if (cpus_found == cpus_to_remove) {
710 		pr_warn("Cannot remove all CPUs\n");
711 	}
712 
713 	return cpus_found;
714 }
715 
716 static int dlpar_cpu_remove_by_count(u32 cpus_to_remove)
717 {
718 	u32 *cpu_drcs;
719 	int cpus_found;
720 	int cpus_removed = 0;
721 	int i, rc;
722 
723 	pr_debug("Attempting to hot-remove %d CPUs\n", cpus_to_remove);
724 
725 	cpu_drcs = kcalloc(cpus_to_remove, sizeof(*cpu_drcs), GFP_KERNEL);
726 	if (!cpu_drcs)
727 		return -EINVAL;
728 
729 	cpus_found = find_dlpar_cpus_to_remove(cpu_drcs, cpus_to_remove);
730 	if (cpus_found <= cpus_to_remove) {
731 		kfree(cpu_drcs);
732 		return -EINVAL;
733 	}
734 
735 	for (i = 0; i < cpus_to_remove; i++) {
736 		rc = dlpar_cpu_remove_by_index(cpu_drcs[i]);
737 		if (rc)
738 			break;
739 
740 		cpus_removed++;
741 	}
742 
743 	if (cpus_removed != cpus_to_remove) {
744 		pr_warn("CPU hot-remove failed, adding back removed CPUs\n");
745 
746 		for (i = 0; i < cpus_removed; i++)
747 			dlpar_cpu_add(cpu_drcs[i]);
748 
749 		rc = -EINVAL;
750 	} else {
751 		rc = 0;
752 	}
753 
754 	kfree(cpu_drcs);
755 	return rc;
756 }
757 
758 static int find_drc_info_cpus_to_add(struct device_node *cpus,
759 				     struct property *info,
760 				     u32 *cpu_drcs, u32 cpus_to_add)
761 {
762 	struct of_drc_info drc;
763 	const __be32 *value;
764 	u32 count, drc_index;
765 	int cpus_found = 0;
766 	int i, j;
767 
768 	if (!info)
769 		return -1;
770 
771 	value = of_prop_next_u32(info, NULL, &count);
772 	if (value)
773 		value++;
774 
775 	for (i = 0; i < count; i++) {
776 		of_read_drc_info_cell(&info, &value, &drc);
777 		if (strncmp(drc.drc_type, "CPU", 3))
778 			break;
779 
780 		drc_index = drc.drc_index_start;
781 		for (j = 0; j < drc.num_sequential_elems; j++) {
782 			if (dlpar_cpu_exists(cpus, drc_index))
783 				continue;
784 
785 			cpu_drcs[cpus_found++] = drc_index;
786 
787 			if (cpus_found == cpus_to_add)
788 				return cpus_found;
789 
790 			drc_index += drc.sequential_inc;
791 		}
792 	}
793 
794 	return cpus_found;
795 }
796 
797 static int find_drc_index_cpus_to_add(struct device_node *cpus,
798 				      u32 *cpu_drcs, u32 cpus_to_add)
799 {
800 	int cpus_found = 0;
801 	int index, rc;
802 	u32 drc_index;
803 
804 	/* Search the ibm,drc-indexes array for possible CPU drcs to
805 	 * add. Note that the format of the ibm,drc-indexes array is
806 	 * the number of entries in the array followed by the array
807 	 * of drc values so we start looking at index = 1.
808 	 */
809 	index = 1;
810 	while (cpus_found < cpus_to_add) {
811 		rc = of_property_read_u32_index(cpus, "ibm,drc-indexes",
812 						index++, &drc_index);
813 
814 		if (rc)
815 			break;
816 
817 		if (dlpar_cpu_exists(cpus, drc_index))
818 			continue;
819 
820 		cpu_drcs[cpus_found++] = drc_index;
821 	}
822 
823 	return cpus_found;
824 }
825 
826 static int dlpar_cpu_add_by_count(u32 cpus_to_add)
827 {
828 	struct device_node *parent;
829 	struct property *info;
830 	u32 *cpu_drcs;
831 	int cpus_added = 0;
832 	int cpus_found;
833 	int i, rc;
834 
835 	pr_debug("Attempting to hot-add %d CPUs\n", cpus_to_add);
836 
837 	cpu_drcs = kcalloc(cpus_to_add, sizeof(*cpu_drcs), GFP_KERNEL);
838 	if (!cpu_drcs)
839 		return -EINVAL;
840 
841 	parent = of_find_node_by_path("/cpus");
842 	if (!parent) {
843 		pr_warn("Could not find CPU root node in device tree\n");
844 		kfree(cpu_drcs);
845 		return -1;
846 	}
847 
848 	info = of_find_property(parent, "ibm,drc-info", NULL);
849 	if (info)
850 		cpus_found = find_drc_info_cpus_to_add(parent, info, cpu_drcs, cpus_to_add);
851 	else
852 		cpus_found = find_drc_index_cpus_to_add(parent, cpu_drcs, cpus_to_add);
853 
854 	of_node_put(parent);
855 
856 	if (cpus_found < cpus_to_add) {
857 		pr_warn("Failed to find enough CPUs (%d of %d) to add\n",
858 			cpus_found, cpus_to_add);
859 		kfree(cpu_drcs);
860 		return -EINVAL;
861 	}
862 
863 	for (i = 0; i < cpus_to_add; i++) {
864 		rc = dlpar_cpu_add(cpu_drcs[i]);
865 		if (rc)
866 			break;
867 
868 		cpus_added++;
869 	}
870 
871 	if (cpus_added < cpus_to_add) {
872 		pr_warn("CPU hot-add failed, removing any added CPUs\n");
873 
874 		for (i = 0; i < cpus_added; i++)
875 			dlpar_cpu_remove_by_index(cpu_drcs[i]);
876 
877 		rc = -EINVAL;
878 	} else {
879 		rc = 0;
880 	}
881 
882 	kfree(cpu_drcs);
883 	return rc;
884 }
885 
886 int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
887 {
888 	u32 count, drc_index;
889 	int rc;
890 
891 	count = hp_elog->_drc_u.drc_count;
892 	drc_index = hp_elog->_drc_u.drc_index;
893 
894 	lock_device_hotplug();
895 
896 	switch (hp_elog->action) {
897 	case PSERIES_HP_ELOG_ACTION_REMOVE:
898 		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
899 			rc = dlpar_cpu_remove_by_count(count);
900 		else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) {
901 			rc = dlpar_cpu_remove_by_index(drc_index);
902 			/*
903 			 * Setting the isolation state of an UNISOLATED/CONFIGURED
904 			 * device to UNISOLATE is a no-op, but the hypervisor can
905 			 * use it as a hint that the CPU removal failed.
906 			 */
907 			if (rc)
908 				dlpar_unisolate_drc(drc_index);
909 		}
910 		else
911 			rc = -EINVAL;
912 		break;
913 	case PSERIES_HP_ELOG_ACTION_ADD:
914 		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
915 			rc = dlpar_cpu_add_by_count(count);
916 		else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
917 			rc = dlpar_cpu_add(drc_index);
918 		else
919 			rc = -EINVAL;
920 		break;
921 	default:
922 		pr_err("Invalid action (%d) specified\n", hp_elog->action);
923 		rc = -EINVAL;
924 		break;
925 	}
926 
927 	unlock_device_hotplug();
928 	return rc;
929 }
930 
931 #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
932 
933 static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
934 {
935 	u32 drc_index;
936 	int rc;
937 
938 	rc = kstrtou32(buf, 0, &drc_index);
939 	if (rc)
940 		return -EINVAL;
941 
942 	rc = dlpar_cpu_add(drc_index);
943 
944 	return rc ? rc : count;
945 }
946 
947 static ssize_t dlpar_cpu_release(const char *buf, size_t count)
948 {
949 	struct device_node *dn;
950 	u32 drc_index;
951 	int rc;
952 
953 	dn = of_find_node_by_path(buf);
954 	if (!dn)
955 		return -EINVAL;
956 
957 	rc = of_property_read_u32(dn, "ibm,my-drc-index", &drc_index);
958 	if (rc) {
959 		of_node_put(dn);
960 		return -EINVAL;
961 	}
962 
963 	rc = dlpar_cpu_remove(dn, drc_index);
964 	of_node_put(dn);
965 
966 	return rc ? rc : count;
967 }
968 
969 #endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
970 
971 static int pseries_smp_notifier(struct notifier_block *nb,
972 				unsigned long action, void *data)
973 {
974 	struct of_reconfig_data *rd = data;
975 	int err = 0;
976 
977 	switch (action) {
978 	case OF_RECONFIG_ATTACH_NODE:
979 		err = pseries_add_processor(rd->dn);
980 		break;
981 	case OF_RECONFIG_DETACH_NODE:
982 		pseries_remove_processor(rd->dn);
983 		break;
984 	}
985 	return notifier_from_errno(err);
986 }
987 
988 static struct notifier_block pseries_smp_nb = {
989 	.notifier_call = pseries_smp_notifier,
990 };
991 
992 static int __init pseries_cpu_hotplug_init(void)
993 {
994 	int qcss_tok;
995 	unsigned int node;
996 
997 #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
998 	ppc_md.cpu_probe = dlpar_cpu_probe;
999 	ppc_md.cpu_release = dlpar_cpu_release;
1000 #endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
1001 
1002 	rtas_stop_self_token = rtas_token("stop-self");
1003 	qcss_tok = rtas_token("query-cpu-stopped-state");
1004 
1005 	if (rtas_stop_self_token == RTAS_UNKNOWN_SERVICE ||
1006 			qcss_tok == RTAS_UNKNOWN_SERVICE) {
1007 		printk(KERN_INFO "CPU Hotplug not supported by firmware "
1008 				"- disabling.\n");
1009 		return 0;
1010 	}
1011 
1012 	smp_ops->cpu_offline_self = pseries_cpu_offline_self;
1013 	smp_ops->cpu_disable = pseries_cpu_disable;
1014 	smp_ops->cpu_die = pseries_cpu_die;
1015 
1016 	/* Processors can be added/removed only on LPAR */
1017 	if (firmware_has_feature(FW_FEATURE_LPAR)) {
1018 		for_each_node(node) {
1019 			alloc_bootmem_cpumask_var(&node_recorded_ids_map[node]);
1020 
1021 			/* Record ids of CPU added at boot time */
1022 			cpumask_or(node_recorded_ids_map[node],
1023 				   node_recorded_ids_map[node],
1024 				   cpumask_of_node(node));
1025 		}
1026 
1027 		of_reconfig_notifier_register(&pseries_smp_nb);
1028 	}
1029 
1030 	return 0;
1031 }
1032 machine_arch_initcall(pseries, pseries_cpu_hotplug_init);
1033