xref: /illumos-gate/usr/src/uts/sun4v/io/dr_cpu.c (revision cbab2b2687744cbfdc12fae90f8088127a0b266c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * sun4v CPU DR Module
31  */
32 
33 #include <sys/modctl.h>
34 #include <sys/processor.h>
35 #include <sys/cpuvar.h>
36 #include <sys/sunddi.h>
37 #include <sys/sunndi.h>
38 #include <sys/note.h>
39 #include <sys/sysevent/dr.h>
40 #include <sys/hypervisor_api.h>
41 #include <sys/mach_descrip.h>
42 #include <sys/mdesc.h>
43 #include <sys/ds.h>
44 #include <sys/dr_util.h>
45 #include <sys/dr_cpu.h>
46 #include <sys/promif.h>
47 #include <sys/machsystm.h>
48 
49 
50 static struct modlmisc modlmisc = {
51 	&mod_miscops,
52 	"sun4v CPU DR %I%"
53 };
54 
55 static struct modlinkage modlinkage = {
56 	MODREV_1,
57 	(void *)&modlmisc,
58 	NULL
59 };
60 
61 /*
62  * Global DS Handle
63  */
64 static ds_svc_hdl_t ds_handle;
65 
66 /*
67  * Supported DS Capability Versions
68  */
69 static ds_ver_t		dr_cpu_vers[] = { { 1, 0 } };
70 #define	DR_CPU_NVERS	(sizeof (dr_cpu_vers) / sizeof (dr_cpu_vers[0]))
71 
72 /*
73  * DS Capability Description
74  */
75 static ds_capability_t dr_cpu_cap = {
76 	DR_CPU_DS_ID,		/* svc_id */
77 	dr_cpu_vers,		/* vers */
78 	DR_CPU_NVERS		/* nvers */
79 };
80 
81 /*
82  * DS Callbacks
83  */
84 static void dr_cpu_reg_handler(ds_cb_arg_t, ds_ver_t *, ds_svc_hdl_t);
85 static void dr_cpu_unreg_handler(ds_cb_arg_t arg);
86 static void dr_cpu_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen);
87 
88 /*
89  * DS Client Ops Vector
90  */
91 static ds_clnt_ops_t dr_cpu_ops = {
92 	dr_cpu_reg_handler,	/* ds_reg_cb */
93 	dr_cpu_unreg_handler,	/* ds_unreg_cb */
94 	dr_cpu_data_handler,	/* ds_data_cb */
95 	NULL			/* cb_arg */
96 };
97 
98 /*
99  * Internal Functions
100  */
101 static int dr_cpu_init(void);
102 static int dr_cpu_fini(void);
103 
104 static int dr_cpu_list_configure(dr_cpu_hdr_t *, dr_cpu_hdr_t **, int *);
105 static int dr_cpu_list_unconfigure(dr_cpu_hdr_t *, dr_cpu_hdr_t **, int *);
106 static int dr_cpu_list_status(dr_cpu_hdr_t *, dr_cpu_hdr_t **, int *);
107 
108 static int dr_cpu_unconfigure(processorid_t, int *status, boolean_t force);
109 static int dr_cpu_configure(processorid_t, int *status);
110 static int dr_cpu_status(processorid_t, int *status);
111 
112 static int dr_cpu_probe(processorid_t newcpuid);
113 static int dr_cpu_deprobe(processorid_t cpuid);
114 
115 static dev_info_t *dr_cpu_find_node(processorid_t cpuid);
116 static mde_cookie_t dr_cpu_find_node_md(processorid_t, md_t *, mde_cookie_t *);
117 
118 
119 int
120 _init(void)
121 {
122 	int	status;
123 
124 	/* check that CPU DR is enabled */
125 	if (dr_is_disabled(DR_TYPE_CPU)) {
126 		cmn_err(CE_CONT, "!CPU DR is disabled\n");
127 		return (-1);
128 	}
129 
130 	if ((status = dr_cpu_init()) != 0) {
131 		cmn_err(CE_NOTE, "CPU DR initialization failed");
132 		return (status);
133 	}
134 
135 	if ((status = mod_install(&modlinkage)) != 0) {
136 		(void) dr_cpu_fini();
137 	}
138 
139 	return (status);
140 }
141 
142 int
143 _info(struct modinfo *modinfop)
144 {
145 	return (mod_info(&modlinkage, modinfop));
146 }
147 
148 int dr_cpu_allow_unload;
149 
150 int
151 _fini(void)
152 {
153 	int	status;
154 
155 	if (dr_cpu_allow_unload == 0)
156 		return (EBUSY);
157 
158 	if ((status = mod_remove(&modlinkage)) == 0) {
159 		(void) dr_cpu_fini();
160 	}
161 
162 	return (status);
163 }
164 
165 static int
166 dr_cpu_init(void)
167 {
168 	int	rv;
169 
170 	if ((rv = ds_cap_init(&dr_cpu_cap, &dr_cpu_ops)) != 0) {
171 		cmn_err(CE_NOTE, "ds_cap_init failed: %d", rv);
172 		return (-1);
173 	}
174 
175 	return (0);
176 }
177 
178 static int
179 dr_cpu_fini(void)
180 {
181 	int	rv;
182 
183 	if ((rv = ds_cap_fini(&dr_cpu_cap)) != 0) {
184 		cmn_err(CE_NOTE, "ds_cap_fini failed: %d", rv);
185 		return (-1);
186 	}
187 
188 	return (0);
189 }
190 
191 static void
192 dr_cpu_reg_handler(ds_cb_arg_t arg, ds_ver_t *ver, ds_svc_hdl_t hdl)
193 {
194 	DR_DBG_CPU("reg_handler: arg=0x%p, ver=%d.%d, hdl=0x%lx\n", arg,
195 	    ver->major, ver->minor, hdl);
196 
197 	ds_handle = hdl;
198 }
199 
200 static void
201 dr_cpu_unreg_handler(ds_cb_arg_t arg)
202 {
203 	DR_DBG_CPU("unreg_handler: arg=0x%p\n", arg);
204 
205 	ds_handle = DS_INVALID_HDL;
206 }
207 
208 static void
209 dr_cpu_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen)
210 {
211 	_NOTE(ARGUNUSED(arg))
212 
213 	dr_cpu_hdr_t	*req = buf;
214 	dr_cpu_hdr_t	err_resp;
215 	dr_cpu_hdr_t	*resp = &err_resp;
216 	int		resp_len = 0;
217 	int		rv;
218 
219 	/*
220 	 * Sanity check the message
221 	 */
222 	if (buflen < sizeof (dr_cpu_hdr_t)) {
223 		DR_DBG_CPU("incoming message short: expected at least %ld "
224 		    "bytes, received %ld\n", sizeof (dr_cpu_hdr_t), buflen);
225 		goto done;
226 	}
227 
228 	if (req == NULL) {
229 		DR_DBG_CPU("empty message: expected at least %ld bytes\n",
230 		    sizeof (dr_cpu_hdr_t));
231 		goto done;
232 	}
233 
234 	DR_DBG_CPU("incoming request:\n");
235 	DR_DBG_DUMP_MSG(buf, buflen);
236 
237 	if (req->num_records > NCPU) {
238 		DR_DBG_CPU("CPU list too long: %d when %d is the maximum\n",
239 		    req->num_records, NCPU);
240 		goto done;
241 	}
242 
243 	if (req->num_records == 0) {
244 		DR_DBG_CPU("No CPU specified for operation\n");
245 		goto done;
246 	}
247 
248 	/*
249 	 * Process the command
250 	 */
251 	switch (req->msg_type) {
252 	case DR_CPU_CONFIGURE:
253 		if ((rv = dr_cpu_list_configure(req, &resp, &resp_len)) != 0)
254 			DR_DBG_CPU("dr_cpu_list_configure failed (%d)\n", rv);
255 		break;
256 
257 	case DR_CPU_UNCONFIGURE:
258 	case DR_CPU_FORCE_UNCONFIG:
259 		if ((rv = dr_cpu_list_unconfigure(req, &resp, &resp_len)) != 0)
260 			DR_DBG_CPU("dr_cpu_list_unconfigure failed (%d)\n", rv);
261 		break;
262 
263 	case DR_CPU_STATUS:
264 		if ((rv = dr_cpu_list_status(req, &resp, &resp_len)) != 0)
265 			DR_DBG_CPU("dr_cpu_list_status failed (%d)\n", rv);
266 		break;
267 
268 	default:
269 		cmn_err(CE_NOTE, "unsupported DR operation (%d)",
270 		    req->msg_type);
271 		break;
272 	}
273 
274 done:
275 	/* check if an error occurred */
276 	if (resp == &err_resp) {
277 		resp->req_num = (req) ? req->req_num : 0;
278 		resp->msg_type = DR_CPU_ERROR;
279 		resp->num_records = 0;
280 		resp_len = sizeof (dr_cpu_hdr_t);
281 	}
282 
283 	/* send back the response */
284 	if (ds_cap_send(ds_handle, resp, resp_len) != 0) {
285 		DR_DBG_CPU("ds_send failed\n");
286 	}
287 
288 	/* free any allocated memory */
289 	if (resp != &err_resp) {
290 		kmem_free(resp, resp_len);
291 	}
292 }
293 
294 /*
295  * Do not modify result buffer or length on error.
296  */
297 static int
298 dr_cpu_list_configure(dr_cpu_hdr_t *req, dr_cpu_hdr_t **resp, int *resp_len)
299 {
300 	int		idx;
301 	int		result;
302 	int		status;
303 	int		rlen;
304 	uint32_t	*cpuids;
305 	dr_cpu_hdr_t	*rp;
306 	dr_cpu_stat_t	*stat;
307 
308 	/* the incoming array of cpuids to configure */
309 	cpuids = (uint32_t *)((caddr_t)req + sizeof (dr_cpu_hdr_t));
310 
311 	/* allocate a response message */
312 	rlen = sizeof (dr_cpu_hdr_t);
313 	rlen += req->num_records * sizeof (dr_cpu_stat_t);
314 	rp = kmem_zalloc(rlen, KM_SLEEP);
315 
316 	/* fill in the known data */
317 	rp->req_num = req->req_num;
318 	rp->msg_type = DR_CPU_OK;
319 	rp->num_records = req->num_records;
320 
321 	/* stat array for the response */
322 	stat = (dr_cpu_stat_t *)((caddr_t)rp + sizeof (dr_cpu_hdr_t));
323 
324 	/* configure each of the CPUs */
325 	for (idx = 0; idx < req->num_records; idx++) {
326 
327 		result = dr_cpu_configure(cpuids[idx], &status);
328 
329 		/* save off results of the configure */
330 		stat[idx].cpuid = cpuids[idx];
331 		stat[idx].result = result;
332 		stat[idx].status = status;
333 	}
334 
335 	*resp = rp;
336 	*resp_len = rlen;
337 
338 	dr_generate_event(DR_TYPE_CPU, SE_HINT_INSERT);
339 
340 	return (0);
341 }
342 
343 static void
344 dr_cpu_check_cpus(uint32_t *cpuids, int ncpus, dr_cpu_stat_t *stat)
345 {
346 	int		idx;
347 	kthread_t	*tp;
348 	proc_t		*pp;
349 
350 	DR_DBG_CPU("dr_cpu_check_cpus...\n");
351 
352 	mutex_enter(&cpu_lock);
353 
354 	/* process each cpu that is part of the request */
355 	for (idx = 0; idx < ncpus; idx++) {
356 
357 		if (cpu_get(cpuids[idx]) == NULL)
358 			continue;
359 
360 		mutex_enter(&pidlock);
361 
362 		/*
363 		 * Walk the active processes, checking if each
364 		 * thread belonging to the process is bound.
365 		 */
366 		for (pp = practive; pp != NULL; pp = pp->p_next) {
367 			mutex_enter(&pp->p_lock);
368 			tp = pp->p_tlist;
369 
370 			if (tp == NULL || (pp->p_flag & SSYS)) {
371 				mutex_exit(&pp->p_lock);
372 				continue;
373 			}
374 
375 			do {
376 				if (tp->t_bind_cpu != cpuids[idx])
377 					continue;
378 
379 				DR_DBG_CPU("thread(s) bound to cpu %d\n",
380 				    cpuids[idx]);
381 
382 				stat[idx].cpuid = cpuids[idx];
383 				stat[idx].result = DR_CPU_RES_BLOCKED;
384 				stat[idx].status = DR_CPU_STAT_CONFIGURED;
385 				break;
386 
387 			} while ((tp = tp->t_forw) != pp->p_tlist);
388 			mutex_exit(&pp->p_lock);
389 		}
390 
391 		mutex_exit(&pidlock);
392 	}
393 
394 	mutex_exit(&cpu_lock);
395 }
396 
397 /*
398  * Do not modify result buffer or length on error.
399  */
400 static int
401 dr_cpu_list_unconfigure(dr_cpu_hdr_t *req, dr_cpu_hdr_t **resp, int *resp_len)
402 {
403 	int		idx;
404 	int		result;
405 	int		status;
406 	int		rlen;
407 	uint32_t	*cpuids;
408 	dr_cpu_hdr_t	*rp;
409 	dr_cpu_stat_t	*stat;
410 	boolean_t	force;
411 
412 	/* the incoming array of cpuids to configure */
413 	cpuids = (uint32_t *)((caddr_t)req + sizeof (dr_cpu_hdr_t));
414 
415 	/* check if this is a forced unconfigured */
416 	force = (req->msg_type == DR_CPU_FORCE_UNCONFIG) ? B_TRUE : B_FALSE;
417 
418 	/* allocate a response message */
419 	rlen = sizeof (dr_cpu_hdr_t);
420 	rlen += req->num_records * sizeof (dr_cpu_stat_t);
421 	rp = kmem_zalloc(rlen, KM_SLEEP);
422 
423 	/* fill in the known data */
424 	rp->req_num = req->req_num;
425 	rp->msg_type = DR_CPU_OK;
426 	rp->num_records = req->num_records;
427 
428 	/* stat array for the response */
429 	stat = (dr_cpu_stat_t *)((caddr_t)rp + sizeof (dr_cpu_hdr_t));
430 
431 	/*
432 	 * If the operation is not a forced unconfigure,
433 	 * perform secondary checks for things that would
434 	 * prevent an operation.
435 	 */
436 	if (!force)
437 		dr_cpu_check_cpus(cpuids, req->num_records, stat);
438 
439 	/* unconfigure each of the CPUs */
440 	for (idx = 0; idx < req->num_records; idx++) {
441 
442 		/* skip this cpu if it is already marked as blocked */
443 		if (stat[idx].result == DR_CPU_RES_BLOCKED)
444 			continue;
445 
446 		result = dr_cpu_unconfigure(cpuids[idx], &status, force);
447 
448 		/* save off results of the unconfigure */
449 		stat[idx].cpuid = cpuids[idx];
450 		stat[idx].result = result;
451 		stat[idx].status = status;
452 	}
453 
454 	*resp = rp;
455 	*resp_len = rlen;
456 
457 	dr_generate_event(DR_TYPE_CPU, SE_HINT_REMOVE);
458 
459 	return (0);
460 }
461 
462 /*
463  * Do not modify result buffer or length on error.
464  */
465 static int
466 dr_cpu_list_status(dr_cpu_hdr_t *req, dr_cpu_hdr_t **resp, int *resp_len)
467 {
468 	int		idx;
469 	int		result;
470 	int		status;
471 	int		rlen;
472 	uint32_t	*cpuids;
473 	dr_cpu_hdr_t	*rp;
474 	dr_cpu_stat_t	*stat;
475 	md_t		*mdp = NULL;
476 	int		num_nodes;
477 	int		listsz;
478 	mde_cookie_t	*listp = NULL;
479 	mde_cookie_t	cpunode;
480 	boolean_t	walk_md = B_FALSE;
481 
482 	/* the incoming array of cpuids to configure */
483 	cpuids = (uint32_t *)((caddr_t)req + sizeof (dr_cpu_hdr_t));
484 
485 	/* allocate a response message */
486 	rlen = sizeof (dr_cpu_hdr_t);
487 	rlen += req->num_records * sizeof (dr_cpu_stat_t);
488 	rp = kmem_zalloc(rlen, KM_SLEEP);
489 
490 	/* fill in the known data */
491 	rp->req_num = req->req_num;
492 	rp->msg_type = DR_CPU_STATUS;
493 	rp->num_records = req->num_records;
494 
495 	/* stat array for the response */
496 	stat = (dr_cpu_stat_t *)((caddr_t)rp + sizeof (dr_cpu_hdr_t));
497 
498 	/* get the status for each of the CPUs */
499 	for (idx = 0; idx < req->num_records; idx++) {
500 
501 		result = dr_cpu_status(cpuids[idx], &status);
502 
503 		if (result == DR_CPU_RES_FAILURE)
504 			walk_md = B_TRUE;
505 
506 		/* save off results of the status */
507 		stat[idx].cpuid = cpuids[idx];
508 		stat[idx].result = result;
509 		stat[idx].status = status;
510 	}
511 
512 	if (walk_md == B_FALSE)
513 		goto done;
514 
515 	/*
516 	 * At least one of the cpus did not have a CPU
517 	 * structure. So, consult the MD to determine if
518 	 * they are present.
519 	 */
520 
521 	if ((mdp = md_get_handle()) == NULL) {
522 		DR_DBG_CPU("unable to initialize MD\n");
523 		goto done;
524 	}
525 
526 	num_nodes = md_node_count(mdp);
527 	ASSERT(num_nodes > 0);
528 
529 	listsz = num_nodes * sizeof (mde_cookie_t);
530 	listp = kmem_zalloc(listsz, KM_SLEEP);
531 
532 	for (idx = 0; idx < req->num_records; idx++) {
533 
534 		if (stat[idx].result != DR_CPU_RES_FAILURE)
535 			continue;
536 
537 		/* check the MD for the current cpuid */
538 		cpunode = dr_cpu_find_node_md(stat[idx].cpuid, mdp, listp);
539 
540 		stat[idx].result = DR_CPU_RES_OK;
541 
542 		if (cpunode == MDE_INVAL_ELEM_COOKIE) {
543 			stat[idx].status = DR_CPU_STAT_NOT_PRESENT;
544 		} else {
545 			stat[idx].status = DR_CPU_STAT_UNCONFIGURED;
546 		}
547 	}
548 
549 	kmem_free(listp, listsz);
550 
551 	(void) md_fini_handle(mdp);
552 
553 done:
554 	*resp = rp;
555 	*resp_len = rlen;
556 
557 	return (0);
558 }
559 
560 static int
561 dr_cpu_configure(processorid_t cpuid, int *status)
562 {
563 	struct cpu	*cp;
564 	int		rv = 0;
565 
566 	DR_DBG_CPU("dr_cpu_configure...\n");
567 
568 	/*
569 	 * Build device tree node for the CPU
570 	 */
571 	if ((rv = dr_cpu_probe(cpuid)) != 0) {
572 		DR_DBG_CPU("failed to probe CPU %d (%d)\n", cpuid, rv);
573 		if (rv == EINVAL) {
574 			*status = DR_CPU_STAT_NOT_PRESENT;
575 			return (DR_CPU_RES_NOT_IN_MD);
576 		}
577 		*status = DR_CPU_STAT_UNCONFIGURED;
578 		return (DR_CPU_RES_FAILURE);
579 	}
580 
581 	mutex_enter(&cpu_lock);
582 
583 	/*
584 	 * Configure the CPU
585 	 */
586 	if ((cp = cpu_get(cpuid)) == NULL) {
587 
588 		if ((rv = cpu_configure(cpuid)) != 0) {
589 			DR_DBG_CPU("failed to configure CPU %d (%d)\n",
590 			    cpuid, rv);
591 			rv = DR_CPU_RES_FAILURE;
592 			*status = DR_CPU_STAT_UNCONFIGURED;
593 			goto done;
594 		}
595 
596 		DR_DBG_CPU("CPU %d configured\n", cpuid);
597 
598 		/* CPU struct should exist now */
599 		cp = cpu_get(cpuid);
600 	}
601 
602 	ASSERT(cp);
603 
604 	/*
605 	 * Power on the CPU. In sun4v, this brings the stopped
606 	 * CPU into the guest from the Hypervisor.
607 	 */
608 	if (cpu_is_poweredoff(cp)) {
609 
610 		if ((rv = cpu_poweron(cp)) != 0) {
611 			DR_DBG_CPU("failed to power on CPU %d (%d)\n",
612 			    cpuid, rv);
613 			rv = DR_CPU_RES_FAILURE;
614 			*status = DR_CPU_STAT_UNCONFIGURED;
615 			goto done;
616 		}
617 
618 		DR_DBG_CPU("CPU %d powered on\n", cpuid);
619 	}
620 
621 	/*
622 	 * Online the CPU
623 	 */
624 	if (cpu_is_offline(cp)) {
625 
626 		if ((rv = cpu_online(cp)) != 0) {
627 			DR_DBG_CPU("failed to online CPU %d (%d)\n",
628 			    cpuid, rv);
629 			rv = DR_CPU_RES_FAILURE;
630 			/* offline is still configured */
631 			*status = DR_CPU_STAT_CONFIGURED;
632 			goto done;
633 		}
634 
635 		DR_DBG_CPU("CPU %d online\n", cpuid);
636 	}
637 
638 	rv = DR_CPU_RES_OK;
639 	*status = DR_CPU_STAT_CONFIGURED;
640 
641 done:
642 	mutex_exit(&cpu_lock);
643 
644 	return (rv);
645 }
646 
647 static int
648 dr_cpu_unconfigure(processorid_t cpuid, int *status, boolean_t force)
649 {
650 	struct cpu	*cp;
651 	int		rv = 0;
652 	int		cpu_flags;
653 
654 	DR_DBG_CPU("dr_cpu_unconfigure%s...\n", (force) ? " (force)" : "");
655 
656 	mutex_enter(&cpu_lock);
657 
658 	cp = cpu_get(cpuid);
659 
660 	if (cp == NULL) {
661 
662 		/*
663 		 * The OS CPU structures are already torn down,
664 		 * Attempt to deprobe the CPU to make sure the
665 		 * device tree is up to date.
666 		 */
667 		if (dr_cpu_deprobe(cpuid) != 0) {
668 			DR_DBG_CPU("failed to deprobe CPU %d\n", cpuid);
669 			rv = DR_CPU_RES_FAILURE;
670 			*status = DR_CPU_STAT_UNCONFIGURED;
671 			goto done;
672 		}
673 
674 		goto done;
675 	}
676 
677 	ASSERT(cp->cpu_id == cpuid);
678 
679 	/*
680 	 * Offline the CPU
681 	 */
682 	if (cpu_is_active(cp)) {
683 
684 		/* set the force flag correctly */
685 		cpu_flags = (force) ? CPU_FORCED : 0;
686 
687 		if ((rv = cpu_offline(cp, cpu_flags)) != 0) {
688 			DR_DBG_CPU("failed to offline CPU %d (%d)\n",
689 			    cpuid, rv);
690 
691 			rv = DR_CPU_RES_FAILURE;
692 			*status = DR_CPU_STAT_CONFIGURED;
693 			goto done;
694 		}
695 
696 		DR_DBG_CPU("CPU %d offline\n", cpuid);
697 	}
698 
699 	/*
700 	 * Power off the CPU. In sun4v, this puts the running
701 	 * CPU into the stopped state in the Hypervisor.
702 	 */
703 	if (!cpu_is_poweredoff(cp)) {
704 
705 		if ((rv = cpu_poweroff(cp)) != 0) {
706 			DR_DBG_CPU("failed to power off CPU %d (%d)\n",
707 			    cpuid, rv);
708 			rv = DR_CPU_RES_FAILURE;
709 			*status = DR_CPU_STAT_CONFIGURED;
710 			goto done;
711 		}
712 
713 		DR_DBG_CPU("CPU %d powered off\n", cpuid);
714 	}
715 
716 	/*
717 	 * Unconfigure the CPU
718 	 */
719 	if ((rv = cpu_unconfigure(cpuid)) != 0) {
720 		DR_DBG_CPU("failed to unconfigure CPU %d (%d)\n", cpuid, rv);
721 		rv = DR_CPU_RES_FAILURE;
722 		*status = DR_CPU_STAT_UNCONFIGURED;
723 		goto done;
724 	}
725 
726 	DR_DBG_CPU("CPU %d unconfigured\n", cpuid);
727 
728 	/*
729 	 * Tear down device tree.
730 	 */
731 	if ((rv = dr_cpu_deprobe(cpuid)) != 0) {
732 		DR_DBG_CPU("failed to deprobe CPU %d (%d)\n", cpuid, rv);
733 		rv = DR_CPU_RES_FAILURE;
734 		*status = DR_CPU_STAT_UNCONFIGURED;
735 		goto done;
736 	}
737 
738 	rv = DR_CPU_RES_OK;
739 	*status = DR_CPU_STAT_UNCONFIGURED;
740 
741 done:
742 	mutex_exit(&cpu_lock);
743 
744 	return (rv);
745 }
746 
747 /*
748  * Determine the state of a CPU. If the CPU structure is not present,
749  * it does not attempt to determine whether or not the CPU is in the
750  * MD. It is more efficient to do this at the higher level for all
751  * CPUs since it may not even be necessary to search the MD if all
752  * the CPUs are accounted for. Returns DR_CPU_RES_OK if the CPU
753  * structure is present, and DR_CPU_RES_FAILURE otherwise as a signal
754  * that an MD walk is necessary.
755  */
756 static int
757 dr_cpu_status(processorid_t cpuid, int *status)
758 {
759 	int		rv;
760 	struct cpu	*cp;
761 
762 	DR_DBG_CPU("dr_cpu_status...\n");
763 
764 	mutex_enter(&cpu_lock);
765 
766 	if ((cp = cpu_get(cpuid)) == NULL) {
767 		/* need to check if cpu is in the MD */
768 		rv = DR_CPU_RES_FAILURE;
769 		goto done;
770 	}
771 
772 	if (cpu_is_poweredoff(cp)) {
773 		/*
774 		 * The CPU is powered off, so it is considered
775 		 * unconfigured from the service entity point of
776 		 * view. The CPU is not available to the system
777 		 * and intervention by the service entity would
778 		 * be required to change that.
779 		 */
780 		*status = DR_CPU_STAT_UNCONFIGURED;
781 	} else {
782 		/*
783 		 * The CPU is powered on, so it is considered
784 		 * configured from the service entity point of
785 		 * view. It is available for use by the system
786 		 * and service entities are not concerned about
787 		 * the operational status (offline, online, etc.)
788 		 * of the CPU in terms of DR.
789 		 */
790 		*status = DR_CPU_STAT_CONFIGURED;
791 	}
792 
793 	rv = DR_CPU_RES_OK;
794 
795 done:
796 	mutex_exit(&cpu_lock);
797 
798 	return (rv);
799 }
800 
801 typedef struct {
802 	md_t		*mdp;
803 	mde_cookie_t	cpunode;
804 	dev_info_t	*dip;
805 } cb_arg_t;
806 
807 #define	STR_ARR_LEN	5
808 
809 static int
810 new_cpu_node(dev_info_t *new_node, void *arg, uint_t flags)
811 {
812 	_NOTE(ARGUNUSED(flags))
813 
814 	char		*compat;
815 	uint64_t	freq;
816 	uint64_t	cpuid = 0;
817 	int		regbuf[4];
818 	int		len = 0;
819 	cb_arg_t	*cba;
820 	char		*str_arr[STR_ARR_LEN];
821 	char		*curr;
822 	int		idx = 0;
823 
824 	DR_DBG_CPU("new_cpu_node...\n");
825 
826 	cba = (cb_arg_t *)arg;
827 
828 	/*
829 	 * Add 'name' property
830 	 */
831 	if (ndi_prop_update_string(DDI_DEV_T_NONE, new_node,
832 	    "name", "cpu") != DDI_SUCCESS) {
833 		DR_DBG_CPU("new_cpu_node: failed to create 'name' property\n");
834 		return (DDI_WALK_ERROR);
835 	}
836 
837 	/*
838 	 * Add 'compatible' property
839 	 */
840 	if (md_get_prop_data(cba->mdp, cba->cpunode, "compatible",
841 	    (uint8_t **)(&compat), &len)) {
842 		DR_DBG_CPU("new_cpu_node: failed to read 'compatible' property "
843 		    "from MD\n");
844 		return (DDI_WALK_ERROR);
845 	}
846 
847 	DR_DBG_CPU("'compatible' len is %d\n", len);
848 
849 	/* parse the MD string array */
850 	curr = compat;
851 	while (curr < (compat + len)) {
852 
853 		DR_DBG_CPU("adding '%s' to 'compatible' property\n", curr);
854 
855 		str_arr[idx++] = curr;
856 		curr += strlen(curr) + 1;
857 
858 		if (idx == STR_ARR_LEN) {
859 			DR_DBG_CPU("exceeded str_arr len (%d)\n", STR_ARR_LEN);
860 			break;
861 		}
862 	}
863 
864 	if (ndi_prop_update_string_array(DDI_DEV_T_NONE, new_node,
865 	    "compatible", str_arr, idx) != DDI_SUCCESS) {
866 		DR_DBG_CPU("new_cpu_node: failed to create 'compatible' "
867 		    "property\n");
868 		return (DDI_WALK_ERROR);
869 	}
870 
871 	/*
872 	 * Add 'device_type' property
873 	 */
874 	if (ndi_prop_update_string(DDI_DEV_T_NONE, new_node,
875 	    "device_type", "cpu") != DDI_SUCCESS) {
876 		DR_DBG_CPU("new_cpu_node: failed to create 'device_type' "
877 		    "property\n");
878 		return (DDI_WALK_ERROR);
879 	}
880 
881 	/*
882 	 * Add 'clock-frequency' property
883 	 */
884 	if (md_get_prop_val(cba->mdp, cba->cpunode, "clock-frequency", &freq)) {
885 		DR_DBG_CPU("new_cpu_node: failed to read 'clock-frequency' "
886 		    "property from MD\n");
887 		return (DDI_WALK_ERROR);
888 	}
889 
890 	if (ndi_prop_update_int(DDI_DEV_T_NONE, new_node,
891 	    "clock-frequency", freq) != DDI_SUCCESS) {
892 		DR_DBG_CPU("new_cpu_node: failed to create 'clock-frequency' "
893 		    "property\n");
894 		return (DDI_WALK_ERROR);
895 	}
896 
897 	/*
898 	 * Add 'reg' (cpuid) property
899 	 */
900 	if (md_get_prop_val(cba->mdp, cba->cpunode, "id", &cpuid)) {
901 		DR_DBG_CPU("new_cpu_node: failed to read 'id' property "
902 		    "from MD\n");
903 		return (DDI_WALK_ERROR);
904 	}
905 
906 	DR_DBG_CPU("new cpuid=0x%lx\n", cpuid);
907 
908 	bzero(regbuf, 4 * sizeof (int));
909 	regbuf[0] = 0xc0000000 | cpuid;
910 
911 	if (ndi_prop_update_int_array(DDI_DEV_T_NONE, new_node,
912 	    "reg", regbuf, 4) != DDI_SUCCESS) {
913 		DR_DBG_CPU("new_cpu_node: failed to create 'reg' property\n");
914 		return (DDI_WALK_ERROR);
915 	}
916 
917 	cba->dip = new_node;
918 
919 	return (DDI_WALK_TERMINATE);
920 }
921 
922 static int
923 dr_cpu_probe(processorid_t cpuid)
924 {
925 	dev_info_t	*pdip;
926 	dev_info_t	*dip;
927 	devi_branch_t	br;
928 	md_t		*mdp = NULL;
929 	int		num_nodes;
930 	int		rv = 0;
931 	int		listsz;
932 	mde_cookie_t	*listp = NULL;
933 	cb_arg_t	cba;
934 	mde_cookie_t	cpunode;
935 
936 	if ((dip = dr_cpu_find_node(cpuid)) != NULL) {
937 		/* nothing to do */
938 		e_ddi_branch_rele(dip);
939 		return (0);
940 	}
941 
942 	if ((mdp = md_get_handle()) == NULL) {
943 		DR_DBG_CPU("unable to initialize machine description\n");
944 		return (-1);
945 	}
946 
947 	num_nodes = md_node_count(mdp);
948 	ASSERT(num_nodes > 0);
949 
950 	listsz = num_nodes * sizeof (mde_cookie_t);
951 	listp = kmem_zalloc(listsz, KM_SLEEP);
952 
953 	cpunode = dr_cpu_find_node_md(cpuid, mdp, listp);
954 
955 	if (cpunode == MDE_INVAL_ELEM_COOKIE) {
956 		rv = EINVAL;
957 		goto done;
958 	}
959 
960 	/* pass in MD cookie for CPU */
961 	cba.mdp = mdp;
962 	cba.cpunode = cpunode;
963 
964 	br.arg = (void *)&cba;
965 	br.type = DEVI_BRANCH_SID;
966 	br.create.sid_branch_create = new_cpu_node;
967 	br.devi_branch_callback = NULL;
968 	pdip = ddi_root_node();
969 
970 	if ((rv = e_ddi_branch_create(pdip, &br, NULL, 0))) {
971 		DR_DBG_CPU("e_ddi_branch_create failed: %d\n", rv);
972 		rv = -1;
973 		goto done;
974 	}
975 
976 	DR_DBG_CPU("CPU %d probed\n", cpuid);
977 
978 	rv = 0;
979 
980 done:
981 	if (listp)
982 		kmem_free(listp, listsz);
983 
984 	if (mdp)
985 		(void) md_fini_handle(mdp);
986 
987 	return (rv);
988 }
989 
990 static int
991 dr_cpu_deprobe(processorid_t cpuid)
992 {
993 	dev_info_t	*fdip = NULL;
994 	dev_info_t	*dip;
995 
996 	if ((dip = dr_cpu_find_node(cpuid)) == NULL) {
997 		DR_DBG_CPU("cpuid %d already deprobed\n", cpuid);
998 		return (0);
999 	}
1000 
1001 	ASSERT(e_ddi_branch_held(dip));
1002 
1003 	if (e_ddi_branch_destroy(dip, &fdip, 0)) {
1004 		char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1005 
1006 		/*
1007 		 * If non-NULL, fdip is held and must be released.
1008 		 */
1009 		if (fdip != NULL) {
1010 			(void) ddi_pathname(fdip, path);
1011 			ddi_release_devi(fdip);
1012 		} else {
1013 			(void) ddi_pathname(dip, path);
1014 		}
1015 		cmn_err(CE_NOTE, "node removal failed: %s (%p)",
1016 		    path, (fdip) ? (void *)fdip : (void *)dip);
1017 
1018 		kmem_free(path, MAXPATHLEN);
1019 
1020 		return (-1);
1021 	}
1022 
1023 	DR_DBG_CPU("CPU %d deprobed\n", cpuid);
1024 
1025 	return (0);
1026 }
1027 
1028 typedef struct {
1029 	processorid_t	cpuid;
1030 	dev_info_t	*dip;
1031 } dr_search_arg_t;
1032 
1033 static int
1034 dr_cpu_check_node(dev_info_t *dip, void *arg)
1035 {
1036 	char 		*name;
1037 	processorid_t	cpuid;
1038 	dr_search_arg_t	*sarg = (dr_search_arg_t *)arg;
1039 
1040 	if (dip == ddi_root_node()) {
1041 		return (DDI_WALK_CONTINUE);
1042 	}
1043 
1044 	name = ddi_node_name(dip);
1045 
1046 	if (strcmp(name, "cpu") != 0) {
1047 		return (DDI_WALK_PRUNECHILD);
1048 	}
1049 
1050 	cpuid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
1051 	    "reg", -1);
1052 
1053 	cpuid = PROM_CFGHDL_TO_CPUID(cpuid);
1054 
1055 	DR_DBG_CPU("found cpuid=0x%x, looking for 0x%x\n", cpuid, sarg->cpuid);
1056 
1057 	if (cpuid == sarg->cpuid) {
1058 		DR_DBG_CPU("matching node\n");
1059 
1060 		/* matching node must be returned held */
1061 		if (!e_ddi_branch_held(dip))
1062 			e_ddi_branch_hold(dip);
1063 
1064 		sarg->dip = dip;
1065 		return (DDI_WALK_TERMINATE);
1066 	}
1067 
1068 	return (DDI_WALK_CONTINUE);
1069 }
1070 
1071 /*
1072  * Walk the device tree to find the dip corresponding to the cpuid
1073  * passed in. If present, the dip is returned held. The caller must
1074  * release the hold on the dip once it is no longer required. If no
1075  * matching node if found, NULL is returned.
1076  */
1077 static dev_info_t *
1078 dr_cpu_find_node(processorid_t cpuid)
1079 {
1080 	dr_search_arg_t	arg;
1081 
1082 	DR_DBG_CPU("dr_cpu_find_node...\n");
1083 
1084 	arg.cpuid = cpuid;
1085 	arg.dip = NULL;
1086 
1087 	ddi_walk_devs(ddi_root_node(), dr_cpu_check_node, &arg);
1088 
1089 	ASSERT((arg.dip == NULL) || (e_ddi_branch_held(arg.dip)));
1090 
1091 	return ((arg.dip) ? arg.dip : NULL);
1092 }
1093 
1094 /*
1095  * Look up a particular cpuid in the MD. Returns the mde_cookie_t
1096  * representing that CPU if present, and MDE_INVAL_ELEM_COOKIE
1097  * otherwise. It is assumed the scratch array has already been
1098  * allocated so that it can accommodate the worst case scenario,
1099  * every node in the MD.
1100  */
1101 static mde_cookie_t
1102 dr_cpu_find_node_md(processorid_t cpuid, md_t *mdp, mde_cookie_t *listp)
1103 {
1104 	int		idx;
1105 	int		nnodes;
1106 	mde_cookie_t	rootnode;
1107 	uint64_t	cpuid_prop;
1108 	mde_cookie_t	result = MDE_INVAL_ELEM_COOKIE;
1109 
1110 	rootnode = md_root_node(mdp);
1111 	ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE);
1112 
1113 	/*
1114 	 * Scan the DAG for all the CPU nodes
1115 	 */
1116 	nnodes = md_scan_dag(mdp, rootnode, md_find_name(mdp, "cpu"),
1117 	    md_find_name(mdp, "fwd"), listp);
1118 
1119 	if (nnodes < 0) {
1120 		DR_DBG_CPU("Scan for CPUs failed\n");
1121 		return (result);
1122 	}
1123 
1124 	DR_DBG_CPU("dr_cpu_find_node_md: found %d CPUs in the MD\n", nnodes);
1125 
1126 	/*
1127 	 * Find the CPU of interest
1128 	 */
1129 	for (idx = 0; idx < nnodes; idx++) {
1130 
1131 		if (md_get_prop_val(mdp, listp[idx], "id", &cpuid_prop)) {
1132 			DR_DBG_CPU("Missing 'id' property for CPU node %d\n",
1133 			    idx);
1134 			break;
1135 		}
1136 
1137 		if (cpuid_prop == cpuid) {
1138 			/* found a match */
1139 			DR_DBG_CPU("dr_cpu_find_node_md: found CPU %d "
1140 			    "in MD\n", cpuid);
1141 			result = listp[idx];
1142 			break;
1143 		}
1144 	}
1145 
1146 	if (result == MDE_INVAL_ELEM_COOKIE) {
1147 		DR_DBG_CPU("CPU %d not in MD\n", cpuid);
1148 	}
1149 
1150 	return (result);
1151 }
1152