xref: /titanic_44/usr/src/uts/sun4v/io/dr_cpu.c (revision 1d7f3fadeebf3754e3f042d91e7a4439755dc598)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * sun4v CPU DR Module
29  */
30 
31 #include <sys/modctl.h>
32 #include <sys/processor.h>
33 #include <sys/cpuvar.h>
34 #include <sys/cpupart.h>
35 #include <sys/sunddi.h>
36 #include <sys/sunndi.h>
37 #include <sys/note.h>
38 #include <sys/sysevent/dr.h>
39 #include <sys/hypervisor_api.h>
40 #include <sys/mach_descrip.h>
41 #include <sys/mdesc.h>
42 #include <sys/ds.h>
43 #include <sys/drctl.h>
44 #include <sys/dr_util.h>
45 #include <sys/dr_cpu.h>
46 #include <sys/promif.h>
47 #include <sys/machsystm.h>
48 
49 
50 static struct modlmisc modlmisc = {
51 	&mod_miscops,
52 	"sun4v CPU DR"
53 };
54 
55 static struct modlinkage modlinkage = {
56 	MODREV_1,
57 	(void *)&modlmisc,
58 	NULL
59 };
60 
61 typedef int (*fn_t)(processorid_t, int *, boolean_t);
62 
63 /*
64  * Global DS Handle
65  */
66 static ds_svc_hdl_t ds_handle;
67 
68 /*
69  * Supported DS Capability Versions
70  */
71 static ds_ver_t		dr_cpu_vers[] = { { 1, 0 } };
72 #define	DR_CPU_NVERS	(sizeof (dr_cpu_vers) / sizeof (dr_cpu_vers[0]))
73 
74 /*
75  * DS Capability Description
76  */
77 static ds_capability_t dr_cpu_cap = {
78 	DR_CPU_DS_ID,		/* svc_id */
79 	dr_cpu_vers,		/* vers */
80 	DR_CPU_NVERS		/* nvers */
81 };
82 
83 /*
84  * DS Callbacks
85  */
86 static void dr_cpu_reg_handler(ds_cb_arg_t, ds_ver_t *, ds_svc_hdl_t);
87 static void dr_cpu_unreg_handler(ds_cb_arg_t arg);
88 static void dr_cpu_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen);
89 
90 /*
91  * DS Client Ops Vector
92  */
93 static ds_clnt_ops_t dr_cpu_ops = {
94 	dr_cpu_reg_handler,	/* ds_reg_cb */
95 	dr_cpu_unreg_handler,	/* ds_unreg_cb */
96 	dr_cpu_data_handler,	/* ds_data_cb */
97 	NULL			/* cb_arg */
98 };
99 
100 /*
101  * Operation Results
102  *
103  * Used internally to gather results while an operation on a
104  * list of CPUs is in progress. In particular, it is used to
105  * keep track of which CPUs have already failed so that they are
106  * not processed further, and the manner in which they failed.
107  */
108 typedef struct {
109 	uint32_t	cpuid;
110 	uint32_t	result;
111 	uint32_t	status;
112 	char		*string;
113 } dr_cpu_res_t;
114 
115 #define	DR_CPU_MAX_ERR_LEN	64	/* maximum error string length */
116 
117 /*
118  * Internal Functions
119  */
120 static int dr_cpu_init(void);
121 static int dr_cpu_fini(void);
122 
123 static int dr_cpu_list_wrk(dr_cpu_hdr_t *, dr_cpu_hdr_t **, int *);
124 static int dr_cpu_list_status(dr_cpu_hdr_t *, dr_cpu_hdr_t **, int *);
125 
126 static int dr_cpu_unconfigure(processorid_t, int *status, boolean_t force);
127 static int dr_cpu_configure(processorid_t, int *status, boolean_t force);
128 static int dr_cpu_status(processorid_t, int *status);
129 
130 static void dr_cpu_check_cpus(dr_cpu_hdr_t *req, dr_cpu_res_t *res);
131 static void dr_cpu_check_psrset(uint32_t *cpuids, dr_cpu_res_t *res, int nres);
132 static int dr_cpu_check_bound_thr(cpu_t *cp, dr_cpu_res_t *res);
133 
134 static dr_cpu_res_t *dr_cpu_res_array_init(dr_cpu_hdr_t *, drctl_rsrc_t *, int);
135 static void dr_cpu_res_array_fini(dr_cpu_res_t *res, int nres);
136 static size_t dr_cpu_pack_response(dr_cpu_hdr_t *req, dr_cpu_res_t *res,
137     dr_cpu_hdr_t **respp);
138 
139 static int dr_cpu_probe(processorid_t newcpuid);
140 static int dr_cpu_deprobe(processorid_t cpuid);
141 
142 static dev_info_t *dr_cpu_find_node(processorid_t cpuid);
143 static mde_cookie_t dr_cpu_find_node_md(processorid_t, md_t *, mde_cookie_t *);
144 
145 int
146 _init(void)
147 {
148 	int	status;
149 
150 	/* check that CPU DR is enabled */
151 	if (dr_is_disabled(DR_TYPE_CPU)) {
152 		cmn_err(CE_CONT, "!CPU DR is disabled\n");
153 		return (-1);
154 	}
155 
156 	if ((status = dr_cpu_init()) != 0) {
157 		cmn_err(CE_NOTE, "CPU DR initialization failed");
158 		return (status);
159 	}
160 
161 	if ((status = mod_install(&modlinkage)) != 0) {
162 		(void) dr_cpu_fini();
163 	}
164 
165 	return (status);
166 }
167 
168 int
169 _info(struct modinfo *modinfop)
170 {
171 	return (mod_info(&modlinkage, modinfop));
172 }
173 
174 int dr_cpu_allow_unload;
175 
176 int
177 _fini(void)
178 {
179 	int	status;
180 
181 	if (dr_cpu_allow_unload == 0)
182 		return (EBUSY);
183 
184 	if ((status = mod_remove(&modlinkage)) == 0) {
185 		(void) dr_cpu_fini();
186 	}
187 
188 	return (status);
189 }
190 
191 static int
192 dr_cpu_init(void)
193 {
194 	int	rv;
195 
196 	if ((rv = ds_cap_init(&dr_cpu_cap, &dr_cpu_ops)) != 0) {
197 		cmn_err(CE_NOTE, "ds_cap_init failed: %d", rv);
198 		return (-1);
199 	}
200 
201 	return (0);
202 }
203 
204 static int
205 dr_cpu_fini(void)
206 {
207 	int	rv;
208 
209 	if ((rv = ds_cap_fini(&dr_cpu_cap)) != 0) {
210 		cmn_err(CE_NOTE, "ds_cap_fini failed: %d", rv);
211 		return (-1);
212 	}
213 
214 	return (0);
215 }
216 
217 static void
218 dr_cpu_reg_handler(ds_cb_arg_t arg, ds_ver_t *ver, ds_svc_hdl_t hdl)
219 {
220 	DR_DBG_CPU("reg_handler: arg=0x%p, ver=%d.%d, hdl=0x%lx\n", arg,
221 	    ver->major, ver->minor, hdl);
222 
223 	ds_handle = hdl;
224 }
225 
226 static void
227 dr_cpu_unreg_handler(ds_cb_arg_t arg)
228 {
229 	DR_DBG_CPU("unreg_handler: arg=0x%p\n", arg);
230 
231 	ds_handle = DS_INVALID_HDL;
232 }
233 
234 static void
235 dr_cpu_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen)
236 {
237 	_NOTE(ARGUNUSED(arg))
238 
239 	dr_cpu_hdr_t	*req = buf;
240 	dr_cpu_hdr_t	err_resp;
241 	dr_cpu_hdr_t	*resp = &err_resp;
242 	int		resp_len = 0;
243 	int		rv;
244 
245 	/*
246 	 * Sanity check the message
247 	 */
248 	if (buflen < sizeof (dr_cpu_hdr_t)) {
249 		DR_DBG_CPU("incoming message short: expected at least %ld "
250 		    "bytes, received %ld\n", sizeof (dr_cpu_hdr_t), buflen);
251 		goto done;
252 	}
253 
254 	if (req == NULL) {
255 		DR_DBG_CPU("empty message: expected at least %ld bytes\n",
256 		    sizeof (dr_cpu_hdr_t));
257 		goto done;
258 	}
259 
260 	DR_DBG_CPU("incoming request:\n");
261 	DR_DBG_DUMP_MSG(buf, buflen);
262 
263 	if (req->num_records > NCPU) {
264 		DR_DBG_CPU("CPU list too long: %d when %d is the maximum\n",
265 		    req->num_records, NCPU);
266 		goto done;
267 	}
268 
269 	if (req->num_records == 0) {
270 		DR_DBG_CPU("No CPU specified for operation\n");
271 		goto done;
272 	}
273 
274 	/*
275 	 * Process the command
276 	 */
277 	switch (req->msg_type) {
278 	case DR_CPU_CONFIGURE:
279 	case DR_CPU_UNCONFIGURE:
280 	case DR_CPU_FORCE_UNCONFIG:
281 		if ((rv = dr_cpu_list_wrk(req, &resp, &resp_len)) != 0) {
282 			DR_DBG_CPU("%s%s failed (%d)\n",
283 			    (req->msg_type == DR_CPU_CONFIGURE) ?
284 			    "CPU configure" : "CPU unconfigure",
285 			    (req->msg_type == DR_CPU_FORCE_UNCONFIG) ?
286 			    " (forced)" : "", rv);
287 		}
288 		break;
289 
290 	case DR_CPU_STATUS:
291 		if ((rv = dr_cpu_list_status(req, &resp, &resp_len)) != 0)
292 			DR_DBG_CPU("CPU status failed (%d)\n", rv);
293 		break;
294 
295 	default:
296 		cmn_err(CE_NOTE, "unsupported DR operation (%d)",
297 		    req->msg_type);
298 		break;
299 	}
300 
301 done:
302 	/* check if an error occurred */
303 	if (resp == &err_resp) {
304 		resp->req_num = (req) ? req->req_num : 0;
305 		resp->msg_type = DR_CPU_ERROR;
306 		resp->num_records = 0;
307 		resp_len = sizeof (dr_cpu_hdr_t);
308 	}
309 
310 	DR_DBG_CPU("outgoing response:\n");
311 	DR_DBG_DUMP_MSG(resp, resp_len);
312 
313 	/* send back the response */
314 	if (ds_cap_send(ds_handle, resp, resp_len) != 0) {
315 		DR_DBG_CPU("ds_send failed\n");
316 	}
317 
318 	/* free any allocated memory */
319 	if (resp != &err_resp) {
320 		kmem_free(resp, resp_len);
321 	}
322 }
323 
324 /*
325  * Common routine to config or unconfig multiple cpus.  The unconfig
326  * case checks with the OS to see if the removal of cpus will be
327  * permitted, but can be overridden by the "force" version of the
328  * command.  Otherwise, the logic for both cases is identical.
329  *
330  * Note: Do not modify result buffer or length on error.
331  */
332 static int
333 dr_cpu_list_wrk(dr_cpu_hdr_t *req, dr_cpu_hdr_t **resp, int *resp_len)
334 {
335 	int		rv;
336 	int		idx;
337 	int		count;
338 	fn_t		dr_fn;
339 	int		se_hint;
340 	boolean_t	force = B_FALSE;
341 	uint32_t	*req_cpus;
342 	dr_cpu_res_t	*res;
343 	int		drctl_cmd;
344 	int		drctl_flags = 0;
345 	drctl_rsrc_t	*drctl_req;
346 	size_t		drctl_req_len;
347 	drctl_rsrc_t	*drctl_res;
348 	size_t		drctl_res_len = 0;
349 	drctl_cookie_t	drctl_res_ck;
350 
351 	static const char me[] = "dr_cpu_list_wrk";
352 
353 	ASSERT((req != NULL) && (req->num_records != 0));
354 
355 	count = req->num_records;
356 
357 	/*
358 	 * Extract all information that is specific
359 	 * to the various types of operations.
360 	 */
361 	switch (req->msg_type) {
362 	case DR_CPU_CONFIGURE:
363 		dr_fn = dr_cpu_configure;
364 		drctl_cmd = DRCTL_CPU_CONFIG_REQUEST;
365 		se_hint = SE_HINT_INSERT;
366 		break;
367 	case DR_CPU_FORCE_UNCONFIG:
368 		drctl_flags = DRCTL_FLAG_FORCE;
369 		force = B_TRUE;
370 		_NOTE(FALLTHROUGH)
371 	case DR_CPU_UNCONFIGURE:
372 		dr_fn = dr_cpu_unconfigure;
373 		drctl_cmd = DRCTL_CPU_UNCONFIG_REQUEST;
374 		se_hint = SE_HINT_REMOVE;
375 		break;
376 	default:
377 		/* Programming error if we reach this. */
378 		cmn_err(CE_NOTE, "%s: bad msg_type %d\n", me, req->msg_type);
379 		ASSERT(0);
380 		return (-1);
381 	}
382 
383 	/* the incoming array of cpuids to operate on */
384 	req_cpus = DR_CPU_CMD_CPUIDS(req);
385 
386 	/* allocate drctl request msg based on incoming resource count */
387 	drctl_req_len = sizeof (drctl_rsrc_t) * count;
388 	drctl_req = kmem_zalloc(drctl_req_len, KM_SLEEP);
389 
390 	/* copy the cpuids for the drctl call from the incoming request msg */
391 	for (idx = 0; idx < count; idx++)
392 		drctl_req[idx].res_cpu_id = req_cpus[idx];
393 
394 	if ((rv = drctl_config_init(drctl_cmd, drctl_flags, drctl_req,
395 	    count, &drctl_res, &drctl_res_len, &drctl_res_ck)) != 0) {
396 		DR_DBG_CPU("%s: drctl_config_init returned: %d\n", me, rv);
397 		kmem_free(drctl_req, drctl_req_len);
398 		return (-1);
399 	}
400 
401 	ASSERT((drctl_res != NULL) && (drctl_res_len != 0));
402 
403 	/* create the result scratch array */
404 	res = dr_cpu_res_array_init(req, drctl_res, count);
405 
406 	/*
407 	 * For unconfigure, check if there are any conditions
408 	 * that will cause the operation to fail. These are
409 	 * performed before the actual unconfigure attempt so
410 	 * that a meaningful error message can be generated.
411 	 */
412 	if (req->msg_type != DR_CPU_CONFIGURE)
413 		dr_cpu_check_cpus(req, res);
414 
415 	/* perform the specified operation on each of the CPUs */
416 	for (idx = 0; idx < count; idx++) {
417 		int result;
418 		int status;
419 
420 		/*
421 		 * If no action will be taken against the current
422 		 * CPU, update the drctl resource information to
423 		 * ensure that it gets recovered properly during
424 		 * the drctl fini() call.
425 		 */
426 		if (res[idx].result != DR_CPU_RES_OK) {
427 			drctl_req[idx].status = DRCTL_STATUS_CONFIG_FAILURE;
428 			continue;
429 		}
430 
431 		/* call the function to perform the actual operation */
432 		result = (*dr_fn)(req_cpus[idx], &status, force);
433 
434 		/* save off results of the operation */
435 		res[idx].result = result;
436 		res[idx].status = status;
437 
438 		/* save result for drctl fini() reusing init() msg memory */
439 		drctl_req[idx].status = (result != DR_CPU_RES_OK) ?
440 		    DRCTL_STATUS_CONFIG_FAILURE : DRCTL_STATUS_CONFIG_SUCCESS;
441 
442 		DR_DBG_CPU("%s: cpuid %d status %d result %d off '%s'\n",
443 		    me, req_cpus[idx], drctl_req[idx].status, result,
444 		    (res[idx].string) ? res[idx].string : "");
445 	}
446 
447 	if ((rv = drctl_config_fini(&drctl_res_ck, drctl_req, count)) != 0)
448 		DR_DBG_CPU("%s: drctl_config_fini returned: %d\n", me, rv);
449 
450 	/*
451 	 * Operation completed without any fatal errors.
452 	 * Pack the response for transmission.
453 	 */
454 	*resp_len = dr_cpu_pack_response(req, res, resp);
455 
456 	/* notify interested parties about the operation */
457 	dr_generate_event(DR_TYPE_CPU, se_hint);
458 
459 	/*
460 	 * Deallocate any scratch memory.
461 	 */
462 	kmem_free(drctl_res, drctl_res_len);
463 	kmem_free(drctl_req, drctl_req_len);
464 
465 	dr_cpu_res_array_fini(res, count);
466 
467 	return (0);
468 }
469 
470 /*
471  * Allocate and initialize a result array based on the initial
472  * drctl operation. A valid result array is always returned.
473  */
474 static dr_cpu_res_t *
475 dr_cpu_res_array_init(dr_cpu_hdr_t *req, drctl_rsrc_t *rsrc, int nrsrc)
476 {
477 	int		idx;
478 	dr_cpu_res_t	*res;
479 	char		*err_str;
480 	size_t		err_len;
481 
482 	/* allocate zero filled buffer to initialize fields */
483 	res = kmem_zalloc(nrsrc * sizeof (dr_cpu_res_t), KM_SLEEP);
484 
485 	/*
486 	 * Fill in the result information for each resource.
487 	 */
488 	for (idx = 0; idx < nrsrc; idx++) {
489 		res[idx].cpuid = rsrc[idx].res_cpu_id;
490 		res[idx].result = DR_CPU_RES_OK;
491 
492 		if (rsrc[idx].status == DRCTL_STATUS_ALLOW)
493 			continue;
494 
495 		/*
496 		 * Update the state information for this CPU.
497 		 */
498 		res[idx].result = DR_CPU_RES_BLOCKED;
499 		res[idx].status = (req->msg_type == DR_CPU_CONFIGURE) ?
500 		    DR_CPU_STAT_UNCONFIGURED : DR_CPU_STAT_CONFIGURED;
501 
502 		/*
503 		 * If an error string exists, copy it out of the
504 		 * message buffer. This eliminates any dependency
505 		 * on the memory allocated for the message buffer
506 		 * itself.
507 		 */
508 		if (rsrc[idx].offset != NULL) {
509 			err_str = (char *)rsrc + rsrc[idx].offset;
510 			err_len = strlen(err_str) + 1;
511 
512 			res[idx].string = kmem_alloc(err_len, KM_SLEEP);
513 			bcopy(err_str, res[idx].string, err_len);
514 		}
515 	}
516 
517 	return (res);
518 }
519 
520 static void
521 dr_cpu_res_array_fini(dr_cpu_res_t *res, int nres)
522 {
523 	int	idx;
524 	size_t	str_len;
525 
526 	for (idx = 0; idx < nres; idx++) {
527 		/* deallocate the error string if present */
528 		if (res[idx].string) {
529 			str_len = strlen(res[idx].string) + 1;
530 			kmem_free(res[idx].string, str_len);
531 		}
532 	}
533 
534 	/* deallocate the result array itself */
535 	kmem_free(res, sizeof (dr_cpu_res_t) * nres);
536 }
537 
538 /*
539  * Allocate and pack a response message for transmission based
540  * on the specified result array. A valid response message and
541  * valid size information is always returned.
542  */
543 static size_t
544 dr_cpu_pack_response(dr_cpu_hdr_t *req, dr_cpu_res_t *res, dr_cpu_hdr_t **respp)
545 {
546 	int		idx;
547 	dr_cpu_hdr_t	*resp;
548 	dr_cpu_stat_t	*resp_stat;
549 	size_t		resp_len;
550 	uint32_t	curr_off;
551 	caddr_t		curr_str;
552 	size_t		str_len;
553 	size_t		stat_len;
554 	int		nstat = req->num_records;
555 
556 	/*
557 	 * Calculate the size of the response message
558 	 * and allocate an appropriately sized buffer.
559 	 */
560 	resp_len = 0;
561 
562 	/* add the header size */
563 	resp_len += sizeof (dr_cpu_hdr_t);
564 
565 	/* add the stat array size */
566 	stat_len = sizeof (dr_cpu_stat_t) * nstat;
567 	resp_len += stat_len;
568 
569 	/* add the size of any error strings */
570 	for (idx = 0; idx < nstat; idx++) {
571 		if (res[idx].string != NULL) {
572 			resp_len += strlen(res[idx].string) + 1;
573 		}
574 	}
575 
576 	/* allocate the message buffer */
577 	resp = kmem_zalloc(resp_len, KM_SLEEP);
578 
579 	/*
580 	 * Fill in the header information.
581 	 */
582 	resp->req_num = req->req_num;
583 	resp->msg_type = DR_CPU_OK;
584 	resp->num_records = nstat;
585 
586 	/*
587 	 * Fill in the stat information.
588 	 */
589 	resp_stat = DR_CPU_RESP_STATS(resp);
590 
591 	/* string offsets start immediately after stat array */
592 	curr_off = sizeof (dr_cpu_hdr_t) + stat_len;
593 	curr_str = (char *)resp_stat + stat_len;
594 
595 	for (idx = 0; idx < nstat; idx++) {
596 		resp_stat[idx].cpuid = res[idx].cpuid;
597 		resp_stat[idx].result = res[idx].result;
598 		resp_stat[idx].status = res[idx].status;
599 
600 		if (res[idx].string != NULL) {
601 			/* copy over the error string */
602 			str_len = strlen(res[idx].string) + 1;
603 			bcopy(res[idx].string, curr_str, str_len);
604 			resp_stat[idx].string_off = curr_off;
605 
606 			curr_off += str_len;
607 			curr_str += str_len;
608 		}
609 	}
610 
611 	/* buffer should be exactly filled */
612 	ASSERT(curr_off == resp_len);
613 
614 	*respp = resp;
615 	return (resp_len);
616 }
617 
618 /*
619  * Check for conditions that will prevent a CPU from being offlined.
620  * This provides the opportunity to generate useful information to
621  * help diagnose the failure rather than letting the offline attempt
622  * fail in a more generic way.
623  */
624 static void
625 dr_cpu_check_cpus(dr_cpu_hdr_t *req, dr_cpu_res_t *res)
626 {
627 	int		idx;
628 	cpu_t		*cp;
629 	uint32_t	*cpuids;
630 
631 	ASSERT((req->msg_type == DR_CPU_UNCONFIGURE) ||
632 	    (req->msg_type == DR_CPU_FORCE_UNCONFIG));
633 
634 	DR_DBG_CPU("dr_cpu_check_cpus...\n");
635 
636 	/* array of cpuids start just after the header */
637 	cpuids = DR_CPU_CMD_CPUIDS(req);
638 
639 	mutex_enter(&cpu_lock);
640 
641 	/*
642 	 * Always check processor set membership first. The
643 	 * last CPU in a processor set will fail to offline
644 	 * even if the operation if forced, so any failures
645 	 * should always be reported.
646 	 */
647 	dr_cpu_check_psrset(cpuids, res, req->num_records);
648 
649 	/* process each cpu that is part of the request */
650 	for (idx = 0; idx < req->num_records; idx++) {
651 
652 		/* nothing to check if the CPU has already failed */
653 		if (res[idx].result != DR_CPU_RES_OK)
654 			continue;
655 
656 		if ((cp = cpu_get(cpuids[idx])) == NULL)
657 			continue;
658 
659 		/*
660 		 * Only check if there are bound threads if the
661 		 * operation is not a forced unconfigure. In a
662 		 * forced request, threads are automatically
663 		 * unbound before they are offlined.
664 		 */
665 		if (req->msg_type == DR_CPU_UNCONFIGURE) {
666 			/*
667 			 * The return value is only interesting if other
668 			 * checks are added to this loop and a decision
669 			 * is needed on whether to continue checking.
670 			 */
671 			(void) dr_cpu_check_bound_thr(cp, &res[idx]);
672 		}
673 	}
674 
675 	mutex_exit(&cpu_lock);
676 }
677 
678 /*
679  * Examine the processor set configuration for the specified
680  * CPUs and see if the unconfigure operation would result in
681  * trying to remove the last CPU in any processor set.
682  */
683 static void
684 dr_cpu_check_psrset(uint32_t *cpuids, dr_cpu_res_t *res, int nres)
685 {
686 	int		cpu_idx;
687 	int		set_idx;
688 	cpu_t		*cp;
689 	cpupart_t	*cpp;
690 	char		err_str[DR_CPU_MAX_ERR_LEN];
691 	size_t		err_len;
692 	struct {
693 		cpupart_t	*cpp;
694 		int		ncpus;
695 	} *psrset;
696 
697 	ASSERT(MUTEX_HELD(&cpu_lock));
698 
699 	/*
700 	 * Allocate a scratch array to count the CPUs in
701 	 * the various processor sets. A CPU always belongs
702 	 * to exactly one processor set, so by definition,
703 	 * the scratch array never needs to be larger than
704 	 * the number of CPUs.
705 	 */
706 	psrset = kmem_zalloc(sizeof (*psrset) * nres, KM_SLEEP);
707 
708 	for (cpu_idx = 0; cpu_idx < nres; cpu_idx++) {
709 
710 		/* skip any CPUs that have already failed */
711 		if (res[cpu_idx].result != DR_CPU_RES_OK)
712 			continue;
713 
714 		if ((cp = cpu_get(cpuids[cpu_idx])) == NULL)
715 			continue;
716 
717 		cpp = cp->cpu_part;
718 
719 		/* lookup the set this CPU belongs to */
720 		for (set_idx = 0; set_idx < nres; set_idx++) {
721 
722 			/* matching set found */
723 			if (cpp == psrset[set_idx].cpp)
724 				break;
725 
726 			/* set not found, start a new entry */
727 			if (psrset[set_idx].cpp == NULL) {
728 				psrset[set_idx].cpp = cpp;
729 				psrset[set_idx].ncpus = cpp->cp_ncpus;
730 				break;
731 			}
732 		}
733 
734 		ASSERT(set_idx != nres);
735 
736 		/*
737 		 * Remove the current CPU from the set total but only
738 		 * generate an error for the last CPU. The correct CPU
739 		 * will get the error because the unconfigure attempts
740 		 * will occur in the same order in which the CPUs are
741 		 * examined in this loop.
742 		 */
743 		if (--psrset[set_idx].ncpus == 0) {
744 			/*
745 			 * Fill in the various pieces of information
746 			 * to report that the operation will fail.
747 			 */
748 			res[cpu_idx].result = DR_CPU_RES_BLOCKED;
749 			res[cpu_idx].status = DR_CPU_STAT_CONFIGURED;
750 
751 			(void) snprintf(err_str, DR_CPU_MAX_ERR_LEN,
752 			    "last online cpu in processor set %d", cpp->cp_id);
753 
754 			err_len = strlen(err_str) + 1;
755 
756 			res[cpu_idx].string = kmem_alloc(err_len, KM_SLEEP);
757 			bcopy(err_str, res[cpu_idx].string, err_len);
758 
759 			DR_DBG_CPU("cpu %d: %s\n", cpuids[cpu_idx], err_str);
760 		}
761 	}
762 
763 	kmem_free(psrset, sizeof (*psrset) * nres);
764 }
765 
766 /*
767  * Check if any threads are bound to the specified CPU. If the
768  * condition is true, DR_CPU_RES_BLOCKED is returned and an error
769  * string is generated and placed in the specified result structure.
770  * Otherwise, DR_CPU_RES_OK is returned.
771  */
772 static int
773 dr_cpu_check_bound_thr(cpu_t *cp, dr_cpu_res_t *res)
774 {
775 	int		nbound;
776 	proc_t		*pp;
777 	kthread_t	*tp;
778 	char		err_str[DR_CPU_MAX_ERR_LEN];
779 	size_t		err_len;
780 
781 	/*
782 	 * Error string allocation makes an assumption
783 	 * that no blocking condition has been identified.
784 	 */
785 	ASSERT(res->result == DR_CPU_RES_OK);
786 	ASSERT(res->string == NULL);
787 
788 	ASSERT(MUTEX_HELD(&cpu_lock));
789 
790 	mutex_enter(&pidlock);
791 
792 	nbound = 0;
793 
794 	/*
795 	 * Walk the active processes, checking if each
796 	 * thread belonging to the process is bound.
797 	 */
798 	for (pp = practive; (pp != NULL) && (nbound <= 1); pp = pp->p_next) {
799 		mutex_enter(&pp->p_lock);
800 
801 		tp = pp->p_tlist;
802 
803 		if ((tp == NULL) || (pp->p_flag & SSYS)) {
804 			mutex_exit(&pp->p_lock);
805 			continue;
806 		}
807 
808 		do {
809 			if (tp->t_bind_cpu != cp->cpu_id)
810 				continue;
811 
812 			/*
813 			 * Update the running total of bound
814 			 * threads. Continue the search until
815 			 * it can be determined if more than
816 			 * one thread is bound to the CPU.
817 			 */
818 			if (++nbound > 1)
819 				break;
820 
821 		} while ((tp = tp->t_forw) != pp->p_tlist);
822 
823 		mutex_exit(&pp->p_lock);
824 	}
825 
826 	mutex_exit(&pidlock);
827 
828 	if (nbound) {
829 		/*
830 		 * Threads are bound to the CPU. Fill in
831 		 * various pieces of information to report
832 		 * that the operation will fail.
833 		 */
834 		res->result = DR_CPU_RES_BLOCKED;
835 		res->status = DR_CPU_STAT_CONFIGURED;
836 
837 		(void) snprintf(err_str, DR_CPU_MAX_ERR_LEN, "cpu has bound "
838 		    "thread%s", (nbound > 1) ? "s" : "");
839 
840 		err_len = strlen(err_str) + 1;
841 
842 		res->string = kmem_alloc(err_len, KM_SLEEP);
843 		bcopy(err_str, res->string, err_len);
844 
845 		DR_DBG_CPU("cpu %d: %s\n", cp->cpu_id, err_str);
846 	}
847 
848 	return (res->result);
849 }
850 
851 /*
852  * Do not modify result buffer or length on error.
853  */
854 static int
855 dr_cpu_list_status(dr_cpu_hdr_t *req, dr_cpu_hdr_t **resp, int *resp_len)
856 {
857 	int		idx;
858 	int		result;
859 	int		status;
860 	int		rlen;
861 	uint32_t	*cpuids;
862 	dr_cpu_hdr_t	*rp;
863 	dr_cpu_stat_t	*stat;
864 	md_t		*mdp = NULL;
865 	int		num_nodes;
866 	int		listsz;
867 	mde_cookie_t	*listp = NULL;
868 	mde_cookie_t	cpunode;
869 	boolean_t	walk_md = B_FALSE;
870 
871 	/* the incoming array of cpuids to configure */
872 	cpuids = DR_CPU_CMD_CPUIDS(req);
873 
874 	/* allocate a response message */
875 	rlen = sizeof (dr_cpu_hdr_t);
876 	rlen += req->num_records * sizeof (dr_cpu_stat_t);
877 	rp = kmem_zalloc(rlen, KM_SLEEP);
878 
879 	/* fill in the known data */
880 	rp->req_num = req->req_num;
881 	rp->msg_type = DR_CPU_STATUS;
882 	rp->num_records = req->num_records;
883 
884 	/* stat array for the response */
885 	stat = DR_CPU_RESP_STATS(rp);
886 
887 	/* get the status for each of the CPUs */
888 	for (idx = 0; idx < req->num_records; idx++) {
889 
890 		result = dr_cpu_status(cpuids[idx], &status);
891 
892 		if (result == DR_CPU_RES_FAILURE)
893 			walk_md = B_TRUE;
894 
895 		/* save off results of the status */
896 		stat[idx].cpuid = cpuids[idx];
897 		stat[idx].result = result;
898 		stat[idx].status = status;
899 	}
900 
901 	if (walk_md == B_FALSE)
902 		goto done;
903 
904 	/*
905 	 * At least one of the cpus did not have a CPU
906 	 * structure. So, consult the MD to determine if
907 	 * they are present.
908 	 */
909 
910 	if ((mdp = md_get_handle()) == NULL) {
911 		DR_DBG_CPU("unable to initialize MD\n");
912 		goto done;
913 	}
914 
915 	num_nodes = md_node_count(mdp);
916 	ASSERT(num_nodes > 0);
917 
918 	listsz = num_nodes * sizeof (mde_cookie_t);
919 	listp = kmem_zalloc(listsz, KM_SLEEP);
920 
921 	for (idx = 0; idx < req->num_records; idx++) {
922 
923 		if (stat[idx].result != DR_CPU_RES_FAILURE)
924 			continue;
925 
926 		/* check the MD for the current cpuid */
927 		cpunode = dr_cpu_find_node_md(stat[idx].cpuid, mdp, listp);
928 
929 		stat[idx].result = DR_CPU_RES_OK;
930 
931 		if (cpunode == MDE_INVAL_ELEM_COOKIE) {
932 			stat[idx].status = DR_CPU_STAT_NOT_PRESENT;
933 		} else {
934 			stat[idx].status = DR_CPU_STAT_UNCONFIGURED;
935 		}
936 	}
937 
938 	kmem_free(listp, listsz);
939 
940 	(void) md_fini_handle(mdp);
941 
942 done:
943 	*resp = rp;
944 	*resp_len = rlen;
945 
946 	return (0);
947 }
948 
949 static int
950 dr_cpu_configure(processorid_t cpuid, int *status, boolean_t force)
951 {
952 	 _NOTE(ARGUNUSED(force))
953 	struct cpu	*cp;
954 	int		rv = 0;
955 
956 	DR_DBG_CPU("dr_cpu_configure...\n");
957 
958 	/*
959 	 * Build device tree node for the CPU
960 	 */
961 	if ((rv = dr_cpu_probe(cpuid)) != 0) {
962 		DR_DBG_CPU("failed to probe CPU %d (%d)\n", cpuid, rv);
963 		if (rv == EINVAL) {
964 			*status = DR_CPU_STAT_NOT_PRESENT;
965 			return (DR_CPU_RES_NOT_IN_MD);
966 		}
967 		*status = DR_CPU_STAT_UNCONFIGURED;
968 		return (DR_CPU_RES_FAILURE);
969 	}
970 
971 	mutex_enter(&cpu_lock);
972 
973 	/*
974 	 * Configure the CPU
975 	 */
976 	if ((cp = cpu_get(cpuid)) == NULL) {
977 
978 		if ((rv = cpu_configure(cpuid)) != 0) {
979 			DR_DBG_CPU("failed to configure CPU %d (%d)\n",
980 			    cpuid, rv);
981 			rv = DR_CPU_RES_FAILURE;
982 			*status = DR_CPU_STAT_UNCONFIGURED;
983 			goto done;
984 		}
985 
986 		DR_DBG_CPU("CPU %d configured\n", cpuid);
987 
988 		/* CPU struct should exist now */
989 		cp = cpu_get(cpuid);
990 	}
991 
992 	ASSERT(cp);
993 
994 	/*
995 	 * Power on the CPU. In sun4v, this brings the stopped
996 	 * CPU into the guest from the Hypervisor.
997 	 */
998 	if (cpu_is_poweredoff(cp)) {
999 
1000 		if ((rv = cpu_poweron(cp)) != 0) {
1001 			DR_DBG_CPU("failed to power on CPU %d (%d)\n",
1002 			    cpuid, rv);
1003 			rv = DR_CPU_RES_FAILURE;
1004 			*status = DR_CPU_STAT_UNCONFIGURED;
1005 			goto done;
1006 		}
1007 
1008 		DR_DBG_CPU("CPU %d powered on\n", cpuid);
1009 	}
1010 
1011 	/*
1012 	 * Online the CPU
1013 	 */
1014 	if (cpu_is_offline(cp)) {
1015 
1016 		if ((rv = cpu_online(cp)) != 0) {
1017 			DR_DBG_CPU("failed to online CPU %d (%d)\n",
1018 			    cpuid, rv);
1019 			rv = DR_CPU_RES_FAILURE;
1020 			/* offline is still configured */
1021 			*status = DR_CPU_STAT_CONFIGURED;
1022 			goto done;
1023 		}
1024 
1025 		DR_DBG_CPU("CPU %d online\n", cpuid);
1026 	}
1027 
1028 	rv = DR_CPU_RES_OK;
1029 	*status = DR_CPU_STAT_CONFIGURED;
1030 
1031 done:
1032 	mutex_exit(&cpu_lock);
1033 
1034 	return (rv);
1035 }
1036 
1037 static int
1038 dr_cpu_unconfigure(processorid_t cpuid, int *status, boolean_t force)
1039 {
1040 	struct cpu	*cp;
1041 	int		rv = 0;
1042 	int		cpu_flags;
1043 
1044 	DR_DBG_CPU("dr_cpu_unconfigure%s...\n", (force) ? " (force)" : "");
1045 
1046 	mutex_enter(&cpu_lock);
1047 
1048 	cp = cpu_get(cpuid);
1049 
1050 	if (cp == NULL) {
1051 
1052 		/*
1053 		 * The OS CPU structures are already torn down,
1054 		 * Attempt to deprobe the CPU to make sure the
1055 		 * device tree is up to date.
1056 		 */
1057 		if (dr_cpu_deprobe(cpuid) != 0) {
1058 			DR_DBG_CPU("failed to deprobe CPU %d\n", cpuid);
1059 			rv = DR_CPU_RES_FAILURE;
1060 			*status = DR_CPU_STAT_UNCONFIGURED;
1061 			goto done;
1062 		}
1063 
1064 		goto done;
1065 	}
1066 
1067 	ASSERT(cp->cpu_id == cpuid);
1068 
1069 	/*
1070 	 * Offline the CPU
1071 	 */
1072 	if (cpu_is_active(cp)) {
1073 
1074 		/* set the force flag correctly */
1075 		cpu_flags = (force) ? CPU_FORCED : 0;
1076 
1077 		if ((rv = cpu_offline(cp, cpu_flags)) != 0) {
1078 			DR_DBG_CPU("failed to offline CPU %d (%d)\n",
1079 			    cpuid, rv);
1080 
1081 			rv = DR_CPU_RES_FAILURE;
1082 			*status = DR_CPU_STAT_CONFIGURED;
1083 			goto done;
1084 		}
1085 
1086 		DR_DBG_CPU("CPU %d offline\n", cpuid);
1087 	}
1088 
1089 	/*
1090 	 * Power off the CPU. In sun4v, this puts the running
1091 	 * CPU into the stopped state in the Hypervisor.
1092 	 */
1093 	if (!cpu_is_poweredoff(cp)) {
1094 
1095 		if ((rv = cpu_poweroff(cp)) != 0) {
1096 			DR_DBG_CPU("failed to power off CPU %d (%d)\n",
1097 			    cpuid, rv);
1098 			rv = DR_CPU_RES_FAILURE;
1099 			*status = DR_CPU_STAT_CONFIGURED;
1100 			goto done;
1101 		}
1102 
1103 		DR_DBG_CPU("CPU %d powered off\n", cpuid);
1104 	}
1105 
1106 	/*
1107 	 * Unconfigure the CPU
1108 	 */
1109 	if ((rv = cpu_unconfigure(cpuid)) != 0) {
1110 		DR_DBG_CPU("failed to unconfigure CPU %d (%d)\n", cpuid, rv);
1111 		rv = DR_CPU_RES_FAILURE;
1112 		*status = DR_CPU_STAT_UNCONFIGURED;
1113 		goto done;
1114 	}
1115 
1116 	DR_DBG_CPU("CPU %d unconfigured\n", cpuid);
1117 
1118 	/*
1119 	 * Tear down device tree.
1120 	 */
1121 	if ((rv = dr_cpu_deprobe(cpuid)) != 0) {
1122 		DR_DBG_CPU("failed to deprobe CPU %d (%d)\n", cpuid, rv);
1123 		rv = DR_CPU_RES_FAILURE;
1124 		*status = DR_CPU_STAT_UNCONFIGURED;
1125 		goto done;
1126 	}
1127 
1128 	rv = DR_CPU_RES_OK;
1129 	*status = DR_CPU_STAT_UNCONFIGURED;
1130 
1131 done:
1132 	mutex_exit(&cpu_lock);
1133 
1134 	return (rv);
1135 }
1136 
1137 /*
1138  * Determine the state of a CPU. If the CPU structure is not present,
1139  * it does not attempt to determine whether or not the CPU is in the
1140  * MD. It is more efficient to do this at the higher level for all
1141  * CPUs since it may not even be necessary to search the MD if all
1142  * the CPUs are accounted for. Returns DR_CPU_RES_OK if the CPU
1143  * structure is present, and DR_CPU_RES_FAILURE otherwise as a signal
1144  * that an MD walk is necessary.
1145  */
1146 static int
1147 dr_cpu_status(processorid_t cpuid, int *status)
1148 {
1149 	int		rv;
1150 	struct cpu	*cp;
1151 
1152 	DR_DBG_CPU("dr_cpu_status...\n");
1153 
1154 	mutex_enter(&cpu_lock);
1155 
1156 	if ((cp = cpu_get(cpuid)) == NULL) {
1157 		/* need to check if cpu is in the MD */
1158 		rv = DR_CPU_RES_FAILURE;
1159 		goto done;
1160 	}
1161 
1162 	if (cpu_is_poweredoff(cp)) {
1163 		/*
1164 		 * The CPU is powered off, so it is considered
1165 		 * unconfigured from the service entity point of
1166 		 * view. The CPU is not available to the system
1167 		 * and intervention by the service entity would
1168 		 * be required to change that.
1169 		 */
1170 		*status = DR_CPU_STAT_UNCONFIGURED;
1171 	} else {
1172 		/*
1173 		 * The CPU is powered on, so it is considered
1174 		 * configured from the service entity point of
1175 		 * view. It is available for use by the system
1176 		 * and service entities are not concerned about
1177 		 * the operational status (offline, online, etc.)
1178 		 * of the CPU in terms of DR.
1179 		 */
1180 		*status = DR_CPU_STAT_CONFIGURED;
1181 	}
1182 
1183 	rv = DR_CPU_RES_OK;
1184 
1185 done:
1186 	mutex_exit(&cpu_lock);
1187 
1188 	return (rv);
1189 }
1190 
1191 typedef struct {
1192 	md_t		*mdp;
1193 	mde_cookie_t	cpunode;
1194 	dev_info_t	*dip;
1195 } cb_arg_t;
1196 
1197 #define	STR_ARR_LEN	5
1198 
1199 static int
1200 new_cpu_node(dev_info_t *new_node, void *arg, uint_t flags)
1201 {
1202 	_NOTE(ARGUNUSED(flags))
1203 
1204 	char		*compat;
1205 	uint64_t	freq;
1206 	uint64_t	cpuid = 0;
1207 	int		regbuf[4];
1208 	int		len = 0;
1209 	cb_arg_t	*cba;
1210 	char		*str_arr[STR_ARR_LEN];
1211 	char		*curr;
1212 	int		idx = 0;
1213 
1214 	DR_DBG_CPU("new_cpu_node...\n");
1215 
1216 	cba = (cb_arg_t *)arg;
1217 
1218 	/*
1219 	 * Add 'name' property
1220 	 */
1221 	if (ndi_prop_update_string(DDI_DEV_T_NONE, new_node,
1222 	    "name", "cpu") != DDI_SUCCESS) {
1223 		DR_DBG_CPU("new_cpu_node: failed to create 'name' property\n");
1224 		return (DDI_WALK_ERROR);
1225 	}
1226 
1227 	/*
1228 	 * Add 'compatible' property
1229 	 */
1230 	if (md_get_prop_data(cba->mdp, cba->cpunode, "compatible",
1231 	    (uint8_t **)(&compat), &len)) {
1232 		DR_DBG_CPU("new_cpu_node: failed to read 'compatible' property "
1233 		    "from MD\n");
1234 		return (DDI_WALK_ERROR);
1235 	}
1236 
1237 	DR_DBG_CPU("'compatible' len is %d\n", len);
1238 
1239 	/* parse the MD string array */
1240 	curr = compat;
1241 	while (curr < (compat + len)) {
1242 
1243 		DR_DBG_CPU("adding '%s' to 'compatible' property\n", curr);
1244 
1245 		str_arr[idx++] = curr;
1246 		curr += strlen(curr) + 1;
1247 
1248 		if (idx == STR_ARR_LEN) {
1249 			DR_DBG_CPU("exceeded str_arr len (%d)\n", STR_ARR_LEN);
1250 			break;
1251 		}
1252 	}
1253 
1254 	if (ndi_prop_update_string_array(DDI_DEV_T_NONE, new_node,
1255 	    "compatible", str_arr, idx) != DDI_SUCCESS) {
1256 		DR_DBG_CPU("new_cpu_node: failed to create 'compatible' "
1257 		    "property\n");
1258 		return (DDI_WALK_ERROR);
1259 	}
1260 
1261 	/*
1262 	 * Add 'device_type' property
1263 	 */
1264 	if (ndi_prop_update_string(DDI_DEV_T_NONE, new_node,
1265 	    "device_type", "cpu") != DDI_SUCCESS) {
1266 		DR_DBG_CPU("new_cpu_node: failed to create 'device_type' "
1267 		    "property\n");
1268 		return (DDI_WALK_ERROR);
1269 	}
1270 
1271 	/*
1272 	 * Add 'clock-frequency' property
1273 	 */
1274 	if (md_get_prop_val(cba->mdp, cba->cpunode, "clock-frequency", &freq)) {
1275 		DR_DBG_CPU("new_cpu_node: failed to read 'clock-frequency' "
1276 		    "property from MD\n");
1277 		return (DDI_WALK_ERROR);
1278 	}
1279 
1280 	if (ndi_prop_update_int(DDI_DEV_T_NONE, new_node,
1281 	    "clock-frequency", freq) != DDI_SUCCESS) {
1282 		DR_DBG_CPU("new_cpu_node: failed to create 'clock-frequency' "
1283 		    "property\n");
1284 		return (DDI_WALK_ERROR);
1285 	}
1286 
1287 	/*
1288 	 * Add 'reg' (cpuid) property
1289 	 */
1290 	if (md_get_prop_val(cba->mdp, cba->cpunode, "id", &cpuid)) {
1291 		DR_DBG_CPU("new_cpu_node: failed to read 'id' property "
1292 		    "from MD\n");
1293 		return (DDI_WALK_ERROR);
1294 	}
1295 
1296 	DR_DBG_CPU("new cpuid=0x%lx\n", cpuid);
1297 
1298 	bzero(regbuf, 4 * sizeof (int));
1299 	regbuf[0] = 0xc0000000 | cpuid;
1300 
1301 	if (ndi_prop_update_int_array(DDI_DEV_T_NONE, new_node,
1302 	    "reg", regbuf, 4) != DDI_SUCCESS) {
1303 		DR_DBG_CPU("new_cpu_node: failed to create 'reg' property\n");
1304 		return (DDI_WALK_ERROR);
1305 	}
1306 
1307 	cba->dip = new_node;
1308 
1309 	return (DDI_WALK_TERMINATE);
1310 }
1311 
1312 static int
1313 dr_cpu_probe(processorid_t cpuid)
1314 {
1315 	dev_info_t	*pdip;
1316 	dev_info_t	*dip;
1317 	devi_branch_t	br;
1318 	md_t		*mdp = NULL;
1319 	int		num_nodes;
1320 	int		rv = 0;
1321 	int		listsz;
1322 	mde_cookie_t	*listp = NULL;
1323 	cb_arg_t	cba;
1324 	mde_cookie_t	cpunode;
1325 
1326 	if ((dip = dr_cpu_find_node(cpuid)) != NULL) {
1327 		/* nothing to do */
1328 		e_ddi_branch_rele(dip);
1329 		return (0);
1330 	}
1331 
1332 	if ((mdp = md_get_handle()) == NULL) {
1333 		DR_DBG_CPU("unable to initialize machine description\n");
1334 		return (-1);
1335 	}
1336 
1337 	num_nodes = md_node_count(mdp);
1338 	ASSERT(num_nodes > 0);
1339 
1340 	listsz = num_nodes * sizeof (mde_cookie_t);
1341 	listp = kmem_zalloc(listsz, KM_SLEEP);
1342 
1343 	cpunode = dr_cpu_find_node_md(cpuid, mdp, listp);
1344 
1345 	if (cpunode == MDE_INVAL_ELEM_COOKIE) {
1346 		rv = EINVAL;
1347 		goto done;
1348 	}
1349 
1350 	/* pass in MD cookie for CPU */
1351 	cba.mdp = mdp;
1352 	cba.cpunode = cpunode;
1353 
1354 	br.arg = (void *)&cba;
1355 	br.type = DEVI_BRANCH_SID;
1356 	br.create.sid_branch_create = new_cpu_node;
1357 	br.devi_branch_callback = NULL;
1358 	pdip = ddi_root_node();
1359 
1360 	if ((rv = e_ddi_branch_create(pdip, &br, NULL, 0))) {
1361 		DR_DBG_CPU("e_ddi_branch_create failed: %d\n", rv);
1362 		rv = -1;
1363 		goto done;
1364 	}
1365 
1366 	DR_DBG_CPU("CPU %d probed\n", cpuid);
1367 
1368 	rv = 0;
1369 
1370 done:
1371 	if (listp)
1372 		kmem_free(listp, listsz);
1373 
1374 	if (mdp)
1375 		(void) md_fini_handle(mdp);
1376 
1377 	return (rv);
1378 }
1379 
1380 static int
1381 dr_cpu_deprobe(processorid_t cpuid)
1382 {
1383 	dev_info_t	*fdip = NULL;
1384 	dev_info_t	*dip;
1385 
1386 	if ((dip = dr_cpu_find_node(cpuid)) == NULL) {
1387 		DR_DBG_CPU("cpuid %d already deprobed\n", cpuid);
1388 		return (0);
1389 	}
1390 
1391 	ASSERT(e_ddi_branch_held(dip));
1392 
1393 	if (e_ddi_branch_destroy(dip, &fdip, 0)) {
1394 		char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1395 
1396 		/*
1397 		 * If non-NULL, fdip is held and must be released.
1398 		 */
1399 		if (fdip != NULL) {
1400 			(void) ddi_pathname(fdip, path);
1401 			ddi_release_devi(fdip);
1402 		} else {
1403 			(void) ddi_pathname(dip, path);
1404 		}
1405 		cmn_err(CE_NOTE, "node removal failed: %s (%p)",
1406 		    path, (fdip) ? (void *)fdip : (void *)dip);
1407 
1408 		kmem_free(path, MAXPATHLEN);
1409 
1410 		return (-1);
1411 	}
1412 
1413 	DR_DBG_CPU("CPU %d deprobed\n", cpuid);
1414 
1415 	return (0);
1416 }
1417 
1418 typedef struct {
1419 	processorid_t	cpuid;
1420 	dev_info_t	*dip;
1421 } dr_search_arg_t;
1422 
1423 static int
1424 dr_cpu_check_node(dev_info_t *dip, void *arg)
1425 {
1426 	char 		*name;
1427 	processorid_t	cpuid;
1428 	dr_search_arg_t	*sarg = (dr_search_arg_t *)arg;
1429 
1430 	if (dip == ddi_root_node()) {
1431 		return (DDI_WALK_CONTINUE);
1432 	}
1433 
1434 	name = ddi_node_name(dip);
1435 
1436 	if (strcmp(name, "cpu") != 0) {
1437 		return (DDI_WALK_PRUNECHILD);
1438 	}
1439 
1440 	cpuid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
1441 	    "reg", -1);
1442 
1443 	cpuid = PROM_CFGHDL_TO_CPUID(cpuid);
1444 
1445 	DR_DBG_CPU("found cpuid=0x%x, looking for 0x%x\n", cpuid, sarg->cpuid);
1446 
1447 	if (cpuid == sarg->cpuid) {
1448 		DR_DBG_CPU("matching node\n");
1449 
1450 		/* matching node must be returned held */
1451 		if (!e_ddi_branch_held(dip))
1452 			e_ddi_branch_hold(dip);
1453 
1454 		sarg->dip = dip;
1455 		return (DDI_WALK_TERMINATE);
1456 	}
1457 
1458 	return (DDI_WALK_CONTINUE);
1459 }
1460 
1461 /*
1462  * Walk the device tree to find the dip corresponding to the cpuid
1463  * passed in. If present, the dip is returned held. The caller must
1464  * release the hold on the dip once it is no longer required. If no
1465  * matching node if found, NULL is returned.
1466  */
1467 static dev_info_t *
1468 dr_cpu_find_node(processorid_t cpuid)
1469 {
1470 	dr_search_arg_t	arg;
1471 
1472 	DR_DBG_CPU("dr_cpu_find_node...\n");
1473 
1474 	arg.cpuid = cpuid;
1475 	arg.dip = NULL;
1476 
1477 	ddi_walk_devs(ddi_root_node(), dr_cpu_check_node, &arg);
1478 
1479 	ASSERT((arg.dip == NULL) || (e_ddi_branch_held(arg.dip)));
1480 
1481 	return ((arg.dip) ? arg.dip : NULL);
1482 }
1483 
1484 /*
1485  * Look up a particular cpuid in the MD. Returns the mde_cookie_t
1486  * representing that CPU if present, and MDE_INVAL_ELEM_COOKIE
1487  * otherwise. It is assumed the scratch array has already been
1488  * allocated so that it can accommodate the worst case scenario,
1489  * every node in the MD.
1490  */
1491 static mde_cookie_t
1492 dr_cpu_find_node_md(processorid_t cpuid, md_t *mdp, mde_cookie_t *listp)
1493 {
1494 	int		idx;
1495 	int		nnodes;
1496 	mde_cookie_t	rootnode;
1497 	uint64_t	cpuid_prop;
1498 	mde_cookie_t	result = MDE_INVAL_ELEM_COOKIE;
1499 
1500 	rootnode = md_root_node(mdp);
1501 	ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE);
1502 
1503 	/*
1504 	 * Scan the DAG for all the CPU nodes
1505 	 */
1506 	nnodes = md_scan_dag(mdp, rootnode, md_find_name(mdp, "cpu"),
1507 	    md_find_name(mdp, "fwd"), listp);
1508 
1509 	if (nnodes < 0) {
1510 		DR_DBG_CPU("Scan for CPUs failed\n");
1511 		return (result);
1512 	}
1513 
1514 	DR_DBG_CPU("dr_cpu_find_node_md: found %d CPUs in the MD\n", nnodes);
1515 
1516 	/*
1517 	 * Find the CPU of interest
1518 	 */
1519 	for (idx = 0; idx < nnodes; idx++) {
1520 
1521 		if (md_get_prop_val(mdp, listp[idx], "id", &cpuid_prop)) {
1522 			DR_DBG_CPU("Missing 'id' property for CPU node %d\n",
1523 			    idx);
1524 			break;
1525 		}
1526 
1527 		if (cpuid_prop == cpuid) {
1528 			/* found a match */
1529 			DR_DBG_CPU("dr_cpu_find_node_md: found CPU %d "
1530 			    "in MD\n", cpuid);
1531 			result = listp[idx];
1532 			break;
1533 		}
1534 	}
1535 
1536 	if (result == MDE_INVAL_ELEM_COOKIE) {
1537 		DR_DBG_CPU("CPU %d not in MD\n", cpuid);
1538 	}
1539 
1540 	return (result);
1541 }
1542