xref: /titanic_52/usr/src/uts/sun4v/io/dr_cpu.c (revision c1c61f44e88f4c8c155272ee56d868043146096a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * sun4v CPU DR Module
31  */
32 
33 #include <sys/modctl.h>
34 #include <sys/processor.h>
35 #include <sys/cpuvar.h>
36 #include <sys/cpupart.h>
37 #include <sys/sunddi.h>
38 #include <sys/sunndi.h>
39 #include <sys/note.h>
40 #include <sys/sysevent/dr.h>
41 #include <sys/hypervisor_api.h>
42 #include <sys/mach_descrip.h>
43 #include <sys/mdesc.h>
44 #include <sys/ds.h>
45 #include <sys/drctl.h>
46 #include <sys/dr_util.h>
47 #include <sys/dr_cpu.h>
48 #include <sys/promif.h>
49 #include <sys/machsystm.h>
50 
51 
52 static struct modlmisc modlmisc = {
53 	&mod_miscops,
54 	"sun4v CPU DR %I%"
55 };
56 
57 static struct modlinkage modlinkage = {
58 	MODREV_1,
59 	(void *)&modlmisc,
60 	NULL
61 };
62 
63 typedef int (*fn_t)(processorid_t, int *, boolean_t);
64 
65 /*
66  * Global DS Handle
67  */
68 static ds_svc_hdl_t ds_handle;
69 
70 /*
71  * Supported DS Capability Versions
72  */
73 static ds_ver_t		dr_cpu_vers[] = { { 1, 0 } };
74 #define	DR_CPU_NVERS	(sizeof (dr_cpu_vers) / sizeof (dr_cpu_vers[0]))
75 
76 /*
77  * DS Capability Description
78  */
79 static ds_capability_t dr_cpu_cap = {
80 	DR_CPU_DS_ID,		/* svc_id */
81 	dr_cpu_vers,		/* vers */
82 	DR_CPU_NVERS		/* nvers */
83 };
84 
85 /*
86  * DS Callbacks
87  */
88 static void dr_cpu_reg_handler(ds_cb_arg_t, ds_ver_t *, ds_svc_hdl_t);
89 static void dr_cpu_unreg_handler(ds_cb_arg_t arg);
90 static void dr_cpu_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen);
91 
92 /*
93  * DS Client Ops Vector
94  */
95 static ds_clnt_ops_t dr_cpu_ops = {
96 	dr_cpu_reg_handler,	/* ds_reg_cb */
97 	dr_cpu_unreg_handler,	/* ds_unreg_cb */
98 	dr_cpu_data_handler,	/* ds_data_cb */
99 	NULL			/* cb_arg */
100 };
101 
102 /*
103  * Operation Results
104  *
105  * Used internally to gather results while an operation on a
106  * list of CPUs is in progress. In particular, it is used to
107  * keep track of which CPUs have already failed so that they are
108  * not processed further, and the manner in which they failed.
109  */
110 typedef struct {
111 	uint32_t	cpuid;
112 	uint32_t	result;
113 	uint32_t	status;
114 	char		*string;
115 } dr_cpu_res_t;
116 
117 #define	DR_CPU_MAX_ERR_LEN	64	/* maximum error string length */
118 
119 /*
120  * Internal Functions
121  */
122 static int dr_cpu_init(void);
123 static int dr_cpu_fini(void);
124 
125 static int dr_cpu_list_wrk(dr_cpu_hdr_t *, dr_cpu_hdr_t **, int *);
126 static int dr_cpu_list_status(dr_cpu_hdr_t *, dr_cpu_hdr_t **, int *);
127 
128 static int dr_cpu_unconfigure(processorid_t, int *status, boolean_t force);
129 static int dr_cpu_configure(processorid_t, int *status, boolean_t force);
130 static int dr_cpu_status(processorid_t, int *status);
131 
132 static void dr_cpu_check_cpus(dr_cpu_hdr_t *req, dr_cpu_res_t *res);
133 static void dr_cpu_check_psrset(uint32_t *cpuids, dr_cpu_res_t *res, int nres);
134 static int dr_cpu_check_bound_thr(cpu_t *cp, dr_cpu_res_t *res);
135 
136 static dr_cpu_res_t *dr_cpu_res_array_init(dr_cpu_hdr_t *, drctl_rsrc_t *, int);
137 static void dr_cpu_res_array_fini(dr_cpu_res_t *res, int nres);
138 static size_t dr_cpu_pack_response(dr_cpu_hdr_t *req, dr_cpu_res_t *res,
139     dr_cpu_hdr_t **respp);
140 
141 static int dr_cpu_probe(processorid_t newcpuid);
142 static int dr_cpu_deprobe(processorid_t cpuid);
143 
144 static dev_info_t *dr_cpu_find_node(processorid_t cpuid);
145 static mde_cookie_t dr_cpu_find_node_md(processorid_t, md_t *, mde_cookie_t *);
146 
147 int
148 _init(void)
149 {
150 	int	status;
151 
152 	/* check that CPU DR is enabled */
153 	if (dr_is_disabled(DR_TYPE_CPU)) {
154 		cmn_err(CE_CONT, "!CPU DR is disabled\n");
155 		return (-1);
156 	}
157 
158 	if ((status = dr_cpu_init()) != 0) {
159 		cmn_err(CE_NOTE, "CPU DR initialization failed");
160 		return (status);
161 	}
162 
163 	if ((status = mod_install(&modlinkage)) != 0) {
164 		(void) dr_cpu_fini();
165 	}
166 
167 	return (status);
168 }
169 
170 int
171 _info(struct modinfo *modinfop)
172 {
173 	return (mod_info(&modlinkage, modinfop));
174 }
175 
176 int dr_cpu_allow_unload;
177 
178 int
179 _fini(void)
180 {
181 	int	status;
182 
183 	if (dr_cpu_allow_unload == 0)
184 		return (EBUSY);
185 
186 	if ((status = mod_remove(&modlinkage)) == 0) {
187 		(void) dr_cpu_fini();
188 	}
189 
190 	return (status);
191 }
192 
193 static int
194 dr_cpu_init(void)
195 {
196 	int	rv;
197 
198 	if ((rv = ds_cap_init(&dr_cpu_cap, &dr_cpu_ops)) != 0) {
199 		cmn_err(CE_NOTE, "ds_cap_init failed: %d", rv);
200 		return (-1);
201 	}
202 
203 	return (0);
204 }
205 
206 static int
207 dr_cpu_fini(void)
208 {
209 	int	rv;
210 
211 	if ((rv = ds_cap_fini(&dr_cpu_cap)) != 0) {
212 		cmn_err(CE_NOTE, "ds_cap_fini failed: %d", rv);
213 		return (-1);
214 	}
215 
216 	return (0);
217 }
218 
219 static void
220 dr_cpu_reg_handler(ds_cb_arg_t arg, ds_ver_t *ver, ds_svc_hdl_t hdl)
221 {
222 	DR_DBG_CPU("reg_handler: arg=0x%p, ver=%d.%d, hdl=0x%lx\n", arg,
223 	    ver->major, ver->minor, hdl);
224 
225 	ds_handle = hdl;
226 }
227 
228 static void
229 dr_cpu_unreg_handler(ds_cb_arg_t arg)
230 {
231 	DR_DBG_CPU("unreg_handler: arg=0x%p\n", arg);
232 
233 	ds_handle = DS_INVALID_HDL;
234 }
235 
236 static void
237 dr_cpu_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen)
238 {
239 	_NOTE(ARGUNUSED(arg))
240 
241 	dr_cpu_hdr_t	*req = buf;
242 	dr_cpu_hdr_t	err_resp;
243 	dr_cpu_hdr_t	*resp = &err_resp;
244 	int		resp_len = 0;
245 	int		rv;
246 
247 	/*
248 	 * Sanity check the message
249 	 */
250 	if (buflen < sizeof (dr_cpu_hdr_t)) {
251 		DR_DBG_CPU("incoming message short: expected at least %ld "
252 		    "bytes, received %ld\n", sizeof (dr_cpu_hdr_t), buflen);
253 		goto done;
254 	}
255 
256 	if (req == NULL) {
257 		DR_DBG_CPU("empty message: expected at least %ld bytes\n",
258 		    sizeof (dr_cpu_hdr_t));
259 		goto done;
260 	}
261 
262 	DR_DBG_CPU("incoming request:\n");
263 	DR_DBG_DUMP_MSG(buf, buflen);
264 
265 	if (req->num_records > NCPU) {
266 		DR_DBG_CPU("CPU list too long: %d when %d is the maximum\n",
267 		    req->num_records, NCPU);
268 		goto done;
269 	}
270 
271 	if (req->num_records == 0) {
272 		DR_DBG_CPU("No CPU specified for operation\n");
273 		goto done;
274 	}
275 
276 	/*
277 	 * Process the command
278 	 */
279 	switch (req->msg_type) {
280 	case DR_CPU_CONFIGURE:
281 	case DR_CPU_UNCONFIGURE:
282 	case DR_CPU_FORCE_UNCONFIG:
283 		if ((rv = dr_cpu_list_wrk(req, &resp, &resp_len)) != 0) {
284 			DR_DBG_CPU("%s%s failed (%d)\n",
285 			    (req->msg_type == DR_CPU_CONFIGURE) ?
286 			    "CPU configure" : "CPU unconfigure",
287 			    (req->msg_type == DR_CPU_FORCE_UNCONFIG) ?
288 			    " (forced)" : "", rv);
289 		}
290 		break;
291 
292 	case DR_CPU_STATUS:
293 		if ((rv = dr_cpu_list_status(req, &resp, &resp_len)) != 0)
294 			DR_DBG_CPU("CPU status failed (%d)\n", rv);
295 		break;
296 
297 	default:
298 		cmn_err(CE_NOTE, "unsupported DR operation (%d)",
299 		    req->msg_type);
300 		break;
301 	}
302 
303 done:
304 	/* check if an error occurred */
305 	if (resp == &err_resp) {
306 		resp->req_num = (req) ? req->req_num : 0;
307 		resp->msg_type = DR_CPU_ERROR;
308 		resp->num_records = 0;
309 		resp_len = sizeof (dr_cpu_hdr_t);
310 	}
311 
312 	DR_DBG_CPU("outgoing response:\n");
313 	DR_DBG_DUMP_MSG(resp, resp_len);
314 
315 	/* send back the response */
316 	if (ds_cap_send(ds_handle, resp, resp_len) != 0) {
317 		DR_DBG_CPU("ds_send failed\n");
318 	}
319 
320 	/* free any allocated memory */
321 	if (resp != &err_resp) {
322 		kmem_free(resp, resp_len);
323 	}
324 }
325 
326 /*
327  * Common routine to config or unconfig multiple cpus.  The unconfig
328  * case checks with the OS to see if the removal of cpus will be
329  * permitted, but can be overridden by the "force" version of the
330  * command.  Otherwise, the logic for both cases is identical.
331  *
332  * Note: Do not modify result buffer or length on error.
333  */
334 static int
335 dr_cpu_list_wrk(dr_cpu_hdr_t *req, dr_cpu_hdr_t **resp, int *resp_len)
336 {
337 	int		rv;
338 	int		idx;
339 	int		count;
340 	fn_t		dr_fn;
341 	int		se_hint;
342 	boolean_t	force = B_FALSE;
343 	uint32_t	*req_cpus;
344 	dr_cpu_res_t	*res;
345 	int		drctl_cmd;
346 	int		drctl_flags = 0;
347 	drctl_rsrc_t	*drctl_req;
348 	size_t		drctl_req_len;
349 	drctl_rsrc_t	*drctl_res;
350 	size_t		drctl_res_len = 0;
351 	drctl_cookie_t	drctl_res_ck;
352 
353 	static const char me[] = "dr_cpu_list_wrk";
354 
355 	ASSERT((req != NULL) && (req->num_records != 0));
356 
357 	count = req->num_records;
358 
359 	/*
360 	 * Extract all information that is specific
361 	 * to the various types of operations.
362 	 */
363 	switch (req->msg_type) {
364 	case DR_CPU_CONFIGURE:
365 		dr_fn = dr_cpu_configure;
366 		drctl_cmd = DRCTL_CPU_CONFIG_REQUEST;
367 		se_hint = SE_HINT_INSERT;
368 		break;
369 	case DR_CPU_FORCE_UNCONFIG:
370 		drctl_flags = DRCTL_FLAG_FORCE;
371 		force = B_TRUE;
372 		_NOTE(FALLTHROUGH)
373 	case DR_CPU_UNCONFIGURE:
374 		dr_fn = dr_cpu_unconfigure;
375 		drctl_cmd = DRCTL_CPU_UNCONFIG_REQUEST;
376 		se_hint = SE_HINT_REMOVE;
377 		break;
378 	default:
379 		/* Programming error if we reach this. */
380 		cmn_err(CE_NOTE, "%s: bad msg_type %d\n", me, req->msg_type);
381 		ASSERT(0);
382 		return (-1);
383 	}
384 
385 	/* the incoming array of cpuids to operate on */
386 	req_cpus = DR_CPU_CMD_CPUIDS(req);
387 
388 	/* allocate drctl request msg based on incoming resource count */
389 	drctl_req_len = sizeof (drctl_rsrc_t) * count;
390 	drctl_req = kmem_zalloc(drctl_req_len, KM_SLEEP);
391 
392 	/* copy the cpuids for the drctl call from the incoming request msg */
393 	for (idx = 0; idx < count; idx++)
394 		drctl_req[idx].res_cpu_id = req_cpus[idx];
395 
396 	if ((rv = drctl_config_init(drctl_cmd, drctl_flags, drctl_req,
397 	    count, &drctl_res, &drctl_res_len, &drctl_res_ck)) != 0) {
398 		DR_DBG_CPU("%s: drctl_config_init returned: %d\n", me, rv);
399 		kmem_free(drctl_req, drctl_req_len);
400 		return (-1);
401 	}
402 
403 	ASSERT((drctl_res != NULL) && (drctl_res_len != 0));
404 
405 	/* create the result scratch array */
406 	res = dr_cpu_res_array_init(req, drctl_res, count);
407 
408 	/*
409 	 * For unconfigure, check if there are any conditions
410 	 * that will cause the operation to fail. These are
411 	 * performed before the actual unconfigure attempt so
412 	 * that a meaningful error message can be generated.
413 	 */
414 	if (req->msg_type != DR_CPU_CONFIGURE)
415 		dr_cpu_check_cpus(req, res);
416 
417 	/* perform the specified operation on each of the CPUs */
418 	for (idx = 0; idx < count; idx++) {
419 		int result;
420 		int status;
421 
422 		/*
423 		 * If no action will be taken against the current
424 		 * CPU, update the drctl resource information to
425 		 * ensure that it gets recovered properly during
426 		 * the drctl fini() call.
427 		 */
428 		if (res[idx].result != DR_CPU_RES_OK) {
429 			drctl_req[idx].status = DRCTL_STATUS_CONFIG_FAILURE;
430 			continue;
431 		}
432 
433 		/* call the function to perform the actual operation */
434 		result = (*dr_fn)(req_cpus[idx], &status, force);
435 
436 		/* save off results of the operation */
437 		res[idx].result = result;
438 		res[idx].status = status;
439 
440 		/* save result for drctl fini() reusing init() msg memory */
441 		drctl_req[idx].status = (result != DR_CPU_RES_OK) ?
442 		    DRCTL_STATUS_CONFIG_FAILURE : DRCTL_STATUS_CONFIG_SUCCESS;
443 
444 		DR_DBG_CPU("%s: cpuid %d status %d result %d off '%s'\n",
445 		    me, req_cpus[idx], drctl_req[idx].status, result,
446 		    (res[idx].string) ? res[idx].string : "");
447 	}
448 
449 	if ((rv = drctl_config_fini(&drctl_res_ck, drctl_req, count)) != 0)
450 		DR_DBG_CPU("%s: drctl_config_fini returned: %d\n", me, rv);
451 
452 	/*
453 	 * Operation completed without any fatal errors.
454 	 * Pack the response for transmission.
455 	 */
456 	*resp_len = dr_cpu_pack_response(req, res, resp);
457 
458 	/* notify interested parties about the operation */
459 	dr_generate_event(DR_TYPE_CPU, se_hint);
460 
461 	/*
462 	 * Deallocate any scratch memory.
463 	 */
464 	kmem_free(drctl_res, drctl_res_len);
465 	kmem_free(drctl_req, drctl_req_len);
466 
467 	dr_cpu_res_array_fini(res, count);
468 
469 	return (0);
470 }
471 
472 /*
473  * Allocate and initialize a result array based on the initial
474  * drctl operation. A valid result array is always returned.
475  */
476 static dr_cpu_res_t *
477 dr_cpu_res_array_init(dr_cpu_hdr_t *req, drctl_rsrc_t *rsrc, int nrsrc)
478 {
479 	int		idx;
480 	dr_cpu_res_t	*res;
481 	char		*err_str;
482 	size_t		err_len;
483 
484 	/* allocate zero filled buffer to initialize fields */
485 	res = kmem_zalloc(nrsrc * sizeof (dr_cpu_res_t), KM_SLEEP);
486 
487 	/*
488 	 * Fill in the result information for each resource.
489 	 */
490 	for (idx = 0; idx < nrsrc; idx++) {
491 		res[idx].cpuid = rsrc[idx].res_cpu_id;
492 		res[idx].result = DR_CPU_RES_OK;
493 
494 		if (rsrc[idx].status == DRCTL_STATUS_ALLOW)
495 			continue;
496 
497 		/*
498 		 * Update the state information for this CPU.
499 		 */
500 		res[idx].result = DR_CPU_RES_BLOCKED;
501 		res[idx].status = (req->msg_type == DR_CPU_CONFIGURE) ?
502 		    DR_CPU_STAT_UNCONFIGURED : DR_CPU_STAT_CONFIGURED;
503 
504 		/*
505 		 * If an error string exists, copy it out of the
506 		 * message buffer. This eliminates any dependency
507 		 * on the memory allocated for the message buffer
508 		 * itself.
509 		 */
510 		if (rsrc[idx].offset != NULL) {
511 			err_str = (char *)rsrc + rsrc[idx].offset;
512 			err_len = strlen(err_str) + 1;
513 
514 			res[idx].string = kmem_alloc(err_len, KM_SLEEP);
515 			bcopy(err_str, res[idx].string, err_len);
516 		}
517 	}
518 
519 	return (res);
520 }
521 
522 static void
523 dr_cpu_res_array_fini(dr_cpu_res_t *res, int nres)
524 {
525 	int	idx;
526 	size_t	str_len;
527 
528 	for (idx = 0; idx < nres; idx++) {
529 		/* deallocate the error string if present */
530 		if (res[idx].string) {
531 			str_len = strlen(res[idx].string) + 1;
532 			kmem_free(res[idx].string, str_len);
533 		}
534 	}
535 
536 	/* deallocate the result array itself */
537 	kmem_free(res, sizeof (dr_cpu_res_t) * nres);
538 }
539 
540 /*
541  * Allocate and pack a response message for transmission based
542  * on the specified result array. A valid response message and
543  * valid size information is always returned.
544  */
545 static size_t
546 dr_cpu_pack_response(dr_cpu_hdr_t *req, dr_cpu_res_t *res, dr_cpu_hdr_t **respp)
547 {
548 	int		idx;
549 	dr_cpu_hdr_t	*resp;
550 	dr_cpu_stat_t	*resp_stat;
551 	size_t		resp_len;
552 	uint32_t	curr_off;
553 	caddr_t		curr_str;
554 	size_t		str_len;
555 	size_t		stat_len;
556 	int		nstat = req->num_records;
557 
558 	/*
559 	 * Calculate the size of the response message
560 	 * and allocate an appropriately sized buffer.
561 	 */
562 	resp_len = 0;
563 
564 	/* add the header size */
565 	resp_len += sizeof (dr_cpu_hdr_t);
566 
567 	/* add the stat array size */
568 	stat_len = sizeof (dr_cpu_stat_t) * nstat;
569 	resp_len += stat_len;
570 
571 	/* add the size of any error strings */
572 	for (idx = 0; idx < nstat; idx++) {
573 		if (res[idx].string != NULL) {
574 			resp_len += strlen(res[idx].string) + 1;
575 		}
576 	}
577 
578 	/* allocate the message buffer */
579 	resp = kmem_zalloc(resp_len, KM_SLEEP);
580 
581 	/*
582 	 * Fill in the header information.
583 	 */
584 	resp->req_num = req->req_num;
585 	resp->msg_type = DR_CPU_OK;
586 	resp->num_records = nstat;
587 
588 	/*
589 	 * Fill in the stat information.
590 	 */
591 	resp_stat = DR_CPU_RESP_STATS(resp);
592 
593 	/* string offsets start immediately after stat array */
594 	curr_off = sizeof (dr_cpu_hdr_t) + stat_len;
595 	curr_str = (char *)resp_stat + stat_len;
596 
597 	for (idx = 0; idx < nstat; idx++) {
598 		resp_stat[idx].cpuid = res[idx].cpuid;
599 		resp_stat[idx].result = res[idx].result;
600 		resp_stat[idx].status = res[idx].status;
601 
602 		if (res[idx].string != NULL) {
603 			/* copy over the error string */
604 			str_len = strlen(res[idx].string) + 1;
605 			bcopy(res[idx].string, curr_str, str_len);
606 			resp_stat[idx].string_off = curr_off;
607 
608 			curr_off += str_len;
609 			curr_str += str_len;
610 		}
611 	}
612 
613 	/* buffer should be exactly filled */
614 	ASSERT(curr_off == resp_len);
615 
616 	*respp = resp;
617 	return (resp_len);
618 }
619 
620 /*
621  * Check for conditions that will prevent a CPU from being offlined.
622  * This provides the opportunity to generate useful information to
623  * help diagnose the failure rather than letting the offline attempt
624  * fail in a more generic way.
625  */
626 static void
627 dr_cpu_check_cpus(dr_cpu_hdr_t *req, dr_cpu_res_t *res)
628 {
629 	int		idx;
630 	cpu_t		*cp;
631 	uint32_t	*cpuids;
632 
633 	ASSERT((req->msg_type == DR_CPU_UNCONFIGURE) ||
634 	    (req->msg_type == DR_CPU_FORCE_UNCONFIG));
635 
636 	DR_DBG_CPU("dr_cpu_check_cpus...\n");
637 
638 	/* array of cpuids start just after the header */
639 	cpuids = DR_CPU_CMD_CPUIDS(req);
640 
641 	mutex_enter(&cpu_lock);
642 
643 	/*
644 	 * Always check processor set membership first. The
645 	 * last CPU in a processor set will fail to offline
646 	 * even if the operation if forced, so any failures
647 	 * should always be reported.
648 	 */
649 	dr_cpu_check_psrset(cpuids, res, req->num_records);
650 
651 	/* process each cpu that is part of the request */
652 	for (idx = 0; idx < req->num_records; idx++) {
653 
654 		/* nothing to check if the CPU has already failed */
655 		if (res[idx].result != DR_CPU_RES_OK)
656 			continue;
657 
658 		if ((cp = cpu_get(cpuids[idx])) == NULL)
659 			continue;
660 
661 		/*
662 		 * Only check if there are bound threads if the
663 		 * operation is not a forced unconfigure. In a
664 		 * forced request, threads are automatically
665 		 * unbound before they are offlined.
666 		 */
667 		if (req->msg_type == DR_CPU_UNCONFIGURE) {
668 			/*
669 			 * The return value is only interesting if other
670 			 * checks are added to this loop and a decision
671 			 * is needed on whether to continue checking.
672 			 */
673 			(void) dr_cpu_check_bound_thr(cp, &res[idx]);
674 		}
675 	}
676 
677 	mutex_exit(&cpu_lock);
678 }
679 
680 /*
681  * Examine the processor set configuration for the specified
682  * CPUs and see if the unconfigure operation would result in
683  * trying to remove the last CPU in any processor set.
684  */
685 static void
686 dr_cpu_check_psrset(uint32_t *cpuids, dr_cpu_res_t *res, int nres)
687 {
688 	int		cpu_idx;
689 	int		set_idx;
690 	cpu_t		*cp;
691 	cpupart_t	*cpp;
692 	char		err_str[DR_CPU_MAX_ERR_LEN];
693 	size_t		err_len;
694 	struct {
695 		cpupart_t	*cpp;
696 		int		ncpus;
697 	} *psrset;
698 
699 	ASSERT(MUTEX_HELD(&cpu_lock));
700 
701 	/*
702 	 * Allocate a scratch array to count the CPUs in
703 	 * the various processor sets. A CPU always belongs
704 	 * to exactly one processor set, so by definition,
705 	 * the scratch array never needs to be larger than
706 	 * the number of CPUs.
707 	 */
708 	psrset = kmem_zalloc(sizeof (*psrset) * nres, KM_SLEEP);
709 
710 	for (cpu_idx = 0; cpu_idx < nres; cpu_idx++) {
711 
712 		/* skip any CPUs that have already failed */
713 		if (res[cpu_idx].result != DR_CPU_RES_OK)
714 			continue;
715 
716 		if ((cp = cpu_get(cpuids[cpu_idx])) == NULL)
717 			continue;
718 
719 		cpp = cp->cpu_part;
720 
721 		/* lookup the set this CPU belongs to */
722 		for (set_idx = 0; set_idx < nres; set_idx++) {
723 
724 			/* matching set found */
725 			if (cpp == psrset[set_idx].cpp)
726 				break;
727 
728 			/* set not found, start a new entry */
729 			if (psrset[set_idx].cpp == NULL) {
730 				psrset[set_idx].cpp = cpp;
731 				psrset[set_idx].ncpus = cpp->cp_ncpus;
732 				break;
733 			}
734 		}
735 
736 		ASSERT(set_idx != nres);
737 
738 		/*
739 		 * Remove the current CPU from the set total but only
740 		 * generate an error for the last CPU. The correct CPU
741 		 * will get the error because the unconfigure attempts
742 		 * will occur in the same order in which the CPUs are
743 		 * examined in this loop.
744 		 */
745 		if (--psrset[set_idx].ncpus == 0) {
746 			/*
747 			 * Fill in the various pieces of information
748 			 * to report that the operation will fail.
749 			 */
750 			res[cpu_idx].result = DR_CPU_RES_BLOCKED;
751 			res[cpu_idx].status = DR_CPU_STAT_CONFIGURED;
752 
753 			(void) snprintf(err_str, DR_CPU_MAX_ERR_LEN,
754 			    "last online cpu in processor set %d", cpp->cp_id);
755 
756 			err_len = strlen(err_str) + 1;
757 
758 			res[cpu_idx].string = kmem_alloc(err_len, KM_SLEEP);
759 			bcopy(err_str, res[cpu_idx].string, err_len);
760 
761 			DR_DBG_CPU("cpu %d: %s\n", cpuids[cpu_idx], err_str);
762 		}
763 	}
764 
765 	kmem_free(psrset, sizeof (*psrset) * nres);
766 }
767 
768 /*
769  * Check if any threads are bound to the specified CPU. If the
770  * condition is true, DR_CPU_RES_BLOCKED is returned and an error
771  * string is generated and placed in the specified result structure.
772  * Otherwise, DR_CPU_RES_OK is returned.
773  */
774 static int
775 dr_cpu_check_bound_thr(cpu_t *cp, dr_cpu_res_t *res)
776 {
777 	int		nbound;
778 	proc_t		*pp;
779 	kthread_t	*tp;
780 	char		err_str[DR_CPU_MAX_ERR_LEN];
781 	size_t		err_len;
782 
783 	/*
784 	 * Error string allocation makes an assumption
785 	 * that no blocking condition has been identified.
786 	 */
787 	ASSERT(res->result == DR_CPU_RES_OK);
788 	ASSERT(res->string == NULL);
789 
790 	ASSERT(MUTEX_HELD(&cpu_lock));
791 
792 	mutex_enter(&pidlock);
793 
794 	nbound = 0;
795 
796 	/*
797 	 * Walk the active processes, checking if each
798 	 * thread belonging to the process is bound.
799 	 */
800 	for (pp = practive; (pp != NULL) && (nbound <= 1); pp = pp->p_next) {
801 		mutex_enter(&pp->p_lock);
802 
803 		tp = pp->p_tlist;
804 
805 		if ((tp == NULL) || (pp->p_flag & SSYS)) {
806 			mutex_exit(&pp->p_lock);
807 			continue;
808 		}
809 
810 		do {
811 			if (tp->t_bind_cpu != cp->cpu_id)
812 				continue;
813 
814 			/*
815 			 * Update the running total of bound
816 			 * threads. Continue the search until
817 			 * it can be determined if more than
818 			 * one thread is bound to the CPU.
819 			 */
820 			if (++nbound > 1)
821 				break;
822 
823 		} while ((tp = tp->t_forw) != pp->p_tlist);
824 
825 		mutex_exit(&pp->p_lock);
826 	}
827 
828 	mutex_exit(&pidlock);
829 
830 	if (nbound) {
831 		/*
832 		 * Threads are bound to the CPU. Fill in
833 		 * various pieces of information to report
834 		 * that the operation will fail.
835 		 */
836 		res->result = DR_CPU_RES_BLOCKED;
837 		res->status = DR_CPU_STAT_CONFIGURED;
838 
839 		(void) snprintf(err_str, DR_CPU_MAX_ERR_LEN, "cpu has bound "
840 		    "thread%s", (nbound > 1) ? "s" : "");
841 
842 		err_len = strlen(err_str) + 1;
843 
844 		res->string = kmem_alloc(err_len, KM_SLEEP);
845 		bcopy(err_str, res->string, err_len);
846 
847 		DR_DBG_CPU("cpu %d: %s\n", cp->cpu_id, err_str);
848 	}
849 
850 	return (res->result);
851 }
852 
853 /*
854  * Do not modify result buffer or length on error.
855  */
856 static int
857 dr_cpu_list_status(dr_cpu_hdr_t *req, dr_cpu_hdr_t **resp, int *resp_len)
858 {
859 	int		idx;
860 	int		result;
861 	int		status;
862 	int		rlen;
863 	uint32_t	*cpuids;
864 	dr_cpu_hdr_t	*rp;
865 	dr_cpu_stat_t	*stat;
866 	md_t		*mdp = NULL;
867 	int		num_nodes;
868 	int		listsz;
869 	mde_cookie_t	*listp = NULL;
870 	mde_cookie_t	cpunode;
871 	boolean_t	walk_md = B_FALSE;
872 
873 	/* the incoming array of cpuids to configure */
874 	cpuids = DR_CPU_CMD_CPUIDS(req);
875 
876 	/* allocate a response message */
877 	rlen = sizeof (dr_cpu_hdr_t);
878 	rlen += req->num_records * sizeof (dr_cpu_stat_t);
879 	rp = kmem_zalloc(rlen, KM_SLEEP);
880 
881 	/* fill in the known data */
882 	rp->req_num = req->req_num;
883 	rp->msg_type = DR_CPU_STATUS;
884 	rp->num_records = req->num_records;
885 
886 	/* stat array for the response */
887 	stat = DR_CPU_RESP_STATS(rp);
888 
889 	/* get the status for each of the CPUs */
890 	for (idx = 0; idx < req->num_records; idx++) {
891 
892 		result = dr_cpu_status(cpuids[idx], &status);
893 
894 		if (result == DR_CPU_RES_FAILURE)
895 			walk_md = B_TRUE;
896 
897 		/* save off results of the status */
898 		stat[idx].cpuid = cpuids[idx];
899 		stat[idx].result = result;
900 		stat[idx].status = status;
901 	}
902 
903 	if (walk_md == B_FALSE)
904 		goto done;
905 
906 	/*
907 	 * At least one of the cpus did not have a CPU
908 	 * structure. So, consult the MD to determine if
909 	 * they are present.
910 	 */
911 
912 	if ((mdp = md_get_handle()) == NULL) {
913 		DR_DBG_CPU("unable to initialize MD\n");
914 		goto done;
915 	}
916 
917 	num_nodes = md_node_count(mdp);
918 	ASSERT(num_nodes > 0);
919 
920 	listsz = num_nodes * sizeof (mde_cookie_t);
921 	listp = kmem_zalloc(listsz, KM_SLEEP);
922 
923 	for (idx = 0; idx < req->num_records; idx++) {
924 
925 		if (stat[idx].result != DR_CPU_RES_FAILURE)
926 			continue;
927 
928 		/* check the MD for the current cpuid */
929 		cpunode = dr_cpu_find_node_md(stat[idx].cpuid, mdp, listp);
930 
931 		stat[idx].result = DR_CPU_RES_OK;
932 
933 		if (cpunode == MDE_INVAL_ELEM_COOKIE) {
934 			stat[idx].status = DR_CPU_STAT_NOT_PRESENT;
935 		} else {
936 			stat[idx].status = DR_CPU_STAT_UNCONFIGURED;
937 		}
938 	}
939 
940 	kmem_free(listp, listsz);
941 
942 	(void) md_fini_handle(mdp);
943 
944 done:
945 	*resp = rp;
946 	*resp_len = rlen;
947 
948 	return (0);
949 }
950 
951 static int
952 dr_cpu_configure(processorid_t cpuid, int *status, boolean_t force)
953 {
954 	 _NOTE(ARGUNUSED(force))
955 	struct cpu	*cp;
956 	int		rv = 0;
957 
958 	DR_DBG_CPU("dr_cpu_configure...\n");
959 
960 	/*
961 	 * Build device tree node for the CPU
962 	 */
963 	if ((rv = dr_cpu_probe(cpuid)) != 0) {
964 		DR_DBG_CPU("failed to probe CPU %d (%d)\n", cpuid, rv);
965 		if (rv == EINVAL) {
966 			*status = DR_CPU_STAT_NOT_PRESENT;
967 			return (DR_CPU_RES_NOT_IN_MD);
968 		}
969 		*status = DR_CPU_STAT_UNCONFIGURED;
970 		return (DR_CPU_RES_FAILURE);
971 	}
972 
973 	mutex_enter(&cpu_lock);
974 
975 	/*
976 	 * Configure the CPU
977 	 */
978 	if ((cp = cpu_get(cpuid)) == NULL) {
979 
980 		if ((rv = cpu_configure(cpuid)) != 0) {
981 			DR_DBG_CPU("failed to configure CPU %d (%d)\n",
982 			    cpuid, rv);
983 			rv = DR_CPU_RES_FAILURE;
984 			*status = DR_CPU_STAT_UNCONFIGURED;
985 			goto done;
986 		}
987 
988 		DR_DBG_CPU("CPU %d configured\n", cpuid);
989 
990 		/* CPU struct should exist now */
991 		cp = cpu_get(cpuid);
992 	}
993 
994 	ASSERT(cp);
995 
996 	/*
997 	 * Power on the CPU. In sun4v, this brings the stopped
998 	 * CPU into the guest from the Hypervisor.
999 	 */
1000 	if (cpu_is_poweredoff(cp)) {
1001 
1002 		if ((rv = cpu_poweron(cp)) != 0) {
1003 			DR_DBG_CPU("failed to power on CPU %d (%d)\n",
1004 			    cpuid, rv);
1005 			rv = DR_CPU_RES_FAILURE;
1006 			*status = DR_CPU_STAT_UNCONFIGURED;
1007 			goto done;
1008 		}
1009 
1010 		DR_DBG_CPU("CPU %d powered on\n", cpuid);
1011 	}
1012 
1013 	/*
1014 	 * Online the CPU
1015 	 */
1016 	if (cpu_is_offline(cp)) {
1017 
1018 		if ((rv = cpu_online(cp)) != 0) {
1019 			DR_DBG_CPU("failed to online CPU %d (%d)\n",
1020 			    cpuid, rv);
1021 			rv = DR_CPU_RES_FAILURE;
1022 			/* offline is still configured */
1023 			*status = DR_CPU_STAT_CONFIGURED;
1024 			goto done;
1025 		}
1026 
1027 		DR_DBG_CPU("CPU %d online\n", cpuid);
1028 	}
1029 
1030 	rv = DR_CPU_RES_OK;
1031 	*status = DR_CPU_STAT_CONFIGURED;
1032 
1033 done:
1034 	mutex_exit(&cpu_lock);
1035 
1036 	return (rv);
1037 }
1038 
1039 static int
1040 dr_cpu_unconfigure(processorid_t cpuid, int *status, boolean_t force)
1041 {
1042 	struct cpu	*cp;
1043 	int		rv = 0;
1044 	int		cpu_flags;
1045 
1046 	DR_DBG_CPU("dr_cpu_unconfigure%s...\n", (force) ? " (force)" : "");
1047 
1048 	mutex_enter(&cpu_lock);
1049 
1050 	cp = cpu_get(cpuid);
1051 
1052 	if (cp == NULL) {
1053 
1054 		/*
1055 		 * The OS CPU structures are already torn down,
1056 		 * Attempt to deprobe the CPU to make sure the
1057 		 * device tree is up to date.
1058 		 */
1059 		if (dr_cpu_deprobe(cpuid) != 0) {
1060 			DR_DBG_CPU("failed to deprobe CPU %d\n", cpuid);
1061 			rv = DR_CPU_RES_FAILURE;
1062 			*status = DR_CPU_STAT_UNCONFIGURED;
1063 			goto done;
1064 		}
1065 
1066 		goto done;
1067 	}
1068 
1069 	ASSERT(cp->cpu_id == cpuid);
1070 
1071 	/*
1072 	 * Offline the CPU
1073 	 */
1074 	if (cpu_is_active(cp)) {
1075 
1076 		/* set the force flag correctly */
1077 		cpu_flags = (force) ? CPU_FORCED : 0;
1078 
1079 		if ((rv = cpu_offline(cp, cpu_flags)) != 0) {
1080 			DR_DBG_CPU("failed to offline CPU %d (%d)\n",
1081 			    cpuid, rv);
1082 
1083 			rv = DR_CPU_RES_FAILURE;
1084 			*status = DR_CPU_STAT_CONFIGURED;
1085 			goto done;
1086 		}
1087 
1088 		DR_DBG_CPU("CPU %d offline\n", cpuid);
1089 	}
1090 
1091 	/*
1092 	 * Power off the CPU. In sun4v, this puts the running
1093 	 * CPU into the stopped state in the Hypervisor.
1094 	 */
1095 	if (!cpu_is_poweredoff(cp)) {
1096 
1097 		if ((rv = cpu_poweroff(cp)) != 0) {
1098 			DR_DBG_CPU("failed to power off CPU %d (%d)\n",
1099 			    cpuid, rv);
1100 			rv = DR_CPU_RES_FAILURE;
1101 			*status = DR_CPU_STAT_CONFIGURED;
1102 			goto done;
1103 		}
1104 
1105 		DR_DBG_CPU("CPU %d powered off\n", cpuid);
1106 	}
1107 
1108 	/*
1109 	 * Unconfigure the CPU
1110 	 */
1111 	if ((rv = cpu_unconfigure(cpuid)) != 0) {
1112 		DR_DBG_CPU("failed to unconfigure CPU %d (%d)\n", cpuid, rv);
1113 		rv = DR_CPU_RES_FAILURE;
1114 		*status = DR_CPU_STAT_UNCONFIGURED;
1115 		goto done;
1116 	}
1117 
1118 	DR_DBG_CPU("CPU %d unconfigured\n", cpuid);
1119 
1120 	/*
1121 	 * Tear down device tree.
1122 	 */
1123 	if ((rv = dr_cpu_deprobe(cpuid)) != 0) {
1124 		DR_DBG_CPU("failed to deprobe CPU %d (%d)\n", cpuid, rv);
1125 		rv = DR_CPU_RES_FAILURE;
1126 		*status = DR_CPU_STAT_UNCONFIGURED;
1127 		goto done;
1128 	}
1129 
1130 	rv = DR_CPU_RES_OK;
1131 	*status = DR_CPU_STAT_UNCONFIGURED;
1132 
1133 done:
1134 	mutex_exit(&cpu_lock);
1135 
1136 	return (rv);
1137 }
1138 
1139 /*
1140  * Determine the state of a CPU. If the CPU structure is not present,
1141  * it does not attempt to determine whether or not the CPU is in the
1142  * MD. It is more efficient to do this at the higher level for all
1143  * CPUs since it may not even be necessary to search the MD if all
1144  * the CPUs are accounted for. Returns DR_CPU_RES_OK if the CPU
1145  * structure is present, and DR_CPU_RES_FAILURE otherwise as a signal
1146  * that an MD walk is necessary.
1147  */
1148 static int
1149 dr_cpu_status(processorid_t cpuid, int *status)
1150 {
1151 	int		rv;
1152 	struct cpu	*cp;
1153 
1154 	DR_DBG_CPU("dr_cpu_status...\n");
1155 
1156 	mutex_enter(&cpu_lock);
1157 
1158 	if ((cp = cpu_get(cpuid)) == NULL) {
1159 		/* need to check if cpu is in the MD */
1160 		rv = DR_CPU_RES_FAILURE;
1161 		goto done;
1162 	}
1163 
1164 	if (cpu_is_poweredoff(cp)) {
1165 		/*
1166 		 * The CPU is powered off, so it is considered
1167 		 * unconfigured from the service entity point of
1168 		 * view. The CPU is not available to the system
1169 		 * and intervention by the service entity would
1170 		 * be required to change that.
1171 		 */
1172 		*status = DR_CPU_STAT_UNCONFIGURED;
1173 	} else {
1174 		/*
1175 		 * The CPU is powered on, so it is considered
1176 		 * configured from the service entity point of
1177 		 * view. It is available for use by the system
1178 		 * and service entities are not concerned about
1179 		 * the operational status (offline, online, etc.)
1180 		 * of the CPU in terms of DR.
1181 		 */
1182 		*status = DR_CPU_STAT_CONFIGURED;
1183 	}
1184 
1185 	rv = DR_CPU_RES_OK;
1186 
1187 done:
1188 	mutex_exit(&cpu_lock);
1189 
1190 	return (rv);
1191 }
1192 
1193 typedef struct {
1194 	md_t		*mdp;
1195 	mde_cookie_t	cpunode;
1196 	dev_info_t	*dip;
1197 } cb_arg_t;
1198 
1199 #define	STR_ARR_LEN	5
1200 
1201 static int
1202 new_cpu_node(dev_info_t *new_node, void *arg, uint_t flags)
1203 {
1204 	_NOTE(ARGUNUSED(flags))
1205 
1206 	char		*compat;
1207 	uint64_t	freq;
1208 	uint64_t	cpuid = 0;
1209 	int		regbuf[4];
1210 	int		len = 0;
1211 	cb_arg_t	*cba;
1212 	char		*str_arr[STR_ARR_LEN];
1213 	char		*curr;
1214 	int		idx = 0;
1215 
1216 	DR_DBG_CPU("new_cpu_node...\n");
1217 
1218 	cba = (cb_arg_t *)arg;
1219 
1220 	/*
1221 	 * Add 'name' property
1222 	 */
1223 	if (ndi_prop_update_string(DDI_DEV_T_NONE, new_node,
1224 	    "name", "cpu") != DDI_SUCCESS) {
1225 		DR_DBG_CPU("new_cpu_node: failed to create 'name' property\n");
1226 		return (DDI_WALK_ERROR);
1227 	}
1228 
1229 	/*
1230 	 * Add 'compatible' property
1231 	 */
1232 	if (md_get_prop_data(cba->mdp, cba->cpunode, "compatible",
1233 	    (uint8_t **)(&compat), &len)) {
1234 		DR_DBG_CPU("new_cpu_node: failed to read 'compatible' property "
1235 		    "from MD\n");
1236 		return (DDI_WALK_ERROR);
1237 	}
1238 
1239 	DR_DBG_CPU("'compatible' len is %d\n", len);
1240 
1241 	/* parse the MD string array */
1242 	curr = compat;
1243 	while (curr < (compat + len)) {
1244 
1245 		DR_DBG_CPU("adding '%s' to 'compatible' property\n", curr);
1246 
1247 		str_arr[idx++] = curr;
1248 		curr += strlen(curr) + 1;
1249 
1250 		if (idx == STR_ARR_LEN) {
1251 			DR_DBG_CPU("exceeded str_arr len (%d)\n", STR_ARR_LEN);
1252 			break;
1253 		}
1254 	}
1255 
1256 	if (ndi_prop_update_string_array(DDI_DEV_T_NONE, new_node,
1257 	    "compatible", str_arr, idx) != DDI_SUCCESS) {
1258 		DR_DBG_CPU("new_cpu_node: failed to create 'compatible' "
1259 		    "property\n");
1260 		return (DDI_WALK_ERROR);
1261 	}
1262 
1263 	/*
1264 	 * Add 'device_type' property
1265 	 */
1266 	if (ndi_prop_update_string(DDI_DEV_T_NONE, new_node,
1267 	    "device_type", "cpu") != DDI_SUCCESS) {
1268 		DR_DBG_CPU("new_cpu_node: failed to create 'device_type' "
1269 		    "property\n");
1270 		return (DDI_WALK_ERROR);
1271 	}
1272 
1273 	/*
1274 	 * Add 'clock-frequency' property
1275 	 */
1276 	if (md_get_prop_val(cba->mdp, cba->cpunode, "clock-frequency", &freq)) {
1277 		DR_DBG_CPU("new_cpu_node: failed to read 'clock-frequency' "
1278 		    "property from MD\n");
1279 		return (DDI_WALK_ERROR);
1280 	}
1281 
1282 	if (ndi_prop_update_int(DDI_DEV_T_NONE, new_node,
1283 	    "clock-frequency", freq) != DDI_SUCCESS) {
1284 		DR_DBG_CPU("new_cpu_node: failed to create 'clock-frequency' "
1285 		    "property\n");
1286 		return (DDI_WALK_ERROR);
1287 	}
1288 
1289 	/*
1290 	 * Add 'reg' (cpuid) property
1291 	 */
1292 	if (md_get_prop_val(cba->mdp, cba->cpunode, "id", &cpuid)) {
1293 		DR_DBG_CPU("new_cpu_node: failed to read 'id' property "
1294 		    "from MD\n");
1295 		return (DDI_WALK_ERROR);
1296 	}
1297 
1298 	DR_DBG_CPU("new cpuid=0x%lx\n", cpuid);
1299 
1300 	bzero(regbuf, 4 * sizeof (int));
1301 	regbuf[0] = 0xc0000000 | cpuid;
1302 
1303 	if (ndi_prop_update_int_array(DDI_DEV_T_NONE, new_node,
1304 	    "reg", regbuf, 4) != DDI_SUCCESS) {
1305 		DR_DBG_CPU("new_cpu_node: failed to create 'reg' property\n");
1306 		return (DDI_WALK_ERROR);
1307 	}
1308 
1309 	cba->dip = new_node;
1310 
1311 	return (DDI_WALK_TERMINATE);
1312 }
1313 
1314 static int
1315 dr_cpu_probe(processorid_t cpuid)
1316 {
1317 	dev_info_t	*pdip;
1318 	dev_info_t	*dip;
1319 	devi_branch_t	br;
1320 	md_t		*mdp = NULL;
1321 	int		num_nodes;
1322 	int		rv = 0;
1323 	int		listsz;
1324 	mde_cookie_t	*listp = NULL;
1325 	cb_arg_t	cba;
1326 	mde_cookie_t	cpunode;
1327 
1328 	if ((dip = dr_cpu_find_node(cpuid)) != NULL) {
1329 		/* nothing to do */
1330 		e_ddi_branch_rele(dip);
1331 		return (0);
1332 	}
1333 
1334 	if ((mdp = md_get_handle()) == NULL) {
1335 		DR_DBG_CPU("unable to initialize machine description\n");
1336 		return (-1);
1337 	}
1338 
1339 	num_nodes = md_node_count(mdp);
1340 	ASSERT(num_nodes > 0);
1341 
1342 	listsz = num_nodes * sizeof (mde_cookie_t);
1343 	listp = kmem_zalloc(listsz, KM_SLEEP);
1344 
1345 	cpunode = dr_cpu_find_node_md(cpuid, mdp, listp);
1346 
1347 	if (cpunode == MDE_INVAL_ELEM_COOKIE) {
1348 		rv = EINVAL;
1349 		goto done;
1350 	}
1351 
1352 	/* pass in MD cookie for CPU */
1353 	cba.mdp = mdp;
1354 	cba.cpunode = cpunode;
1355 
1356 	br.arg = (void *)&cba;
1357 	br.type = DEVI_BRANCH_SID;
1358 	br.create.sid_branch_create = new_cpu_node;
1359 	br.devi_branch_callback = NULL;
1360 	pdip = ddi_root_node();
1361 
1362 	if ((rv = e_ddi_branch_create(pdip, &br, NULL, 0))) {
1363 		DR_DBG_CPU("e_ddi_branch_create failed: %d\n", rv);
1364 		rv = -1;
1365 		goto done;
1366 	}
1367 
1368 	DR_DBG_CPU("CPU %d probed\n", cpuid);
1369 
1370 	rv = 0;
1371 
1372 done:
1373 	if (listp)
1374 		kmem_free(listp, listsz);
1375 
1376 	if (mdp)
1377 		(void) md_fini_handle(mdp);
1378 
1379 	return (rv);
1380 }
1381 
1382 static int
1383 dr_cpu_deprobe(processorid_t cpuid)
1384 {
1385 	dev_info_t	*fdip = NULL;
1386 	dev_info_t	*dip;
1387 
1388 	if ((dip = dr_cpu_find_node(cpuid)) == NULL) {
1389 		DR_DBG_CPU("cpuid %d already deprobed\n", cpuid);
1390 		return (0);
1391 	}
1392 
1393 	ASSERT(e_ddi_branch_held(dip));
1394 
1395 	if (e_ddi_branch_destroy(dip, &fdip, 0)) {
1396 		char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1397 
1398 		/*
1399 		 * If non-NULL, fdip is held and must be released.
1400 		 */
1401 		if (fdip != NULL) {
1402 			(void) ddi_pathname(fdip, path);
1403 			ddi_release_devi(fdip);
1404 		} else {
1405 			(void) ddi_pathname(dip, path);
1406 		}
1407 		cmn_err(CE_NOTE, "node removal failed: %s (%p)",
1408 		    path, (fdip) ? (void *)fdip : (void *)dip);
1409 
1410 		kmem_free(path, MAXPATHLEN);
1411 
1412 		return (-1);
1413 	}
1414 
1415 	DR_DBG_CPU("CPU %d deprobed\n", cpuid);
1416 
1417 	return (0);
1418 }
1419 
1420 typedef struct {
1421 	processorid_t	cpuid;
1422 	dev_info_t	*dip;
1423 } dr_search_arg_t;
1424 
1425 static int
1426 dr_cpu_check_node(dev_info_t *dip, void *arg)
1427 {
1428 	char 		*name;
1429 	processorid_t	cpuid;
1430 	dr_search_arg_t	*sarg = (dr_search_arg_t *)arg;
1431 
1432 	if (dip == ddi_root_node()) {
1433 		return (DDI_WALK_CONTINUE);
1434 	}
1435 
1436 	name = ddi_node_name(dip);
1437 
1438 	if (strcmp(name, "cpu") != 0) {
1439 		return (DDI_WALK_PRUNECHILD);
1440 	}
1441 
1442 	cpuid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
1443 	    "reg", -1);
1444 
1445 	cpuid = PROM_CFGHDL_TO_CPUID(cpuid);
1446 
1447 	DR_DBG_CPU("found cpuid=0x%x, looking for 0x%x\n", cpuid, sarg->cpuid);
1448 
1449 	if (cpuid == sarg->cpuid) {
1450 		DR_DBG_CPU("matching node\n");
1451 
1452 		/* matching node must be returned held */
1453 		if (!e_ddi_branch_held(dip))
1454 			e_ddi_branch_hold(dip);
1455 
1456 		sarg->dip = dip;
1457 		return (DDI_WALK_TERMINATE);
1458 	}
1459 
1460 	return (DDI_WALK_CONTINUE);
1461 }
1462 
1463 /*
1464  * Walk the device tree to find the dip corresponding to the cpuid
1465  * passed in. If present, the dip is returned held. The caller must
1466  * release the hold on the dip once it is no longer required. If no
1467  * matching node if found, NULL is returned.
1468  */
1469 static dev_info_t *
1470 dr_cpu_find_node(processorid_t cpuid)
1471 {
1472 	dr_search_arg_t	arg;
1473 
1474 	DR_DBG_CPU("dr_cpu_find_node...\n");
1475 
1476 	arg.cpuid = cpuid;
1477 	arg.dip = NULL;
1478 
1479 	ddi_walk_devs(ddi_root_node(), dr_cpu_check_node, &arg);
1480 
1481 	ASSERT((arg.dip == NULL) || (e_ddi_branch_held(arg.dip)));
1482 
1483 	return ((arg.dip) ? arg.dip : NULL);
1484 }
1485 
1486 /*
1487  * Look up a particular cpuid in the MD. Returns the mde_cookie_t
1488  * representing that CPU if present, and MDE_INVAL_ELEM_COOKIE
1489  * otherwise. It is assumed the scratch array has already been
1490  * allocated so that it can accommodate the worst case scenario,
1491  * every node in the MD.
1492  */
1493 static mde_cookie_t
1494 dr_cpu_find_node_md(processorid_t cpuid, md_t *mdp, mde_cookie_t *listp)
1495 {
1496 	int		idx;
1497 	int		nnodes;
1498 	mde_cookie_t	rootnode;
1499 	uint64_t	cpuid_prop;
1500 	mde_cookie_t	result = MDE_INVAL_ELEM_COOKIE;
1501 
1502 	rootnode = md_root_node(mdp);
1503 	ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE);
1504 
1505 	/*
1506 	 * Scan the DAG for all the CPU nodes
1507 	 */
1508 	nnodes = md_scan_dag(mdp, rootnode, md_find_name(mdp, "cpu"),
1509 	    md_find_name(mdp, "fwd"), listp);
1510 
1511 	if (nnodes < 0) {
1512 		DR_DBG_CPU("Scan for CPUs failed\n");
1513 		return (result);
1514 	}
1515 
1516 	DR_DBG_CPU("dr_cpu_find_node_md: found %d CPUs in the MD\n", nnodes);
1517 
1518 	/*
1519 	 * Find the CPU of interest
1520 	 */
1521 	for (idx = 0; idx < nnodes; idx++) {
1522 
1523 		if (md_get_prop_val(mdp, listp[idx], "id", &cpuid_prop)) {
1524 			DR_DBG_CPU("Missing 'id' property for CPU node %d\n",
1525 			    idx);
1526 			break;
1527 		}
1528 
1529 		if (cpuid_prop == cpuid) {
1530 			/* found a match */
1531 			DR_DBG_CPU("dr_cpu_find_node_md: found CPU %d "
1532 			    "in MD\n", cpuid);
1533 			result = listp[idx];
1534 			break;
1535 		}
1536 	}
1537 
1538 	if (result == MDE_INVAL_ELEM_COOKIE) {
1539 		DR_DBG_CPU("CPU %d not in MD\n", cpuid);
1540 	}
1541 
1542 	return (result);
1543 }
1544