xref: /titanic_44/usr/src/uts/sun4v/io/dr_cpu.c (revision bc9ec910d60a3256c0b6bb68a13afa62e106730a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * sun4v CPU DR Module
31  */
32 
33 #include <sys/modctl.h>
34 #include <sys/processor.h>
35 #include <sys/cpuvar.h>
36 #include <sys/sunddi.h>
37 #include <sys/sunndi.h>
38 #include <sys/note.h>
39 #include <sys/sysevent/dr.h>
40 #include <sys/hypervisor_api.h>
41 #include <sys/mach_descrip.h>
42 #include <sys/mdesc.h>
43 #include <sys/ds.h>
44 #include <sys/drctl.h>
45 #include <sys/dr_util.h>
46 #include <sys/dr_cpu.h>
47 #include <sys/promif.h>
48 #include <sys/machsystm.h>
49 
50 
51 static struct modlmisc modlmisc = {
52 	&mod_miscops,
53 	"sun4v CPU DR %I%"
54 };
55 
56 static struct modlinkage modlinkage = {
57 	MODREV_1,
58 	(void *)&modlmisc,
59 	NULL
60 };
61 
62 typedef int (*fn_t)(processorid_t, int *, boolean_t);
63 
64 /*
65  * Global DS Handle
66  */
67 static ds_svc_hdl_t ds_handle;
68 
69 /*
70  * Supported DS Capability Versions
71  */
72 static ds_ver_t		dr_cpu_vers[] = { { 1, 0 } };
73 #define	DR_CPU_NVERS	(sizeof (dr_cpu_vers) / sizeof (dr_cpu_vers[0]))
74 
75 /*
76  * DS Capability Description
77  */
78 static ds_capability_t dr_cpu_cap = {
79 	DR_CPU_DS_ID,		/* svc_id */
80 	dr_cpu_vers,		/* vers */
81 	DR_CPU_NVERS		/* nvers */
82 };
83 
84 /*
85  * DS Callbacks
86  */
87 static void dr_cpu_reg_handler(ds_cb_arg_t, ds_ver_t *, ds_svc_hdl_t);
88 static void dr_cpu_unreg_handler(ds_cb_arg_t arg);
89 static void dr_cpu_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen);
90 
91 /*
92  * DS Client Ops Vector
93  */
94 static ds_clnt_ops_t dr_cpu_ops = {
95 	dr_cpu_reg_handler,	/* ds_reg_cb */
96 	dr_cpu_unreg_handler,	/* ds_unreg_cb */
97 	dr_cpu_data_handler,	/* ds_data_cb */
98 	NULL			/* cb_arg */
99 };
100 
101 /*
102  * Internal Functions
103  */
104 static int dr_cpu_init(void);
105 static int dr_cpu_fini(void);
106 
107 static int dr_cpu_list_wrk(dr_cpu_hdr_t *, dr_cpu_hdr_t **, int *, fn_t);
108 static int dr_cpu_list_status(dr_cpu_hdr_t *, dr_cpu_hdr_t **, int *);
109 
110 static int dr_cpu_unconfigure(processorid_t, int *status, boolean_t force);
111 static int dr_cpu_configure(processorid_t, int *status, boolean_t force);
112 static int dr_cpu_status(processorid_t, int *status);
113 
114 static int dr_cpu_probe(processorid_t newcpuid);
115 static int dr_cpu_deprobe(processorid_t cpuid);
116 
117 static dev_info_t *dr_cpu_find_node(processorid_t cpuid);
118 static mde_cookie_t dr_cpu_find_node_md(processorid_t, md_t *, mde_cookie_t *);
119 static void dr_cpu_check_cpus(uint32_t *cpuids, int ncpus, dr_cpu_stat_t *stat);
120 
121 
122 int
123 _init(void)
124 {
125 	int	status;
126 
127 	/* check that CPU DR is enabled */
128 	if (dr_is_disabled(DR_TYPE_CPU)) {
129 		cmn_err(CE_CONT, "!CPU DR is disabled\n");
130 		return (-1);
131 	}
132 
133 	if ((status = dr_cpu_init()) != 0) {
134 		cmn_err(CE_NOTE, "CPU DR initialization failed");
135 		return (status);
136 	}
137 
138 	if ((status = mod_install(&modlinkage)) != 0) {
139 		(void) dr_cpu_fini();
140 	}
141 
142 	return (status);
143 }
144 
145 int
146 _info(struct modinfo *modinfop)
147 {
148 	return (mod_info(&modlinkage, modinfop));
149 }
150 
151 int dr_cpu_allow_unload;
152 
153 int
154 _fini(void)
155 {
156 	int	status;
157 
158 	if (dr_cpu_allow_unload == 0)
159 		return (EBUSY);
160 
161 	if ((status = mod_remove(&modlinkage)) == 0) {
162 		(void) dr_cpu_fini();
163 	}
164 
165 	return (status);
166 }
167 
168 static int
169 dr_cpu_init(void)
170 {
171 	int	rv;
172 
173 	if ((rv = ds_cap_init(&dr_cpu_cap, &dr_cpu_ops)) != 0) {
174 		cmn_err(CE_NOTE, "ds_cap_init failed: %d", rv);
175 		return (-1);
176 	}
177 
178 	return (0);
179 }
180 
181 static int
182 dr_cpu_fini(void)
183 {
184 	int	rv;
185 
186 	if ((rv = ds_cap_fini(&dr_cpu_cap)) != 0) {
187 		cmn_err(CE_NOTE, "ds_cap_fini failed: %d", rv);
188 		return (-1);
189 	}
190 
191 	return (0);
192 }
193 
194 static void
195 dr_cpu_reg_handler(ds_cb_arg_t arg, ds_ver_t *ver, ds_svc_hdl_t hdl)
196 {
197 	DR_DBG_CPU("reg_handler: arg=0x%p, ver=%d.%d, hdl=0x%lx\n", arg,
198 	    ver->major, ver->minor, hdl);
199 
200 	ds_handle = hdl;
201 }
202 
203 static void
204 dr_cpu_unreg_handler(ds_cb_arg_t arg)
205 {
206 	DR_DBG_CPU("unreg_handler: arg=0x%p\n", arg);
207 
208 	ds_handle = DS_INVALID_HDL;
209 }
210 
211 static void
212 dr_cpu_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen)
213 {
214 	_NOTE(ARGUNUSED(arg))
215 
216 	dr_cpu_hdr_t	*req = buf;
217 	dr_cpu_hdr_t	err_resp;
218 	dr_cpu_hdr_t	*resp = &err_resp;
219 	int		resp_len = 0;
220 	int		rv;
221 
222 	/*
223 	 * Sanity check the message
224 	 */
225 	if (buflen < sizeof (dr_cpu_hdr_t)) {
226 		DR_DBG_CPU("incoming message short: expected at least %ld "
227 		    "bytes, received %ld\n", sizeof (dr_cpu_hdr_t), buflen);
228 		goto done;
229 	}
230 
231 	if (req == NULL) {
232 		DR_DBG_CPU("empty message: expected at least %ld bytes\n",
233 		    sizeof (dr_cpu_hdr_t));
234 		goto done;
235 	}
236 
237 	DR_DBG_CPU("incoming request:\n");
238 	DR_DBG_DUMP_MSG(buf, buflen);
239 
240 	if (req->num_records > NCPU) {
241 		DR_DBG_CPU("CPU list too long: %d when %d is the maximum\n",
242 		    req->num_records, NCPU);
243 		goto done;
244 	}
245 
246 	if (req->num_records == 0) {
247 		DR_DBG_CPU("No CPU specified for operation\n");
248 		goto done;
249 	}
250 
251 	/*
252 	 * Process the command
253 	 */
254 	switch (req->msg_type) {
255 	case DR_CPU_CONFIGURE:
256 		rv = dr_cpu_list_wrk(req, &resp, &resp_len, dr_cpu_configure);
257 		if (rv != 0)
258 			DR_DBG_CPU("dr_cpu_list_configure failed (%d)\n", rv);
259 		break;
260 
261 	case DR_CPU_UNCONFIGURE:
262 	case DR_CPU_FORCE_UNCONFIG:
263 		rv = dr_cpu_list_wrk(req, &resp, &resp_len, dr_cpu_unconfigure);
264 		if (rv != 0)
265 			DR_DBG_CPU("dr_cpu_list_unconfigure failed (%d)\n", rv);
266 		break;
267 
268 	case DR_CPU_STATUS:
269 		if ((rv = dr_cpu_list_status(req, &resp, &resp_len)) != 0)
270 			DR_DBG_CPU("dr_cpu_list_status failed (%d)\n", rv);
271 		break;
272 
273 	default:
274 		cmn_err(CE_NOTE, "unsupported DR operation (%d)",
275 		    req->msg_type);
276 		break;
277 	}
278 
279 done:
280 	/* check if an error occurred */
281 	if (resp == &err_resp) {
282 		resp->req_num = (req) ? req->req_num : 0;
283 		resp->msg_type = DR_CPU_ERROR;
284 		resp->num_records = 0;
285 		resp_len = sizeof (dr_cpu_hdr_t);
286 	}
287 
288 	/* send back the response */
289 	if (ds_cap_send(ds_handle, resp, resp_len) != 0) {
290 		DR_DBG_CPU("ds_send failed\n");
291 	}
292 
293 	/* free any allocated memory */
294 	if (resp != &err_resp) {
295 		kmem_free(resp, resp_len);
296 	}
297 }
298 
299 /*
300  * Common routine to config or unconfig multiple cpus.  The unconfig
301  * case checks with the OS to see if the removal of cpus will be
302  * permitted, but can be overridden by the "force" version of the
303  * command.  Otherwise, the logic for both cases is identical.
304  *
305  * Note: Do not modify result buffer or length on error.
306  */
307 static int
308 dr_cpu_list_wrk(dr_cpu_hdr_t *rq, dr_cpu_hdr_t **resp, int *resp_len, fn_t f)
309 {
310 	/* related to request message (based on cpu_hdr_t *rq function arg) */
311 
312 	uint32_t	*rq_cpus;	/* address of cpuid array in request */
313 
314 	/* the response message to our caller (passed back via **resp) */
315 
316 	dr_cpu_hdr_t	*rs;		/* address of allocated response msg */
317 	size_t		rs_len;		/* length of response msg */
318 	dr_cpu_stat_t	*rs_stat;	/* addr. of status array in response */
319 	caddr_t		rs_str;		/* addr. of string area in response */
320 	size_t		rs_stat_len;	/* length of status area in response */
321 	size_t		rs_str_len;	/* length of string area in response */
322 
323 	/* the request message sent to drctl_config_[init|fini] */
324 
325 	drctl_rsrc_t	*dr_rq;		/* addr. of allocated msg for drctl */
326 	size_t		dr_rq_len;	/* length of same */
327 
328 	/* the response message received from drctl_config_init */
329 
330 	drctl_rsrc_t	*dr_rs;		/* &response from drctl_config_init */
331 	size_t		dr_rs_len = 0;	/* length of response from same */
332 	caddr_t		dr_rs_str;	/* &(string area) in same */
333 	drctl_cookie_t	dr_rs_ck;	/* the cookie from same */
334 
335 	/* common temp variables */
336 
337 	int		idx;
338 	int		cmd;
339 	int		result;
340 	int		status;
341 	int		count;
342 	int		rv;
343 	int		flags;
344 	int		force;
345 	int		fail_status;
346 	static const char me[] = "dr_cpu_list_wrk";
347 
348 
349 	ASSERT(rq != NULL && rq->num_records != 0);
350 
351 	count = rq->num_records;
352 	flags = 0;
353 	force = B_FALSE;
354 
355 	switch (rq->msg_type) {
356 	case DR_CPU_CONFIGURE:
357 		cmd = DRCTL_CPU_CONFIG_REQUEST;
358 		fail_status = DR_CPU_STAT_UNCONFIGURED;
359 		break;
360 	case DR_CPU_FORCE_UNCONFIG:
361 		flags = DRCTL_FLAG_FORCE;
362 		force = B_TRUE;
363 		_NOTE(FALLTHROUGH)
364 	case DR_CPU_UNCONFIGURE:
365 		cmd = DRCTL_CPU_UNCONFIG_REQUEST;
366 		fail_status = DR_CPU_STAT_CONFIGURED;
367 		break;
368 	default:
369 		/* Programming error if we reach this. */
370 		ASSERT(0);
371 		cmn_err(CE_NOTE, "%s: bad msg_type %d\n", me, rq->msg_type);
372 		return (-1);
373 	}
374 
375 	/* the incoming array of cpuids to configure */
376 	rq_cpus = (uint32_t *)((caddr_t)rq + sizeof (dr_cpu_hdr_t));
377 
378 	/* allocate drctl request msg based on incoming resource count */
379 	dr_rq_len = sizeof (drctl_rsrc_t) * count;
380 	dr_rq = kmem_zalloc(dr_rq_len, KM_SLEEP);
381 
382 	/* copy the cpuids for the drctl call from the incoming request msg */
383 	for (idx = 0; idx < count; idx++)
384 		dr_rq[idx].res_cpu_id = rq_cpus[idx];
385 
386 	rv = drctl_config_init(cmd,
387 	    flags, dr_rq, count, &dr_rs, &dr_rs_len, &dr_rs_ck);
388 
389 	if (rv != 0) {
390 		cmn_err(CE_CONT,
391 		    "?%s: drctl_config_init returned: %d\n", me, rv);
392 		kmem_free(dr_rq, dr_rq_len);
393 		return (-1);
394 	}
395 
396 	ASSERT(dr_rs != NULL && dr_rs_len != 0);
397 
398 	/*
399 	 * Allocate a response buffer for our caller.  It consists of
400 	 * the header plus the (per resource) status array and a string
401 	 * area the size of which is equal to the size of the string
402 	 * area in the drctl_config_init response.  The latter is
403 	 * simply the size difference between the config_init request
404 	 * and config_init response messages (and may be zero).
405 	 */
406 	rs_stat_len =  count * sizeof (dr_cpu_stat_t);
407 	rs_str_len = dr_rs_len - dr_rq_len;
408 	rs_len = sizeof (dr_cpu_hdr_t) + rs_stat_len + rs_str_len;
409 	rs = kmem_zalloc(rs_len, KM_SLEEP);
410 
411 	/* fill in the known data */
412 	rs->req_num = rq->req_num;
413 	rs->msg_type = DR_CPU_OK;
414 	rs->num_records = count;
415 
416 	/* stat array for the response */
417 	rs_stat = (dr_cpu_stat_t *)((caddr_t)rs + sizeof (dr_cpu_hdr_t));
418 
419 	if (rq->msg_type == DR_CPU_FORCE_UNCONFIG)
420 		dr_cpu_check_cpus(rq_cpus, count, rs_stat);
421 
422 	/* [un]configure each of the CPUs */
423 	for (idx = 0; idx < count; idx++) {
424 
425 		if (dr_rs[idx].status != DRCTL_STATUS_ALLOW ||
426 		    rs_stat[idx].result == DR_CPU_RES_BLOCKED) {
427 			result = DR_CPU_RES_FAILURE;
428 			status = fail_status;
429 		} else {
430 			result = (*f)(rq_cpus[idx], &status, force);
431 		}
432 
433 		/* save off results of the configure */
434 		rs_stat[idx].cpuid = rq_cpus[idx];
435 		rs_stat[idx].result = result;
436 		rs_stat[idx].status = status;
437 
438 		/*
439 		 * Convert any string offset from being relative to
440 		 * the start of the drctl response to being relative
441 		 * to the start of the response sent to our caller.
442 		 */
443 		if (dr_rs[idx].offset != 0)
444 			rs_stat[idx].string_off = (uint32_t)dr_rs[idx].offset -
445 			    dr_rq_len + (rs_len - rs_str_len);
446 
447 		/* save result for _fini() reusing _init msg memory */
448 		dr_rq[idx].status = (status == fail_status) ?
449 		    DRCTL_STATUS_CONFIG_FAILURE : DRCTL_STATUS_CONFIG_SUCCESS;
450 		DR_DBG_CPU("%s: cpuid %d status %d result %d off %d",
451 		    me, rq_cpus[idx], dr_rq[idx].status,
452 		    result, rs_stat[idx].string_off);
453 	}
454 
455 	/* copy the strings (if any) from drctl resp. into resp. for caller */
456 	dr_rs_str = (caddr_t)dr_rs + dr_rq_len;
457 	rs_str = (caddr_t)rs + rs_len - rs_str_len;
458 	bcopy(dr_rs_str, rs_str, rs_str_len);
459 
460 	rv = drctl_config_fini(&dr_rs_ck, dr_rq, count);
461 	if (rv != 0)
462 		cmn_err(CE_CONT,
463 		    "?%s: drctl_config_fini returned: %d\n", me, rv);
464 
465 	kmem_free(dr_rs, dr_rs_len);
466 
467 	kmem_free(dr_rq, dr_rq_len);
468 
469 	*resp = rs;
470 	*resp_len = rs_len;
471 
472 	dr_generate_event(DR_TYPE_CPU, SE_HINT_INSERT);
473 
474 	return (0);
475 }
476 
477 static void
478 dr_cpu_check_cpus(uint32_t *cpuids, int ncpus, dr_cpu_stat_t *stat)
479 {
480 	int		idx;
481 	kthread_t	*tp;
482 	proc_t		*pp;
483 
484 	DR_DBG_CPU("dr_cpu_check_cpus...\n");
485 
486 	mutex_enter(&cpu_lock);
487 
488 	/* process each cpu that is part of the request */
489 	for (idx = 0; idx < ncpus; idx++) {
490 
491 		if (cpu_get(cpuids[idx]) == NULL)
492 			continue;
493 
494 		mutex_enter(&pidlock);
495 
496 		/*
497 		 * Walk the active processes, checking if each
498 		 * thread belonging to the process is bound.
499 		 */
500 		for (pp = practive; pp != NULL; pp = pp->p_next) {
501 			mutex_enter(&pp->p_lock);
502 			tp = pp->p_tlist;
503 
504 			if (tp == NULL || (pp->p_flag & SSYS)) {
505 				mutex_exit(&pp->p_lock);
506 				continue;
507 			}
508 
509 			do {
510 				if (tp->t_bind_cpu != cpuids[idx])
511 					continue;
512 
513 				DR_DBG_CPU("thread(s) bound to cpu %d\n",
514 				    cpuids[idx]);
515 
516 				stat[idx].cpuid = cpuids[idx];
517 				stat[idx].result = DR_CPU_RES_BLOCKED;
518 				stat[idx].status = DR_CPU_STAT_CONFIGURED;
519 				break;
520 
521 			} while ((tp = tp->t_forw) != pp->p_tlist);
522 			mutex_exit(&pp->p_lock);
523 		}
524 
525 		mutex_exit(&pidlock);
526 	}
527 
528 	mutex_exit(&cpu_lock);
529 }
530 
531 
532 /*
533  * Do not modify result buffer or length on error.
534  */
535 static int
536 dr_cpu_list_status(dr_cpu_hdr_t *req, dr_cpu_hdr_t **resp, int *resp_len)
537 {
538 	int		idx;
539 	int		result;
540 	int		status;
541 	int		rlen;
542 	uint32_t	*cpuids;
543 	dr_cpu_hdr_t	*rp;
544 	dr_cpu_stat_t	*stat;
545 	md_t		*mdp = NULL;
546 	int		num_nodes;
547 	int		listsz;
548 	mde_cookie_t	*listp = NULL;
549 	mde_cookie_t	cpunode;
550 	boolean_t	walk_md = B_FALSE;
551 
552 	/* the incoming array of cpuids to configure */
553 	cpuids = (uint32_t *)((caddr_t)req + sizeof (dr_cpu_hdr_t));
554 
555 	/* allocate a response message */
556 	rlen = sizeof (dr_cpu_hdr_t);
557 	rlen += req->num_records * sizeof (dr_cpu_stat_t);
558 	rp = kmem_zalloc(rlen, KM_SLEEP);
559 
560 	/* fill in the known data */
561 	rp->req_num = req->req_num;
562 	rp->msg_type = DR_CPU_STATUS;
563 	rp->num_records = req->num_records;
564 
565 	/* stat array for the response */
566 	stat = (dr_cpu_stat_t *)((caddr_t)rp + sizeof (dr_cpu_hdr_t));
567 
568 	/* get the status for each of the CPUs */
569 	for (idx = 0; idx < req->num_records; idx++) {
570 
571 		result = dr_cpu_status(cpuids[idx], &status);
572 
573 		if (result == DR_CPU_RES_FAILURE)
574 			walk_md = B_TRUE;
575 
576 		/* save off results of the status */
577 		stat[idx].cpuid = cpuids[idx];
578 		stat[idx].result = result;
579 		stat[idx].status = status;
580 	}
581 
582 	if (walk_md == B_FALSE)
583 		goto done;
584 
585 	/*
586 	 * At least one of the cpus did not have a CPU
587 	 * structure. So, consult the MD to determine if
588 	 * they are present.
589 	 */
590 
591 	if ((mdp = md_get_handle()) == NULL) {
592 		DR_DBG_CPU("unable to initialize MD\n");
593 		goto done;
594 	}
595 
596 	num_nodes = md_node_count(mdp);
597 	ASSERT(num_nodes > 0);
598 
599 	listsz = num_nodes * sizeof (mde_cookie_t);
600 	listp = kmem_zalloc(listsz, KM_SLEEP);
601 
602 	for (idx = 0; idx < req->num_records; idx++) {
603 
604 		if (stat[idx].result != DR_CPU_RES_FAILURE)
605 			continue;
606 
607 		/* check the MD for the current cpuid */
608 		cpunode = dr_cpu_find_node_md(stat[idx].cpuid, mdp, listp);
609 
610 		stat[idx].result = DR_CPU_RES_OK;
611 
612 		if (cpunode == MDE_INVAL_ELEM_COOKIE) {
613 			stat[idx].status = DR_CPU_STAT_NOT_PRESENT;
614 		} else {
615 			stat[idx].status = DR_CPU_STAT_UNCONFIGURED;
616 		}
617 	}
618 
619 	kmem_free(listp, listsz);
620 
621 	(void) md_fini_handle(mdp);
622 
623 done:
624 	*resp = rp;
625 	*resp_len = rlen;
626 
627 	return (0);
628 }
629 
630 
631 static int
632 dr_cpu_configure(processorid_t cpuid, int *status, boolean_t force)
633 {
634 	 _NOTE(ARGUNUSED(force))
635 	struct cpu	*cp;
636 	int		rv = 0;
637 
638 	DR_DBG_CPU("dr_cpu_configure...\n");
639 
640 	/*
641 	 * Build device tree node for the CPU
642 	 */
643 	if ((rv = dr_cpu_probe(cpuid)) != 0) {
644 		DR_DBG_CPU("failed to probe CPU %d (%d)\n", cpuid, rv);
645 		if (rv == EINVAL) {
646 			*status = DR_CPU_STAT_NOT_PRESENT;
647 			return (DR_CPU_RES_NOT_IN_MD);
648 		}
649 		*status = DR_CPU_STAT_UNCONFIGURED;
650 		return (DR_CPU_RES_FAILURE);
651 	}
652 
653 	mutex_enter(&cpu_lock);
654 
655 	/*
656 	 * Configure the CPU
657 	 */
658 	if ((cp = cpu_get(cpuid)) == NULL) {
659 
660 		if ((rv = cpu_configure(cpuid)) != 0) {
661 			DR_DBG_CPU("failed to configure CPU %d (%d)\n",
662 			    cpuid, rv);
663 			rv = DR_CPU_RES_FAILURE;
664 			*status = DR_CPU_STAT_UNCONFIGURED;
665 			goto done;
666 		}
667 
668 		DR_DBG_CPU("CPU %d configured\n", cpuid);
669 
670 		/* CPU struct should exist now */
671 		cp = cpu_get(cpuid);
672 	}
673 
674 	ASSERT(cp);
675 
676 	/*
677 	 * Power on the CPU. In sun4v, this brings the stopped
678 	 * CPU into the guest from the Hypervisor.
679 	 */
680 	if (cpu_is_poweredoff(cp)) {
681 
682 		if ((rv = cpu_poweron(cp)) != 0) {
683 			DR_DBG_CPU("failed to power on CPU %d (%d)\n",
684 			    cpuid, rv);
685 			rv = DR_CPU_RES_FAILURE;
686 			*status = DR_CPU_STAT_UNCONFIGURED;
687 			goto done;
688 		}
689 
690 		DR_DBG_CPU("CPU %d powered on\n", cpuid);
691 	}
692 
693 	/*
694 	 * Online the CPU
695 	 */
696 	if (cpu_is_offline(cp)) {
697 
698 		if ((rv = cpu_online(cp)) != 0) {
699 			DR_DBG_CPU("failed to online CPU %d (%d)\n",
700 			    cpuid, rv);
701 			rv = DR_CPU_RES_FAILURE;
702 			/* offline is still configured */
703 			*status = DR_CPU_STAT_CONFIGURED;
704 			goto done;
705 		}
706 
707 		DR_DBG_CPU("CPU %d online\n", cpuid);
708 	}
709 
710 	rv = DR_CPU_RES_OK;
711 	*status = DR_CPU_STAT_CONFIGURED;
712 
713 done:
714 	mutex_exit(&cpu_lock);
715 
716 	return (rv);
717 }
718 
719 static int
720 dr_cpu_unconfigure(processorid_t cpuid, int *status, boolean_t force)
721 {
722 	struct cpu	*cp;
723 	int		rv = 0;
724 	int		cpu_flags;
725 
726 	DR_DBG_CPU("dr_cpu_unconfigure%s...\n", (force) ? " (force)" : "");
727 
728 	mutex_enter(&cpu_lock);
729 
730 	cp = cpu_get(cpuid);
731 
732 	if (cp == NULL) {
733 
734 		/*
735 		 * The OS CPU structures are already torn down,
736 		 * Attempt to deprobe the CPU to make sure the
737 		 * device tree is up to date.
738 		 */
739 		if (dr_cpu_deprobe(cpuid) != 0) {
740 			DR_DBG_CPU("failed to deprobe CPU %d\n", cpuid);
741 			rv = DR_CPU_RES_FAILURE;
742 			*status = DR_CPU_STAT_UNCONFIGURED;
743 			goto done;
744 		}
745 
746 		goto done;
747 	}
748 
749 	ASSERT(cp->cpu_id == cpuid);
750 
751 	/*
752 	 * Offline the CPU
753 	 */
754 	if (cpu_is_active(cp)) {
755 
756 		/* set the force flag correctly */
757 		cpu_flags = (force) ? CPU_FORCED : 0;
758 
759 		if ((rv = cpu_offline(cp, cpu_flags)) != 0) {
760 			DR_DBG_CPU("failed to offline CPU %d (%d)\n",
761 			    cpuid, rv);
762 
763 			rv = DR_CPU_RES_FAILURE;
764 			*status = DR_CPU_STAT_CONFIGURED;
765 			goto done;
766 		}
767 
768 		DR_DBG_CPU("CPU %d offline\n", cpuid);
769 	}
770 
771 	/*
772 	 * Power off the CPU. In sun4v, this puts the running
773 	 * CPU into the stopped state in the Hypervisor.
774 	 */
775 	if (!cpu_is_poweredoff(cp)) {
776 
777 		if ((rv = cpu_poweroff(cp)) != 0) {
778 			DR_DBG_CPU("failed to power off CPU %d (%d)\n",
779 			    cpuid, rv);
780 			rv = DR_CPU_RES_FAILURE;
781 			*status = DR_CPU_STAT_CONFIGURED;
782 			goto done;
783 		}
784 
785 		DR_DBG_CPU("CPU %d powered off\n", cpuid);
786 	}
787 
788 	/*
789 	 * Unconfigure the CPU
790 	 */
791 	if ((rv = cpu_unconfigure(cpuid)) != 0) {
792 		DR_DBG_CPU("failed to unconfigure CPU %d (%d)\n", cpuid, rv);
793 		rv = DR_CPU_RES_FAILURE;
794 		*status = DR_CPU_STAT_UNCONFIGURED;
795 		goto done;
796 	}
797 
798 	DR_DBG_CPU("CPU %d unconfigured\n", cpuid);
799 
800 	/*
801 	 * Tear down device tree.
802 	 */
803 	if ((rv = dr_cpu_deprobe(cpuid)) != 0) {
804 		DR_DBG_CPU("failed to deprobe CPU %d (%d)\n", cpuid, rv);
805 		rv = DR_CPU_RES_FAILURE;
806 		*status = DR_CPU_STAT_UNCONFIGURED;
807 		goto done;
808 	}
809 
810 	rv = DR_CPU_RES_OK;
811 	*status = DR_CPU_STAT_UNCONFIGURED;
812 
813 done:
814 	mutex_exit(&cpu_lock);
815 
816 	return (rv);
817 }
818 
819 /*
820  * Determine the state of a CPU. If the CPU structure is not present,
821  * it does not attempt to determine whether or not the CPU is in the
822  * MD. It is more efficient to do this at the higher level for all
823  * CPUs since it may not even be necessary to search the MD if all
824  * the CPUs are accounted for. Returns DR_CPU_RES_OK if the CPU
825  * structure is present, and DR_CPU_RES_FAILURE otherwise as a signal
826  * that an MD walk is necessary.
827  */
828 static int
829 dr_cpu_status(processorid_t cpuid, int *status)
830 {
831 	int		rv;
832 	struct cpu	*cp;
833 
834 	DR_DBG_CPU("dr_cpu_status...\n");
835 
836 	mutex_enter(&cpu_lock);
837 
838 	if ((cp = cpu_get(cpuid)) == NULL) {
839 		/* need to check if cpu is in the MD */
840 		rv = DR_CPU_RES_FAILURE;
841 		goto done;
842 	}
843 
844 	if (cpu_is_poweredoff(cp)) {
845 		/*
846 		 * The CPU is powered off, so it is considered
847 		 * unconfigured from the service entity point of
848 		 * view. The CPU is not available to the system
849 		 * and intervention by the service entity would
850 		 * be required to change that.
851 		 */
852 		*status = DR_CPU_STAT_UNCONFIGURED;
853 	} else {
854 		/*
855 		 * The CPU is powered on, so it is considered
856 		 * configured from the service entity point of
857 		 * view. It is available for use by the system
858 		 * and service entities are not concerned about
859 		 * the operational status (offline, online, etc.)
860 		 * of the CPU in terms of DR.
861 		 */
862 		*status = DR_CPU_STAT_CONFIGURED;
863 	}
864 
865 	rv = DR_CPU_RES_OK;
866 
867 done:
868 	mutex_exit(&cpu_lock);
869 
870 	return (rv);
871 }
872 
873 typedef struct {
874 	md_t		*mdp;
875 	mde_cookie_t	cpunode;
876 	dev_info_t	*dip;
877 } cb_arg_t;
878 
879 #define	STR_ARR_LEN	5
880 
881 static int
882 new_cpu_node(dev_info_t *new_node, void *arg, uint_t flags)
883 {
884 	_NOTE(ARGUNUSED(flags))
885 
886 	char		*compat;
887 	uint64_t	freq;
888 	uint64_t	cpuid = 0;
889 	int		regbuf[4];
890 	int		len = 0;
891 	cb_arg_t	*cba;
892 	char		*str_arr[STR_ARR_LEN];
893 	char		*curr;
894 	int		idx = 0;
895 
896 	DR_DBG_CPU("new_cpu_node...\n");
897 
898 	cba = (cb_arg_t *)arg;
899 
900 	/*
901 	 * Add 'name' property
902 	 */
903 	if (ndi_prop_update_string(DDI_DEV_T_NONE, new_node,
904 	    "name", "cpu") != DDI_SUCCESS) {
905 		DR_DBG_CPU("new_cpu_node: failed to create 'name' property\n");
906 		return (DDI_WALK_ERROR);
907 	}
908 
909 	/*
910 	 * Add 'compatible' property
911 	 */
912 	if (md_get_prop_data(cba->mdp, cba->cpunode, "compatible",
913 	    (uint8_t **)(&compat), &len)) {
914 		DR_DBG_CPU("new_cpu_node: failed to read 'compatible' property "
915 		    "from MD\n");
916 		return (DDI_WALK_ERROR);
917 	}
918 
919 	DR_DBG_CPU("'compatible' len is %d\n", len);
920 
921 	/* parse the MD string array */
922 	curr = compat;
923 	while (curr < (compat + len)) {
924 
925 		DR_DBG_CPU("adding '%s' to 'compatible' property\n", curr);
926 
927 		str_arr[idx++] = curr;
928 		curr += strlen(curr) + 1;
929 
930 		if (idx == STR_ARR_LEN) {
931 			DR_DBG_CPU("exceeded str_arr len (%d)\n", STR_ARR_LEN);
932 			break;
933 		}
934 	}
935 
936 	if (ndi_prop_update_string_array(DDI_DEV_T_NONE, new_node,
937 	    "compatible", str_arr, idx) != DDI_SUCCESS) {
938 		DR_DBG_CPU("new_cpu_node: failed to create 'compatible' "
939 		    "property\n");
940 		return (DDI_WALK_ERROR);
941 	}
942 
943 	/*
944 	 * Add 'device_type' property
945 	 */
946 	if (ndi_prop_update_string(DDI_DEV_T_NONE, new_node,
947 	    "device_type", "cpu") != DDI_SUCCESS) {
948 		DR_DBG_CPU("new_cpu_node: failed to create 'device_type' "
949 		    "property\n");
950 		return (DDI_WALK_ERROR);
951 	}
952 
953 	/*
954 	 * Add 'clock-frequency' property
955 	 */
956 	if (md_get_prop_val(cba->mdp, cba->cpunode, "clock-frequency", &freq)) {
957 		DR_DBG_CPU("new_cpu_node: failed to read 'clock-frequency' "
958 		    "property from MD\n");
959 		return (DDI_WALK_ERROR);
960 	}
961 
962 	if (ndi_prop_update_int(DDI_DEV_T_NONE, new_node,
963 	    "clock-frequency", freq) != DDI_SUCCESS) {
964 		DR_DBG_CPU("new_cpu_node: failed to create 'clock-frequency' "
965 		    "property\n");
966 		return (DDI_WALK_ERROR);
967 	}
968 
969 	/*
970 	 * Add 'reg' (cpuid) property
971 	 */
972 	if (md_get_prop_val(cba->mdp, cba->cpunode, "id", &cpuid)) {
973 		DR_DBG_CPU("new_cpu_node: failed to read 'id' property "
974 		    "from MD\n");
975 		return (DDI_WALK_ERROR);
976 	}
977 
978 	DR_DBG_CPU("new cpuid=0x%lx\n", cpuid);
979 
980 	bzero(regbuf, 4 * sizeof (int));
981 	regbuf[0] = 0xc0000000 | cpuid;
982 
983 	if (ndi_prop_update_int_array(DDI_DEV_T_NONE, new_node,
984 	    "reg", regbuf, 4) != DDI_SUCCESS) {
985 		DR_DBG_CPU("new_cpu_node: failed to create 'reg' property\n");
986 		return (DDI_WALK_ERROR);
987 	}
988 
989 	cba->dip = new_node;
990 
991 	return (DDI_WALK_TERMINATE);
992 }
993 
994 static int
995 dr_cpu_probe(processorid_t cpuid)
996 {
997 	dev_info_t	*pdip;
998 	dev_info_t	*dip;
999 	devi_branch_t	br;
1000 	md_t		*mdp = NULL;
1001 	int		num_nodes;
1002 	int		rv = 0;
1003 	int		listsz;
1004 	mde_cookie_t	*listp = NULL;
1005 	cb_arg_t	cba;
1006 	mde_cookie_t	cpunode;
1007 
1008 	if ((dip = dr_cpu_find_node(cpuid)) != NULL) {
1009 		/* nothing to do */
1010 		e_ddi_branch_rele(dip);
1011 		return (0);
1012 	}
1013 
1014 	if ((mdp = md_get_handle()) == NULL) {
1015 		DR_DBG_CPU("unable to initialize machine description\n");
1016 		return (-1);
1017 	}
1018 
1019 	num_nodes = md_node_count(mdp);
1020 	ASSERT(num_nodes > 0);
1021 
1022 	listsz = num_nodes * sizeof (mde_cookie_t);
1023 	listp = kmem_zalloc(listsz, KM_SLEEP);
1024 
1025 	cpunode = dr_cpu_find_node_md(cpuid, mdp, listp);
1026 
1027 	if (cpunode == MDE_INVAL_ELEM_COOKIE) {
1028 		rv = EINVAL;
1029 		goto done;
1030 	}
1031 
1032 	/* pass in MD cookie for CPU */
1033 	cba.mdp = mdp;
1034 	cba.cpunode = cpunode;
1035 
1036 	br.arg = (void *)&cba;
1037 	br.type = DEVI_BRANCH_SID;
1038 	br.create.sid_branch_create = new_cpu_node;
1039 	br.devi_branch_callback = NULL;
1040 	pdip = ddi_root_node();
1041 
1042 	if ((rv = e_ddi_branch_create(pdip, &br, NULL, 0))) {
1043 		DR_DBG_CPU("e_ddi_branch_create failed: %d\n", rv);
1044 		rv = -1;
1045 		goto done;
1046 	}
1047 
1048 	DR_DBG_CPU("CPU %d probed\n", cpuid);
1049 
1050 	rv = 0;
1051 
1052 done:
1053 	if (listp)
1054 		kmem_free(listp, listsz);
1055 
1056 	if (mdp)
1057 		(void) md_fini_handle(mdp);
1058 
1059 	return (rv);
1060 }
1061 
1062 static int
1063 dr_cpu_deprobe(processorid_t cpuid)
1064 {
1065 	dev_info_t	*fdip = NULL;
1066 	dev_info_t	*dip;
1067 
1068 	if ((dip = dr_cpu_find_node(cpuid)) == NULL) {
1069 		DR_DBG_CPU("cpuid %d already deprobed\n", cpuid);
1070 		return (0);
1071 	}
1072 
1073 	ASSERT(e_ddi_branch_held(dip));
1074 
1075 	if (e_ddi_branch_destroy(dip, &fdip, 0)) {
1076 		char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1077 
1078 		/*
1079 		 * If non-NULL, fdip is held and must be released.
1080 		 */
1081 		if (fdip != NULL) {
1082 			(void) ddi_pathname(fdip, path);
1083 			ddi_release_devi(fdip);
1084 		} else {
1085 			(void) ddi_pathname(dip, path);
1086 		}
1087 		cmn_err(CE_NOTE, "node removal failed: %s (%p)",
1088 		    path, (fdip) ? (void *)fdip : (void *)dip);
1089 
1090 		kmem_free(path, MAXPATHLEN);
1091 
1092 		return (-1);
1093 	}
1094 
1095 	DR_DBG_CPU("CPU %d deprobed\n", cpuid);
1096 
1097 	return (0);
1098 }
1099 
1100 typedef struct {
1101 	processorid_t	cpuid;
1102 	dev_info_t	*dip;
1103 } dr_search_arg_t;
1104 
1105 static int
1106 dr_cpu_check_node(dev_info_t *dip, void *arg)
1107 {
1108 	char 		*name;
1109 	processorid_t	cpuid;
1110 	dr_search_arg_t	*sarg = (dr_search_arg_t *)arg;
1111 
1112 	if (dip == ddi_root_node()) {
1113 		return (DDI_WALK_CONTINUE);
1114 	}
1115 
1116 	name = ddi_node_name(dip);
1117 
1118 	if (strcmp(name, "cpu") != 0) {
1119 		return (DDI_WALK_PRUNECHILD);
1120 	}
1121 
1122 	cpuid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
1123 	    "reg", -1);
1124 
1125 	cpuid = PROM_CFGHDL_TO_CPUID(cpuid);
1126 
1127 	DR_DBG_CPU("found cpuid=0x%x, looking for 0x%x\n", cpuid, sarg->cpuid);
1128 
1129 	if (cpuid == sarg->cpuid) {
1130 		DR_DBG_CPU("matching node\n");
1131 
1132 		/* matching node must be returned held */
1133 		if (!e_ddi_branch_held(dip))
1134 			e_ddi_branch_hold(dip);
1135 
1136 		sarg->dip = dip;
1137 		return (DDI_WALK_TERMINATE);
1138 	}
1139 
1140 	return (DDI_WALK_CONTINUE);
1141 }
1142 
1143 /*
1144  * Walk the device tree to find the dip corresponding to the cpuid
1145  * passed in. If present, the dip is returned held. The caller must
1146  * release the hold on the dip once it is no longer required. If no
1147  * matching node if found, NULL is returned.
1148  */
1149 static dev_info_t *
1150 dr_cpu_find_node(processorid_t cpuid)
1151 {
1152 	dr_search_arg_t	arg;
1153 
1154 	DR_DBG_CPU("dr_cpu_find_node...\n");
1155 
1156 	arg.cpuid = cpuid;
1157 	arg.dip = NULL;
1158 
1159 	ddi_walk_devs(ddi_root_node(), dr_cpu_check_node, &arg);
1160 
1161 	ASSERT((arg.dip == NULL) || (e_ddi_branch_held(arg.dip)));
1162 
1163 	return ((arg.dip) ? arg.dip : NULL);
1164 }
1165 
1166 /*
1167  * Look up a particular cpuid in the MD. Returns the mde_cookie_t
1168  * representing that CPU if present, and MDE_INVAL_ELEM_COOKIE
1169  * otherwise. It is assumed the scratch array has already been
1170  * allocated so that it can accommodate the worst case scenario,
1171  * every node in the MD.
1172  */
1173 static mde_cookie_t
1174 dr_cpu_find_node_md(processorid_t cpuid, md_t *mdp, mde_cookie_t *listp)
1175 {
1176 	int		idx;
1177 	int		nnodes;
1178 	mde_cookie_t	rootnode;
1179 	uint64_t	cpuid_prop;
1180 	mde_cookie_t	result = MDE_INVAL_ELEM_COOKIE;
1181 
1182 	rootnode = md_root_node(mdp);
1183 	ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE);
1184 
1185 	/*
1186 	 * Scan the DAG for all the CPU nodes
1187 	 */
1188 	nnodes = md_scan_dag(mdp, rootnode, md_find_name(mdp, "cpu"),
1189 	    md_find_name(mdp, "fwd"), listp);
1190 
1191 	if (nnodes < 0) {
1192 		DR_DBG_CPU("Scan for CPUs failed\n");
1193 		return (result);
1194 	}
1195 
1196 	DR_DBG_CPU("dr_cpu_find_node_md: found %d CPUs in the MD\n", nnodes);
1197 
1198 	/*
1199 	 * Find the CPU of interest
1200 	 */
1201 	for (idx = 0; idx < nnodes; idx++) {
1202 
1203 		if (md_get_prop_val(mdp, listp[idx], "id", &cpuid_prop)) {
1204 			DR_DBG_CPU("Missing 'id' property for CPU node %d\n",
1205 			    idx);
1206 			break;
1207 		}
1208 
1209 		if (cpuid_prop == cpuid) {
1210 			/* found a match */
1211 			DR_DBG_CPU("dr_cpu_find_node_md: found CPU %d "
1212 			    "in MD\n", cpuid);
1213 			result = listp[idx];
1214 			break;
1215 		}
1216 	}
1217 
1218 	if (result == MDE_INVAL_ELEM_COOKIE) {
1219 		DR_DBG_CPU("CPU %d not in MD\n", cpuid);
1220 	}
1221 
1222 	return (result);
1223 }
1224