1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * Copyright 2019 Joyent, Inc.
29 */
30
31 /*
32 * sun4v CPU DR Module
33 */
34
35 #include <sys/modctl.h>
36 #include <sys/processor.h>
37 #include <sys/cpuvar.h>
38 #include <sys/cpupart.h>
39 #include <sys/sunddi.h>
40 #include <sys/sunndi.h>
41 #include <sys/note.h>
42 #include <sys/sysevent/dr.h>
43 #include <sys/hypervisor_api.h>
44 #include <sys/mach_descrip.h>
45 #include <sys/mdesc.h>
46 #include <sys/ds.h>
47 #include <sys/drctl.h>
48 #include <sys/dr_util.h>
49 #include <sys/dr_cpu.h>
50 #include <sys/promif.h>
51 #include <sys/machsystm.h>
52
53
54 static struct modlmisc modlmisc = {
55 &mod_miscops,
56 "sun4v CPU DR"
57 };
58
59 static struct modlinkage modlinkage = {
60 MODREV_1,
61 (void *)&modlmisc,
62 NULL
63 };
64
65 typedef int (*fn_t)(processorid_t, int *, boolean_t);
66
67 /*
68 * Global DS Handle
69 */
70 static ds_svc_hdl_t ds_handle;
71
72 /*
73 * Supported DS Capability Versions
74 */
75 static ds_ver_t dr_cpu_vers[] = { { 1, 1 }, { 1, 0 } };
76 #define DR_CPU_NVERS (sizeof (dr_cpu_vers) / sizeof (dr_cpu_vers[0]))
77
78 static ds_ver_t version;
79
80 /*
81 * DS Capability Description
82 */
83 static ds_capability_t dr_cpu_cap = {
84 DR_CPU_DS_ID, /* svc_id */
85 dr_cpu_vers, /* vers */
86 DR_CPU_NVERS /* nvers */
87 };
88
89 #define DRCPU_VERS_EQ(_maj, _min) \
90 ((version.major == (_maj)) && (version.minor == (_min)))
91
92 #define DRCPU_VERS_GTEQ(_maj, _min) \
93 ((version.major > (_maj)) || \
94 ((version.major == (_maj)) && (version.minor >= (_min))))
95
96 /*
97 * DS Callbacks
98 */
99 static void dr_cpu_reg_handler(ds_cb_arg_t, ds_ver_t *, ds_svc_hdl_t);
100 static void dr_cpu_unreg_handler(ds_cb_arg_t arg);
101 static void dr_cpu_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen);
102
103 /*
104 * DS Client Ops Vector
105 */
106 static ds_clnt_ops_t dr_cpu_ops = {
107 dr_cpu_reg_handler, /* ds_reg_cb */
108 dr_cpu_unreg_handler, /* ds_unreg_cb */
109 dr_cpu_data_handler, /* ds_data_cb */
110 NULL /* cb_arg */
111 };
112
113 /*
114 * Operation Results
115 *
116 * Used internally to gather results while an operation on a
117 * list of CPUs is in progress. In particular, it is used to
118 * keep track of which CPUs have already failed so that they are
119 * not processed further, and the manner in which they failed.
120 */
121 typedef struct {
122 uint32_t cpuid;
123 uint32_t result;
124 uint32_t status;
125 char *string;
126 } dr_cpu_res_t;
127
128 #define DR_CPU_MAX_ERR_LEN 64 /* maximum error string length */
129
130 /*
131 * Internal Functions
132 */
133 static int dr_cpu_init(void);
134 static int dr_cpu_fini(void);
135
136 static int dr_cpu_list_wrk(dr_cpu_hdr_t *, dr_cpu_hdr_t **, int *);
137 static int dr_cpu_list_status(dr_cpu_hdr_t *, dr_cpu_hdr_t **, int *);
138
139 static int dr_cpu_unconfigure(processorid_t, int *status, boolean_t force);
140 static int dr_cpu_configure(processorid_t, int *status, boolean_t force);
141 static int dr_cpu_status(processorid_t, int *status);
142
143 static void dr_cpu_check_cpus(dr_cpu_hdr_t *req, dr_cpu_res_t *res);
144 static void dr_cpu_check_psrset(uint32_t *cpuids, dr_cpu_res_t *res, int nres);
145 static int dr_cpu_check_bound_thr(cpu_t *cp, dr_cpu_res_t *res);
146
147 static dr_cpu_res_t *dr_cpu_res_array_init(dr_cpu_hdr_t *, drctl_rsrc_t *, int);
148 static void dr_cpu_res_array_fini(dr_cpu_res_t *res, int nres);
149 static size_t dr_cpu_pack_response(dr_cpu_hdr_t *req, dr_cpu_res_t *res,
150 dr_cpu_hdr_t **respp);
151
152 static int dr_cpu_probe(processorid_t newcpuid);
153 static int dr_cpu_deprobe(processorid_t cpuid);
154
155 static dev_info_t *dr_cpu_find_node(processorid_t cpuid);
156 static mde_cookie_t dr_cpu_find_node_md(processorid_t, md_t *, mde_cookie_t *);
157
158 int
_init(void)159 _init(void)
160 {
161 int status;
162
163 /* check that CPU DR is enabled */
164 if (dr_is_disabled(DR_TYPE_CPU)) {
165 cmn_err(CE_CONT, "!CPU DR is disabled\n");
166 return (-1);
167 }
168
169 if ((status = dr_cpu_init()) != 0) {
170 cmn_err(CE_NOTE, "CPU DR initialization failed");
171 return (status);
172 }
173
174 if ((status = mod_install(&modlinkage)) != 0) {
175 (void) dr_cpu_fini();
176 }
177
178 return (status);
179 }
180
181 int
_info(struct modinfo * modinfop)182 _info(struct modinfo *modinfop)
183 {
184 return (mod_info(&modlinkage, modinfop));
185 }
186
187 int dr_cpu_allow_unload;
188
189 int
_fini(void)190 _fini(void)
191 {
192 int status;
193
194 if (dr_cpu_allow_unload == 0)
195 return (EBUSY);
196
197 if ((status = mod_remove(&modlinkage)) == 0) {
198 (void) dr_cpu_fini();
199 }
200
201 return (status);
202 }
203
204 static int
dr_cpu_init(void)205 dr_cpu_init(void)
206 {
207 int rv;
208
209 if ((rv = ds_cap_init(&dr_cpu_cap, &dr_cpu_ops)) != 0) {
210 cmn_err(CE_NOTE, "ds_cap_init failed: %d", rv);
211 return (-1);
212 }
213
214 return (0);
215 }
216
217 static int
dr_cpu_fini(void)218 dr_cpu_fini(void)
219 {
220 int rv;
221
222 if ((rv = ds_cap_fini(&dr_cpu_cap)) != 0) {
223 cmn_err(CE_NOTE, "ds_cap_fini failed: %d", rv);
224 return (-1);
225 }
226
227 return (0);
228 }
229
230 static void
dr_cpu_reg_handler(ds_cb_arg_t arg,ds_ver_t * ver,ds_svc_hdl_t hdl)231 dr_cpu_reg_handler(ds_cb_arg_t arg, ds_ver_t *ver, ds_svc_hdl_t hdl)
232 {
233 DR_DBG_CPU("reg_handler: arg=0x%p, ver=%d.%d, hdl=0x%lx\n", arg,
234 ver->major, ver->minor, hdl);
235
236 version.major = ver->major;
237 version.minor = ver->minor;
238 ds_handle = hdl;
239 }
240
241 static void
dr_cpu_unreg_handler(ds_cb_arg_t arg)242 dr_cpu_unreg_handler(ds_cb_arg_t arg)
243 {
244 DR_DBG_CPU("unreg_handler: arg=0x%p\n", arg);
245
246 ds_handle = DS_INVALID_HDL;
247 }
248
249 static void
dr_cpu_data_handler(ds_cb_arg_t arg,void * buf,size_t buflen)250 dr_cpu_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen)
251 {
252 _NOTE(ARGUNUSED(arg))
253
254 dr_cpu_hdr_t *req = buf;
255 dr_cpu_hdr_t err_resp;
256 dr_cpu_hdr_t *resp = &err_resp;
257 int resp_len = 0;
258 int rv;
259
260 /*
261 * Sanity check the message
262 */
263 if (buflen < sizeof (dr_cpu_hdr_t)) {
264 DR_DBG_CPU("incoming message short: expected at least %ld "
265 "bytes, received %ld\n", sizeof (dr_cpu_hdr_t), buflen);
266 goto done;
267 }
268
269 if (req == NULL) {
270 DR_DBG_CPU("empty message: expected at least %ld bytes\n",
271 sizeof (dr_cpu_hdr_t));
272 goto done;
273 }
274
275 DR_DBG_CPU("incoming request:\n");
276 DR_DBG_DUMP_MSG(buf, buflen);
277
278 if (req->num_records > NCPU) {
279 DR_DBG_CPU("CPU list too long: %d when %d is the maximum\n",
280 req->num_records, NCPU);
281 goto done;
282 }
283
284 if (req->num_records == 0) {
285 DR_DBG_CPU("No CPU specified for operation\n");
286 goto done;
287 }
288
289 /*
290 * Process the command
291 */
292 switch (req->msg_type) {
293 case DR_CPU_CONFIGURE:
294 case DR_CPU_UNCONFIGURE:
295 case DR_CPU_FORCE_UNCONFIG:
296 if ((rv = dr_cpu_list_wrk(req, &resp, &resp_len)) != 0) {
297 DR_DBG_CPU("%s%s failed (%d)\n",
298 (req->msg_type == DR_CPU_CONFIGURE) ?
299 "CPU configure" : "CPU unconfigure",
300 (req->msg_type == DR_CPU_FORCE_UNCONFIG) ?
301 " (forced)" : "", rv);
302 }
303 break;
304
305 case DR_CPU_STATUS:
306 if ((rv = dr_cpu_list_status(req, &resp, &resp_len)) != 0)
307 DR_DBG_CPU("CPU status failed (%d)\n", rv);
308 break;
309
310 default:
311 cmn_err(CE_NOTE, "unsupported DR operation (%d)",
312 req->msg_type);
313 break;
314 }
315
316 done:
317 /* check if an error occurred */
318 if (resp == &err_resp) {
319 resp->req_num = (req) ? req->req_num : 0;
320 resp->msg_type = DR_CPU_ERROR;
321 resp->num_records = 0;
322 resp_len = sizeof (dr_cpu_hdr_t);
323 }
324
325 DR_DBG_CPU("outgoing response:\n");
326 DR_DBG_DUMP_MSG(resp, resp_len);
327
328 /* send back the response */
329 if (ds_cap_send(ds_handle, resp, resp_len) != 0) {
330 DR_DBG_CPU("ds_send failed\n");
331 }
332
333 /* free any allocated memory */
334 if (DRCPU_VERS_GTEQ(1, 1) || (resp != &err_resp)) {
335 DR_DBG_KMEM("%s: free addr %p size %d\n",
336 __func__, (void *)resp, resp_len);
337 kmem_free(resp, resp_len);
338 }
339 }
340
341 /*
342 * Create a response message which consists of a header followed
343 * by the error string passed in.
344 */
345 static size_t
dr_cpu_err_resp(dr_cpu_hdr_t * req,dr_cpu_hdr_t ** respp,char * msg)346 dr_cpu_err_resp(dr_cpu_hdr_t *req, dr_cpu_hdr_t **respp, char *msg)
347 {
348 size_t size;
349 dr_cpu_hdr_t *resp;
350
351 ASSERT((msg != NULL) && (strlen(msg) > 0));
352
353 size = sizeof (*req) + strlen(msg) + 1;
354 resp = kmem_alloc(size, KM_SLEEP);
355 DR_DBG_KMEM("%s: alloc addr %p size %ld\n",
356 __func__, (void *)resp, size);
357
358 resp->req_num = req->req_num;
359 resp->msg_type = DR_CPU_ERROR;
360 resp->num_records = 0;
361
362 (void) strcpy((char *)(resp) + sizeof (*resp), msg);
363
364 *respp = resp;
365
366 return (size);
367 }
368
369 /*
370 * Common routine to config or unconfig multiple cpus. The unconfig
371 * case checks with the OS to see if the removal of cpus will be
372 * permitted, but can be overridden by the "force" version of the
373 * command. Otherwise, the logic for both cases is identical.
374 *
375 * Note: Do not modify result buffer or length on error.
376 */
377 static int
dr_cpu_list_wrk(dr_cpu_hdr_t * req,dr_cpu_hdr_t ** resp,int * resp_len)378 dr_cpu_list_wrk(dr_cpu_hdr_t *req, dr_cpu_hdr_t **resp, int *resp_len)
379 {
380 int rv;
381 int idx;
382 int count;
383 fn_t dr_fn;
384 int se_hint;
385 boolean_t force = B_FALSE;
386 uint32_t *req_cpus;
387 dr_cpu_res_t *res;
388 int drctl_cmd;
389 int drctl_flags = 0;
390 drctl_rsrc_t *drctl_req;
391 size_t drctl_req_len;
392 drctl_resp_t *drctl_resp;
393 drctl_rsrc_t *drctl_rsrc;
394 size_t drctl_resp_len = 0;
395 drctl_cookie_t drctl_res_ck;
396
397 ASSERT((req != NULL) && (req->num_records != 0));
398
399 count = req->num_records;
400
401 /*
402 * Extract all information that is specific
403 * to the various types of operations.
404 */
405 switch (req->msg_type) {
406 case DR_CPU_CONFIGURE:
407 dr_fn = dr_cpu_configure;
408 drctl_cmd = DRCTL_CPU_CONFIG_REQUEST;
409 se_hint = SE_HINT_INSERT;
410 break;
411 case DR_CPU_FORCE_UNCONFIG:
412 drctl_flags = DRCTL_FLAG_FORCE;
413 force = B_TRUE;
414 /* FALLTHROUGH */
415 case DR_CPU_UNCONFIGURE:
416 dr_fn = dr_cpu_unconfigure;
417 drctl_cmd = DRCTL_CPU_UNCONFIG_REQUEST;
418 se_hint = SE_HINT_REMOVE;
419 break;
420 default:
421 /* Programming error if we reach this. */
422 cmn_err(CE_NOTE,
423 "%s: bad msg_type %d\n", __func__, req->msg_type);
424 ASSERT(0);
425 return (-1);
426 }
427
428 /* the incoming array of cpuids to operate on */
429 req_cpus = DR_CPU_CMD_CPUIDS(req);
430
431 /* allocate drctl request msg based on incoming resource count */
432 drctl_req_len = sizeof (drctl_rsrc_t) * count;
433 drctl_req = kmem_zalloc(drctl_req_len, KM_SLEEP);
434 DR_DBG_KMEM("%s: alloc addr %p size %ld\n",
435 __func__, (void *)drctl_req, drctl_req_len);
436
437 /* copy the cpuids for the drctl call from the incoming request msg */
438 for (idx = 0; idx < count; idx++)
439 drctl_req[idx].res_cpu_id = req_cpus[idx];
440
441 rv = drctl_config_init(drctl_cmd, drctl_flags, drctl_req,
442 count, &drctl_resp, &drctl_resp_len, &drctl_res_ck);
443
444 ASSERT((drctl_resp != NULL) && (drctl_resp_len != 0));
445
446 if (rv != 0) {
447 DR_DBG_CPU("%s: drctl_config_init "
448 "returned: %d\n", __func__, rv);
449
450 if (DRCPU_VERS_EQ(1, 0)) {
451 rv = -1;
452 } else {
453 ASSERT(DRCPU_VERS_GTEQ(1, 1));
454 ASSERT(drctl_resp->resp_type == DRCTL_RESP_ERR);
455
456 *resp_len = dr_cpu_err_resp(req,
457 resp, drctl_resp->resp_err_msg);
458 }
459
460 DR_DBG_KMEM("%s: free addr %p size %ld\n",
461 __func__, (void *)drctl_resp, drctl_resp_len);
462 kmem_free(drctl_resp, drctl_resp_len);
463 DR_DBG_KMEM("%s: free addr %p size %ld\n",
464 __func__, (void *)drctl_req, drctl_req_len);
465 kmem_free(drctl_req, drctl_req_len);
466
467 return (rv);
468 }
469
470 ASSERT(drctl_resp->resp_type == DRCTL_RESP_OK);
471
472 drctl_rsrc = drctl_resp->resp_resources;
473
474 /* create the result scratch array */
475 res = dr_cpu_res_array_init(req, drctl_rsrc, count);
476
477 /*
478 * For unconfigure, check if there are any conditions
479 * that will cause the operation to fail. These are
480 * performed before the actual unconfigure attempt so
481 * that a meaningful error message can be generated.
482 */
483 if (req->msg_type != DR_CPU_CONFIGURE)
484 dr_cpu_check_cpus(req, res);
485
486 /* perform the specified operation on each of the CPUs */
487 for (idx = 0; idx < count; idx++) {
488 int result;
489 int status;
490
491 /*
492 * If no action will be taken against the current
493 * CPU, update the drctl resource information to
494 * ensure that it gets recovered properly during
495 * the drctl fini() call.
496 */
497 if (res[idx].result != DR_CPU_RES_OK) {
498 drctl_req[idx].status = DRCTL_STATUS_CONFIG_FAILURE;
499 continue;
500 }
501
502 /* call the function to perform the actual operation */
503 result = (*dr_fn)(req_cpus[idx], &status, force);
504
505 /* save off results of the operation */
506 res[idx].result = result;
507 res[idx].status = status;
508
509 /* save result for drctl fini() reusing init() msg memory */
510 drctl_req[idx].status = (result != DR_CPU_RES_OK) ?
511 DRCTL_STATUS_CONFIG_FAILURE : DRCTL_STATUS_CONFIG_SUCCESS;
512
513 DR_DBG_CPU("%s: cpuid %d status %d result %d off '%s'\n",
514 __func__, req_cpus[idx], drctl_req[idx].status, result,
515 (res[idx].string) ? res[idx].string : "");
516 }
517
518 if ((rv = drctl_config_fini(&drctl_res_ck, drctl_req, count)) != 0)
519 DR_DBG_CPU("%s: drctl_config_fini "
520 "returned: %d\n", __func__, rv);
521
522 /*
523 * Operation completed without any fatal errors.
524 * Pack the response for transmission.
525 */
526 *resp_len = dr_cpu_pack_response(req, res, resp);
527
528 /* notify interested parties about the operation */
529 dr_generate_event(DR_TYPE_CPU, se_hint);
530
531 /*
532 * Deallocate any scratch memory.
533 */
534 DR_DBG_KMEM("%s: free addr %p size %ld\n",
535 __func__, (void *)drctl_resp, drctl_resp_len);
536 kmem_free(drctl_resp, drctl_resp_len);
537 DR_DBG_KMEM("%s: free addr %p size %ld\n",
538 __func__, (void *)drctl_req, drctl_req_len);
539 kmem_free(drctl_req, drctl_req_len);
540
541 dr_cpu_res_array_fini(res, count);
542
543 return (0);
544 }
545
546 /*
547 * Allocate and initialize a result array based on the initial
548 * drctl operation. A valid result array is always returned.
549 */
550 static dr_cpu_res_t *
dr_cpu_res_array_init(dr_cpu_hdr_t * req,drctl_rsrc_t * rsrc,int nrsrc)551 dr_cpu_res_array_init(dr_cpu_hdr_t *req, drctl_rsrc_t *rsrc, int nrsrc)
552 {
553 int idx;
554 dr_cpu_res_t *res;
555 char *err_str;
556 size_t err_len;
557
558 /* allocate zero filled buffer to initialize fields */
559 res = kmem_zalloc(nrsrc * sizeof (dr_cpu_res_t), KM_SLEEP);
560 DR_DBG_KMEM("%s: alloc addr %p size %ld\n",
561 __func__, (void *)res, nrsrc * sizeof (dr_cpu_res_t));
562
563 /*
564 * Fill in the result information for each resource.
565 */
566 for (idx = 0; idx < nrsrc; idx++) {
567 res[idx].cpuid = rsrc[idx].res_cpu_id;
568 res[idx].result = DR_CPU_RES_OK;
569
570 if (rsrc[idx].status == DRCTL_STATUS_ALLOW)
571 continue;
572
573 /*
574 * Update the state information for this CPU.
575 */
576 res[idx].result = DR_CPU_RES_BLOCKED;
577 res[idx].status = (req->msg_type == DR_CPU_CONFIGURE) ?
578 DR_CPU_STAT_UNCONFIGURED : DR_CPU_STAT_CONFIGURED;
579
580 /*
581 * If an error string exists, copy it out of the
582 * message buffer. This eliminates any dependency
583 * on the memory allocated for the message buffer
584 * itself.
585 */
586 if (rsrc[idx].offset != 0) {
587 err_str = (char *)rsrc + rsrc[idx].offset;
588 err_len = strlen(err_str) + 1;
589
590 res[idx].string = kmem_alloc(err_len, KM_SLEEP);
591 DR_DBG_KMEM("%s: alloc addr %p size %ld\n",
592 __func__, (void *)(res[idx].string), err_len);
593 bcopy(err_str, res[idx].string, err_len);
594 }
595 }
596
597 return (res);
598 }
599
600 static void
dr_cpu_res_array_fini(dr_cpu_res_t * res,int nres)601 dr_cpu_res_array_fini(dr_cpu_res_t *res, int nres)
602 {
603 int idx;
604 size_t str_len;
605
606 for (idx = 0; idx < nres; idx++) {
607 /* deallocate the error string if present */
608 if (res[idx].string) {
609 str_len = strlen(res[idx].string) + 1;
610 DR_DBG_KMEM("%s: free addr %p size %ld\n",
611 __func__, (void *)(res[idx].string), str_len);
612 kmem_free(res[idx].string, str_len);
613 }
614 }
615
616 /* deallocate the result array itself */
617 DR_DBG_KMEM("%s: free addr %p size %ld\n",
618 __func__, (void *)res, sizeof (dr_cpu_res_t) * nres);
619 kmem_free(res, sizeof (dr_cpu_res_t) * nres);
620 }
621
622 /*
623 * Allocate and pack a response message for transmission based
624 * on the specified result array. A valid response message and
625 * valid size information is always returned.
626 */
627 static size_t
dr_cpu_pack_response(dr_cpu_hdr_t * req,dr_cpu_res_t * res,dr_cpu_hdr_t ** respp)628 dr_cpu_pack_response(dr_cpu_hdr_t *req, dr_cpu_res_t *res, dr_cpu_hdr_t **respp)
629 {
630 int idx;
631 dr_cpu_hdr_t *resp;
632 dr_cpu_stat_t *resp_stat;
633 size_t resp_len;
634 uint32_t curr_off;
635 caddr_t curr_str;
636 size_t str_len;
637 size_t stat_len;
638 int nstat = req->num_records;
639
640 /*
641 * Calculate the size of the response message
642 * and allocate an appropriately sized buffer.
643 */
644 resp_len = 0;
645
646 /* add the header size */
647 resp_len += sizeof (dr_cpu_hdr_t);
648
649 /* add the stat array size */
650 stat_len = sizeof (dr_cpu_stat_t) * nstat;
651 resp_len += stat_len;
652
653 /* add the size of any error strings */
654 for (idx = 0; idx < nstat; idx++) {
655 if (res[idx].string != NULL) {
656 resp_len += strlen(res[idx].string) + 1;
657 }
658 }
659
660 /* allocate the message buffer */
661 resp = kmem_zalloc(resp_len, KM_SLEEP);
662 DR_DBG_KMEM("%s: alloc addr %p size %ld\n",
663 __func__, (void *)resp, resp_len);
664
665 /*
666 * Fill in the header information.
667 */
668 resp->req_num = req->req_num;
669 resp->msg_type = DR_CPU_OK;
670 resp->num_records = nstat;
671
672 /*
673 * Fill in the stat information.
674 */
675 resp_stat = DR_CPU_RESP_STATS(resp);
676
677 /* string offsets start immediately after stat array */
678 curr_off = sizeof (dr_cpu_hdr_t) + stat_len;
679 curr_str = (char *)resp_stat + stat_len;
680
681 for (idx = 0; idx < nstat; idx++) {
682 resp_stat[idx].cpuid = res[idx].cpuid;
683 resp_stat[idx].result = res[idx].result;
684 resp_stat[idx].status = res[idx].status;
685
686 if (res[idx].string != NULL) {
687 /* copy over the error string */
688 str_len = strlen(res[idx].string) + 1;
689 bcopy(res[idx].string, curr_str, str_len);
690 resp_stat[idx].string_off = curr_off;
691
692 curr_off += str_len;
693 curr_str += str_len;
694 }
695 }
696
697 /* buffer should be exactly filled */
698 ASSERT(curr_off == resp_len);
699
700 *respp = resp;
701 return (resp_len);
702 }
703
704 /*
705 * Check for conditions that will prevent a CPU from being offlined.
706 * This provides the opportunity to generate useful information to
707 * help diagnose the failure rather than letting the offline attempt
708 * fail in a more generic way.
709 */
710 static void
dr_cpu_check_cpus(dr_cpu_hdr_t * req,dr_cpu_res_t * res)711 dr_cpu_check_cpus(dr_cpu_hdr_t *req, dr_cpu_res_t *res)
712 {
713 int idx;
714 cpu_t *cp;
715 uint32_t *cpuids;
716
717 ASSERT((req->msg_type == DR_CPU_UNCONFIGURE) ||
718 (req->msg_type == DR_CPU_FORCE_UNCONFIG));
719
720 DR_DBG_CPU("dr_cpu_check_cpus...\n");
721
722 /* array of cpuids start just after the header */
723 cpuids = DR_CPU_CMD_CPUIDS(req);
724
725 mutex_enter(&cpu_lock);
726
727 /*
728 * Always check processor set membership first. The
729 * last CPU in a processor set will fail to offline
730 * even if the operation if forced, so any failures
731 * should always be reported.
732 */
733 dr_cpu_check_psrset(cpuids, res, req->num_records);
734
735 /* process each cpu that is part of the request */
736 for (idx = 0; idx < req->num_records; idx++) {
737
738 /* nothing to check if the CPU has already failed */
739 if (res[idx].result != DR_CPU_RES_OK)
740 continue;
741
742 if ((cp = cpu_get(cpuids[idx])) == NULL)
743 continue;
744
745 /*
746 * Only check if there are bound threads if the
747 * operation is not a forced unconfigure. In a
748 * forced request, threads are automatically
749 * unbound before they are offlined.
750 */
751 if (req->msg_type == DR_CPU_UNCONFIGURE) {
752 /*
753 * The return value is only interesting if other
754 * checks are added to this loop and a decision
755 * is needed on whether to continue checking.
756 */
757 (void) dr_cpu_check_bound_thr(cp, &res[idx]);
758 }
759 }
760
761 mutex_exit(&cpu_lock);
762 }
763
764 /*
765 * Examine the processor set configuration for the specified
766 * CPUs and see if the unconfigure operation would result in
767 * trying to remove the last CPU in any processor set.
768 */
769 static void
dr_cpu_check_psrset(uint32_t * cpuids,dr_cpu_res_t * res,int nres)770 dr_cpu_check_psrset(uint32_t *cpuids, dr_cpu_res_t *res, int nres)
771 {
772 int cpu_idx;
773 int set_idx;
774 cpu_t *cp;
775 cpupart_t *cpp;
776 char err_str[DR_CPU_MAX_ERR_LEN];
777 size_t err_len;
778 struct {
779 cpupart_t *cpp;
780 int ncpus;
781 } *psrset;
782
783 ASSERT(MUTEX_HELD(&cpu_lock));
784
785 /*
786 * Allocate a scratch array to count the CPUs in
787 * the various processor sets. A CPU always belongs
788 * to exactly one processor set, so by definition,
789 * the scratch array never needs to be larger than
790 * the number of CPUs.
791 */
792 psrset = kmem_zalloc(sizeof (*psrset) * nres, KM_SLEEP);
793 DR_DBG_KMEM("%s: alloc addr %p size %ld\n",
794 __func__, (void *)psrset, sizeof (*psrset) * nres);
795
796 for (cpu_idx = 0; cpu_idx < nres; cpu_idx++) {
797
798 /* skip any CPUs that have already failed */
799 if (res[cpu_idx].result != DR_CPU_RES_OK)
800 continue;
801
802 if ((cp = cpu_get(cpuids[cpu_idx])) == NULL)
803 continue;
804
805 cpp = cp->cpu_part;
806
807 /* lookup the set this CPU belongs to */
808 for (set_idx = 0; set_idx < nres; set_idx++) {
809
810 /* matching set found */
811 if (cpp == psrset[set_idx].cpp)
812 break;
813
814 /* set not found, start a new entry */
815 if (psrset[set_idx].cpp == NULL) {
816 psrset[set_idx].cpp = cpp;
817 psrset[set_idx].ncpus = cpp->cp_ncpus;
818 break;
819 }
820 }
821
822 ASSERT(set_idx != nres);
823
824 /*
825 * Remove the current CPU from the set total but only
826 * generate an error for the last CPU. The correct CPU
827 * will get the error because the unconfigure attempts
828 * will occur in the same order in which the CPUs are
829 * examined in this loop. The cp_ncpus field of a
830 * cpupart_t counts only online cpus, so it is safe
831 * to remove an offline cpu without testing ncpus.
832 */
833 if (cpu_is_offline(cp))
834 continue;
835
836 if (--psrset[set_idx].ncpus == 0) {
837 /*
838 * Fill in the various pieces of information
839 * to report that the operation will fail.
840 */
841 res[cpu_idx].result = DR_CPU_RES_BLOCKED;
842 res[cpu_idx].status = DR_CPU_STAT_CONFIGURED;
843
844 (void) snprintf(err_str, DR_CPU_MAX_ERR_LEN,
845 "last online cpu in processor set %d", cpp->cp_id);
846
847 err_len = strlen(err_str) + 1;
848
849 res[cpu_idx].string = kmem_alloc(err_len, KM_SLEEP);
850 DR_DBG_KMEM("%s: alloc addr %p size %ld\n",
851 __func__, (void *)(res[cpu_idx].string), err_len);
852 bcopy(err_str, res[cpu_idx].string, err_len);
853
854 DR_DBG_CPU("cpu %d: %s\n", cpuids[cpu_idx], err_str);
855 }
856 }
857
858 DR_DBG_KMEM("%s: free addr %p size %ld\n",
859 __func__, (void *)psrset, sizeof (*psrset) * nres);
860 kmem_free(psrset, sizeof (*psrset) * nres);
861 }
862
863 /*
864 * Check if any threads are bound to the specified CPU. If the
865 * condition is true, DR_CPU_RES_BLOCKED is returned and an error
866 * string is generated and placed in the specified result structure.
867 * Otherwise, DR_CPU_RES_OK is returned.
868 */
869 static int
dr_cpu_check_bound_thr(cpu_t * cp,dr_cpu_res_t * res)870 dr_cpu_check_bound_thr(cpu_t *cp, dr_cpu_res_t *res)
871 {
872 int nbound;
873 proc_t *pp;
874 kthread_t *tp;
875 char err_str[DR_CPU_MAX_ERR_LEN];
876 size_t err_len;
877
878 /*
879 * Error string allocation makes an assumption
880 * that no blocking condition has been identified.
881 */
882 ASSERT(res->result == DR_CPU_RES_OK);
883 ASSERT(res->string == NULL);
884
885 ASSERT(MUTEX_HELD(&cpu_lock));
886
887 mutex_enter(&pidlock);
888
889 nbound = 0;
890
891 /*
892 * Walk the active processes, checking if each
893 * thread belonging to the process is bound.
894 */
895 for (pp = practive; (pp != NULL) && (nbound <= 1); pp = pp->p_next) {
896 mutex_enter(&pp->p_lock);
897
898 tp = pp->p_tlist;
899
900 if ((tp == NULL) || (pp->p_flag & SSYS)) {
901 mutex_exit(&pp->p_lock);
902 continue;
903 }
904
905 do {
906 if (tp->t_bind_cpu != cp->cpu_id)
907 continue;
908
909 /*
910 * Update the running total of bound
911 * threads. Continue the search until
912 * it can be determined if more than
913 * one thread is bound to the CPU.
914 */
915 if (++nbound > 1)
916 break;
917
918 } while ((tp = tp->t_forw) != pp->p_tlist);
919
920 mutex_exit(&pp->p_lock);
921 }
922
923 mutex_exit(&pidlock);
924
925 if (nbound) {
926 /*
927 * Threads are bound to the CPU. Fill in
928 * various pieces of information to report
929 * that the operation will fail.
930 */
931 res->result = DR_CPU_RES_BLOCKED;
932 res->status = DR_CPU_STAT_CONFIGURED;
933
934 (void) snprintf(err_str, DR_CPU_MAX_ERR_LEN, "cpu has bound "
935 "thread%s", (nbound > 1) ? "s" : "");
936
937 err_len = strlen(err_str) + 1;
938
939 res->string = kmem_alloc(err_len, KM_SLEEP);
940 DR_DBG_KMEM("%s: alloc addr %p size %ld\n",
941 __func__, (void *)(res->string), err_len);
942 bcopy(err_str, res->string, err_len);
943
944 DR_DBG_CPU("cpu %d: %s\n", cp->cpu_id, err_str);
945 }
946
947 return (res->result);
948 }
949
950 /*
951 * Do not modify result buffer or length on error.
952 */
953 static int
dr_cpu_list_status(dr_cpu_hdr_t * req,dr_cpu_hdr_t ** resp,int * resp_len)954 dr_cpu_list_status(dr_cpu_hdr_t *req, dr_cpu_hdr_t **resp, int *resp_len)
955 {
956 int idx;
957 int result;
958 int status;
959 int rlen;
960 uint32_t *cpuids;
961 dr_cpu_hdr_t *rp;
962 dr_cpu_stat_t *stat;
963 md_t *mdp = NULL;
964 int num_nodes;
965 int listsz;
966 mde_cookie_t *listp = NULL;
967 mde_cookie_t cpunode;
968 boolean_t walk_md = B_FALSE;
969
970 /* the incoming array of cpuids to configure */
971 cpuids = DR_CPU_CMD_CPUIDS(req);
972
973 /* allocate a response message */
974 rlen = sizeof (dr_cpu_hdr_t);
975 rlen += req->num_records * sizeof (dr_cpu_stat_t);
976 rp = kmem_zalloc(rlen, KM_SLEEP);
977 DR_DBG_KMEM("%s: alloc addr %p size %d\n", __func__, (void *)rp, rlen);
978
979 /* fill in the known data */
980 rp->req_num = req->req_num;
981 rp->msg_type = DR_CPU_STATUS;
982 rp->num_records = req->num_records;
983
984 /* stat array for the response */
985 stat = DR_CPU_RESP_STATS(rp);
986
987 /* get the status for each of the CPUs */
988 for (idx = 0; idx < req->num_records; idx++) {
989
990 result = dr_cpu_status(cpuids[idx], &status);
991
992 if (result == DR_CPU_RES_FAILURE)
993 walk_md = B_TRUE;
994
995 /* save off results of the status */
996 stat[idx].cpuid = cpuids[idx];
997 stat[idx].result = result;
998 stat[idx].status = status;
999 }
1000
1001 if (walk_md == B_FALSE)
1002 goto done;
1003
1004 /*
1005 * At least one of the cpus did not have a CPU
1006 * structure. So, consult the MD to determine if
1007 * they are present.
1008 */
1009
1010 if ((mdp = md_get_handle()) == NULL) {
1011 DR_DBG_CPU("unable to initialize MD\n");
1012 goto done;
1013 }
1014
1015 num_nodes = md_node_count(mdp);
1016 ASSERT(num_nodes > 0);
1017
1018 listsz = num_nodes * sizeof (mde_cookie_t);
1019 listp = kmem_zalloc(listsz, KM_SLEEP);
1020 DR_DBG_KMEM("%s: alloc addr %p size %d\n",
1021 __func__, (void *)listp, listsz);
1022
1023 for (idx = 0; idx < req->num_records; idx++) {
1024
1025 if (stat[idx].result != DR_CPU_RES_FAILURE)
1026 continue;
1027
1028 /* check the MD for the current cpuid */
1029 cpunode = dr_cpu_find_node_md(stat[idx].cpuid, mdp, listp);
1030
1031 stat[idx].result = DR_CPU_RES_OK;
1032
1033 if (cpunode == MDE_INVAL_ELEM_COOKIE) {
1034 stat[idx].status = DR_CPU_STAT_NOT_PRESENT;
1035 } else {
1036 stat[idx].status = DR_CPU_STAT_UNCONFIGURED;
1037 }
1038 }
1039
1040 DR_DBG_KMEM("%s: free addr %p size %d\n",
1041 __func__, (void *)listp, listsz);
1042 kmem_free(listp, listsz);
1043
1044 (void) md_fini_handle(mdp);
1045
1046 done:
1047 *resp = rp;
1048 *resp_len = rlen;
1049
1050 return (0);
1051 }
1052
1053 static int
dr_cpu_configure(processorid_t cpuid,int * status,boolean_t force)1054 dr_cpu_configure(processorid_t cpuid, int *status, boolean_t force)
1055 {
1056 _NOTE(ARGUNUSED(force))
1057 struct cpu *cp;
1058 int rv = 0;
1059
1060 DR_DBG_CPU("dr_cpu_configure...\n");
1061
1062 /*
1063 * Build device tree node for the CPU
1064 */
1065 if ((rv = dr_cpu_probe(cpuid)) != 0) {
1066 DR_DBG_CPU("failed to probe CPU %d (%d)\n", cpuid, rv);
1067 if (rv == EINVAL) {
1068 *status = DR_CPU_STAT_NOT_PRESENT;
1069 return (DR_CPU_RES_NOT_IN_MD);
1070 }
1071 *status = DR_CPU_STAT_UNCONFIGURED;
1072 return (DR_CPU_RES_FAILURE);
1073 }
1074
1075 mutex_enter(&cpu_lock);
1076
1077 /*
1078 * Configure the CPU
1079 */
1080 if ((cp = cpu_get(cpuid)) == NULL) {
1081
1082 if ((rv = cpu_configure(cpuid)) != 0) {
1083 DR_DBG_CPU("failed to configure CPU %d (%d)\n",
1084 cpuid, rv);
1085 rv = DR_CPU_RES_FAILURE;
1086 *status = DR_CPU_STAT_UNCONFIGURED;
1087 goto done;
1088 }
1089
1090 DR_DBG_CPU("CPU %d configured\n", cpuid);
1091
1092 /* CPU struct should exist now */
1093 cp = cpu_get(cpuid);
1094 }
1095
1096 ASSERT(cp);
1097
1098 /*
1099 * Power on the CPU. In sun4v, this brings the stopped
1100 * CPU into the guest from the Hypervisor.
1101 */
1102 if (cpu_is_poweredoff(cp)) {
1103
1104 if ((rv = cpu_poweron(cp)) != 0) {
1105 DR_DBG_CPU("failed to power on CPU %d (%d)\n",
1106 cpuid, rv);
1107 rv = DR_CPU_RES_FAILURE;
1108 *status = DR_CPU_STAT_UNCONFIGURED;
1109 goto done;
1110 }
1111
1112 DR_DBG_CPU("CPU %d powered on\n", cpuid);
1113 }
1114
1115 /*
1116 * Online the CPU
1117 */
1118 if (cpu_is_offline(cp)) {
1119
1120 if ((rv = cpu_online(cp, 0)) != 0) {
1121 DR_DBG_CPU("failed to online CPU %d (%d)\n",
1122 cpuid, rv);
1123 rv = DR_CPU_RES_FAILURE;
1124 /* offline is still configured */
1125 *status = DR_CPU_STAT_CONFIGURED;
1126 goto done;
1127 }
1128
1129 DR_DBG_CPU("CPU %d online\n", cpuid);
1130 }
1131
1132 rv = DR_CPU_RES_OK;
1133 *status = DR_CPU_STAT_CONFIGURED;
1134
1135 done:
1136 mutex_exit(&cpu_lock);
1137
1138 return (rv);
1139 }
1140
1141 static int
dr_cpu_unconfigure(processorid_t cpuid,int * status,boolean_t force)1142 dr_cpu_unconfigure(processorid_t cpuid, int *status, boolean_t force)
1143 {
1144 struct cpu *cp;
1145 int rv = 0;
1146 int cpu_flags;
1147
1148 DR_DBG_CPU("dr_cpu_unconfigure%s...\n", (force) ? " (force)" : "");
1149
1150 mutex_enter(&cpu_lock);
1151
1152 cp = cpu_get(cpuid);
1153
1154 if (cp == NULL) {
1155 /*
1156 * As OS CPU structures are already torn down proceed
1157 * to deprobe device tree to make sure the device tree
1158 * is up do date.
1159 */
1160 goto deprobe;
1161 }
1162
1163 ASSERT(cp->cpu_id == cpuid);
1164
1165 /*
1166 * Offline the CPU
1167 */
1168 if (cpu_is_active(cp)) {
1169
1170 /* set the force flag correctly */
1171 cpu_flags = (force) ? CPU_FORCED : 0;
1172
1173 /*
1174 * Before we take the CPU offline, we first enable interrupts.
1175 * Otherwise, cpu_offline() might reject the request. Note:
1176 * if the offline subsequently fails, the target cpu will be
1177 * left with interrupts enabled. This is consistent with the
1178 * behavior of psradm(8) and p_online(2).
1179 */
1180 cpu_intr_enable(cp);
1181
1182 if ((rv = cpu_offline(cp, cpu_flags)) != 0) {
1183 DR_DBG_CPU("failed to offline CPU %d (%d)\n",
1184 cpuid, rv);
1185
1186 rv = DR_CPU_RES_FAILURE;
1187 *status = DR_CPU_STAT_CONFIGURED;
1188 mutex_exit(&cpu_lock);
1189 return (rv);
1190 }
1191
1192 DR_DBG_CPU("CPU %d offline\n", cpuid);
1193 }
1194
1195 /*
1196 * Power off the CPU. In sun4v, this puts the running
1197 * CPU into the stopped state in the Hypervisor.
1198 */
1199 if (!cpu_is_poweredoff(cp)) {
1200
1201 if ((rv = cpu_poweroff(cp)) != 0) {
1202 DR_DBG_CPU("failed to power off CPU %d (%d)\n",
1203 cpuid, rv);
1204 rv = DR_CPU_RES_FAILURE;
1205 *status = DR_CPU_STAT_CONFIGURED;
1206 mutex_exit(&cpu_lock);
1207 return (rv);
1208 }
1209
1210 DR_DBG_CPU("CPU %d powered off\n", cpuid);
1211 }
1212
1213 /*
1214 * Unconfigure the CPU
1215 */
1216 if ((rv = cpu_unconfigure(cpuid)) != 0) {
1217 DR_DBG_CPU("failed to unconfigure CPU %d (%d)\n", cpuid, rv);
1218 rv = DR_CPU_RES_FAILURE;
1219 *status = DR_CPU_STAT_UNCONFIGURED;
1220 mutex_exit(&cpu_lock);
1221 return (rv);
1222 }
1223
1224 DR_DBG_CPU("CPU %d unconfigured\n", cpuid);
1225
1226 deprobe:
1227 mutex_exit(&cpu_lock);
1228 /*
1229 * Tear down device tree.
1230 */
1231 if ((rv = dr_cpu_deprobe(cpuid)) != 0) {
1232 DR_DBG_CPU("failed to deprobe CPU %d (%d)\n", cpuid, rv);
1233 rv = DR_CPU_RES_FAILURE;
1234 *status = DR_CPU_STAT_UNCONFIGURED;
1235 return (rv);
1236 }
1237
1238 rv = DR_CPU_RES_OK;
1239 *status = DR_CPU_STAT_UNCONFIGURED;
1240
1241 return (rv);
1242 }
1243
1244 /*
1245 * Determine the state of a CPU. If the CPU structure is not present,
1246 * it does not attempt to determine whether or not the CPU is in the
1247 * MD. It is more efficient to do this at the higher level for all
1248 * CPUs since it may not even be necessary to search the MD if all
1249 * the CPUs are accounted for. Returns DR_CPU_RES_OK if the CPU
1250 * structure is present, and DR_CPU_RES_FAILURE otherwise as a signal
1251 * that an MD walk is necessary.
1252 */
1253 static int
dr_cpu_status(processorid_t cpuid,int * status)1254 dr_cpu_status(processorid_t cpuid, int *status)
1255 {
1256 int rv;
1257 struct cpu *cp;
1258
1259 DR_DBG_CPU("dr_cpu_status...\n");
1260
1261 mutex_enter(&cpu_lock);
1262
1263 if ((cp = cpu_get(cpuid)) == NULL) {
1264 /* need to check if cpu is in the MD */
1265 rv = DR_CPU_RES_FAILURE;
1266 goto done;
1267 }
1268
1269 if (cpu_is_poweredoff(cp)) {
1270 /*
1271 * The CPU is powered off, so it is considered
1272 * unconfigured from the service entity point of
1273 * view. The CPU is not available to the system
1274 * and intervention by the service entity would
1275 * be required to change that.
1276 */
1277 *status = DR_CPU_STAT_UNCONFIGURED;
1278 } else {
1279 /*
1280 * The CPU is powered on, so it is considered
1281 * configured from the service entity point of
1282 * view. It is available for use by the system
1283 * and service entities are not concerned about
1284 * the operational status (offline, online, etc.)
1285 * of the CPU in terms of DR.
1286 */
1287 *status = DR_CPU_STAT_CONFIGURED;
1288 }
1289
1290 rv = DR_CPU_RES_OK;
1291
1292 done:
1293 mutex_exit(&cpu_lock);
1294
1295 return (rv);
1296 }
1297
1298 typedef struct {
1299 md_t *mdp;
1300 mde_cookie_t cpunode;
1301 dev_info_t *dip;
1302 } cb_arg_t;
1303
1304 #define STR_ARR_LEN 5
1305
1306 static int
new_cpu_node(dev_info_t * new_node,void * arg,uint_t flags)1307 new_cpu_node(dev_info_t *new_node, void *arg, uint_t flags)
1308 {
1309 _NOTE(ARGUNUSED(flags))
1310
1311 char *compat;
1312 uint64_t freq;
1313 uint64_t cpuid = 0;
1314 int regbuf[4];
1315 int len = 0;
1316 cb_arg_t *cba;
1317 char *str_arr[STR_ARR_LEN];
1318 char *curr;
1319 int idx = 0;
1320
1321 DR_DBG_CPU("new_cpu_node...\n");
1322
1323 cba = (cb_arg_t *)arg;
1324
1325 /*
1326 * Add 'name' property
1327 */
1328 if (ndi_prop_update_string(DDI_DEV_T_NONE, new_node,
1329 "name", "cpu") != DDI_SUCCESS) {
1330 DR_DBG_CPU("new_cpu_node: failed to create 'name' property\n");
1331 return (DDI_WALK_ERROR);
1332 }
1333
1334 /*
1335 * Add 'compatible' property
1336 */
1337 if (md_get_prop_data(cba->mdp, cba->cpunode, "compatible",
1338 (uint8_t **)(&compat), &len)) {
1339 DR_DBG_CPU("new_cpu_node: failed to read 'compatible' property "
1340 "from MD\n");
1341 return (DDI_WALK_ERROR);
1342 }
1343
1344 DR_DBG_CPU("'compatible' len is %d\n", len);
1345
1346 /* parse the MD string array */
1347 curr = compat;
1348 while (curr < (compat + len)) {
1349
1350 DR_DBG_CPU("adding '%s' to 'compatible' property\n", curr);
1351
1352 str_arr[idx++] = curr;
1353 curr += strlen(curr) + 1;
1354
1355 if (idx == STR_ARR_LEN) {
1356 DR_DBG_CPU("exceeded str_arr len (%d)\n", STR_ARR_LEN);
1357 break;
1358 }
1359 }
1360
1361 if (ndi_prop_update_string_array(DDI_DEV_T_NONE, new_node,
1362 "compatible", str_arr, idx) != DDI_SUCCESS) {
1363 DR_DBG_CPU("new_cpu_node: failed to create 'compatible' "
1364 "property\n");
1365 return (DDI_WALK_ERROR);
1366 }
1367
1368 /*
1369 * Add 'device_type' property
1370 */
1371 if (ndi_prop_update_string(DDI_DEV_T_NONE, new_node,
1372 "device_type", "cpu") != DDI_SUCCESS) {
1373 DR_DBG_CPU("new_cpu_node: failed to create 'device_type' "
1374 "property\n");
1375 return (DDI_WALK_ERROR);
1376 }
1377
1378 /*
1379 * Add 'clock-frequency' property
1380 */
1381 if (md_get_prop_val(cba->mdp, cba->cpunode, "clock-frequency", &freq)) {
1382 DR_DBG_CPU("new_cpu_node: failed to read 'clock-frequency' "
1383 "property from MD\n");
1384 return (DDI_WALK_ERROR);
1385 }
1386
1387 if (ndi_prop_update_int(DDI_DEV_T_NONE, new_node,
1388 "clock-frequency", freq) != DDI_SUCCESS) {
1389 DR_DBG_CPU("new_cpu_node: failed to create 'clock-frequency' "
1390 "property\n");
1391 return (DDI_WALK_ERROR);
1392 }
1393
1394 /*
1395 * Add 'reg' (cpuid) property
1396 */
1397 if (md_get_prop_val(cba->mdp, cba->cpunode, "id", &cpuid)) {
1398 DR_DBG_CPU("new_cpu_node: failed to read 'id' property "
1399 "from MD\n");
1400 return (DDI_WALK_ERROR);
1401 }
1402
1403 DR_DBG_CPU("new cpuid=0x%lx\n", cpuid);
1404
1405 bzero(regbuf, 4 * sizeof (int));
1406 regbuf[0] = 0xc0000000 | cpuid;
1407
1408 if (ndi_prop_update_int_array(DDI_DEV_T_NONE, new_node,
1409 "reg", regbuf, 4) != DDI_SUCCESS) {
1410 DR_DBG_CPU("new_cpu_node: failed to create 'reg' property\n");
1411 return (DDI_WALK_ERROR);
1412 }
1413
1414 cba->dip = new_node;
1415
1416 return (DDI_WALK_TERMINATE);
1417 }
1418
1419 static int
dr_cpu_probe(processorid_t cpuid)1420 dr_cpu_probe(processorid_t cpuid)
1421 {
1422 dev_info_t *pdip;
1423 dev_info_t *dip;
1424 devi_branch_t br;
1425 md_t *mdp = NULL;
1426 int num_nodes;
1427 int rv = 0;
1428 int listsz;
1429 mde_cookie_t *listp = NULL;
1430 cb_arg_t cba;
1431 mde_cookie_t cpunode;
1432
1433 if ((dip = dr_cpu_find_node(cpuid)) != NULL) {
1434 /* nothing to do */
1435 e_ddi_branch_rele(dip);
1436 return (0);
1437 }
1438
1439 if ((mdp = md_get_handle()) == NULL) {
1440 DR_DBG_CPU("unable to initialize machine description\n");
1441 return (-1);
1442 }
1443
1444 num_nodes = md_node_count(mdp);
1445 ASSERT(num_nodes > 0);
1446
1447 listsz = num_nodes * sizeof (mde_cookie_t);
1448 listp = kmem_zalloc(listsz, KM_SLEEP);
1449 DR_DBG_KMEM("%s: alloc addr %p size %d\n",
1450 __func__, (void *)listp, listsz);
1451
1452 cpunode = dr_cpu_find_node_md(cpuid, mdp, listp);
1453
1454 if (cpunode == MDE_INVAL_ELEM_COOKIE) {
1455 rv = EINVAL;
1456 goto done;
1457 }
1458
1459 /* pass in MD cookie for CPU */
1460 cba.mdp = mdp;
1461 cba.cpunode = cpunode;
1462
1463 br.arg = (void *)&cba;
1464 br.type = DEVI_BRANCH_SID;
1465 br.create.sid_branch_create = new_cpu_node;
1466 br.devi_branch_callback = NULL;
1467 pdip = ddi_root_node();
1468
1469 if ((rv = e_ddi_branch_create(pdip, &br, NULL, 0))) {
1470 DR_DBG_CPU("e_ddi_branch_create failed: %d\n", rv);
1471 rv = -1;
1472 goto done;
1473 }
1474
1475 DR_DBG_CPU("CPU %d probed\n", cpuid);
1476
1477 rv = 0;
1478
1479 done:
1480 if (listp) {
1481 DR_DBG_KMEM("%s: free addr %p size %d\n",
1482 __func__, (void *)listp, listsz);
1483 kmem_free(listp, listsz);
1484 }
1485
1486 if (mdp)
1487 (void) md_fini_handle(mdp);
1488
1489 return (rv);
1490 }
1491
1492 static int
dr_cpu_deprobe(processorid_t cpuid)1493 dr_cpu_deprobe(processorid_t cpuid)
1494 {
1495 dev_info_t *fdip = NULL;
1496 dev_info_t *dip;
1497
1498 if ((dip = dr_cpu_find_node(cpuid)) == NULL) {
1499 DR_DBG_CPU("cpuid %d already deprobed\n", cpuid);
1500 return (0);
1501 }
1502
1503 ASSERT(e_ddi_branch_held(dip));
1504
1505 if (e_ddi_branch_destroy(dip, &fdip, 0)) {
1506 char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1507
1508 DR_DBG_KMEM("%s: alloc addr %p size %d\n",
1509 __func__, (void *)path, MAXPATHLEN);
1510 /*
1511 * If non-NULL, fdip is held and must be released.
1512 */
1513 if (fdip != NULL) {
1514 (void) ddi_pathname(fdip, path);
1515 ddi_release_devi(fdip);
1516 } else {
1517 (void) ddi_pathname(dip, path);
1518 }
1519 cmn_err(CE_NOTE, "node removal failed: %s (%p)",
1520 path, (fdip) ? (void *)fdip : (void *)dip);
1521
1522 DR_DBG_KMEM("%s: free addr %p size %d\n",
1523 __func__, (void *)path, MAXPATHLEN);
1524 kmem_free(path, MAXPATHLEN);
1525
1526 return (-1);
1527 }
1528
1529 DR_DBG_CPU("CPU %d deprobed\n", cpuid);
1530
1531 return (0);
1532 }
1533
1534 typedef struct {
1535 processorid_t cpuid;
1536 dev_info_t *dip;
1537 } dr_search_arg_t;
1538
1539 static int
dr_cpu_check_node(dev_info_t * dip,void * arg)1540 dr_cpu_check_node(dev_info_t *dip, void *arg)
1541 {
1542 char *name;
1543 processorid_t cpuid;
1544 dr_search_arg_t *sarg = (dr_search_arg_t *)arg;
1545
1546 if (dip == ddi_root_node()) {
1547 return (DDI_WALK_CONTINUE);
1548 }
1549
1550 name = ddi_node_name(dip);
1551
1552 if (strcmp(name, "cpu") != 0) {
1553 return (DDI_WALK_PRUNECHILD);
1554 }
1555
1556 cpuid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
1557 "reg", -1);
1558
1559 cpuid = PROM_CFGHDL_TO_CPUID(cpuid);
1560
1561 DR_DBG_CPU("found cpuid=0x%x, looking for 0x%x\n", cpuid, sarg->cpuid);
1562
1563 if (cpuid == sarg->cpuid) {
1564 DR_DBG_CPU("matching node\n");
1565
1566 /* matching node must be returned held */
1567 if (!e_ddi_branch_held(dip))
1568 e_ddi_branch_hold(dip);
1569
1570 sarg->dip = dip;
1571 return (DDI_WALK_TERMINATE);
1572 }
1573
1574 return (DDI_WALK_CONTINUE);
1575 }
1576
1577 /*
1578 * Walk the device tree to find the dip corresponding to the cpuid
1579 * passed in. If present, the dip is returned held. The caller must
1580 * release the hold on the dip once it is no longer required. If no
1581 * matching node if found, NULL is returned.
1582 */
1583 static dev_info_t *
dr_cpu_find_node(processorid_t cpuid)1584 dr_cpu_find_node(processorid_t cpuid)
1585 {
1586 dr_search_arg_t arg;
1587
1588 DR_DBG_CPU("dr_cpu_find_node...\n");
1589
1590 arg.cpuid = cpuid;
1591 arg.dip = NULL;
1592
1593 ddi_walk_devs(ddi_root_node(), dr_cpu_check_node, &arg);
1594
1595 ASSERT((arg.dip == NULL) || (e_ddi_branch_held(arg.dip)));
1596
1597 return ((arg.dip) ? arg.dip : NULL);
1598 }
1599
1600 /*
1601 * Look up a particular cpuid in the MD. Returns the mde_cookie_t
1602 * representing that CPU if present, and MDE_INVAL_ELEM_COOKIE
1603 * otherwise. It is assumed the scratch array has already been
1604 * allocated so that it can accommodate the worst case scenario,
1605 * every node in the MD.
1606 */
1607 static mde_cookie_t
dr_cpu_find_node_md(processorid_t cpuid,md_t * mdp,mde_cookie_t * listp)1608 dr_cpu_find_node_md(processorid_t cpuid, md_t *mdp, mde_cookie_t *listp)
1609 {
1610 int idx;
1611 int nnodes;
1612 mde_cookie_t rootnode;
1613 uint64_t cpuid_prop;
1614 mde_cookie_t result = MDE_INVAL_ELEM_COOKIE;
1615
1616 rootnode = md_root_node(mdp);
1617 ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE);
1618
1619 /*
1620 * Scan the DAG for all the CPU nodes
1621 */
1622 nnodes = md_scan_dag(mdp, rootnode, md_find_name(mdp, "cpu"),
1623 md_find_name(mdp, "fwd"), listp);
1624
1625 if (nnodes < 0) {
1626 DR_DBG_CPU("Scan for CPUs failed\n");
1627 return (result);
1628 }
1629
1630 DR_DBG_CPU("dr_cpu_find_node_md: found %d CPUs in the MD\n", nnodes);
1631
1632 /*
1633 * Find the CPU of interest
1634 */
1635 for (idx = 0; idx < nnodes; idx++) {
1636
1637 if (md_get_prop_val(mdp, listp[idx], "id", &cpuid_prop)) {
1638 DR_DBG_CPU("Missing 'id' property for CPU node %d\n",
1639 idx);
1640 break;
1641 }
1642
1643 if (cpuid_prop == cpuid) {
1644 /* found a match */
1645 DR_DBG_CPU("dr_cpu_find_node_md: found CPU %d "
1646 "in MD\n", cpuid);
1647 result = listp[idx];
1648 break;
1649 }
1650 }
1651
1652 if (result == MDE_INVAL_ELEM_COOKIE) {
1653 DR_DBG_CPU("CPU %d not in MD\n", cpuid);
1654 }
1655
1656 return (result);
1657 }
1658