xref: /titanic_50/usr/src/lib/fm/libldom/sparc/ldom.c (revision c0c79a3f09914f35651895ffc111883455b7f62d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <stdio.h>
29 #include <unistd.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <fcntl.h>
33 #include <pthread.h>
34 #include <errno.h>
35 #include <libnvpair.h>
36 #include <dlfcn.h>
37 #include <link.h>
38 
39 #include <sys/processor.h>
40 #include <sys/stat.h>
41 #include <sys/mdesc.h>
42 #include <sys/param.h>
43 #include <sys/systeminfo.h>
44 #include <sys/mem.h>
45 #include <sys/bl.h>
46 #include <sys/fm/protocol.h>
47 #include <fm/fmd_fmri.h>
48 #include <sys/pri.h>
49 
50 #include "ldom.h"
51 #include "ldmsvcs_utils.h"
52 
53 #define	MD_STR_PLATFORM		"platform"
54 #define	MD_STR_DOM_ENABLE	"domaining-enabled"
55 
56 static void *ldom_dl_hp = (void *)NULL;
57 static const char *ldom_dl_path = "libpri.so.1";
58 static int ldom_dl_mode = (RTLD_NOW | RTLD_LOCAL);
59 
60 static int (*ldom_pri_init)(void) = (int (*)(void))NULL;
61 static void (*ldom_pri_fini)(void) = (void (*)(void))NULL;
62 static ssize_t (*ldom_pri_get)(uint8_t wait, uint64_t *token, uint64_t **buf,
63 	void *(*allocp)(size_t), void (*freep)(void *, size_t)) =
64 	(ssize_t (*)(uint8_t wait, uint64_t *token, uint64_t **buf,
65 	void *(*allocp)(size_t), void (*freep)(void *, size_t)))NULL;
66 
67 static void
68 ldom_pri_config(void)
69 {
70 	char isa[MAXNAMELEN];	/* used to see if machine is sun4v */
71 
72 	if (sysinfo(SI_MACHINE, isa, MAXNAMELEN) < 0)
73 		return;
74 	if (strcmp(isa, "sun4v") != 0)
75 		return;
76 	if ((ldom_dl_hp = dlopen(ldom_dl_path, ldom_dl_mode)) == NULL)
77 		return;
78 
79 	ldom_pri_init = (int (*)(void))dlsym(ldom_dl_hp, "pri_init");
80 	ldom_pri_fini = (void (*)(void))dlsym(ldom_dl_hp, "pri_fini");
81 	ldom_pri_get = (ssize_t (*)(uint8_t wait, uint64_t *token,
82 	    uint64_t **buf, void *(*allocp)(size_t),
83 	    void (*freep)(void *, size_t)))dlsym(ldom_dl_hp, "pri_get");
84 }
85 
86 static void
87 ldom_pri_unconfig(void)
88 {
89 	if (ldom_dl_hp == NULL)
90 		return;
91 
92 	ldom_pri_init = (int (*)(void))NULL;
93 	ldom_pri_fini = (void (*)(void))NULL;
94 	ldom_pri_get = (ssize_t (*)(uint8_t wait, uint64_t *token,
95 	    uint64_t **buf, void *(*allocp)(size_t),
96 	    void (*freep)(void *, size_t)))NULL;
97 	(void) dlclose(ldom_dl_hp);
98 	ldom_dl_hp = (void *)NULL;
99 }
100 
101 static ssize_t
102 get_local_core_md(ldom_hdl_t *lhp, uint64_t **buf)
103 {
104 	int fh;
105 	size_t size;
106 	ssize_t ssize;
107 	uint64_t tok;
108 	uint64_t *bufp;
109 
110 	if (ldom_pri_get != NULL)
111 		if ((ssize = (*ldom_pri_get)(PRI_GET, &tok, buf,
112 		    lhp->allocp, lhp->freep)) >= 0)
113 			return (ssize);
114 
115 	if ((fh = open("/devices/pseudo/mdesc@0:mdesc", O_RDONLY, 0)) < 0)
116 		return (-1);
117 
118 	if (ioctl(fh, MDESCIOCGSZ, &size) < 0) {
119 		(void) close(fh);
120 		return (-1);
121 	}
122 
123 	bufp = (uint64_t *)lhp->allocp(size);
124 
125 	if (read(fh, bufp, size) < 0) {
126 		lhp->freep(bufp, size);
127 		(void) close(fh);
128 		return (-1);
129 	}
130 	(void) close(fh);
131 
132 	*buf = bufp;
133 
134 	return ((ssize_t)size);
135 }
136 
137 
138 static int
139 get_local_md_prop_value(ldom_hdl_t *lhp, char *node, char *prop, uint64_t *val)
140 {
141 	int rc = 1;
142 	uint64_t *bufp;
143 	ssize_t bufsiz;
144 
145 	if ((bufsiz = get_local_core_md(lhp, &bufp)) > 0) {
146 		md_t *mdp;
147 
148 		if (mdp = md_init_intern(bufp, lhp->allocp, lhp->freep)) {
149 			int num_nodes;
150 			mde_cookie_t *listp;
151 
152 			num_nodes = md_node_count(mdp);
153 			listp = lhp->allocp(sizeof (mde_cookie_t) * num_nodes);
154 
155 			if (md_scan_dag(mdp, MDE_INVAL_ELEM_COOKIE,
156 					md_find_name(mdp, node),
157 					md_find_name(mdp, "fwd"),
158 					listp) > 0 &&
159 			    md_get_prop_val(mdp, listp[0], prop, val) >= 0) {
160 				/* found the property */
161 				rc = 0;
162 			}
163 
164 			lhp->freep(listp, sizeof (mde_cookie_t) * num_nodes);
165 			(void) md_fini(mdp);
166 		}
167 		lhp->freep(bufp, bufsiz);
168 	}
169 	return (rc);
170 }
171 
172 static int
173 ldom_getinfo(struct ldom_hdl *lhp)
174 {
175 	static pthread_mutex_t mt = PTHREAD_MUTEX_INITIALIZER;
176 	static pthread_cond_t cv = PTHREAD_COND_INITIALIZER;
177 	static int major_version = -1;
178 	static int service_ldom = -1;
179 	static int busy_init = 0;
180 
181 	int ier, rc = 0;
182 	uint64_t domain_enable;
183 
184 	(void) pthread_mutex_lock(&mt);
185 
186 	while (busy_init == 1)
187 		(void) pthread_cond_wait(&cv, &mt);
188 
189 	if (major_version != -1 && service_ldom != -1) {
190 		lhp->major_version = major_version;
191 		lhp->service_ldom = service_ldom;
192 		(void) pthread_mutex_unlock(&mt);
193 		return (0);
194 	}
195 
196 	/*
197 	 * get to this point if major_version and service_ldom have not yet
198 	 * been determined
199 	 */
200 	busy_init = 1;
201 	(void) pthread_mutex_unlock(&mt);
202 
203 	/*
204 	 * set defaults which correspond to the case of "LDOMS not
205 	 * available".  note that these can (and will) also apply to
206 	 * non-sun4v machines.
207 	 */
208 	major_version = 0;
209 	service_ldom = 0;
210 	domain_enable = 0;
211 
212 	if (get_local_md_prop_value(lhp, MD_STR_PLATFORM, MD_STR_DOM_ENABLE,
213 				&domain_enable) == 0 &&
214 	    domain_enable != 0) {
215 
216 		/*
217 		 * Domaining is enable and ldmd is not in config mode
218 		 * so this is a ldom env.
219 		 */
220 		major_version = 1;
221 
222 		if ((ier = ldmsvcs_check_channel()) == 0) {
223 			/*
224 			 * control ldom
225 			 * ldmfma channel between FMA and ldmd only exists
226 			 * on the control domain.
227 			 */
228 			service_ldom = 1;
229 		} else if (ier == 1) {
230 			/*
231 			 * guest ldom
232 			 * non-control ldom such as guest and io service ldom
233 			 */
234 			service_ldom = 0;
235 		}
236 	}
237 
238 	(void) pthread_mutex_lock(&mt);
239 	lhp->major_version = major_version;
240 	lhp->service_ldom = service_ldom;
241 	busy_init = 0;
242 	(void) pthread_mutex_unlock(&mt);
243 
244 	(void) pthread_cond_broadcast(&cv);
245 
246 	return (rc);
247 }
248 
249 
250 /*
251  * search the machine description for a "pid" entry (physical cpuid) and
252  * return the corresponding "id" entry (virtual cpuid)
253  */
254 static processorid_t
255 cpu_phys2virt(ldom_hdl_t *lhp, uint32_t cpuid)
256 {
257 	char isa[MAXNAMELEN];
258 	md_t *mdp;
259 	mde_cookie_t *listp;
260 	ssize_t bufsize;
261 	processorid_t vid;
262 	uint64_t *bufp;
263 	uint64_t pval;
264 	int num_nodes, ncpus, i;
265 
266 	(void) sysinfo(SI_ARCHITECTURE, isa, MAXNAMELEN);
267 
268 	if (strcmp(isa, "sun4v") != 0)
269 		return ((processorid_t)cpuid);
270 
271 	/*
272 	 * convert the physical cpuid to a virtual cpuid
273 	 */
274 	if ((bufsize = ldom_get_core_md(lhp, &bufp)) < 1)
275 		return (-1);
276 
277 	if ((mdp = md_init_intern(bufp, lhp->allocp, lhp->freep)) == NULL ||
278 	    (num_nodes = md_node_count(mdp)) < 1) {
279 		lhp->freep(bufp, bufsize);
280 		return (-1);
281 	}
282 
283 	listp = (mde_cookie_t *)lhp->allocp(sizeof (mde_cookie_t) * num_nodes);
284 	ncpus = md_scan_dag(mdp, MDE_INVAL_ELEM_COOKIE,
285 			    md_find_name(mdp, "cpu"),
286 			    md_find_name(mdp, "fwd"), listp);
287 
288 	vid = -1;
289 	for (i = 0; i < ncpus; i++) {
290 		if (md_get_prop_val(mdp, listp[i], "pid", &pval) >= 0 &&
291 		    pval == (uint64_t)cpuid) {
292 			if (md_get_prop_val(mdp, listp[i], "id", &pval) >= 0)
293 				vid = (processorid_t)pval;
294 
295 			break;
296 		}
297 	}
298 
299 	lhp->freep(listp, sizeof (mde_cookie_t) * num_nodes);
300 	(void) md_fini(mdp);
301 	lhp->freep(bufp, bufsize);
302 
303 	return (vid);
304 }
305 
306 /*
307  * if checking for status of a retired page:
308  *   0 - page is retired
309  *   EAGAIN - page is scheduled for retirement
310  *   EIO - page not scheduled for retirement
311  *   EINVAL - error
312  *
313  * if retiring a page:
314  *   0 - success in retiring page
315  *   EIO - page is already retired
316  *   EAGAIN - page is scheduled for retirement
317  *   EINVAL - error
318  *
319  * the original decoder for ioctl() return values is
320  * http://fma.eng/documents/engineering/cpumem/page_retire_api.txt
321  */
322 static int
323 os_mem_page_retire(ldom_hdl_t *lhp, int cmd, nvlist_t *nvl)
324 {
325 	mem_page_t mpage;
326 	char *fmribuf;
327 	size_t fmrisz;
328 	int fd, rc, err;
329 
330 	if (cmd != MEM_PAGE_RETIRE && cmd != MEM_PAGE_FMRI_RETIRE &&
331 	    cmd != MEM_PAGE_ISRETIRED && cmd != MEM_PAGE_FMRI_ISRETIRED)
332 			return (EINVAL);
333 
334 	if ((fd = open("/dev/mem", O_RDONLY)) < 0)
335 		return (EINVAL);
336 
337 	if ((errno = nvlist_size(nvl, &fmrisz, NV_ENCODE_NATIVE)) != 0 ||
338 	    fmrisz > MEM_FMRI_MAX_BUFSIZE ||
339 	    (fmribuf = lhp->allocp(fmrisz)) == NULL) {
340 		(void) close(fd);
341 		return (EINVAL);
342 	}
343 
344 	if ((errno = nvlist_pack(nvl, &fmribuf, &fmrisz,
345 				    NV_ENCODE_NATIVE, 0)) != 0) {
346 		lhp->freep(fmribuf, fmrisz);
347 		(void) close(fd);
348 		return (EINVAL);
349 	}
350 
351 	mpage.m_fmri = fmribuf;
352 	mpage.m_fmrisz = fmrisz;
353 
354 	rc = ioctl(fd, cmd, &mpage);
355 	err = errno;
356 
357 	lhp->freep(fmribuf, fmrisz);
358 	(void) close(fd);
359 
360 	if (rc < 0) {
361 		rc = err;
362 	}
363 
364 	return (rc);
365 }
366 
367 int
368 ldom_fmri_status(ldom_hdl_t *lhp, nvlist_t *nvl)
369 {
370 	char *name;
371 	int ret;
372 
373 	if (nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &name) != 0)
374 		return (EINVAL);
375 
376 	switch (ldom_major_version(lhp)) {
377 	case 0:
378 		/*
379 		 * version == 0 means LDOMS support is not available
380 		 */
381 		if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) {
382 			processorid_t vid;
383 			uint32_t cpuid;
384 
385 			if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID,
386 						    &cpuid) == 0 &&
387 			    (vid = cpu_phys2virt(lhp, cpuid)) != -1)
388 				return (p_online(vid, P_STATUS));
389 		} else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) {
390 			return (os_mem_page_retire(lhp,
391 						MEM_PAGE_FMRI_ISRETIRED, nvl));
392 		}
393 
394 		return (EINVAL);
395 		/*NOTREACHED*/
396 		break;
397 	case 1:
398 		/* LDOMS 1.0 */
399 		if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) {
400 			uint32_t cpuid;
401 
402 			if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID,
403 						&cpuid) == 0)
404 				ret = ldmsvcs_cpu_req_status(lhp, cpuid);
405 		} else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) {
406 			uint64_t pa;
407 
408 			if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR,
409 						&pa) == 0)
410 				ret = ldmsvcs_mem_req_status(lhp, pa);
411 			else
412 				ret = EINVAL;
413 		} else {
414 			ret = ENOTSUP;
415 		}
416 		return (ret);
417 
418 		/*NOTREACHED*/
419 		break;
420 	default:
421 		break;
422 	}
423 
424 	return (ENOTSUP);
425 }
426 
427 
428 int
429 ldom_fmri_retire(ldom_hdl_t *lhp, nvlist_t *nvl)
430 {
431 	char *name;
432 	int ret;
433 
434 	if (nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &name) != 0)
435 		return (EINVAL);
436 
437 	switch (ldom_major_version(lhp)) {
438 	case 0:
439 		/*
440 		 * version == 0 means LDOMS support is not available
441 		 */
442 		if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) {
443 			processorid_t vid;
444 			uint32_t cpuid;
445 
446 			if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID,
447 						    &cpuid) == 0 &&
448 			    (vid = cpu_phys2virt(lhp, cpuid)) != -1)
449 				return (p_online(vid, P_FAULTED));
450 		} else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) {
451 			return (os_mem_page_retire(lhp,
452 						MEM_PAGE_FMRI_RETIRE, nvl));
453 		}
454 
455 		return (EINVAL);
456 		/*NOTREACHED*/
457 		break;
458 	case 1:
459 		/* LDOMS 1.0 */
460 		if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) {
461 			uint32_t cpuid;
462 
463 			if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID,
464 						&cpuid) == 0)
465 				ret = ldmsvcs_cpu_req_offline(lhp, cpuid);
466 		} else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) {
467 			uint64_t pa;
468 
469 			if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR,
470 						&pa) == 0)
471 				ret = ldmsvcs_mem_req_retire(lhp, pa);
472 			else
473 				ret = EINVAL;
474 		} else {
475 			ret = ENOTSUP;
476 		}
477 		return (ret);
478 
479 		/*NOTREACHED*/
480 		break;
481 	default:
482 		break;
483 	}
484 
485 	return (ENOTSUP);
486 }
487 
488 
489 /*
490  * blacklist cpus in a non-LDOMS environment
491  */
492 int
493 ldom_fmri_blacklist(ldom_hdl_t *lhp, nvlist_t *nvl)
494 {
495 	char *name;
496 
497 	if (ldom_major_version(lhp) != 0)
498 		return (0);
499 
500 	if (nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &name) != 0)
501 		return (EINVAL);
502 
503 	if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) {
504 		bl_req_t blr;
505 		char *class;
506 		int fd, rc, err;
507 
508 		if ((nvlist_lookup_string(nvl, FM_CLASS, &class) != 0) ||
509 		    (class == NULL) || (*class == '\0'))
510 			return (EINVAL);
511 
512 		if ((fd = open("/dev/bl", O_RDONLY)) < 0)
513 			return (EIO);
514 
515 		if (nvlist_size(nvl, &blr.bl_fmrisz, NV_ENCODE_NATIVE) != 0 ||
516 		    blr.bl_fmrisz == 0 ||
517 		    (blr.bl_fmri = (caddr_t)lhp->allocp(blr.bl_fmrisz)) ==
518 		    NULL) {
519 			(void) close(fd);
520 			return (EINVAL);
521 		}
522 
523 		blr.bl_class = class;
524 
525 		rc = ioctl(fd, BLIOC_INSERT, &blr);
526 		err = errno;
527 
528 		lhp->freep((void *)&blr.bl_fmri, blr.bl_fmrisz);
529 		(void) close(fd);
530 
531 		if (rc < 0 && err != ENOTSUP) {
532 			errno = err;
533 			return (-1);
534 		}
535 	}
536 
537 	return (0);
538 }
539 
540 
541 ssize_t
542 ldom_get_core_md(ldom_hdl_t *lhp, uint64_t **buf)
543 {
544 	ssize_t		rv;	/* return value */
545 
546 	switch (ldom_major_version(lhp)) {
547 	case 0:
548 		return (get_local_core_md(lhp, buf));
549 		/*NOTREACHED*/
550 		break;
551 	case 1:
552 		/* LDOMS 1.0 */
553 		if (ldom_on_service(lhp) == 1) {
554 			if ((rv = ldmsvcs_get_core_md(lhp, buf)) < 0)
555 				rv = get_local_core_md(lhp, buf);
556 			return (rv);
557 		} else {
558 			return (get_local_core_md(lhp, buf));
559 		}
560 
561 		/*NOTREACHED*/
562 		break;
563 	default:
564 		*buf = NULL;
565 		break;
566 	}
567 
568 	return (-1);
569 }
570 
571 /*
572  * version 0 means no LDOMS
573  */
574 int
575 ldom_major_version(ldom_hdl_t *lhp)
576 {
577 	if (lhp == NULL)
578 		return (-1);
579 
580 	if (ldom_getinfo(lhp) == 0)
581 		return (lhp->major_version);
582 	else
583 		return (0);
584 }
585 
586 /*
587  * in the absence of ldoms we are on a single OS instance which is the
588  * equivalent of the service ldom
589  */
590 int
591 ldom_on_service(ldom_hdl_t *lhp)
592 {
593 	if (lhp == NULL)
594 		return (-1);
595 
596 	if (ldom_getinfo(lhp) == 0)
597 		return (lhp->service_ldom);
598 	else
599 		return (1);
600 }
601 
602 
603 ldom_hdl_t *
604 ldom_init(void *(*allocp)(size_t size),
605 	void (*freep)(void *addr, size_t size))
606 {
607 	struct ldom_hdl *lhp;
608 
609 	ldom_pri_config();
610 	if (ldom_pri_init != NULL)
611 		if ((*ldom_pri_init)() < 0)
612 			return (NULL);
613 
614 	if ((lhp = allocp(sizeof (struct ldom_hdl))) == NULL) {
615 		if (ldom_pri_fini != NULL)
616 			(*ldom_pri_fini)();
617 		return (NULL);
618 	}
619 
620 	lhp->major_version = -1;	/* version not yet determined */
621 	lhp->allocp = allocp;
622 	lhp->freep = freep;
623 
624 	ldmsvcs_init(lhp);
625 
626 	return (lhp);
627 }
628 
629 
630 void
631 ldom_fini(ldom_hdl_t *lhp)
632 {
633 	if (lhp == NULL)
634 		return;
635 
636 	ldmsvcs_fini(lhp);
637 	lhp->freep(lhp, sizeof (struct ldom_hdl));
638 
639 	if (ldom_pri_fini != NULL)
640 		(*ldom_pri_fini)();
641 	ldom_pri_unconfig();
642 }
643 
644 /* end file */
645