xref: /titanic_44/usr/src/lib/fm/libldom/sparc/ldom.c (revision b1b8ab34de515a5e83206da22c3d7e563241b021)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <stdio.h>
29 #include <unistd.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <fcntl.h>
33 #include <pthread.h>
34 #include <errno.h>
35 #include <libnvpair.h>
36 
37 #include <sys/processor.h>
38 #include <sys/stat.h>
39 #include <sys/mdesc.h>
40 #include <sys/param.h>
41 #include <sys/systeminfo.h>
42 #include <sys/mem.h>
43 #include <sys/bl.h>
44 #include <sys/fm/protocol.h>
45 #include <fm/fmd_fmri.h>
46 #include <sys/pri.h>
47 
48 #include "ldom.h"
49 #include "ldmsvcs_utils.h"
50 
51 #define	MD_STR_PLATFORM		"platform"
52 #define	MD_STR_DOM_ENABLE	"domaining-enabled"
53 
54 static ssize_t
55 get_local_core_md(ldom_hdl_t *lhp, uint64_t **buf)
56 {
57 	int fh;
58 	size_t size;
59 	ssize_t ssize;
60 	uint64_t tok;
61 	uint64_t *bufp;
62 
63 	if ((ssize = pri_get(PRI_GET, &tok, buf, lhp->allocp, lhp->freep)) >= 0)
64 		return (ssize);
65 
66 	if ((fh = open("/devices/pseudo/mdesc@0:mdesc", O_RDONLY, 0)) < 0)
67 		return (-1);
68 
69 	if (ioctl(fh, MDESCIOCGSZ, &size) < 0) {
70 		(void) close(fh);
71 		return (-1);
72 	}
73 
74 	bufp = (uint64_t *)lhp->allocp(size);
75 
76 	if (read(fh, bufp, size) < 0) {
77 		lhp->freep(bufp, size);
78 		(void) close(fh);
79 		return (-1);
80 	}
81 	(void) close(fh);
82 
83 	*buf = bufp;
84 
85 	return ((ssize_t)size);
86 }
87 
88 
89 static int
90 get_local_md_prop_value(ldom_hdl_t *lhp, char *node, char *prop, uint64_t *val)
91 {
92 	int rc = 1;
93 	uint64_t *bufp;
94 	ssize_t bufsiz;
95 
96 	if ((bufsiz = get_local_core_md(lhp, &bufp)) > 0) {
97 		md_t *mdp;
98 
99 		if (mdp = md_init_intern(bufp, lhp->allocp, lhp->freep)) {
100 			int num_nodes;
101 			mde_cookie_t *listp;
102 
103 			num_nodes = md_node_count(mdp);
104 			listp = lhp->allocp(sizeof (mde_cookie_t) * num_nodes);
105 
106 			if (md_scan_dag(mdp, MDE_INVAL_ELEM_COOKIE,
107 					md_find_name(mdp, node),
108 					md_find_name(mdp, "fwd"),
109 					listp) > 0 &&
110 			    md_get_prop_val(mdp, listp[0], prop, val) >= 0) {
111 				/* found the property */
112 				rc = 0;
113 			}
114 
115 			lhp->freep(listp, sizeof (mde_cookie_t) * num_nodes);
116 			(void) md_fini(mdp);
117 		}
118 		lhp->freep(bufp, bufsiz);
119 	}
120 	return (rc);
121 }
122 
123 static int
124 ldom_getinfo(struct ldom_hdl *lhp)
125 {
126 	static pthread_mutex_t mt = PTHREAD_MUTEX_INITIALIZER;
127 	static pthread_cond_t cv = PTHREAD_COND_INITIALIZER;
128 	static int major_version = -1;
129 	static int service_ldom = -1;
130 	static int busy_init = 0;
131 
132 	int ier, rc = 0;
133 	uint64_t domain_enable;
134 
135 	(void) pthread_mutex_lock(&mt);
136 
137 	while (busy_init == 1)
138 		(void) pthread_cond_wait(&cv, &mt);
139 
140 	if (major_version != -1 && service_ldom != -1) {
141 		lhp->major_version = major_version;
142 		lhp->service_ldom = service_ldom;
143 		(void) pthread_mutex_unlock(&mt);
144 		return (0);
145 	}
146 
147 	/*
148 	 * get to this point if major_version and service_ldom have not yet
149 	 * been determined
150 	 */
151 	busy_init = 1;
152 	(void) pthread_mutex_unlock(&mt);
153 
154 	/*
155 	 * set defaults which correspond to the case of "LDOMS not
156 	 * available".  note that these can (and will) also apply to
157 	 * non-sun4v machines.
158 	 */
159 	major_version = 0;
160 	service_ldom = 0;
161 	domain_enable = 0;
162 
163 	if (get_local_md_prop_value(lhp, MD_STR_PLATFORM, MD_STR_DOM_ENABLE,
164 				&domain_enable) == 0 &&
165 	    domain_enable != 0) {
166 
167 		/*
168 		 * Domaining is enable and ldmd is not in config mode
169 		 * so this is a ldom env.
170 		 */
171 		major_version = 1;
172 
173 		if ((ier = ldmsvcs_check_channel()) == 0) {
174 			/*
175 			 * control ldom
176 			 * ldmfma channel between FMA and ldmd only exists
177 			 * on the control domain.
178 			 */
179 			service_ldom = 1;
180 		} else if (ier == 1) {
181 			/*
182 			 * guest ldom
183 			 * non-control ldom such as guest and io service ldom
184 			 */
185 			service_ldom = 0;
186 		}
187 	}
188 
189 	(void) pthread_mutex_lock(&mt);
190 	lhp->major_version = major_version;
191 	lhp->service_ldom = service_ldom;
192 	busy_init = 0;
193 	(void) pthread_mutex_unlock(&mt);
194 
195 	(void) pthread_cond_broadcast(&cv);
196 
197 	return (rc);
198 }
199 
200 
201 /*
202  * search the machine description for a "pid" entry (physical cpuid) and
203  * return the corresponding "id" entry (virtual cpuid)
204  */
205 static processorid_t
206 cpu_phys2virt(ldom_hdl_t *lhp, uint32_t cpuid)
207 {
208 	char isa[MAXNAMELEN];
209 	md_t *mdp;
210 	mde_cookie_t *listp;
211 	ssize_t bufsize;
212 	processorid_t vid;
213 	uint64_t *bufp;
214 	uint64_t pval;
215 	int num_nodes, ncpus, i;
216 
217 	(void) sysinfo(SI_ARCHITECTURE, isa, MAXNAMELEN);
218 
219 	if (strcmp(isa, "sun4v") != 0)
220 		return ((processorid_t)cpuid);
221 
222 	/*
223 	 * convert the physical cpuid to a virtual cpuid
224 	 */
225 	if ((bufsize = ldom_get_core_md(lhp, &bufp)) < 1)
226 		return (-1);
227 
228 	if ((mdp = md_init_intern(bufp, lhp->allocp, lhp->freep)) == NULL ||
229 	    (num_nodes = md_node_count(mdp)) < 1) {
230 		lhp->freep(bufp, bufsize);
231 		return (-1);
232 	}
233 
234 	listp = (mde_cookie_t *)lhp->allocp(sizeof (mde_cookie_t) * num_nodes);
235 	ncpus = md_scan_dag(mdp, MDE_INVAL_ELEM_COOKIE,
236 			    md_find_name(mdp, "cpu"),
237 			    md_find_name(mdp, "fwd"), listp);
238 
239 	vid = -1;
240 	for (i = 0; i < ncpus; i++) {
241 		if (md_get_prop_val(mdp, listp[i], "pid", &pval) >= 0 &&
242 		    pval == (uint64_t)cpuid) {
243 			if (md_get_prop_val(mdp, listp[i], "id", &pval) >= 0)
244 				vid = (processorid_t)pval;
245 
246 			break;
247 		}
248 	}
249 
250 	lhp->freep(listp, sizeof (mde_cookie_t) * num_nodes);
251 	(void) md_fini(mdp);
252 	lhp->freep(bufp, bufsize);
253 
254 	return (vid);
255 }
256 
257 /*
258  * if checking for status of a retired page:
259  *   0 - page is retired
260  *   EAGAIN - page is scheduled for retirement
261  *   EIO - page not scheduled for retirement
262  *   EINVAL - error
263  *
264  * if retiring a page:
265  *   0 - success in retiring page
266  *   EIO - page is already retired
267  *   EAGAIN - page is scheduled for retirement
268  *   EINVAL - error
269  *
270  * the original decoder for ioctl() return values is
271  * http://fma.eng/documents/engineering/cpumem/page_retire_api.txt
272  */
273 static int
274 os_mem_page_retire(ldom_hdl_t *lhp, int cmd, nvlist_t *nvl)
275 {
276 	mem_page_t mpage;
277 	char *fmribuf;
278 	size_t fmrisz;
279 	int fd, rc, err;
280 
281 	if (cmd != MEM_PAGE_RETIRE && cmd != MEM_PAGE_FMRI_RETIRE &&
282 	    cmd != MEM_PAGE_ISRETIRED && cmd != MEM_PAGE_FMRI_ISRETIRED)
283 			return (EINVAL);
284 
285 	if ((fd = open("/dev/mem", O_RDONLY)) < 0)
286 		return (EINVAL);
287 
288 	if ((errno = nvlist_size(nvl, &fmrisz, NV_ENCODE_NATIVE)) != 0 ||
289 	    fmrisz > MEM_FMRI_MAX_BUFSIZE ||
290 	    (fmribuf = lhp->allocp(fmrisz)) == NULL) {
291 		(void) close(fd);
292 		return (EINVAL);
293 	}
294 
295 	if ((errno = nvlist_pack(nvl, &fmribuf, &fmrisz,
296 				    NV_ENCODE_NATIVE, 0)) != 0) {
297 		lhp->freep(fmribuf, fmrisz);
298 		(void) close(fd);
299 		return (EINVAL);
300 	}
301 
302 	mpage.m_fmri = fmribuf;
303 	mpage.m_fmrisz = fmrisz;
304 
305 	rc = ioctl(fd, cmd, &mpage);
306 	err = errno;
307 
308 	lhp->freep(fmribuf, fmrisz);
309 	(void) close(fd);
310 
311 	if (rc < 0) {
312 		rc = err;
313 	}
314 
315 	return (rc);
316 }
317 
318 int
319 ldom_fmri_status(ldom_hdl_t *lhp, nvlist_t *nvl)
320 {
321 	char *name;
322 	int ret;
323 
324 	if (nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &name) != 0)
325 		return (EINVAL);
326 
327 	switch (ldom_major_version(lhp)) {
328 	case 0:
329 		/*
330 		 * version == 0 means LDOMS support is not available
331 		 */
332 		if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) {
333 			processorid_t vid;
334 			uint32_t cpuid;
335 
336 			if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID,
337 						    &cpuid) == 0 &&
338 			    (vid = cpu_phys2virt(lhp, cpuid)) != -1)
339 				return (p_online(vid, P_STATUS));
340 		} else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) {
341 			return (os_mem_page_retire(lhp,
342 						MEM_PAGE_FMRI_ISRETIRED, nvl));
343 		}
344 
345 		return (EINVAL);
346 		/*NOTREACHED*/
347 		break;
348 	case 1:
349 		/* LDOMS 1.0 */
350 		if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) {
351 			uint32_t cpuid;
352 
353 			if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID,
354 						&cpuid) == 0)
355 				ret = ldmsvcs_cpu_req_status(lhp, cpuid);
356 		} else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) {
357 			uint64_t pa;
358 
359 			if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR,
360 						&pa) == 0)
361 				ret = ldmsvcs_mem_req_status(lhp, pa);
362 			else
363 				ret = EINVAL;
364 		} else {
365 			ret = ENOTSUP;
366 		}
367 		return (ret);
368 
369 		/*NOTREACHED*/
370 		break;
371 	default:
372 		break;
373 	}
374 
375 	return (ENOTSUP);
376 }
377 
378 
379 int
380 ldom_fmri_retire(ldom_hdl_t *lhp, nvlist_t *nvl)
381 {
382 	char *name;
383 	int ret;
384 
385 	if (nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &name) != 0)
386 		return (EINVAL);
387 
388 	switch (ldom_major_version(lhp)) {
389 	case 0:
390 		/*
391 		 * version == 0 means LDOMS support is not available
392 		 */
393 		if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) {
394 			processorid_t vid;
395 			uint32_t cpuid;
396 
397 			if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID,
398 						    &cpuid) == 0 &&
399 			    (vid = cpu_phys2virt(lhp, cpuid)) != -1)
400 				return (p_online(vid, P_FAULTED));
401 		} else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) {
402 			return (os_mem_page_retire(lhp,
403 						MEM_PAGE_FMRI_RETIRE, nvl));
404 		}
405 
406 		return (EINVAL);
407 		/*NOTREACHED*/
408 		break;
409 	case 1:
410 		/* LDOMS 1.0 */
411 		if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) {
412 			uint32_t cpuid;
413 
414 			if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID,
415 						&cpuid) == 0)
416 				ret = ldmsvcs_cpu_req_offline(lhp, cpuid);
417 		} else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) {
418 			uint64_t pa;
419 
420 			if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR,
421 						&pa) == 0)
422 				ret = ldmsvcs_mem_req_retire(lhp, pa);
423 			else
424 				ret = EINVAL;
425 		} else {
426 			ret = ENOTSUP;
427 		}
428 		return (ret);
429 
430 		/*NOTREACHED*/
431 		break;
432 	default:
433 		break;
434 	}
435 
436 	return (ENOTSUP);
437 }
438 
439 
440 /*
441  * blacklist cpus in a non-LDOMS environment
442  */
443 int
444 ldom_fmri_blacklist(ldom_hdl_t *lhp, nvlist_t *nvl)
445 {
446 	char *name;
447 
448 	if (ldom_major_version(lhp) != 0)
449 		return (0);
450 
451 	if (nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &name) != 0)
452 		return (EINVAL);
453 
454 	if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) {
455 		bl_req_t blr;
456 		char *class;
457 		int fd, rc, err;
458 
459 		if ((nvlist_lookup_string(nvl, FM_CLASS, &class) != 0) ||
460 		    (class == NULL) || (*class == '\0'))
461 			return (EINVAL);
462 
463 		if ((fd = open("/dev/bl", O_RDONLY)) < 0)
464 			return (EIO);
465 
466 		if (nvlist_size(nvl, &blr.bl_fmrisz, NV_ENCODE_NATIVE) != 0 ||
467 		    blr.bl_fmrisz == 0 ||
468 		    (blr.bl_fmri = (caddr_t)lhp->allocp(blr.bl_fmrisz)) ==
469 		    NULL) {
470 			(void) close(fd);
471 			return (EINVAL);
472 		}
473 
474 		blr.bl_class = class;
475 
476 		rc = ioctl(fd, BLIOC_INSERT, &blr);
477 		err = errno;
478 
479 		lhp->freep((void *)&blr.bl_fmri, blr.bl_fmrisz);
480 		(void) close(fd);
481 
482 		if (rc < 0 && err != ENOTSUP) {
483 			errno = err;
484 			return (-1);
485 		}
486 	}
487 
488 	return (0);
489 }
490 
491 
492 ssize_t
493 ldom_get_core_md(ldom_hdl_t *lhp, uint64_t **buf)
494 {
495 	ssize_t		rv;	/* return value */
496 
497 	switch (ldom_major_version(lhp)) {
498 	case 0:
499 		return (get_local_core_md(lhp, buf));
500 		/*NOTREACHED*/
501 		break;
502 	case 1:
503 		/* LDOMS 1.0 */
504 		if (ldom_on_service(lhp) == 1) {
505 			if ((rv = ldmsvcs_get_core_md(lhp, buf)) < 0)
506 				rv = get_local_core_md(lhp, buf);
507 			return (rv);
508 		} else {
509 			return (get_local_core_md(lhp, buf));
510 		}
511 
512 		/*NOTREACHED*/
513 		break;
514 	default:
515 		*buf = NULL;
516 		break;
517 	}
518 
519 	return (-1);
520 }
521 
522 /*
523  * version 0 means no LDOMS
524  */
525 int
526 ldom_major_version(ldom_hdl_t *lhp)
527 {
528 	if (lhp == NULL)
529 		return (-1);
530 
531 	if (ldom_getinfo(lhp) == 0)
532 		return (lhp->major_version);
533 	else
534 		return (0);
535 }
536 
537 /*
538  * in the absence of ldoms we are on a single OS instance which is the
539  * equivalent of the service ldom
540  */
541 int
542 ldom_on_service(ldom_hdl_t *lhp)
543 {
544 	if (lhp == NULL)
545 		return (-1);
546 
547 	if (ldom_getinfo(lhp) == 0)
548 		return (lhp->service_ldom);
549 	else
550 		return (1);
551 }
552 
553 
554 ldom_hdl_t *
555 ldom_init(void *(*allocp)(size_t size),
556 	void (*freep)(void *addr, size_t size))
557 {
558 	struct ldom_hdl *lhp;
559 
560 	if (pri_init() < 0)
561 		return (NULL);
562 
563 	if ((lhp = allocp(sizeof (struct ldom_hdl))) == NULL) {
564 		pri_fini();
565 		return (NULL);
566 	}
567 
568 	lhp->major_version = -1;	/* version not yet determined */
569 	lhp->allocp = allocp;
570 	lhp->freep = freep;
571 
572 	ldmsvcs_init(lhp);
573 
574 	return (lhp);
575 }
576 
577 
578 void
579 ldom_fini(ldom_hdl_t *lhp)
580 {
581 	if (lhp == NULL)
582 		return;
583 
584 	ldmsvcs_fini(lhp);
585 	lhp->freep(lhp, sizeof (struct ldom_hdl));
586 
587 	pri_fini();
588 }
589 
590 /* end file */
591