xref: /titanic_50/usr/src/lib/fm/libldom/sparc/ldom.c (revision 8b464eb836173b92f2b7a65623cd06c8c3c59289)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <stdio.h>
29 #include <unistd.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <fcntl.h>
33 #include <pthread.h>
34 #include <errno.h>
35 #include <libnvpair.h>
36 
37 #include <sys/processor.h>
38 #include <sys/stat.h>
39 #include <sys/mdesc.h>
40 #include <sys/param.h>
41 #include <sys/systeminfo.h>
42 #include <sys/mem.h>
43 #include <sys/bl.h>
44 #include <sys/fm/protocol.h>
45 #include <fm/fmd_fmri.h>
46 
47 #include "ldom.h"
48 #include "ldmsvcs_utils.h"
49 
50 
51 static ssize_t
52 get_local_core_md(ldom_hdl_t *lhp, uint64_t **buf)
53 {
54 	int fh;
55 	size_t size;
56 	uint64_t *bufp;
57 
58 	if ((fh = open("/devices/pseudo/mdesc@0:mdesc", O_RDONLY, 0)) < 0)
59 		return (-1);
60 
61 	if (ioctl(fh, MDESCIOCGSZ, &size) < 0) {
62 		(void) close(fh);
63 		return (-1);
64 	}
65 
66 	bufp = (uint64_t *)lhp->allocp(size);
67 
68 	if (read(fh, bufp, size) < 0) {
69 		lhp->freep(bufp, size);
70 		(void) close(fh);
71 		return (-1);
72 	}
73 	(void) close(fh);
74 
75 	*buf = bufp;
76 
77 	return ((ssize_t)size);
78 }
79 
80 
81 static int
82 ldom_getinfo(struct ldom_hdl *lhp)
83 {
84 	static pthread_mutex_t mt = PTHREAD_MUTEX_INITIALIZER;
85 	static pthread_cond_t cv = PTHREAD_COND_INITIALIZER;
86 	static int major_version = -1;
87 	static int service_ldom = -1;
88 	static int busy_init = 0;
89 
90 	int ier, rc = 0;
91 
92 	(void) pthread_mutex_lock(&mt);
93 
94 	while (busy_init == 1)
95 		(void) pthread_cond_wait(&cv, &mt);
96 
97 	if (major_version != -1 && service_ldom != -1) {
98 		lhp->major_version = major_version;
99 		lhp->service_ldom = service_ldom;
100 		(void) pthread_mutex_unlock(&mt);
101 		return (0);
102 	}
103 
104 	/*
105 	 * get to this point if major_version and service_ldom have not yet
106 	 * been determined
107 	 */
108 	busy_init = 1;
109 	(void) pthread_mutex_unlock(&mt);
110 
111 	/*
112 	 * set defaults which correspond to the case of "LDOMS not
113 	 * available".  note that these can (and will) also apply to
114 	 * non-sun4v machines.
115 	 */
116 	major_version = 0;
117 	service_ldom = 1;
118 
119 	/* figure out version */
120 	if ((ier = ldmsvcs_check_channel()) == 0) {
121 		/*
122 		 * get into this block if vldc exists.  LDOMS is available
123 		 * and we are on the service LDOM.
124 		 */
125 		major_version = 1;
126 		service_ldom = 1;
127 	} else if (ier == 1) {
128 		/*
129 		 * get into this block if vldc does not exist
130 		 *
131 		 * if we do not get into the following if() block [i.e.,
132 		 * if (bufsiz <= 0)] then we are on a non-sun4v machine.
133 		 */
134 		uint64_t *bufp;
135 		ssize_t bufsiz;
136 
137 		if ((bufsiz = get_local_core_md(lhp, &bufp)) > 0) {
138 			md_t *mdp;
139 
140 			if ((mdp = md_init_intern(bufp, lhp->allocp,
141 						    lhp->freep)) != NULL) {
142 				mde_cookie_t *listp;
143 				uint64_t dval;
144 				int num_nodes;
145 
146 				num_nodes = md_node_count(mdp);
147 				listp = lhp->allocp(sizeof (mde_cookie_t) *
148 						    num_nodes);
149 
150 				/*
151 				 * if we do not enter the following if block,
152 				 * we conclude that LDOMS is not available
153 				 */
154 				if (md_scan_dag(mdp, MDE_INVAL_ELEM_COOKIE,
155 					md_find_name(mdp, "platform"),
156 					md_find_name(mdp, "fwd"),
157 					listp) > 0 &&
158 				    md_get_prop_val(mdp, listp[0],
159 					"domaining-enabled", &dval) >= 0 &&
160 				    dval == 1) {
161 					/*
162 					 * LDOMS is available.  an earlier
163 					 * block detected the situation of
164 					 * being on a service LDOM, so
165 					 * we get to this point only if we
166 					 * are not on a service LDOM.
167 					 */
168 					major_version = 1;
169 					service_ldom = 0;
170 				}
171 
172 				lhp->freep(listp, sizeof (mde_cookie_t) *
173 					    num_nodes);
174 				(void) md_fini(mdp);
175 			}
176 
177 			lhp->freep(bufp, bufsiz);
178 		}
179 	} else {
180 		rc = 1;
181 	}
182 
183 	(void) pthread_mutex_lock(&mt);
184 	lhp->major_version = major_version;
185 	lhp->service_ldom = service_ldom;
186 	busy_init = 0;
187 	(void) pthread_mutex_unlock(&mt);
188 
189 	(void) pthread_cond_broadcast(&cv);
190 
191 	return (rc);
192 }
193 
194 
195 /*
196  * search the machine description for a "pid" entry (physical cpuid) and
197  * return the corresponding "id" entry (virtual cpuid)
198  */
199 static processorid_t
200 cpu_phys2virt(ldom_hdl_t *lhp, uint32_t cpuid)
201 {
202 	char isa[MAXNAMELEN];
203 	md_t *mdp;
204 	mde_cookie_t *listp;
205 	ssize_t bufsize;
206 	processorid_t vid;
207 	uint64_t *bufp;
208 	uint64_t pval;
209 	int num_nodes, ncpus, i;
210 
211 	(void) sysinfo(SI_ARCHITECTURE, isa, MAXNAMELEN);
212 
213 	if (strcmp(isa, "sun4v") != 0)
214 		return ((processorid_t)cpuid);
215 
216 	/*
217 	 * convert the physical cpuid to a virtual cpuid
218 	 */
219 	if ((bufsize = ldom_get_core_md(lhp, &bufp)) < 1)
220 		return (-1);
221 
222 	if ((mdp = md_init_intern(bufp, lhp->allocp, lhp->freep)) == NULL ||
223 	    (num_nodes = md_node_count(mdp)) < 1) {
224 		lhp->freep(bufp, bufsize);
225 		return (-1);
226 	}
227 
228 	listp = (mde_cookie_t *)lhp->allocp(sizeof (mde_cookie_t) * num_nodes);
229 	ncpus = md_scan_dag(mdp, MDE_INVAL_ELEM_COOKIE,
230 			    md_find_name(mdp, "cpu"),
231 			    md_find_name(mdp, "fwd"), listp);
232 
233 	vid = -1;
234 	for (i = 0; i < ncpus; i++) {
235 		if (md_get_prop_val(mdp, listp[i], "pid", &pval) >= 0 &&
236 		    pval == (uint64_t)cpuid) {
237 			if (md_get_prop_val(mdp, listp[i], "id", &pval) >= 0)
238 				vid = (processorid_t)pval;
239 
240 			break;
241 		}
242 	}
243 
244 	lhp->freep(listp, sizeof (mde_cookie_t) * num_nodes);
245 	(void) md_fini(mdp);
246 	lhp->freep(bufp, bufsize);
247 
248 	return (vid);
249 }
250 
251 /*
252  * if checking for status of a retired page:
253  *   0 - page is retired
254  *   EAGAIN - page is scheduled for retirement
255  *   EIO - page not scheduled for retirement
256  *   EINVAL - error
257  *
258  * if retiring a page:
259  *   0 - success in retiring page
260  *   EIO - page is already retired
261  *   EAGAIN - page is scheduled for retirement
262  *   EINVAL - error
263  *
264  * the original decoder for ioctl() return values is
265  * http://fma.eng/documents/engineering/cpumem/page_retire_api.txt
266  */
267 static int
268 os_mem_page_retire(ldom_hdl_t *lhp, int cmd, nvlist_t *nvl)
269 {
270 	mem_page_t mpage;
271 	char *fmribuf;
272 	size_t fmrisz;
273 	int fd, rc;
274 
275 	if ((fd = open("/dev/mem", O_RDONLY)) < 0)
276 		return (EINVAL);
277 
278 	if ((errno = nvlist_size(nvl, &fmrisz, NV_ENCODE_NATIVE)) != 0 ||
279 	    fmrisz > MEM_FMRI_MAX_BUFSIZE ||
280 	    (fmribuf = lhp->allocp(fmrisz)) == NULL) {
281 		(void) close(fd);
282 		return (EINVAL);
283 	}
284 
285 	if ((errno = nvlist_pack(nvl, &fmribuf, &fmrisz,
286 				    NV_ENCODE_NATIVE, 0)) != 0) {
287 		lhp->freep(fmribuf, fmrisz);
288 		(void) close(fd);
289 		return (EINVAL);
290 	}
291 
292 	mpage.m_fmri = fmribuf;
293 	mpage.m_fmrisz = fmrisz;
294 
295 	rc = ioctl(fd, cmd, &mpage);
296 	lhp->freep(fmribuf, fmrisz);
297 	(void) close(fd);
298 
299 	if (rc < 0)
300 		return (EINVAL);
301 
302 	if ((cmd == MEM_PAGE_RETIRE || cmd == MEM_PAGE_FMRI_RETIRE ||
303 	    cmd == MEM_PAGE_ISRETIRED || cmd == MEM_PAGE_FMRI_ISRETIRED) &&
304 	    (rc == 0 || rc == EIO || rc == EAGAIN))
305 			return (rc);
306 
307 	return (EINVAL);
308 }
309 
310 int
311 ldom_fmri_status(ldom_hdl_t *lhp, nvlist_t *nvl)
312 {
313 	char *name;
314 	int ret;
315 
316 	if (nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &name) != 0)
317 		return (EINVAL);
318 
319 	switch (ldom_major_version(lhp)) {
320 	case 0:
321 		/*
322 		 * version == 0 means LDOMS support is not available
323 		 */
324 		if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) {
325 			processorid_t vid;
326 			uint32_t cpuid;
327 
328 			if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID,
329 						    &cpuid) == 0 &&
330 			    (vid = cpu_phys2virt(lhp, cpuid)) != -1)
331 				return (p_online(vid, P_STATUS));
332 		} else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) {
333 			return (os_mem_page_retire(lhp,
334 						MEM_PAGE_FMRI_ISRETIRED, nvl));
335 		}
336 
337 		return (EINVAL);
338 		/*NOTREACHED*/
339 		break;
340 	case 1:
341 		/* LDOMS 1.0 */
342 		if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) {
343 			uint32_t cpuid;
344 
345 			if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID,
346 						&cpuid) == 0)
347 				ret = ldmsvcs_cpu_req_status(lhp, cpuid);
348 		} else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) {
349 			uint64_t pa;
350 
351 			if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR,
352 						&pa) == 0)
353 				ret = ldmsvcs_mem_req_status(lhp, pa);
354 			else
355 				ret = EINVAL;
356 		} else {
357 			ret = ENOTSUP;
358 		}
359 		return (ret);
360 
361 		/*NOTREACHED*/
362 		break;
363 	default:
364 		break;
365 	}
366 
367 	return (ENOTSUP);
368 }
369 
370 
371 int
372 ldom_fmri_retire(ldom_hdl_t *lhp, nvlist_t *nvl)
373 {
374 	char *name;
375 	int ret;
376 
377 	if (nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &name) != 0)
378 		return (EINVAL);
379 
380 	switch (ldom_major_version(lhp)) {
381 	case 0:
382 		/*
383 		 * version == 0 means LDOMS support is not available
384 		 */
385 		if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) {
386 			processorid_t vid;
387 			uint32_t cpuid;
388 
389 			if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID,
390 						    &cpuid) == 0 &&
391 			    (vid = cpu_phys2virt(lhp, cpuid)) != -1)
392 				return (p_online(vid, P_FAULTED));
393 		} else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) {
394 			return (os_mem_page_retire(lhp,
395 						MEM_PAGE_FMRI_RETIRE, nvl));
396 		}
397 
398 		return (EINVAL);
399 		/*NOTREACHED*/
400 		break;
401 	case 1:
402 		/* LDOMS 1.0 */
403 		if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) {
404 			uint32_t cpuid;
405 
406 			if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID,
407 						&cpuid) == 0)
408 				ret = ldmsvcs_cpu_req_offline(lhp, cpuid);
409 		} else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) {
410 			uint64_t pa;
411 
412 			if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR,
413 						&pa) == 0)
414 				ret = ldmsvcs_mem_req_retire(lhp, pa);
415 			else
416 				ret = EINVAL;
417 		} else {
418 			ret = ENOTSUP;
419 		}
420 		return (ret);
421 
422 		/*NOTREACHED*/
423 		break;
424 	default:
425 		break;
426 	}
427 
428 	return (ENOTSUP);
429 }
430 
431 
432 /*
433  * blacklist cpus in a non-LDOMS environment
434  */
435 int
436 ldom_fmri_blacklist(ldom_hdl_t *lhp, nvlist_t *nvl)
437 {
438 	char *name;
439 
440 	if (ldom_major_version(lhp) != 0)
441 		return (0);
442 
443 	if (nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &name) != 0)
444 		return (EINVAL);
445 
446 	if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) {
447 		bl_req_t blr;
448 		char *class;
449 		int fd, rc, err;
450 
451 		if ((nvlist_lookup_string(nvl, FM_CLASS, &class) != 0) ||
452 		    (class == NULL) || (*class == '\0'))
453 			return (EINVAL);
454 
455 		if ((fd = open("/dev/bl", O_RDONLY)) < 0)
456 			return (EIO);
457 
458 		if (nvlist_size(nvl, &blr.bl_fmrisz, NV_ENCODE_NATIVE) != 0 ||
459 		    blr.bl_fmrisz == 0 ||
460 		    (blr.bl_fmri = (caddr_t)lhp->allocp(blr.bl_fmrisz)) ==
461 		    NULL) {
462 			(void) close(fd);
463 			return (EINVAL);
464 		}
465 
466 		blr.bl_class = class;
467 
468 		rc = ioctl(fd, BLIOC_INSERT, &blr);
469 		err = errno;
470 
471 		lhp->freep((void *)&blr.bl_fmri, blr.bl_fmrisz);
472 		(void) close(fd);
473 
474 		if (rc < 0 && err != ENOTSUP) {
475 			errno = err;
476 			return (-1);
477 		}
478 	}
479 
480 	return (0);
481 }
482 
483 
484 ssize_t
485 ldom_get_core_md(ldom_hdl_t *lhp, uint64_t **buf)
486 {
487 	switch (ldom_major_version(lhp)) {
488 	case 0:
489 		return (get_local_core_md(lhp, buf));
490 		/*NOTREACHED*/
491 		break;
492 	case 1:
493 		/* LDOMS 1.0 */
494 		if (ldom_on_service(lhp) == 1)
495 			return (ldmsvcs_get_core_md(lhp, buf));
496 		else
497 			return (get_local_core_md(lhp, buf));
498 
499 		/*NOTREACHED*/
500 		break;
501 	default:
502 		*buf = NULL;
503 		break;
504 	}
505 
506 	return (-1);
507 }
508 
509 
510 /*
511  * version 0 means no LDOMS
512  */
513 int
514 ldom_major_version(ldom_hdl_t *lhp)
515 {
516 	if (lhp == NULL)
517 		return (-1);
518 
519 	if (ldom_getinfo(lhp) == 0)
520 		return (lhp->major_version);
521 	else
522 		return (0);
523 }
524 
525 /*
526  * in the absence of ldoms we are on a single OS instance which is the
527  * equivalent of the service ldom
528  */
529 int
530 ldom_on_service(ldom_hdl_t *lhp)
531 {
532 	if (lhp == NULL)
533 		return (-1);
534 
535 	if (ldom_getinfo(lhp) == 0)
536 		return (lhp->service_ldom);
537 	else
538 		return (1);
539 }
540 
541 
542 ldom_hdl_t *
543 ldom_init(void *(*allocp)(size_t size),
544 	void (*freep)(void *addr, size_t size))
545 {
546 	struct ldom_hdl *lhp;
547 
548 	if ((lhp = allocp(sizeof (struct ldom_hdl))) == NULL)
549 		return (NULL);
550 
551 	lhp->major_version = -1;	/* version not yet determined */
552 	lhp->allocp = allocp;
553 	lhp->freep = freep;
554 
555 	ldmsvcs_init(lhp);
556 
557 	return (lhp);
558 }
559 
560 
561 void
562 ldom_fini(ldom_hdl_t *lhp)
563 {
564 	if (lhp == NULL)
565 		return;
566 
567 	ldmsvcs_fini(lhp);
568 	lhp->freep(lhp, sizeof (struct ldom_hdl));
569 }
570 
571 /* end file */
572