xref: /titanic_51/usr/src/lib/fm/libldom/sparc/ldom.c (revision bfe60e20c2f727eab7a71b13a2183a856ae0c22f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <stdio.h>
29 #include <unistd.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <fcntl.h>
33 #include <pthread.h>
34 #include <errno.h>
35 #include <libnvpair.h>
36 
37 #include <sys/processor.h>
38 #include <sys/stat.h>
39 #include <sys/mdesc.h>
40 #include <sys/param.h>
41 #include <sys/systeminfo.h>
42 #include <sys/mem.h>
43 #include <sys/bl.h>
44 #include <sys/fm/protocol.h>
45 #include <fm/fmd_fmri.h>
46 
47 #include "ldom.h"
48 #include "ldmsvcs_utils.h"
49 
50 
51 static ssize_t
52 get_local_core_md(ldom_hdl_t *lhp, uint64_t **buf)
53 {
54 	int fh;
55 	size_t size;
56 	uint64_t *bufp;
57 
58 	if ((fh = open("/devices/pseudo/mdesc@0:mdesc", O_RDONLY, 0)) < 0)
59 		return (-1);
60 
61 	if (ioctl(fh, MDESCIOCGSZ, &size) < 0) {
62 		(void) close(fh);
63 		return (-1);
64 	}
65 
66 	bufp = (uint64_t *)lhp->allocp(size);
67 
68 	if (read(fh, bufp, size) < 0) {
69 		lhp->freep(bufp, size);
70 		(void) close(fh);
71 		return (-1);
72 	}
73 	(void) close(fh);
74 
75 	*buf = bufp;
76 
77 	return ((ssize_t)size);
78 }
79 
80 
81 static int
82 ldom_getinfo(struct ldom_hdl *lhp)
83 {
84 	static pthread_mutex_t mt = PTHREAD_MUTEX_INITIALIZER;
85 	static pthread_cond_t cv = PTHREAD_COND_INITIALIZER;
86 	static int major_version = -1;
87 	static int service_ldom = -1;
88 	static int busy_init = 0;
89 
90 	int ier, rc = 0;
91 
92 	(void) pthread_mutex_lock(&mt);
93 
94 	while (busy_init == 1)
95 		(void) pthread_cond_wait(&cv, &mt);
96 
97 	if (major_version != -1 && service_ldom != -1) {
98 		lhp->major_version = major_version;
99 		lhp->service_ldom = service_ldom;
100 		(void) pthread_mutex_unlock(&mt);
101 		return (0);
102 	}
103 
104 	/*
105 	 * get to this point if major_version and service_ldom have not yet
106 	 * been determined
107 	 */
108 	busy_init = 1;
109 	(void) pthread_mutex_unlock(&mt);
110 
111 	/*
112 	 * set defaults which correspond to the case of "LDOMS not
113 	 * available".  note that these can (and will) also apply to
114 	 * non-sun4v machines.
115 	 */
116 	major_version = 0;
117 	service_ldom = 1;
118 
119 	/* figure out version */
120 	if ((ier = ldmsvcs_check_channel()) == 0) {
121 		/*
122 		 * get into this block if vldc exists.  LDOMS is available
123 		 * and we are on the service LDOM.
124 		 */
125 		major_version = 1;
126 		service_ldom = 1;
127 	} else if (ier == 1) {
128 		/*
129 		 * get into this block if vldc does not exist
130 		 *
131 		 * if we do not get into the following if() block [i.e.,
132 		 * if (bufsiz <= 0)] then we are on a non-sun4v machine.
133 		 */
134 		uint64_t *bufp;
135 		ssize_t bufsiz;
136 
137 		if ((bufsiz = get_local_core_md(lhp, &bufp)) > 0) {
138 			md_t *mdp;
139 
140 			if ((mdp = md_init_intern(bufp, lhp->allocp,
141 						    lhp->freep)) != NULL) {
142 				mde_cookie_t *listp;
143 				uint64_t dval;
144 				int num_nodes;
145 
146 				num_nodes = md_node_count(mdp);
147 				listp = lhp->allocp(sizeof (mde_cookie_t) *
148 						    num_nodes);
149 
150 				/*
151 				 * if we do not enter the following if block,
152 				 * we conclude that LDOMS is not available
153 				 */
154 				if (md_scan_dag(mdp, MDE_INVAL_ELEM_COOKIE,
155 					md_find_name(mdp, "platform"),
156 					md_find_name(mdp, "fwd"),
157 					listp) > 0 &&
158 				    md_get_prop_val(mdp, listp[0],
159 					"domaining-enabled", &dval) >= 0 &&
160 				    dval == 1) {
161 					/*
162 					 * LDOMS is available.  an earlier
163 					 * block detected the situation of
164 					 * being on a service LDOM, so
165 					 * we get to this point only if we
166 					 * are not on a service LDOM.
167 					 */
168 					major_version = 1;
169 					service_ldom = 0;
170 				}
171 
172 				lhp->freep(listp, sizeof (mde_cookie_t) *
173 					    num_nodes);
174 				(void) md_fini(mdp);
175 			}
176 
177 			lhp->freep(bufp, bufsiz);
178 		}
179 	} else {
180 		rc = 1;
181 	}
182 
183 	(void) pthread_mutex_lock(&mt);
184 	lhp->major_version = major_version;
185 	lhp->service_ldom = service_ldom;
186 	busy_init = 0;
187 	(void) pthread_mutex_unlock(&mt);
188 
189 	(void) pthread_cond_broadcast(&cv);
190 
191 	return (rc);
192 }
193 
194 
195 /*
196  * search the machine description for a "pid" entry (physical cpuid) and
197  * return the corresponding "id" entry (virtual cpuid)
198  */
199 static processorid_t
200 cpu_phys2virt(ldom_hdl_t *lhp, uint32_t cpuid)
201 {
202 	char isa[MAXNAMELEN];
203 	md_t *mdp;
204 	mde_cookie_t *listp;
205 	ssize_t bufsize;
206 	processorid_t vid;
207 	uint64_t *bufp;
208 	uint64_t pval;
209 	int num_nodes, ncpus, i;
210 
211 	(void) sysinfo(SI_ARCHITECTURE, isa, MAXNAMELEN);
212 
213 	if (strcmp(isa, "sun4v") != 0)
214 		return ((processorid_t)cpuid);
215 
216 	/*
217 	 * convert the physical cpuid to a virtual cpuid
218 	 */
219 	if ((bufsize = ldom_get_core_md(lhp, &bufp)) < 1)
220 		return (-1);
221 
222 	if ((mdp = md_init_intern(bufp, lhp->allocp, lhp->freep)) == NULL ||
223 	    (num_nodes = md_node_count(mdp)) < 1) {
224 		lhp->freep(bufp, bufsize);
225 		return (-1);
226 	}
227 
228 	listp = (mde_cookie_t *)lhp->allocp(sizeof (mde_cookie_t) * num_nodes);
229 	ncpus = md_scan_dag(mdp, MDE_INVAL_ELEM_COOKIE,
230 			    md_find_name(mdp, "cpu"),
231 			    md_find_name(mdp, "fwd"), listp);
232 
233 	vid = -1;
234 	for (i = 0; i < ncpus; i++) {
235 		if (md_get_prop_val(mdp, listp[i], "pid", &pval) >= 0 &&
236 		    pval == (uint64_t)cpuid) {
237 			if (md_get_prop_val(mdp, listp[i], "id", &pval) >= 0)
238 				vid = (processorid_t)pval;
239 
240 			break;
241 		}
242 	}
243 
244 	lhp->freep(listp, sizeof (mde_cookie_t) * num_nodes);
245 	(void) md_fini(mdp);
246 	lhp->freep(bufp, bufsize);
247 
248 	return (vid);
249 }
250 
251 /*
252  * if checking for status of a retired page:
253  *   0 - page is retired
254  *   EAGAIN - page is scheduled for retirement
255  *   EIO - page not scheduled for retirement
256  *   EINVAL - error
257  *
258  * if retiring a page:
259  *   0 - success in retiring page
260  *   EIO - page is already retired
261  *   EAGAIN - page is scheduled for retirement
262  *   EINVAL - error
263  *
264  * the original decoder for ioctl() return values is
265  * http://fma.eng/documents/engineering/cpumem/page_retire_api.txt
266  */
267 static int
268 os_mem_page_retire(ldom_hdl_t *lhp, int cmd, nvlist_t *nvl)
269 {
270 	mem_page_t mpage;
271 	char *fmribuf;
272 	size_t fmrisz;
273 	int fd, rc, err;
274 
275 	if (cmd != MEM_PAGE_RETIRE && cmd != MEM_PAGE_FMRI_RETIRE &&
276 	    cmd != MEM_PAGE_ISRETIRED && cmd != MEM_PAGE_FMRI_ISRETIRED)
277 			return (EINVAL);
278 
279 	if ((fd = open("/dev/mem", O_RDONLY)) < 0)
280 		return (EINVAL);
281 
282 	if ((errno = nvlist_size(nvl, &fmrisz, NV_ENCODE_NATIVE)) != 0 ||
283 	    fmrisz > MEM_FMRI_MAX_BUFSIZE ||
284 	    (fmribuf = lhp->allocp(fmrisz)) == NULL) {
285 		(void) close(fd);
286 		return (EINVAL);
287 	}
288 
289 	if ((errno = nvlist_pack(nvl, &fmribuf, &fmrisz,
290 				    NV_ENCODE_NATIVE, 0)) != 0) {
291 		lhp->freep(fmribuf, fmrisz);
292 		(void) close(fd);
293 		return (EINVAL);
294 	}
295 
296 	mpage.m_fmri = fmribuf;
297 	mpage.m_fmrisz = fmrisz;
298 
299 	rc = ioctl(fd, cmd, &mpage);
300 	err = errno;
301 
302 	lhp->freep(fmribuf, fmrisz);
303 	(void) close(fd);
304 
305 	if (rc < 0) {
306 		rc = err;
307 	}
308 
309 	return (rc);
310 }
311 
312 int
313 ldom_fmri_status(ldom_hdl_t *lhp, nvlist_t *nvl)
314 {
315 	char *name;
316 	int ret;
317 
318 	if (nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &name) != 0)
319 		return (EINVAL);
320 
321 	switch (ldom_major_version(lhp)) {
322 	case 0:
323 		/*
324 		 * version == 0 means LDOMS support is not available
325 		 */
326 		if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) {
327 			processorid_t vid;
328 			uint32_t cpuid;
329 
330 			if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID,
331 						    &cpuid) == 0 &&
332 			    (vid = cpu_phys2virt(lhp, cpuid)) != -1)
333 				return (p_online(vid, P_STATUS));
334 		} else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) {
335 			return (os_mem_page_retire(lhp,
336 						MEM_PAGE_FMRI_ISRETIRED, nvl));
337 		}
338 
339 		return (EINVAL);
340 		/*NOTREACHED*/
341 		break;
342 	case 1:
343 		/* LDOMS 1.0 */
344 		if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) {
345 			uint32_t cpuid;
346 
347 			if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID,
348 						&cpuid) == 0)
349 				ret = ldmsvcs_cpu_req_status(lhp, cpuid);
350 		} else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) {
351 			uint64_t pa;
352 
353 			if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR,
354 						&pa) == 0)
355 				ret = ldmsvcs_mem_req_status(lhp, pa);
356 			else
357 				ret = EINVAL;
358 		} else {
359 			ret = ENOTSUP;
360 		}
361 		return (ret);
362 
363 		/*NOTREACHED*/
364 		break;
365 	default:
366 		break;
367 	}
368 
369 	return (ENOTSUP);
370 }
371 
372 
373 int
374 ldom_fmri_retire(ldom_hdl_t *lhp, nvlist_t *nvl)
375 {
376 	char *name;
377 	int ret;
378 
379 	if (nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &name) != 0)
380 		return (EINVAL);
381 
382 	switch (ldom_major_version(lhp)) {
383 	case 0:
384 		/*
385 		 * version == 0 means LDOMS support is not available
386 		 */
387 		if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) {
388 			processorid_t vid;
389 			uint32_t cpuid;
390 
391 			if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID,
392 						    &cpuid) == 0 &&
393 			    (vid = cpu_phys2virt(lhp, cpuid)) != -1)
394 				return (p_online(vid, P_FAULTED));
395 		} else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) {
396 			return (os_mem_page_retire(lhp,
397 						MEM_PAGE_FMRI_RETIRE, nvl));
398 		}
399 
400 		return (EINVAL);
401 		/*NOTREACHED*/
402 		break;
403 	case 1:
404 		/* LDOMS 1.0 */
405 		if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) {
406 			uint32_t cpuid;
407 
408 			if (nvlist_lookup_uint32(nvl, FM_FMRI_CPU_ID,
409 						&cpuid) == 0)
410 				ret = ldmsvcs_cpu_req_offline(lhp, cpuid);
411 		} else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) {
412 			uint64_t pa;
413 
414 			if (nvlist_lookup_uint64(nvl, FM_FMRI_MEM_PHYSADDR,
415 						&pa) == 0)
416 				ret = ldmsvcs_mem_req_retire(lhp, pa);
417 			else
418 				ret = EINVAL;
419 		} else {
420 			ret = ENOTSUP;
421 		}
422 		return (ret);
423 
424 		/*NOTREACHED*/
425 		break;
426 	default:
427 		break;
428 	}
429 
430 	return (ENOTSUP);
431 }
432 
433 
434 /*
435  * blacklist cpus in a non-LDOMS environment
436  */
437 int
438 ldom_fmri_blacklist(ldom_hdl_t *lhp, nvlist_t *nvl)
439 {
440 	char *name;
441 
442 	if (ldom_major_version(lhp) != 0)
443 		return (0);
444 
445 	if (nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &name) != 0)
446 		return (EINVAL);
447 
448 	if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) {
449 		bl_req_t blr;
450 		char *class;
451 		int fd, rc, err;
452 
453 		if ((nvlist_lookup_string(nvl, FM_CLASS, &class) != 0) ||
454 		    (class == NULL) || (*class == '\0'))
455 			return (EINVAL);
456 
457 		if ((fd = open("/dev/bl", O_RDONLY)) < 0)
458 			return (EIO);
459 
460 		if (nvlist_size(nvl, &blr.bl_fmrisz, NV_ENCODE_NATIVE) != 0 ||
461 		    blr.bl_fmrisz == 0 ||
462 		    (blr.bl_fmri = (caddr_t)lhp->allocp(blr.bl_fmrisz)) ==
463 		    NULL) {
464 			(void) close(fd);
465 			return (EINVAL);
466 		}
467 
468 		blr.bl_class = class;
469 
470 		rc = ioctl(fd, BLIOC_INSERT, &blr);
471 		err = errno;
472 
473 		lhp->freep((void *)&blr.bl_fmri, blr.bl_fmrisz);
474 		(void) close(fd);
475 
476 		if (rc < 0 && err != ENOTSUP) {
477 			errno = err;
478 			return (-1);
479 		}
480 	}
481 
482 	return (0);
483 }
484 
485 
486 ssize_t
487 ldom_get_core_md(ldom_hdl_t *lhp, uint64_t **buf)
488 {
489 	switch (ldom_major_version(lhp)) {
490 	case 0:
491 		return (get_local_core_md(lhp, buf));
492 		/*NOTREACHED*/
493 		break;
494 	case 1:
495 		/* LDOMS 1.0 */
496 		if (ldom_on_service(lhp) == 1)
497 			return (ldmsvcs_get_core_md(lhp, buf));
498 		else
499 			return (get_local_core_md(lhp, buf));
500 
501 		/*NOTREACHED*/
502 		break;
503 	default:
504 		*buf = NULL;
505 		break;
506 	}
507 
508 	return (-1);
509 }
510 
511 
512 /*
513  * version 0 means no LDOMS
514  */
515 int
516 ldom_major_version(ldom_hdl_t *lhp)
517 {
518 	if (lhp == NULL)
519 		return (-1);
520 
521 	if (ldom_getinfo(lhp) == 0)
522 		return (lhp->major_version);
523 	else
524 		return (0);
525 }
526 
527 /*
528  * in the absence of ldoms we are on a single OS instance which is the
529  * equivalent of the service ldom
530  */
531 int
532 ldom_on_service(ldom_hdl_t *lhp)
533 {
534 	if (lhp == NULL)
535 		return (-1);
536 
537 	if (ldom_getinfo(lhp) == 0)
538 		return (lhp->service_ldom);
539 	else
540 		return (1);
541 }
542 
543 
544 ldom_hdl_t *
545 ldom_init(void *(*allocp)(size_t size),
546 	void (*freep)(void *addr, size_t size))
547 {
548 	struct ldom_hdl *lhp;
549 
550 	if ((lhp = allocp(sizeof (struct ldom_hdl))) == NULL)
551 		return (NULL);
552 
553 	lhp->major_version = -1;	/* version not yet determined */
554 	lhp->allocp = allocp;
555 	lhp->freep = freep;
556 
557 	ldmsvcs_init(lhp);
558 
559 	return (lhp);
560 }
561 
562 
563 void
564 ldom_fini(ldom_hdl_t *lhp)
565 {
566 	if (lhp == NULL)
567 		return;
568 
569 	ldmsvcs_fini(lhp);
570 	lhp->freep(lhp, sizeof (struct ldom_hdl));
571 }
572 
573 /* end file */
574