xref: /titanic_52/usr/src/cmd/fm/modules/common/cpumem-retire/cma_cpu_arch.c (revision 5f149bca52352f45598e5563debe72ce04bd7a21)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <cma.h>
30 
31 #include <fcntl.h>
32 #include <unistd.h>
33 #include <strings.h>
34 #include <errno.h>
35 #include <time.h>
36 #include <fm/fmd_api.h>
37 #include <sys/fm/protocol.h>
38 #include <sys/bl.h>
39 #include <sys/processor.h>
40 
41 int
42 cma_cpu_blacklist(fmd_hdl_t *hdl, nvlist_t *nvl, nvlist_t *asru,
43     boolean_t repair)
44 {
45 	bl_req_t blr;
46 	nvlist_t *fmri;
47 	char *fmribuf;
48 	size_t fmrisz;
49 	int fd, rc, err;
50 	char *class;
51 
52 	/*
53 	 * Some platforms have special unums for the E$ DIMMs.  If we're dealing
54 	 * with a platform that has these unums, one will have been added to the
55 	 * fault as the resource.  We'll use that for the blacklisting.  If we
56 	 * can't find a resource, we'll fall back to the ASRU.
57 	 */
58 	if (nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &fmri) != 0)
59 		fmri = asru;
60 
61 	if ((nvlist_lookup_string(nvl, FM_CLASS, &class) != 0) ||
62 	    (class == NULL) || (*class == '\0')) {
63 		fmd_hdl_debug(hdl, "failed to get the fault class name\n");
64 		errno = EINVAL;
65 		return (-1);
66 	}
67 
68 	if ((fd = open("/dev/bl", O_RDONLY)) < 0)
69 		return (-1); /* errno is set for us */
70 
71 	if ((errno = nvlist_size(fmri, &fmrisz, NV_ENCODE_NATIVE)) != 0 ||
72 	    (fmribuf = fmd_hdl_alloc(hdl, fmrisz, FMD_SLEEP)) == NULL) {
73 		(void) close(fd);
74 		return (-1); /* errno is set for us */
75 	}
76 
77 	if ((errno = nvlist_pack(fmri, &fmribuf, &fmrisz,
78 	    NV_ENCODE_NATIVE, 0)) != 0) {
79 		fmd_hdl_free(hdl, fmribuf, fmrisz);
80 		(void) close(fd);
81 		return (-1); /* errno is set for us */
82 	}
83 
84 	blr.bl_fmri = fmribuf;
85 	blr.bl_fmrisz = fmrisz;
86 	blr.bl_class = class;
87 
88 	rc = ioctl(fd, repair ? BLIOC_DELETE : BLIOC_INSERT, &blr);
89 	err = errno;
90 
91 	fmd_hdl_free(hdl, fmribuf, fmrisz);
92 	(void) close(fd);
93 
94 	if (rc < 0 && err != ENOTSUP) {
95 		errno = err;
96 		return (-1);
97 	}
98 
99 	return (0);
100 }
101 
102 /* ARGSUSED */
103 int
104 cma_cpu_statechange(fmd_hdl_t *hdl, nvlist_t *asru, const char *uuid,
105     int cpustate, boolean_t repair)
106 {
107 	int i;
108 	uint_t cpuid;
109 
110 	if (nvlist_lookup_uint32(asru, FM_FMRI_CPU_ID, &cpuid) != 0) {
111 		fmd_hdl_debug(hdl, "missing '%s'\n", FM_FMRI_CPU_ID);
112 		cma_stats.bad_flts.fmds_value.ui64++;
113 		return (CMA_RA_FAILURE);
114 	}
115 
116 	for (i = 0; i < cma.cma_cpu_tries;
117 	    i++, (void) nanosleep(&cma.cma_cpu_delay, NULL)) {
118 		int oldstate;
119 		if ((oldstate = p_online(cpuid, cpustate)) != -1) {
120 			fmd_hdl_debug(hdl, "changed cpu %u state from \"%s\" "
121 			    "to \"%s\"\n", cpuid, p_online_state_fmt(oldstate),
122 			    p_online_state_fmt(cpustate));
123 			if (repair)
124 				cma_stats.cpu_repairs.fmds_value.ui64++;
125 			else
126 				cma_stats.cpu_flts.fmds_value.ui64++;
127 			return (CMA_RA_SUCCESS);
128 		}
129 	}
130 
131 	fmd_hdl_debug(hdl, "failed to changed cpu %u state to \"%s\": %s\n",
132 	    cpuid, p_online_state_fmt(cpustate), strerror(errno));
133 	cma_stats.cpu_fails.fmds_value.ui64++;
134 	return (CMA_RA_FAILURE);
135 }
136