xref: /titanic_50/usr/src/cmd/fm/modules/sun4u/datapath-retire/cda_main.c (revision d9638e547d8811f2c689977f8dd2a353938b61fd)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 
23 /*
24  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
25  * Use is subject to license terms.
26  */
27 
28 #pragma ident	"%Z%%M%	%I%	%E% SMI"
29 
30 #include <cda.h>
31 
32 #include <strings.h>
33 #include <errno.h>
34 #include <time.h>
35 #include <sys/utsname.h>
36 #include <sys/systeminfo.h>
37 #include <fm/fmd_api.h>
38 #include <sys/fm/protocol.h>
39 
40 cda_t cda;
41 
42 cda_stats_t cda_stats = {
43 	{ "dp_offs", FMD_TYPE_UINT64, "successful cpu offlines" },
44 	{ "dp_fails", FMD_TYPE_UINT64, "datapath faults unresolveable" },
45 	{ "cpu_supp", FMD_TYPE_UINT64, "cpu offlines suppressed" },
46 	{ "bad_flts", FMD_TYPE_UINT64, "invalid fault events received" },
47 	{ "nop_flts", FMD_TYPE_UINT64, "inapplicable fault events received" },
48 };
49 
50 typedef struct cda_subscriber {
51 	const char *subr_class;
52 	const char *subr_sname;
53 	uint_t subr_svers;
54 	void (*subr_func)(fmd_hdl_t *, nvlist_t *, nvlist_t *, const char *);
55 } cda_subscriber_t;
56 
57 static const cda_subscriber_t cda_subrs[] = {
58 	{ "fault.asic.*.dp", FM_FMRI_SCHEME_HC, FM_HC_SCHEME_VERSION,
59 	    cda_dp_retire },
60 	{ NULL, NULL, 0, NULL }
61 };
62 
63 static const cda_subscriber_t *
cda_get_subr(fmd_hdl_t * hdl,nvlist_t * nvl,nvlist_t ** asrup)64 cda_get_subr(fmd_hdl_t *hdl, nvlist_t *nvl, nvlist_t **asrup)
65 {
66 	const cda_subscriber_t *sp;
67 	nvlist_t *asru;
68 	char *scheme;
69 	uint8_t version;
70 
71 	if (nvlist_lookup_nvlist(nvl, FM_FAULT_ASRU, &asru) != 0 ||
72 	    nvlist_lookup_string(asru, FM_FMRI_SCHEME, &scheme) != 0 ||
73 	    nvlist_lookup_uint8(asru, FM_VERSION, &version) != 0) {
74 		cda_stats.bad_flts.fmds_value.ui64++;
75 		return (NULL);
76 	}
77 
78 	for (sp = cda_subrs; sp->subr_class != NULL; sp++) {
79 		if (fmd_nvl_class_match(hdl, nvl, sp->subr_class) &&
80 		    strcmp(scheme, sp->subr_sname) == 0 &&
81 		    version <= sp->subr_svers) {
82 			*asrup = asru;
83 			return (sp);
84 		}
85 	}
86 
87 	cda_stats.nop_flts.fmds_value.ui64++;
88 	return (NULL);
89 }
90 
91 static void
cda_recv_list(fmd_hdl_t * hdl,nvlist_t * nvl)92 cda_recv_list(fmd_hdl_t *hdl, nvlist_t *nvl)
93 {
94 	char *uuid = NULL;
95 	nvlist_t **nva;
96 	uint_t nvc;
97 	int err = 0;
98 
99 	err |= nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid);
100 	err |= nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST,
101 	    &nva, &nvc);
102 	if (err != 0) {
103 		cda_stats.bad_flts.fmds_value.ui64++;
104 		return;
105 	}
106 
107 	while (nvc-- != 0) {
108 		nvlist_t *nvl = *nva++;
109 		const cda_subscriber_t *subr;
110 		nvlist_t *asru;
111 
112 		if (fmd_case_uuclosed(hdl, uuid))
113 			break;
114 
115 		if ((subr = cda_get_subr(hdl, nvl, &asru)) == NULL)
116 			continue;
117 
118 		if (subr->subr_func != NULL)
119 			subr->subr_func(hdl, nvl, asru, uuid);
120 	}
121 }
122 
123 static void
cda_recv_one(fmd_hdl_t * hdl,nvlist_t * nvl)124 cda_recv_one(fmd_hdl_t *hdl, nvlist_t *nvl)
125 {
126 	const cda_subscriber_t *subr;
127 	nvlist_t *asru;
128 
129 	if ((subr = cda_get_subr(hdl, nvl, &asru)) == NULL)
130 		return;
131 
132 	if (subr->subr_func != NULL)
133 		subr->subr_func(hdl, nvl, asru, NULL);
134 }
135 
136 /*ARGSUSED*/
137 static void
cda_recv(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class)138 cda_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
139 {
140 	fmd_hdl_debug(hdl, "received %s\n", class);
141 
142 	if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0)
143 		cda_recv_list(hdl, nvl);
144 	else
145 		cda_recv_one(hdl, nvl);
146 }
147 
148 static const fmd_hdl_ops_t fmd_ops = {
149 	cda_recv,	/* fmdo_recv */
150 	NULL,		/* fmdo_timeout */
151 	NULL,		/* fmdo_close */
152 	NULL,		/* fmdo_stats */
153 	NULL,		/* fmdo_gc */
154 };
155 
156 static const fmd_prop_t fmd_props[] = {
157 	{ "cpu_tries", FMD_TYPE_UINT32, "10" },
158 	{ "cpu_delay", FMD_TYPE_TIME, "1sec" },
159 	{ "cpu_offline_enable", FMD_TYPE_BOOL, "true" },
160 	{ "cpu_forced_offline", FMD_TYPE_BOOL, "true" },
161 	{ NULL, 0, NULL }
162 };
163 
164 static const fmd_hdl_info_t fmd_info = {
165 	"Datapath Retire Agent", CDA_VERSION, &fmd_ops, fmd_props
166 };
167 
168 static int
cda_platform_check_support(fmd_hdl_t * hdl)169 cda_platform_check_support(fmd_hdl_t *hdl)
170 {
171 	char buf[SYS_NMLN];
172 
173 	if (sysinfo(SI_PLATFORM, buf, sizeof (buf)) == -1) {
174 		fmd_hdl_debug(hdl, "sysinfo failed");
175 		return (0);
176 	}
177 
178 	if (strcmp(buf, "SUNW,Sun-Fire-15000") == 0 ||
179 	    strcmp(buf, "SUNW,Sun-Fire") == 0 ||
180 	    strcmp(buf, "SUNW,Netra-T12") == 0)
181 		return (1);
182 	else
183 		return (0);
184 }
185 
186 void
_fmd_init(fmd_hdl_t * hdl)187 _fmd_init(fmd_hdl_t *hdl)
188 {
189 	hrtime_t nsec;
190 
191 	if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0)
192 		return; /* invalid data in configuration file */
193 
194 	if (cda_platform_check_support(hdl) == 0) {
195 		fmd_hdl_debug(hdl, "platform not supported");
196 		fmd_hdl_unregister(hdl);
197 		return;
198 	}
199 
200 	fmd_hdl_subscribe(hdl, "fault.asic.*.dp");
201 
202 	(void) fmd_stat_create(hdl, FMD_STAT_NOALLOC, sizeof (cda_stats) /
203 	    sizeof (fmd_stat_t), (fmd_stat_t *)&cda_stats);
204 
205 	cda.cda_cpu_tries = fmd_prop_get_int32(hdl, "cpu_tries");
206 
207 	nsec = fmd_prop_get_int64(hdl, "cpu_delay");
208 	cda.cda_cpu_delay.tv_sec = nsec / NANOSEC;
209 	cda.cda_cpu_delay.tv_nsec = nsec % NANOSEC;
210 
211 	cda.cda_cpu_dooffline = fmd_prop_get_int32(hdl,
212 		"cpu_offline_enable");
213 	cda.cda_cpu_forcedoffline = fmd_prop_get_int32(hdl,
214 	    "cpu_forced_offline");
215 }
216 
217 /*ARGSUSED*/
218 void
_fmd_fini(fmd_hdl_t * hdl)219 _fmd_fini(fmd_hdl_t *hdl)
220 {
221 }
222