xref: /illumos-gate/usr/src/cmd/fm/modules/common/io-retire/rio_main.c (revision 6e375c8351497b82ffa4f33cbf61d712999b4605)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/fm/protocol.h>
30 #include <fm/fmd_api.h>
31 #include <strings.h>
32 #include <libdevinfo.h>
33 #include <sys/modctl.h>
34 
35 static int	global_disable;
36 
37 struct except_list {
38 	char			*el_fault;
39 	struct except_list	*el_next;
40 };
41 
42 static struct except_list *except_list;
43 
44 static void
45 parse_exception_string(fmd_hdl_t *hdl, char *estr)
46 {
47 	char	*p;
48 	char	*next;
49 	size_t	len;
50 	struct except_list *elem;
51 
52 	len = strlen(estr);
53 
54 	p = estr;
55 	for (;;) {
56 		/* Remove leading ':' */
57 		while (*p == ':')
58 			p++;
59 		if (*p == '\0')
60 			break;
61 
62 		next = strchr(p, ':');
63 
64 		if (next)
65 			*next = '\0';
66 
67 		elem = fmd_hdl_alloc(hdl,
68 		    sizeof (struct except_list), FMD_SLEEP);
69 		elem->el_fault = fmd_hdl_strdup(hdl, p, FMD_SLEEP);
70 		elem->el_next = except_list;
71 		except_list = elem;
72 
73 		if (next) {
74 			*next = ':';
75 			p = next + 1;
76 		} else {
77 			break;
78 		}
79 	}
80 
81 	if (len != strlen(estr)) {
82 		fmd_hdl_abort(hdl, "Error parsing exception list: %s\n", estr);
83 	}
84 }
85 
86 /*
87  * Returns
88  *	1  if fault on exception list
89  *	0  otherwise
90  */
91 static int
92 fault_exception(fmd_hdl_t *hdl, nvlist_t *fault)
93 {
94 	struct except_list *elem;
95 
96 	for (elem = except_list; elem; elem = elem->el_next) {
97 		if (fmd_nvl_class_match(hdl, fault, elem->el_fault)) {
98 			fmd_hdl_debug(hdl, "rio_recv: Skipping fault "
99 			    "on exception list (%s)\n", elem->el_fault);
100 			return (1);
101 		}
102 	}
103 
104 	return (0);
105 }
106 
107 static void
108 free_exception_list(fmd_hdl_t *hdl)
109 {
110 	struct except_list *elem;
111 
112 	while (except_list) {
113 		elem = except_list;
114 		except_list = elem->el_next;
115 		fmd_hdl_strfree(hdl, elem->el_fault);
116 		fmd_hdl_free(hdl, elem, sizeof (*elem));
117 	}
118 }
119 
120 
121 /*ARGSUSED*/
122 static void
123 rio_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
124 {
125 	nvlist_t	**faults = NULL;
126 	nvlist_t	*asru;
127 	uint_t		nfaults = 0;
128 	int		f;
129 	char		*path;
130 	char		*uuid;
131 	char		*scheme;
132 	di_retire_t	drt = {0};
133 	int		retire;
134 	int		rval = 0;
135 	int		error;
136 	char		*snglfault = FM_FAULT_CLASS"."FM_ERROR_IO".";
137 	boolean_t	rtr;
138 
139 
140 	/*
141 	 * If disabled, we don't do retire. We still do unretires though
142 	 */
143 	if (global_disable && strcmp(class, FM_LIST_SUSPECT_CLASS) == 0) {
144 		fmd_hdl_debug(hdl, "rio_recv: retire disabled\n");
145 		return;
146 	}
147 
148 	drt.rt_abort = (void (*)(void *, const char *, ...))fmd_hdl_abort;
149 	drt.rt_debug = (void (*)(void *, const char *, ...))fmd_hdl_debug;
150 	drt.rt_hdl = hdl;
151 
152 	if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0) {
153 		retire = 1;
154 	} else if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0) {
155 		retire = 0;
156 	} else if (strcmp(class, FM_LIST_UPDATED_CLASS) == 0) {
157 		retire = 0;
158 	} else if (strncmp(class, snglfault, strlen(snglfault)) == 0) {
159 		retire = 1;
160 		faults = &nvl;
161 		nfaults = 1;
162 	} else {
163 		fmd_hdl_debug(hdl, "rio_recv: not list.* class: %s\n", class);
164 		return;
165 	}
166 
167 	if (nfaults == 0 && nvlist_lookup_nvlist_array(nvl,
168 	    FM_SUSPECT_FAULT_LIST, &faults, &nfaults) != 0) {
169 		fmd_hdl_debug(hdl, "rio_recv: no fault list");
170 		return;
171 	}
172 
173 	for (f = 0; f < nfaults; f++) {
174 		if (nvlist_lookup_boolean_value(faults[f], FM_SUSPECT_RETIRE,
175 		    &rtr) == 0 && !rtr) {
176 			fmd_hdl_debug(hdl, "rio_recv: retire suppressed");
177 			continue;
178 		}
179 
180 		if (nvlist_lookup_nvlist(faults[f], FM_FAULT_ASRU,
181 		    &asru) != 0) {
182 			fmd_hdl_debug(hdl, "rio_recv: no asru in fault");
183 			continue;
184 		}
185 
186 		scheme = NULL;
187 		if (nvlist_lookup_string(asru, FM_FMRI_SCHEME, &scheme) != 0 ||
188 		    strcmp(scheme, FM_FMRI_SCHEME_DEV) != 0) {
189 			fmd_hdl_debug(hdl, "rio_recv: not \"dev\" scheme: %s",
190 			    scheme ? scheme : "<NULL>");
191 			continue;
192 		}
193 
194 		if (fault_exception(hdl, faults[f]))
195 			continue;
196 
197 		if (nvlist_lookup_string(asru, FM_FMRI_DEV_PATH,
198 		    &path) != 0 || path[0] == '\0') {
199 			fmd_hdl_debug(hdl, "rio_recv: no dev path in asru");
200 			continue;
201 		}
202 
203 		if (retire) {
204 			if (fmd_nvl_fmri_has_fault(hdl, asru,
205 			    FMD_HAS_FAULT_ASRU, NULL) == 1) {
206 				error = di_retire_device(path, &drt, 0);
207 				if (error != 0) {
208 					fmd_hdl_debug(hdl, "rio_recv:"
209 					    " di_retire_device failed:"
210 					    " error: %d %s", error, path);
211 					rval = -1;
212 				}
213 			}
214 		} else {
215 			if (fmd_nvl_fmri_has_fault(hdl, asru,
216 			    FMD_HAS_FAULT_ASRU, NULL) == 0) {
217 				error = di_unretire_device(path, &drt);
218 				if (error != 0) {
219 					fmd_hdl_debug(hdl, "rio_recv:"
220 					    " di_unretire_device failed:"
221 					    " error: %d %s", error, path);
222 					rval = -1;
223 				}
224 			}
225 		}
226 	}
227 
228 	/*
229 	 * The fmd framework takes care of moving a case to the repaired
230 	 * state. To move the case to the closed state however, we (the
231 	 * retire agent) need to call fmd_case_uuclose()
232 	 */
233 	if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 && rval == 0) {
234 		if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0 &&
235 		    !fmd_case_uuclosed(hdl, uuid)) {
236 			fmd_case_uuclose(hdl, uuid);
237 		}
238 	}
239 
240 	/*
241 	 * Similarly to move the case to the resolved state, we (the
242 	 * retire agent) need to call fmd_case_uuresolved()
243 	 */
244 	if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0 && rval == 0 &&
245 	    nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0)
246 		fmd_case_uuresolved(hdl, uuid);
247 }
248 
249 static const fmd_hdl_ops_t fmd_ops = {
250 	rio_recv,	/* fmdo_recv */
251 	NULL,		/* fmdo_timeout */
252 	NULL,		/* fmdo_close */
253 	NULL,		/* fmdo_stats */
254 	NULL,		/* fmdo_gc */
255 };
256 
257 static const fmd_prop_t rio_props[] = {
258 	{ "global-disable", FMD_TYPE_BOOL, "false" },
259 	{ "fault-exceptions", FMD_TYPE_STRING, NULL },
260 	{ NULL, 0, NULL }
261 };
262 
263 static const fmd_hdl_info_t fmd_info = {
264 	"I/O Retire Agent", "2.0", &fmd_ops, rio_props
265 };
266 
267 void
268 _fmd_init(fmd_hdl_t *hdl)
269 {
270 	char	*estr;
271 	char	*estrdup;
272 
273 	if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) {
274 		fmd_hdl_debug(hdl, "failed to register handle\n");
275 		return;
276 	}
277 
278 	global_disable = fmd_prop_get_int32(hdl, "global-disable");
279 
280 	estrdup = NULL;
281 	if (estr = fmd_prop_get_string(hdl, "fault-exceptions")) {
282 		estrdup = fmd_hdl_strdup(hdl, estr, FMD_SLEEP);
283 		fmd_prop_free_string(hdl, estr);
284 		parse_exception_string(hdl, estrdup);
285 		fmd_hdl_strfree(hdl, estrdup);
286 	}
287 }
288 
289 void
290 _fmd_fini(fmd_hdl_t *hdl)
291 {
292 	free_exception_list(hdl);
293 }
294