xref: /illumos-gate/usr/src/cmd/fm/modules/common/io-retire/rio_main.c (revision 07a48826732249fcd3aa8dd53c8389595e9f1fbc)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/fm/protocol.h>
28 #include <fm/fmd_api.h>
29 #include <strings.h>
30 #include <libdevinfo.h>
31 #include <sys/modctl.h>
32 
33 static int	global_disable;
34 
35 struct except_list {
36 	char			*el_fault;
37 	struct except_list	*el_next;
38 };
39 
40 static struct except_list *except_list;
41 
42 static void
43 parse_exception_string(fmd_hdl_t *hdl, char *estr)
44 {
45 	char	*p;
46 	char	*next;
47 	size_t	len;
48 	struct except_list *elem;
49 
50 	len = strlen(estr);
51 
52 	p = estr;
53 	for (;;) {
54 		/* Remove leading ':' */
55 		while (*p == ':')
56 			p++;
57 		if (*p == '\0')
58 			break;
59 
60 		next = strchr(p, ':');
61 
62 		if (next)
63 			*next = '\0';
64 
65 		elem = fmd_hdl_alloc(hdl,
66 		    sizeof (struct except_list), FMD_SLEEP);
67 		elem->el_fault = fmd_hdl_strdup(hdl, p, FMD_SLEEP);
68 		elem->el_next = except_list;
69 		except_list = elem;
70 
71 		if (next) {
72 			*next = ':';
73 			p = next + 1;
74 		} else {
75 			break;
76 		}
77 	}
78 
79 	if (len != strlen(estr)) {
80 		fmd_hdl_abort(hdl, "Error parsing exception list: %s\n", estr);
81 	}
82 }
83 
84 /*
85  * Returns
86  *	1  if fault on exception list
87  *	0  otherwise
88  */
89 static int
90 fault_exception(fmd_hdl_t *hdl, nvlist_t *fault)
91 {
92 	struct except_list *elem;
93 
94 	for (elem = except_list; elem; elem = elem->el_next) {
95 		if (fmd_nvl_class_match(hdl, fault, elem->el_fault)) {
96 			fmd_hdl_debug(hdl, "rio_recv: Skipping fault "
97 			    "on exception list (%s)\n", elem->el_fault);
98 			return (1);
99 		}
100 	}
101 
102 	return (0);
103 }
104 
105 static void
106 free_exception_list(fmd_hdl_t *hdl)
107 {
108 	struct except_list *elem;
109 
110 	while (except_list) {
111 		elem = except_list;
112 		except_list = elem->el_next;
113 		fmd_hdl_strfree(hdl, elem->el_fault);
114 		fmd_hdl_free(hdl, elem, sizeof (*elem));
115 	}
116 }
117 
118 
119 /*ARGSUSED*/
120 static void
121 rio_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
122 {
123 	nvlist_t	**faults = NULL;
124 	nvlist_t	*asru;
125 	uint_t		nfaults = 0;
126 	int		f;
127 	char		*path;
128 	char		*uuid;
129 	char		*scheme;
130 	di_retire_t	drt = {0};
131 	int		retire;
132 	int		rval = 0;
133 	int		valid_suspect = 0;
134 	int		error;
135 	char		*snglfault = FM_FAULT_CLASS"."FM_ERROR_IO".";
136 	boolean_t	rtr;
137 
138 
139 	/*
140 	 * If disabled, we don't do retire. We still do unretires though
141 	 */
142 	if (global_disable && strcmp(class, FM_LIST_SUSPECT_CLASS) == 0) {
143 		fmd_hdl_debug(hdl, "rio_recv: retire disabled\n");
144 		return;
145 	}
146 
147 	drt.rt_abort = (void (*)(void *, const char *, ...))fmd_hdl_abort;
148 	drt.rt_debug = (void (*)(void *, const char *, ...))fmd_hdl_debug;
149 	drt.rt_hdl = hdl;
150 
151 	if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0) {
152 		retire = 1;
153 	} else if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0) {
154 		retire = 0;
155 	} else if (strcmp(class, FM_LIST_UPDATED_CLASS) == 0) {
156 		retire = 0;
157 	} else if (strcmp(class, FM_LIST_RESOLVED_CLASS) == 0) {
158 		return;
159 	} else if (strncmp(class, snglfault, strlen(snglfault)) == 0) {
160 		retire = 1;
161 		faults = &nvl;
162 		nfaults = 1;
163 	} else {
164 		fmd_hdl_debug(hdl, "rio_recv: not list.* class: %s\n", class);
165 		return;
166 	}
167 
168 	if (nfaults == 0 && nvlist_lookup_nvlist_array(nvl,
169 	    FM_SUSPECT_FAULT_LIST, &faults, &nfaults) != 0) {
170 		fmd_hdl_debug(hdl, "rio_recv: no fault list");
171 		return;
172 	}
173 
174 	for (f = 0; f < nfaults; f++) {
175 		if (nvlist_lookup_boolean_value(faults[f], FM_SUSPECT_RETIRE,
176 		    &rtr) == 0 && !rtr) {
177 			fmd_hdl_debug(hdl, "rio_recv: retire suppressed");
178 			continue;
179 		}
180 
181 		if (nvlist_lookup_nvlist(faults[f], FM_FAULT_ASRU,
182 		    &asru) != 0) {
183 			fmd_hdl_debug(hdl, "rio_recv: no asru in fault");
184 			continue;
185 		}
186 
187 		scheme = NULL;
188 		if (nvlist_lookup_string(asru, FM_FMRI_SCHEME, &scheme) != 0 ||
189 		    strcmp(scheme, FM_FMRI_SCHEME_DEV) != 0) {
190 			fmd_hdl_debug(hdl, "rio_recv: not \"dev\" scheme: %s",
191 			    scheme ? scheme : "<NULL>");
192 			continue;
193 		}
194 
195 		if (fault_exception(hdl, faults[f]))
196 			continue;
197 
198 		if (nvlist_lookup_string(asru, FM_FMRI_DEV_PATH,
199 		    &path) != 0 || path[0] == '\0') {
200 			fmd_hdl_debug(hdl, "rio_recv: no dev path in asru");
201 			continue;
202 		}
203 
204 		valid_suspect = 1;
205 		if (retire) {
206 			if (fmd_nvl_fmri_has_fault(hdl, asru,
207 			    FMD_HAS_FAULT_ASRU, NULL) == 1) {
208 				error = di_retire_device(path, &drt, 0);
209 				if (error != 0) {
210 					fmd_hdl_debug(hdl, "rio_recv:"
211 					    " di_retire_device failed:"
212 					    " error: %d %s", error, path);
213 					rval = -1;
214 				}
215 			}
216 		} else {
217 			if (fmd_nvl_fmri_has_fault(hdl, asru,
218 			    FMD_HAS_FAULT_ASRU, NULL) == 0) {
219 				error = di_unretire_device(path, &drt);
220 				if (error != 0) {
221 					fmd_hdl_debug(hdl, "rio_recv:"
222 					    " di_unretire_device failed:"
223 					    " error: %d %s", error, path);
224 					rval = -1;
225 				}
226 			}
227 		}
228 	}
229 
230 	/*
231 	 * Don't send uuclose or uuresolved unless at least one suspect
232 	 * was valid for this retire agent and no retires/unretires failed.
233 	 */
234 	if (valid_suspect == 0)
235 		return;
236 
237 	/*
238 	 * The fmd framework takes care of moving a case to the repaired
239 	 * state. To move the case to the closed state however, we (the
240 	 * retire agent) need to call fmd_case_uuclose()
241 	 */
242 	if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 && rval == 0) {
243 		if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0 &&
244 		    !fmd_case_uuclosed(hdl, uuid)) {
245 			fmd_case_uuclose(hdl, uuid);
246 		}
247 	}
248 
249 	/*
250 	 * Similarly to move the case to the resolved state, we (the
251 	 * retire agent) need to call fmd_case_uuresolved()
252 	 */
253 	if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0 && rval == 0 &&
254 	    nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0)
255 		fmd_case_uuresolved(hdl, uuid);
256 }
257 
258 static const fmd_hdl_ops_t fmd_ops = {
259 	rio_recv,	/* fmdo_recv */
260 	NULL,		/* fmdo_timeout */
261 	NULL,		/* fmdo_close */
262 	NULL,		/* fmdo_stats */
263 	NULL,		/* fmdo_gc */
264 };
265 
266 static const fmd_prop_t rio_props[] = {
267 	{ "global-disable", FMD_TYPE_BOOL, "false" },
268 	{ "fault-exceptions", FMD_TYPE_STRING, NULL },
269 	{ NULL, 0, NULL }
270 };
271 
272 static const fmd_hdl_info_t fmd_info = {
273 	"I/O Retire Agent", "2.0", &fmd_ops, rio_props
274 };
275 
276 void
277 _fmd_init(fmd_hdl_t *hdl)
278 {
279 	char	*estr;
280 	char	*estrdup;
281 
282 	if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) {
283 		fmd_hdl_debug(hdl, "failed to register handle\n");
284 		return;
285 	}
286 
287 	global_disable = fmd_prop_get_int32(hdl, "global-disable");
288 
289 	estrdup = NULL;
290 	if (estr = fmd_prop_get_string(hdl, "fault-exceptions")) {
291 		estrdup = fmd_hdl_strdup(hdl, estr, FMD_SLEEP);
292 		fmd_prop_free_string(hdl, estr);
293 		parse_exception_string(hdl, estrdup);
294 		fmd_hdl_strfree(hdl, estrdup);
295 	}
296 }
297 
298 void
299 _fmd_fini(fmd_hdl_t *hdl)
300 {
301 	free_exception_list(hdl);
302 }
303