1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include <sys/fm/protocol.h>
28 #include <fm/fmd_api.h>
29 #include <strings.h>
30 #include <libdevinfo.h>
31 #include <sys/modctl.h>
32
33 static int global_disable;
34
35 struct except_list {
36 char *el_fault;
37 struct except_list *el_next;
38 };
39
40 static struct except_list *except_list;
41
42 static void
parse_exception_string(fmd_hdl_t * hdl,char * estr)43 parse_exception_string(fmd_hdl_t *hdl, char *estr)
44 {
45 char *p;
46 char *next;
47 size_t len;
48 struct except_list *elem;
49
50 len = strlen(estr);
51
52 p = estr;
53 for (;;) {
54 /* Remove leading ':' */
55 while (*p == ':')
56 p++;
57 if (*p == '\0')
58 break;
59
60 next = strchr(p, ':');
61
62 if (next)
63 *next = '\0';
64
65 elem = fmd_hdl_alloc(hdl,
66 sizeof (struct except_list), FMD_SLEEP);
67 elem->el_fault = fmd_hdl_strdup(hdl, p, FMD_SLEEP);
68 elem->el_next = except_list;
69 except_list = elem;
70
71 if (next) {
72 *next = ':';
73 p = next + 1;
74 } else {
75 break;
76 }
77 }
78
79 if (len != strlen(estr)) {
80 fmd_hdl_abort(hdl, "Error parsing exception list: %s\n", estr);
81 }
82 }
83
84 /*
85 * Returns
86 * 1 if fault on exception list
87 * 0 otherwise
88 */
89 static int
fault_exception(fmd_hdl_t * hdl,nvlist_t * fault)90 fault_exception(fmd_hdl_t *hdl, nvlist_t *fault)
91 {
92 struct except_list *elem;
93
94 for (elem = except_list; elem; elem = elem->el_next) {
95 if (fmd_nvl_class_match(hdl, fault, elem->el_fault)) {
96 fmd_hdl_debug(hdl, "rio_recv: Skipping fault "
97 "on exception list (%s)\n", elem->el_fault);
98 return (1);
99 }
100 }
101
102 return (0);
103 }
104
105 static void
free_exception_list(fmd_hdl_t * hdl)106 free_exception_list(fmd_hdl_t *hdl)
107 {
108 struct except_list *elem;
109
110 while (except_list) {
111 elem = except_list;
112 except_list = elem->el_next;
113 fmd_hdl_strfree(hdl, elem->el_fault);
114 fmd_hdl_free(hdl, elem, sizeof (*elem));
115 }
116 }
117
118
119 /*ARGSUSED*/
120 static void
rio_recv(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class)121 rio_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
122 {
123 nvlist_t **faults = NULL;
124 nvlist_t *asru;
125 uint_t nfaults = 0;
126 int f;
127 char *path;
128 char *uuid;
129 char *scheme;
130 di_retire_t drt = {0};
131 int retire;
132 int rval = 0;
133 int valid_suspect = 0;
134 int error;
135 char *snglfault = FM_FAULT_CLASS"."FM_ERROR_IO".";
136 boolean_t rtr;
137
138
139 /*
140 * If disabled, we don't do retire. We still do unretires though
141 */
142 if (global_disable && (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 ||
143 strcmp(class, FM_LIST_UPDATED_CLASS) == 0)) {
144 fmd_hdl_debug(hdl, "rio_recv: retire disabled\n");
145 return;
146 }
147
148 drt.rt_abort = (void (*)(void *, const char *, ...))fmd_hdl_abort;
149 drt.rt_debug = (void (*)(void *, const char *, ...))fmd_hdl_debug;
150 drt.rt_hdl = hdl;
151
152 if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0) {
153 retire = 1;
154 } else if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0) {
155 retire = 0;
156 } else if (strcmp(class, FM_LIST_UPDATED_CLASS) == 0) {
157 retire = 0;
158 } else if (strcmp(class, FM_LIST_RESOLVED_CLASS) == 0) {
159 return;
160 } else if (strncmp(class, snglfault, strlen(snglfault)) == 0) {
161 retire = 1;
162 faults = &nvl;
163 nfaults = 1;
164 } else {
165 fmd_hdl_debug(hdl, "rio_recv: not list.* class: %s\n", class);
166 return;
167 }
168
169 if (nfaults == 0 && nvlist_lookup_nvlist_array(nvl,
170 FM_SUSPECT_FAULT_LIST, &faults, &nfaults) != 0) {
171 fmd_hdl_debug(hdl, "rio_recv: no fault list");
172 return;
173 }
174
175 for (f = 0; f < nfaults; f++) {
176 if (nvlist_lookup_boolean_value(faults[f], FM_SUSPECT_RETIRE,
177 &rtr) == 0 && !rtr) {
178 fmd_hdl_debug(hdl, "rio_recv: retire suppressed");
179 continue;
180 }
181
182 if (nvlist_lookup_nvlist(faults[f], FM_FAULT_ASRU,
183 &asru) != 0) {
184 fmd_hdl_debug(hdl, "rio_recv: no asru in fault");
185 continue;
186 }
187
188 scheme = NULL;
189 if (nvlist_lookup_string(asru, FM_FMRI_SCHEME, &scheme) != 0 ||
190 strcmp(scheme, FM_FMRI_SCHEME_DEV) != 0) {
191 fmd_hdl_debug(hdl, "rio_recv: not \"dev\" scheme: %s",
192 scheme ? scheme : "<NULL>");
193 continue;
194 }
195
196 if (fault_exception(hdl, faults[f]))
197 continue;
198
199 if (nvlist_lookup_string(asru, FM_FMRI_DEV_PATH,
200 &path) != 0 || path[0] == '\0') {
201 fmd_hdl_debug(hdl, "rio_recv: no dev path in asru");
202 continue;
203 }
204
205 valid_suspect = 1;
206 if (retire) {
207 if (fmd_nvl_fmri_has_fault(hdl, asru,
208 FMD_HAS_FAULT_ASRU, NULL) == 1) {
209 error = di_retire_device(path, &drt, 0);
210 if (error != 0) {
211 fmd_hdl_debug(hdl, "rio_recv:"
212 " di_retire_device failed:"
213 " error: %d %s", error, path);
214 rval = -1;
215 }
216 }
217 } else {
218 if (fmd_nvl_fmri_has_fault(hdl, asru,
219 FMD_HAS_FAULT_ASRU, NULL) == 0) {
220 error = di_unretire_device(path, &drt);
221 if (error != 0) {
222 fmd_hdl_debug(hdl, "rio_recv:"
223 " di_unretire_device failed:"
224 " error: %d %s", error, path);
225 rval = -1;
226 }
227 }
228 }
229 }
230 /*
231 * Run through again to handle new faults in a list.updated.
232 */
233 for (f = 0; f < nfaults; f++) {
234 if (nvlist_lookup_boolean_value(faults[f], FM_SUSPECT_RETIRE,
235 &rtr) == 0 && !rtr) {
236 fmd_hdl_debug(hdl, "rio_recv: retire suppressed");
237 continue;
238 }
239
240 if (nvlist_lookup_nvlist(faults[f], FM_FAULT_ASRU,
241 &asru) != 0) {
242 fmd_hdl_debug(hdl, "rio_recv: no asru in fault");
243 continue;
244 }
245
246 scheme = NULL;
247 if (nvlist_lookup_string(asru, FM_FMRI_SCHEME, &scheme) != 0 ||
248 strcmp(scheme, FM_FMRI_SCHEME_DEV) != 0) {
249 fmd_hdl_debug(hdl, "rio_recv: not \"dev\" scheme: %s",
250 scheme ? scheme : "<NULL>");
251 continue;
252 }
253
254 if (fault_exception(hdl, faults[f]))
255 continue;
256
257 if (nvlist_lookup_string(asru, FM_FMRI_DEV_PATH,
258 &path) != 0 || path[0] == '\0') {
259 fmd_hdl_debug(hdl, "rio_recv: no dev path in asru");
260 continue;
261 }
262
263 if (strcmp(class, FM_LIST_UPDATED_CLASS) == 0) {
264 if (fmd_nvl_fmri_has_fault(hdl, asru,
265 FMD_HAS_FAULT_ASRU, NULL) == 1) {
266 error = di_retire_device(path, &drt, 0);
267 if (error != 0) {
268 fmd_hdl_debug(hdl, "rio_recv:"
269 " di_retire_device failed:"
270 " error: %d %s", error, path);
271 }
272 }
273 }
274 }
275
276 /*
277 * Don't send uuclose or uuresolved unless at least one suspect
278 * was valid for this retire agent and no retires/unretires failed.
279 */
280 if (valid_suspect == 0)
281 return;
282
283 /*
284 * The fmd framework takes care of moving a case to the repaired
285 * state. To move the case to the closed state however, we (the
286 * retire agent) need to call fmd_case_uuclose()
287 */
288 if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 && rval == 0) {
289 if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0 &&
290 !fmd_case_uuclosed(hdl, uuid)) {
291 fmd_case_uuclose(hdl, uuid);
292 }
293 }
294
295 /*
296 * Similarly to move the case to the resolved state, we (the
297 * retire agent) need to call fmd_case_uuresolved()
298 */
299 if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0 && rval == 0 &&
300 nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0)
301 fmd_case_uuresolved(hdl, uuid);
302 }
303
304 static const fmd_hdl_ops_t fmd_ops = {
305 rio_recv, /* fmdo_recv */
306 NULL, /* fmdo_timeout */
307 NULL, /* fmdo_close */
308 NULL, /* fmdo_stats */
309 NULL, /* fmdo_gc */
310 };
311
312 static const fmd_prop_t rio_props[] = {
313 { "global-disable", FMD_TYPE_BOOL, "false" },
314 { "fault-exceptions", FMD_TYPE_STRING, NULL },
315 { NULL, 0, NULL }
316 };
317
318 static const fmd_hdl_info_t fmd_info = {
319 "I/O Retire Agent", "2.0", &fmd_ops, rio_props
320 };
321
322 void
_fmd_init(fmd_hdl_t * hdl)323 _fmd_init(fmd_hdl_t *hdl)
324 {
325 char *estr;
326 char *estrdup;
327
328 if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) {
329 fmd_hdl_debug(hdl, "failed to register handle\n");
330 return;
331 }
332
333 global_disable = fmd_prop_get_int32(hdl, "global-disable");
334
335 estrdup = NULL;
336 if (estr = fmd_prop_get_string(hdl, "fault-exceptions")) {
337 estrdup = fmd_hdl_strdup(hdl, estr, FMD_SLEEP);
338 fmd_prop_free_string(hdl, estr);
339 parse_exception_string(hdl, estrdup);
340 fmd_hdl_strfree(hdl, estrdup);
341 }
342 }
343
344 void
_fmd_fini(fmd_hdl_t * hdl)345 _fmd_fini(fmd_hdl_t *hdl)
346 {
347 free_exception_list(hdl);
348 }
349