125e8c5aaSvikram /*
225e8c5aaSvikram * CDDL HEADER START
325e8c5aaSvikram *
425e8c5aaSvikram * The contents of this file are subject to the terms of the
525e8c5aaSvikram * Common Development and Distribution License (the "License").
625e8c5aaSvikram * You may not use this file except in compliance with the License.
725e8c5aaSvikram *
825e8c5aaSvikram * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
925e8c5aaSvikram * or http://www.opensolaris.org/os/licensing.
1025e8c5aaSvikram * See the License for the specific language governing permissions
1125e8c5aaSvikram * and limitations under the License.
1225e8c5aaSvikram *
1325e8c5aaSvikram * When distributing Covered Code, include this CDDL HEADER in each
1425e8c5aaSvikram * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1525e8c5aaSvikram * If applicable, add the following below this CDDL HEADER, with the
1625e8c5aaSvikram * fields enclosed by brackets "[]" replaced with your own identifying
1725e8c5aaSvikram * information: Portions Copyright [yyyy] [name of copyright owner]
1825e8c5aaSvikram *
1925e8c5aaSvikram * CDDL HEADER END
2025e8c5aaSvikram */
2125e8c5aaSvikram
2225e8c5aaSvikram /*
23cbf75e67SStephen Hanson * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
2425e8c5aaSvikram * Use is subject to license terms.
2525e8c5aaSvikram */
2625e8c5aaSvikram
2725e8c5aaSvikram #include <sys/fm/protocol.h>
2825e8c5aaSvikram #include <fm/fmd_api.h>
2925e8c5aaSvikram #include <strings.h>
3025e8c5aaSvikram #include <libdevinfo.h>
3125e8c5aaSvikram #include <sys/modctl.h>
3225e8c5aaSvikram
3325e8c5aaSvikram static int global_disable;
3425e8c5aaSvikram
3525e8c5aaSvikram struct except_list {
3625e8c5aaSvikram char *el_fault;
3725e8c5aaSvikram struct except_list *el_next;
3825e8c5aaSvikram };
3925e8c5aaSvikram
4025e8c5aaSvikram static struct except_list *except_list;
4125e8c5aaSvikram
4225e8c5aaSvikram static void
parse_exception_string(fmd_hdl_t * hdl,char * estr)4325e8c5aaSvikram parse_exception_string(fmd_hdl_t *hdl, char *estr)
4425e8c5aaSvikram {
4525e8c5aaSvikram char *p;
4625e8c5aaSvikram char *next;
4725e8c5aaSvikram size_t len;
4825e8c5aaSvikram struct except_list *elem;
4925e8c5aaSvikram
5025e8c5aaSvikram len = strlen(estr);
5125e8c5aaSvikram
5225e8c5aaSvikram p = estr;
5325e8c5aaSvikram for (;;) {
5425e8c5aaSvikram /* Remove leading ':' */
5525e8c5aaSvikram while (*p == ':')
5625e8c5aaSvikram p++;
5725e8c5aaSvikram if (*p == '\0')
5825e8c5aaSvikram break;
5925e8c5aaSvikram
6025e8c5aaSvikram next = strchr(p, ':');
6125e8c5aaSvikram
6225e8c5aaSvikram if (next)
6325e8c5aaSvikram *next = '\0';
6425e8c5aaSvikram
6525e8c5aaSvikram elem = fmd_hdl_alloc(hdl,
6625e8c5aaSvikram sizeof (struct except_list), FMD_SLEEP);
6725e8c5aaSvikram elem->el_fault = fmd_hdl_strdup(hdl, p, FMD_SLEEP);
6825e8c5aaSvikram elem->el_next = except_list;
6925e8c5aaSvikram except_list = elem;
7025e8c5aaSvikram
7125e8c5aaSvikram if (next) {
7225e8c5aaSvikram *next = ':';
7325e8c5aaSvikram p = next + 1;
7425e8c5aaSvikram } else {
7525e8c5aaSvikram break;
7625e8c5aaSvikram }
7725e8c5aaSvikram }
7825e8c5aaSvikram
7925e8c5aaSvikram if (len != strlen(estr)) {
8025e8c5aaSvikram fmd_hdl_abort(hdl, "Error parsing exception list: %s\n", estr);
8125e8c5aaSvikram }
8225e8c5aaSvikram }
8325e8c5aaSvikram
8425e8c5aaSvikram /*
8525e8c5aaSvikram * Returns
8625e8c5aaSvikram * 1 if fault on exception list
8725e8c5aaSvikram * 0 otherwise
8825e8c5aaSvikram */
8925e8c5aaSvikram static int
fault_exception(fmd_hdl_t * hdl,nvlist_t * fault)9025e8c5aaSvikram fault_exception(fmd_hdl_t *hdl, nvlist_t *fault)
9125e8c5aaSvikram {
9225e8c5aaSvikram struct except_list *elem;
9325e8c5aaSvikram
9425e8c5aaSvikram for (elem = except_list; elem; elem = elem->el_next) {
9525e8c5aaSvikram if (fmd_nvl_class_match(hdl, fault, elem->el_fault)) {
9625e8c5aaSvikram fmd_hdl_debug(hdl, "rio_recv: Skipping fault "
9725e8c5aaSvikram "on exception list (%s)\n", elem->el_fault);
9825e8c5aaSvikram return (1);
9925e8c5aaSvikram }
10025e8c5aaSvikram }
10125e8c5aaSvikram
10225e8c5aaSvikram return (0);
10325e8c5aaSvikram }
10425e8c5aaSvikram
10525e8c5aaSvikram static void
free_exception_list(fmd_hdl_t * hdl)10625e8c5aaSvikram free_exception_list(fmd_hdl_t *hdl)
10725e8c5aaSvikram {
10825e8c5aaSvikram struct except_list *elem;
10925e8c5aaSvikram
11025e8c5aaSvikram while (except_list) {
11125e8c5aaSvikram elem = except_list;
11225e8c5aaSvikram except_list = elem->el_next;
11325e8c5aaSvikram fmd_hdl_strfree(hdl, elem->el_fault);
11425e8c5aaSvikram fmd_hdl_free(hdl, elem, sizeof (*elem));
11525e8c5aaSvikram }
11625e8c5aaSvikram }
11725e8c5aaSvikram
11825e8c5aaSvikram
11925e8c5aaSvikram /*ARGSUSED*/
12025e8c5aaSvikram static void
rio_recv(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class)12125e8c5aaSvikram rio_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
12225e8c5aaSvikram {
12325c6ff4bSstephh nvlist_t **faults = NULL;
12425e8c5aaSvikram nvlist_t *asru;
12525c6ff4bSstephh uint_t nfaults = 0;
12625e8c5aaSvikram int f;
12725e8c5aaSvikram char *path;
12825e8c5aaSvikram char *uuid;
12925e8c5aaSvikram char *scheme;
13025e8c5aaSvikram di_retire_t drt = {0};
13125e8c5aaSvikram int retire;
13225c6ff4bSstephh int rval = 0;
133cbf75e67SStephen Hanson int valid_suspect = 0;
13425e8c5aaSvikram int error;
13525e8c5aaSvikram char *snglfault = FM_FAULT_CLASS"."FM_ERROR_IO".";
136b7d3956bSstephh boolean_t rtr;
13725e8c5aaSvikram
13825e8c5aaSvikram
13925e8c5aaSvikram /*
14025e8c5aaSvikram * If disabled, we don't do retire. We still do unretires though
14125e8c5aaSvikram */
142*5750ef5cSStephen Hanson if (global_disable && (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 ||
143*5750ef5cSStephen Hanson strcmp(class, FM_LIST_UPDATED_CLASS) == 0)) {
14425e8c5aaSvikram fmd_hdl_debug(hdl, "rio_recv: retire disabled\n");
14525e8c5aaSvikram return;
14625e8c5aaSvikram }
14725e8c5aaSvikram
14825e8c5aaSvikram drt.rt_abort = (void (*)(void *, const char *, ...))fmd_hdl_abort;
14925e8c5aaSvikram drt.rt_debug = (void (*)(void *, const char *, ...))fmd_hdl_debug;
15025e8c5aaSvikram drt.rt_hdl = hdl;
15125e8c5aaSvikram
15225e8c5aaSvikram if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0) {
15325e8c5aaSvikram retire = 1;
15425e8c5aaSvikram } else if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0) {
15525e8c5aaSvikram retire = 0;
15625c6ff4bSstephh } else if (strcmp(class, FM_LIST_UPDATED_CLASS) == 0) {
15725c6ff4bSstephh retire = 0;
158cbf75e67SStephen Hanson } else if (strcmp(class, FM_LIST_RESOLVED_CLASS) == 0) {
159cbf75e67SStephen Hanson return;
16025e8c5aaSvikram } else if (strncmp(class, snglfault, strlen(snglfault)) == 0) {
16125c6ff4bSstephh retire = 1;
16225c6ff4bSstephh faults = &nvl;
16325c6ff4bSstephh nfaults = 1;
16425e8c5aaSvikram } else {
16525e8c5aaSvikram fmd_hdl_debug(hdl, "rio_recv: not list.* class: %s\n", class);
16625e8c5aaSvikram return;
16725e8c5aaSvikram }
16825e8c5aaSvikram
16925c6ff4bSstephh if (nfaults == 0 && nvlist_lookup_nvlist_array(nvl,
17025c6ff4bSstephh FM_SUSPECT_FAULT_LIST, &faults, &nfaults) != 0) {
17125e8c5aaSvikram fmd_hdl_debug(hdl, "rio_recv: no fault list");
17225e8c5aaSvikram return;
17325e8c5aaSvikram }
17425e8c5aaSvikram
17525e8c5aaSvikram for (f = 0; f < nfaults; f++) {
176b7d3956bSstephh if (nvlist_lookup_boolean_value(faults[f], FM_SUSPECT_RETIRE,
177b7d3956bSstephh &rtr) == 0 && !rtr) {
178b7d3956bSstephh fmd_hdl_debug(hdl, "rio_recv: retire suppressed");
179b7d3956bSstephh continue;
180b7d3956bSstephh }
181b7d3956bSstephh
18225e8c5aaSvikram if (nvlist_lookup_nvlist(faults[f], FM_FAULT_ASRU,
18325e8c5aaSvikram &asru) != 0) {
18425e8c5aaSvikram fmd_hdl_debug(hdl, "rio_recv: no asru in fault");
18525e8c5aaSvikram continue;
18625e8c5aaSvikram }
18725e8c5aaSvikram
18825e8c5aaSvikram scheme = NULL;
18925e8c5aaSvikram if (nvlist_lookup_string(asru, FM_FMRI_SCHEME, &scheme) != 0 ||
19025e8c5aaSvikram strcmp(scheme, FM_FMRI_SCHEME_DEV) != 0) {
19125e8c5aaSvikram fmd_hdl_debug(hdl, "rio_recv: not \"dev\" scheme: %s",
19225e8c5aaSvikram scheme ? scheme : "<NULL>");
19325e8c5aaSvikram continue;
19425e8c5aaSvikram }
19525e8c5aaSvikram
19625c6ff4bSstephh if (fault_exception(hdl, faults[f]))
19725e8c5aaSvikram continue;
19825e8c5aaSvikram
19925e8c5aaSvikram if (nvlist_lookup_string(asru, FM_FMRI_DEV_PATH,
20025e8c5aaSvikram &path) != 0 || path[0] == '\0') {
20125e8c5aaSvikram fmd_hdl_debug(hdl, "rio_recv: no dev path in asru");
20225e8c5aaSvikram continue;
20325e8c5aaSvikram }
20425e8c5aaSvikram
205cbf75e67SStephen Hanson valid_suspect = 1;
20625e8c5aaSvikram if (retire) {
20725c6ff4bSstephh if (fmd_nvl_fmri_has_fault(hdl, asru,
20825c6ff4bSstephh FMD_HAS_FAULT_ASRU, NULL) == 1) {
20925c6ff4bSstephh error = di_retire_device(path, &drt, 0);
21025c6ff4bSstephh if (error != 0) {
21125c6ff4bSstephh fmd_hdl_debug(hdl, "rio_recv:"
21225c6ff4bSstephh " di_retire_device failed:"
21325c6ff4bSstephh " error: %d %s", error, path);
21425c6ff4bSstephh rval = -1;
21525c6ff4bSstephh }
21625e8c5aaSvikram }
21725e8c5aaSvikram } else {
21825c6ff4bSstephh if (fmd_nvl_fmri_has_fault(hdl, asru,
21925c6ff4bSstephh FMD_HAS_FAULT_ASRU, NULL) == 0) {
22025e8c5aaSvikram error = di_unretire_device(path, &drt);
22125e8c5aaSvikram if (error != 0) {
22225e8c5aaSvikram fmd_hdl_debug(hdl, "rio_recv:"
22325c6ff4bSstephh " di_unretire_device failed:"
22425c6ff4bSstephh " error: %d %s", error, path);
22525e8c5aaSvikram rval = -1;
22625e8c5aaSvikram }
22725e8c5aaSvikram }
22825e8c5aaSvikram }
22925e8c5aaSvikram }
230*5750ef5cSStephen Hanson /*
231*5750ef5cSStephen Hanson * Run through again to handle new faults in a list.updated.
232*5750ef5cSStephen Hanson */
233*5750ef5cSStephen Hanson for (f = 0; f < nfaults; f++) {
234*5750ef5cSStephen Hanson if (nvlist_lookup_boolean_value(faults[f], FM_SUSPECT_RETIRE,
235*5750ef5cSStephen Hanson &rtr) == 0 && !rtr) {
236*5750ef5cSStephen Hanson fmd_hdl_debug(hdl, "rio_recv: retire suppressed");
237*5750ef5cSStephen Hanson continue;
238*5750ef5cSStephen Hanson }
239*5750ef5cSStephen Hanson
240*5750ef5cSStephen Hanson if (nvlist_lookup_nvlist(faults[f], FM_FAULT_ASRU,
241*5750ef5cSStephen Hanson &asru) != 0) {
242*5750ef5cSStephen Hanson fmd_hdl_debug(hdl, "rio_recv: no asru in fault");
243*5750ef5cSStephen Hanson continue;
244*5750ef5cSStephen Hanson }
245*5750ef5cSStephen Hanson
246*5750ef5cSStephen Hanson scheme = NULL;
247*5750ef5cSStephen Hanson if (nvlist_lookup_string(asru, FM_FMRI_SCHEME, &scheme) != 0 ||
248*5750ef5cSStephen Hanson strcmp(scheme, FM_FMRI_SCHEME_DEV) != 0) {
249*5750ef5cSStephen Hanson fmd_hdl_debug(hdl, "rio_recv: not \"dev\" scheme: %s",
250*5750ef5cSStephen Hanson scheme ? scheme : "<NULL>");
251*5750ef5cSStephen Hanson continue;
252*5750ef5cSStephen Hanson }
253*5750ef5cSStephen Hanson
254*5750ef5cSStephen Hanson if (fault_exception(hdl, faults[f]))
255*5750ef5cSStephen Hanson continue;
256*5750ef5cSStephen Hanson
257*5750ef5cSStephen Hanson if (nvlist_lookup_string(asru, FM_FMRI_DEV_PATH,
258*5750ef5cSStephen Hanson &path) != 0 || path[0] == '\0') {
259*5750ef5cSStephen Hanson fmd_hdl_debug(hdl, "rio_recv: no dev path in asru");
260*5750ef5cSStephen Hanson continue;
261*5750ef5cSStephen Hanson }
262*5750ef5cSStephen Hanson
263*5750ef5cSStephen Hanson if (strcmp(class, FM_LIST_UPDATED_CLASS) == 0) {
264*5750ef5cSStephen Hanson if (fmd_nvl_fmri_has_fault(hdl, asru,
265*5750ef5cSStephen Hanson FMD_HAS_FAULT_ASRU, NULL) == 1) {
266*5750ef5cSStephen Hanson error = di_retire_device(path, &drt, 0);
267*5750ef5cSStephen Hanson if (error != 0) {
268*5750ef5cSStephen Hanson fmd_hdl_debug(hdl, "rio_recv:"
269*5750ef5cSStephen Hanson " di_retire_device failed:"
270*5750ef5cSStephen Hanson " error: %d %s", error, path);
271*5750ef5cSStephen Hanson }
272*5750ef5cSStephen Hanson }
273*5750ef5cSStephen Hanson }
274*5750ef5cSStephen Hanson }
27525e8c5aaSvikram
27625e8c5aaSvikram /*
277cbf75e67SStephen Hanson * Don't send uuclose or uuresolved unless at least one suspect
278cbf75e67SStephen Hanson * was valid for this retire agent and no retires/unretires failed.
279cbf75e67SStephen Hanson */
280cbf75e67SStephen Hanson if (valid_suspect == 0)
281cbf75e67SStephen Hanson return;
282cbf75e67SStephen Hanson
283cbf75e67SStephen Hanson /*
28425e8c5aaSvikram * The fmd framework takes care of moving a case to the repaired
28525e8c5aaSvikram * state. To move the case to the closed state however, we (the
28625e8c5aaSvikram * retire agent) need to call fmd_case_uuclose()
28725e8c5aaSvikram */
28825c6ff4bSstephh if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 && rval == 0) {
28925e8c5aaSvikram if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0 &&
29025e8c5aaSvikram !fmd_case_uuclosed(hdl, uuid)) {
29125e8c5aaSvikram fmd_case_uuclose(hdl, uuid);
29225e8c5aaSvikram }
29325e8c5aaSvikram }
29425c6ff4bSstephh
29525c6ff4bSstephh /*
29625c6ff4bSstephh * Similarly to move the case to the resolved state, we (the
29725c6ff4bSstephh * retire agent) need to call fmd_case_uuresolved()
29825c6ff4bSstephh */
29925c6ff4bSstephh if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0 && rval == 0 &&
30025c6ff4bSstephh nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0)
30125c6ff4bSstephh fmd_case_uuresolved(hdl, uuid);
30225e8c5aaSvikram }
30325e8c5aaSvikram
30425e8c5aaSvikram static const fmd_hdl_ops_t fmd_ops = {
30525e8c5aaSvikram rio_recv, /* fmdo_recv */
30625e8c5aaSvikram NULL, /* fmdo_timeout */
30725e8c5aaSvikram NULL, /* fmdo_close */
30825e8c5aaSvikram NULL, /* fmdo_stats */
30925e8c5aaSvikram NULL, /* fmdo_gc */
31025e8c5aaSvikram };
31125e8c5aaSvikram
31225e8c5aaSvikram static const fmd_prop_t rio_props[] = {
31325e8c5aaSvikram { "global-disable", FMD_TYPE_BOOL, "false" },
31425e8c5aaSvikram { "fault-exceptions", FMD_TYPE_STRING, NULL },
31525e8c5aaSvikram { NULL, 0, NULL }
31625e8c5aaSvikram };
31725e8c5aaSvikram
31825e8c5aaSvikram static const fmd_hdl_info_t fmd_info = {
31925e8c5aaSvikram "I/O Retire Agent", "2.0", &fmd_ops, rio_props
32025e8c5aaSvikram };
32125e8c5aaSvikram
32225e8c5aaSvikram void
_fmd_init(fmd_hdl_t * hdl)32325e8c5aaSvikram _fmd_init(fmd_hdl_t *hdl)
32425e8c5aaSvikram {
32525e8c5aaSvikram char *estr;
32625e8c5aaSvikram char *estrdup;
32725e8c5aaSvikram
32825e8c5aaSvikram if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) {
32925e8c5aaSvikram fmd_hdl_debug(hdl, "failed to register handle\n");
33025e8c5aaSvikram return;
33125e8c5aaSvikram }
33225e8c5aaSvikram
33325e8c5aaSvikram global_disable = fmd_prop_get_int32(hdl, "global-disable");
33425e8c5aaSvikram
33525e8c5aaSvikram estrdup = NULL;
33625e8c5aaSvikram if (estr = fmd_prop_get_string(hdl, "fault-exceptions")) {
33725e8c5aaSvikram estrdup = fmd_hdl_strdup(hdl, estr, FMD_SLEEP);
33825e8c5aaSvikram fmd_prop_free_string(hdl, estr);
33925e8c5aaSvikram parse_exception_string(hdl, estrdup);
34025e8c5aaSvikram fmd_hdl_strfree(hdl, estrdup);
34125e8c5aaSvikram }
34225e8c5aaSvikram }
34325e8c5aaSvikram
34425e8c5aaSvikram void
_fmd_fini(fmd_hdl_t * hdl)34525e8c5aaSvikram _fmd_fini(fmd_hdl_t *hdl)
34625e8c5aaSvikram {
34725e8c5aaSvikram free_exception_list(hdl);
34825e8c5aaSvikram }
349