1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/fm/protocol.h> 30 #include <fm/fmd_api.h> 31 #include <strings.h> 32 #include <libdevinfo.h> 33 #include <sys/modctl.h> 34 35 static int global_disable; 36 37 struct except_list { 38 char *el_fault; 39 struct except_list *el_next; 40 }; 41 42 static struct except_list *except_list; 43 44 static void 45 parse_exception_string(fmd_hdl_t *hdl, char *estr) 46 { 47 char *p; 48 char *next; 49 size_t len; 50 struct except_list *elem; 51 52 len = strlen(estr); 53 54 p = estr; 55 for (;;) { 56 /* Remove leading ':' */ 57 while (*p == ':') 58 p++; 59 if (*p == '\0') 60 break; 61 62 next = strchr(p, ':'); 63 64 if (next) 65 *next = '\0'; 66 67 elem = fmd_hdl_alloc(hdl, 68 sizeof (struct except_list), FMD_SLEEP); 69 elem->el_fault = fmd_hdl_strdup(hdl, p, FMD_SLEEP); 70 elem->el_next = except_list; 71 except_list = elem; 72 73 if (next) { 74 *next = ':'; 75 p = next + 1; 76 } else { 77 break; 78 } 79 } 80 81 if (len != strlen(estr)) { 82 fmd_hdl_abort(hdl, "Error parsing exception list: %s\n", estr); 83 } 84 } 85 86 /* 87 * Returns 88 * 1 if fault on exception list 89 * 0 otherwise 90 */ 91 static int 92 fault_exception(fmd_hdl_t *hdl, nvlist_t *fault) 93 { 94 struct except_list *elem; 95 96 for (elem = except_list; elem; elem = elem->el_next) { 97 if (fmd_nvl_class_match(hdl, fault, elem->el_fault)) { 98 fmd_hdl_debug(hdl, "rio_recv: Skipping fault " 99 "on exception list (%s)\n", elem->el_fault); 100 return (1); 101 } 102 } 103 104 return (0); 105 } 106 107 static void 108 free_exception_list(fmd_hdl_t *hdl) 109 { 110 struct except_list *elem; 111 112 while (except_list) { 113 elem = except_list; 114 except_list = elem->el_next; 115 fmd_hdl_strfree(hdl, elem->el_fault); 116 fmd_hdl_free(hdl, elem, sizeof (*elem)); 117 } 118 } 119 120 121 /*ARGSUSED*/ 122 static void 123 rio_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) 124 { 125 nvlist_t **faults = NULL; 126 nvlist_t *asru; 127 uint_t nfaults = 0; 128 int f; 129 char *path; 130 char *uuid; 131 char *scheme; 132 di_retire_t drt = {0}; 133 int retire; 134 int rval = 0; 135 int error; 136 char *snglfault = FM_FAULT_CLASS"."FM_ERROR_IO"."; 137 boolean_t rtr; 138 139 140 /* 141 * If disabled, we don't do retire. We still do unretires though 142 */ 143 if (global_disable && strcmp(class, FM_LIST_SUSPECT_CLASS) == 0) { 144 fmd_hdl_debug(hdl, "rio_recv: retire disabled\n"); 145 return; 146 } 147 148 drt.rt_abort = (void (*)(void *, const char *, ...))fmd_hdl_abort; 149 drt.rt_debug = (void (*)(void *, const char *, ...))fmd_hdl_debug; 150 drt.rt_hdl = hdl; 151 152 if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0) { 153 retire = 1; 154 } else if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0) { 155 retire = 0; 156 } else if (strcmp(class, FM_LIST_UPDATED_CLASS) == 0) { 157 retire = 0; 158 } else if (strncmp(class, snglfault, strlen(snglfault)) == 0) { 159 retire = 1; 160 faults = &nvl; 161 nfaults = 1; 162 } else { 163 fmd_hdl_debug(hdl, "rio_recv: not list.* class: %s\n", class); 164 return; 165 } 166 167 if (nfaults == 0 && nvlist_lookup_nvlist_array(nvl, 168 FM_SUSPECT_FAULT_LIST, &faults, &nfaults) != 0) { 169 fmd_hdl_debug(hdl, "rio_recv: no fault list"); 170 return; 171 } 172 173 for (f = 0; f < nfaults; f++) { 174 if (nvlist_lookup_boolean_value(faults[f], FM_SUSPECT_RETIRE, 175 &rtr) == 0 && !rtr) { 176 fmd_hdl_debug(hdl, "rio_recv: retire suppressed"); 177 continue; 178 } 179 180 if (nvlist_lookup_nvlist(faults[f], FM_FAULT_ASRU, 181 &asru) != 0) { 182 fmd_hdl_debug(hdl, "rio_recv: no asru in fault"); 183 continue; 184 } 185 186 scheme = NULL; 187 if (nvlist_lookup_string(asru, FM_FMRI_SCHEME, &scheme) != 0 || 188 strcmp(scheme, FM_FMRI_SCHEME_DEV) != 0) { 189 fmd_hdl_debug(hdl, "rio_recv: not \"dev\" scheme: %s", 190 scheme ? scheme : "<NULL>"); 191 continue; 192 } 193 194 if (fault_exception(hdl, faults[f])) 195 continue; 196 197 if (nvlist_lookup_string(asru, FM_FMRI_DEV_PATH, 198 &path) != 0 || path[0] == '\0') { 199 fmd_hdl_debug(hdl, "rio_recv: no dev path in asru"); 200 continue; 201 } 202 203 if (retire) { 204 if (fmd_nvl_fmri_has_fault(hdl, asru, 205 FMD_HAS_FAULT_ASRU, NULL) == 1) { 206 error = di_retire_device(path, &drt, 0); 207 if (error != 0) { 208 fmd_hdl_debug(hdl, "rio_recv:" 209 " di_retire_device failed:" 210 " error: %d %s", error, path); 211 rval = -1; 212 } 213 } 214 } else { 215 if (fmd_nvl_fmri_has_fault(hdl, asru, 216 FMD_HAS_FAULT_ASRU, NULL) == 0) { 217 error = di_unretire_device(path, &drt); 218 if (error != 0) { 219 fmd_hdl_debug(hdl, "rio_recv:" 220 " di_unretire_device failed:" 221 " error: %d %s", error, path); 222 rval = -1; 223 } 224 } 225 } 226 } 227 228 /* 229 * The fmd framework takes care of moving a case to the repaired 230 * state. To move the case to the closed state however, we (the 231 * retire agent) need to call fmd_case_uuclose() 232 */ 233 if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 && rval == 0) { 234 if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0 && 235 !fmd_case_uuclosed(hdl, uuid)) { 236 fmd_case_uuclose(hdl, uuid); 237 } 238 } 239 240 /* 241 * Similarly to move the case to the resolved state, we (the 242 * retire agent) need to call fmd_case_uuresolved() 243 */ 244 if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0 && rval == 0 && 245 nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0) 246 fmd_case_uuresolved(hdl, uuid); 247 } 248 249 static const fmd_hdl_ops_t fmd_ops = { 250 rio_recv, /* fmdo_recv */ 251 NULL, /* fmdo_timeout */ 252 NULL, /* fmdo_close */ 253 NULL, /* fmdo_stats */ 254 NULL, /* fmdo_gc */ 255 }; 256 257 static const fmd_prop_t rio_props[] = { 258 { "global-disable", FMD_TYPE_BOOL, "false" }, 259 { "fault-exceptions", FMD_TYPE_STRING, NULL }, 260 { NULL, 0, NULL } 261 }; 262 263 static const fmd_hdl_info_t fmd_info = { 264 "I/O Retire Agent", "2.0", &fmd_ops, rio_props 265 }; 266 267 void 268 _fmd_init(fmd_hdl_t *hdl) 269 { 270 char *estr; 271 char *estrdup; 272 273 if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) { 274 fmd_hdl_debug(hdl, "failed to register handle\n"); 275 return; 276 } 277 278 global_disable = fmd_prop_get_int32(hdl, "global-disable"); 279 280 estrdup = NULL; 281 if (estr = fmd_prop_get_string(hdl, "fault-exceptions")) { 282 estrdup = fmd_hdl_strdup(hdl, estr, FMD_SLEEP); 283 fmd_prop_free_string(hdl, estr); 284 parse_exception_string(hdl, estrdup); 285 fmd_hdl_strfree(hdl, estrdup); 286 } 287 } 288 289 void 290 _fmd_fini(fmd_hdl_t *hdl) 291 { 292 free_exception_list(hdl); 293 } 294