1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/fm/protocol.h> 28 #include <fm/fmd_api.h> 29 #include <strings.h> 30 #include <libdevinfo.h> 31 #include <sys/modctl.h> 32 33 static int global_disable; 34 35 struct except_list { 36 char *el_fault; 37 struct except_list *el_next; 38 }; 39 40 static struct except_list *except_list; 41 42 static void 43 parse_exception_string(fmd_hdl_t *hdl, char *estr) 44 { 45 char *p; 46 char *next; 47 size_t len; 48 struct except_list *elem; 49 50 len = strlen(estr); 51 52 p = estr; 53 for (;;) { 54 /* Remove leading ':' */ 55 while (*p == ':') 56 p++; 57 if (*p == '\0') 58 break; 59 60 next = strchr(p, ':'); 61 62 if (next) 63 *next = '\0'; 64 65 elem = fmd_hdl_alloc(hdl, 66 sizeof (struct except_list), FMD_SLEEP); 67 elem->el_fault = fmd_hdl_strdup(hdl, p, FMD_SLEEP); 68 elem->el_next = except_list; 69 except_list = elem; 70 71 if (next) { 72 *next = ':'; 73 p = next + 1; 74 } else { 75 break; 76 } 77 } 78 79 if (len != strlen(estr)) { 80 fmd_hdl_abort(hdl, "Error parsing exception list: %s\n", estr); 81 } 82 } 83 84 /* 85 * Returns 86 * 1 if fault on exception list 87 * 0 otherwise 88 */ 89 static int 90 fault_exception(fmd_hdl_t *hdl, nvlist_t *fault) 91 { 92 struct except_list *elem; 93 94 for (elem = except_list; elem; elem = elem->el_next) { 95 if (fmd_nvl_class_match(hdl, fault, elem->el_fault)) { 96 fmd_hdl_debug(hdl, "rio_recv: Skipping fault " 97 "on exception list (%s)\n", elem->el_fault); 98 return (1); 99 } 100 } 101 102 return (0); 103 } 104 105 static void 106 free_exception_list(fmd_hdl_t *hdl) 107 { 108 struct except_list *elem; 109 110 while (except_list) { 111 elem = except_list; 112 except_list = elem->el_next; 113 fmd_hdl_strfree(hdl, elem->el_fault); 114 fmd_hdl_free(hdl, elem, sizeof (*elem)); 115 } 116 } 117 118 119 /*ARGSUSED*/ 120 static void 121 rio_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) 122 { 123 nvlist_t **faults = NULL; 124 nvlist_t *asru; 125 uint_t nfaults = 0; 126 int f; 127 char *path; 128 char *uuid; 129 char *scheme; 130 di_retire_t drt = {0}; 131 int retire; 132 int rval = 0; 133 int valid_suspect = 0; 134 int error; 135 char *snglfault = FM_FAULT_CLASS"."FM_ERROR_IO"."; 136 boolean_t rtr; 137 138 139 /* 140 * If disabled, we don't do retire. We still do unretires though 141 */ 142 if (global_disable && strcmp(class, FM_LIST_SUSPECT_CLASS) == 0) { 143 fmd_hdl_debug(hdl, "rio_recv: retire disabled\n"); 144 return; 145 } 146 147 drt.rt_abort = (void (*)(void *, const char *, ...))fmd_hdl_abort; 148 drt.rt_debug = (void (*)(void *, const char *, ...))fmd_hdl_debug; 149 drt.rt_hdl = hdl; 150 151 if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0) { 152 retire = 1; 153 } else if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0) { 154 retire = 0; 155 } else if (strcmp(class, FM_LIST_UPDATED_CLASS) == 0) { 156 retire = 0; 157 } else if (strcmp(class, FM_LIST_RESOLVED_CLASS) == 0) { 158 return; 159 } else if (strncmp(class, snglfault, strlen(snglfault)) == 0) { 160 retire = 1; 161 faults = &nvl; 162 nfaults = 1; 163 } else { 164 fmd_hdl_debug(hdl, "rio_recv: not list.* class: %s\n", class); 165 return; 166 } 167 168 if (nfaults == 0 && nvlist_lookup_nvlist_array(nvl, 169 FM_SUSPECT_FAULT_LIST, &faults, &nfaults) != 0) { 170 fmd_hdl_debug(hdl, "rio_recv: no fault list"); 171 return; 172 } 173 174 for (f = 0; f < nfaults; f++) { 175 if (nvlist_lookup_boolean_value(faults[f], FM_SUSPECT_RETIRE, 176 &rtr) == 0 && !rtr) { 177 fmd_hdl_debug(hdl, "rio_recv: retire suppressed"); 178 continue; 179 } 180 181 if (nvlist_lookup_nvlist(faults[f], FM_FAULT_ASRU, 182 &asru) != 0) { 183 fmd_hdl_debug(hdl, "rio_recv: no asru in fault"); 184 continue; 185 } 186 187 scheme = NULL; 188 if (nvlist_lookup_string(asru, FM_FMRI_SCHEME, &scheme) != 0 || 189 strcmp(scheme, FM_FMRI_SCHEME_DEV) != 0) { 190 fmd_hdl_debug(hdl, "rio_recv: not \"dev\" scheme: %s", 191 scheme ? scheme : "<NULL>"); 192 continue; 193 } 194 195 if (fault_exception(hdl, faults[f])) 196 continue; 197 198 if (nvlist_lookup_string(asru, FM_FMRI_DEV_PATH, 199 &path) != 0 || path[0] == '\0') { 200 fmd_hdl_debug(hdl, "rio_recv: no dev path in asru"); 201 continue; 202 } 203 204 valid_suspect = 1; 205 if (retire) { 206 if (fmd_nvl_fmri_has_fault(hdl, asru, 207 FMD_HAS_FAULT_ASRU, NULL) == 1) { 208 error = di_retire_device(path, &drt, 0); 209 if (error != 0) { 210 fmd_hdl_debug(hdl, "rio_recv:" 211 " di_retire_device failed:" 212 " error: %d %s", error, path); 213 rval = -1; 214 } 215 } 216 } else { 217 if (fmd_nvl_fmri_has_fault(hdl, asru, 218 FMD_HAS_FAULT_ASRU, NULL) == 0) { 219 error = di_unretire_device(path, &drt); 220 if (error != 0) { 221 fmd_hdl_debug(hdl, "rio_recv:" 222 " di_unretire_device failed:" 223 " error: %d %s", error, path); 224 rval = -1; 225 } 226 } 227 } 228 } 229 230 /* 231 * Don't send uuclose or uuresolved unless at least one suspect 232 * was valid for this retire agent and no retires/unretires failed. 233 */ 234 if (valid_suspect == 0) 235 return; 236 237 /* 238 * The fmd framework takes care of moving a case to the repaired 239 * state. To move the case to the closed state however, we (the 240 * retire agent) need to call fmd_case_uuclose() 241 */ 242 if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 && rval == 0) { 243 if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0 && 244 !fmd_case_uuclosed(hdl, uuid)) { 245 fmd_case_uuclose(hdl, uuid); 246 } 247 } 248 249 /* 250 * Similarly to move the case to the resolved state, we (the 251 * retire agent) need to call fmd_case_uuresolved() 252 */ 253 if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0 && rval == 0 && 254 nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0) 255 fmd_case_uuresolved(hdl, uuid); 256 } 257 258 static const fmd_hdl_ops_t fmd_ops = { 259 rio_recv, /* fmdo_recv */ 260 NULL, /* fmdo_timeout */ 261 NULL, /* fmdo_close */ 262 NULL, /* fmdo_stats */ 263 NULL, /* fmdo_gc */ 264 }; 265 266 static const fmd_prop_t rio_props[] = { 267 { "global-disable", FMD_TYPE_BOOL, "false" }, 268 { "fault-exceptions", FMD_TYPE_STRING, NULL }, 269 { NULL, 0, NULL } 270 }; 271 272 static const fmd_hdl_info_t fmd_info = { 273 "I/O Retire Agent", "2.0", &fmd_ops, rio_props 274 }; 275 276 void 277 _fmd_init(fmd_hdl_t *hdl) 278 { 279 char *estr; 280 char *estrdup; 281 282 if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) { 283 fmd_hdl_debug(hdl, "failed to register handle\n"); 284 return; 285 } 286 287 global_disable = fmd_prop_get_int32(hdl, "global-disable"); 288 289 estrdup = NULL; 290 if (estr = fmd_prop_get_string(hdl, "fault-exceptions")) { 291 estrdup = fmd_hdl_strdup(hdl, estr, FMD_SLEEP); 292 fmd_prop_free_string(hdl, estr); 293 parse_exception_string(hdl, estrdup); 294 fmd_hdl_strfree(hdl, estrdup); 295 } 296 } 297 298 void 299 _fmd_fini(fmd_hdl_t *hdl) 300 { 301 free_exception_list(hdl); 302 } 303