1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/fm/protocol.h> 28 #include <fm/fmd_api.h> 29 #include <strings.h> 30 #include <libdevinfo.h> 31 #include <sys/modctl.h> 32 33 static int global_disable; 34 35 struct except_list { 36 char *el_fault; 37 struct except_list *el_next; 38 }; 39 40 static struct except_list *except_list; 41 42 static void 43 parse_exception_string(fmd_hdl_t *hdl, char *estr) 44 { 45 char *p; 46 char *next; 47 size_t len; 48 struct except_list *elem; 49 50 len = strlen(estr); 51 52 p = estr; 53 for (;;) { 54 /* Remove leading ':' */ 55 while (*p == ':') 56 p++; 57 if (*p == '\0') 58 break; 59 60 next = strchr(p, ':'); 61 62 if (next) 63 *next = '\0'; 64 65 elem = fmd_hdl_alloc(hdl, 66 sizeof (struct except_list), FMD_SLEEP); 67 elem->el_fault = fmd_hdl_strdup(hdl, p, FMD_SLEEP); 68 elem->el_next = except_list; 69 except_list = elem; 70 71 if (next) { 72 *next = ':'; 73 p = next + 1; 74 } else { 75 break; 76 } 77 } 78 79 if (len != strlen(estr)) { 80 fmd_hdl_abort(hdl, "Error parsing exception list: %s\n", estr); 81 } 82 } 83 84 /* 85 * Returns 86 * 1 if fault on exception list 87 * 0 otherwise 88 */ 89 static int 90 fault_exception(fmd_hdl_t *hdl, nvlist_t *fault) 91 { 92 struct except_list *elem; 93 94 for (elem = except_list; elem; elem = elem->el_next) { 95 if (fmd_nvl_class_match(hdl, fault, elem->el_fault)) { 96 fmd_hdl_debug(hdl, "rio_recv: Skipping fault " 97 "on exception list (%s)\n", elem->el_fault); 98 return (1); 99 } 100 } 101 102 return (0); 103 } 104 105 static void 106 free_exception_list(fmd_hdl_t *hdl) 107 { 108 struct except_list *elem; 109 110 while (except_list) { 111 elem = except_list; 112 except_list = elem->el_next; 113 fmd_hdl_strfree(hdl, elem->el_fault); 114 fmd_hdl_free(hdl, elem, sizeof (*elem)); 115 } 116 } 117 118 119 /*ARGSUSED*/ 120 static void 121 rio_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) 122 { 123 nvlist_t **faults = NULL; 124 nvlist_t *asru; 125 uint_t nfaults = 0; 126 int f; 127 char *path; 128 char *uuid; 129 char *scheme; 130 di_retire_t drt = {0}; 131 int retire; 132 int rval = 0; 133 int valid_suspect = 0; 134 int error; 135 char *snglfault = FM_FAULT_CLASS"."FM_ERROR_IO"."; 136 boolean_t rtr; 137 138 139 /* 140 * If disabled, we don't do retire. We still do unretires though 141 */ 142 if (global_disable && (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 || 143 strcmp(class, FM_LIST_UPDATED_CLASS) == 0)) { 144 fmd_hdl_debug(hdl, "rio_recv: retire disabled\n"); 145 return; 146 } 147 148 drt.rt_abort = (void (*)(void *, const char *, ...))fmd_hdl_abort; 149 drt.rt_debug = (void (*)(void *, const char *, ...))fmd_hdl_debug; 150 drt.rt_hdl = hdl; 151 152 if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0) { 153 retire = 1; 154 } else if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0) { 155 retire = 0; 156 } else if (strcmp(class, FM_LIST_UPDATED_CLASS) == 0) { 157 retire = 0; 158 } else if (strcmp(class, FM_LIST_RESOLVED_CLASS) == 0) { 159 return; 160 } else if (strncmp(class, snglfault, strlen(snglfault)) == 0) { 161 retire = 1; 162 faults = &nvl; 163 nfaults = 1; 164 } else { 165 fmd_hdl_debug(hdl, "rio_recv: not list.* class: %s\n", class); 166 return; 167 } 168 169 if (nfaults == 0 && nvlist_lookup_nvlist_array(nvl, 170 FM_SUSPECT_FAULT_LIST, &faults, &nfaults) != 0) { 171 fmd_hdl_debug(hdl, "rio_recv: no fault list"); 172 return; 173 } 174 175 for (f = 0; f < nfaults; f++) { 176 if (nvlist_lookup_boolean_value(faults[f], FM_SUSPECT_RETIRE, 177 &rtr) == 0 && !rtr) { 178 fmd_hdl_debug(hdl, "rio_recv: retire suppressed"); 179 continue; 180 } 181 182 if (nvlist_lookup_nvlist(faults[f], FM_FAULT_ASRU, 183 &asru) != 0) { 184 fmd_hdl_debug(hdl, "rio_recv: no asru in fault"); 185 continue; 186 } 187 188 scheme = NULL; 189 if (nvlist_lookup_string(asru, FM_FMRI_SCHEME, &scheme) != 0 || 190 strcmp(scheme, FM_FMRI_SCHEME_DEV) != 0) { 191 fmd_hdl_debug(hdl, "rio_recv: not \"dev\" scheme: %s", 192 scheme ? scheme : "<NULL>"); 193 continue; 194 } 195 196 if (fault_exception(hdl, faults[f])) 197 continue; 198 199 if (nvlist_lookup_string(asru, FM_FMRI_DEV_PATH, 200 &path) != 0 || path[0] == '\0') { 201 fmd_hdl_debug(hdl, "rio_recv: no dev path in asru"); 202 continue; 203 } 204 205 valid_suspect = 1; 206 if (retire) { 207 if (fmd_nvl_fmri_has_fault(hdl, asru, 208 FMD_HAS_FAULT_ASRU, NULL) == 1) { 209 error = di_retire_device(path, &drt, 0); 210 if (error != 0) { 211 fmd_hdl_debug(hdl, "rio_recv:" 212 " di_retire_device failed:" 213 " error: %d %s", error, path); 214 rval = -1; 215 } 216 } 217 } else { 218 if (fmd_nvl_fmri_has_fault(hdl, asru, 219 FMD_HAS_FAULT_ASRU, NULL) == 0) { 220 error = di_unretire_device(path, &drt); 221 if (error != 0) { 222 fmd_hdl_debug(hdl, "rio_recv:" 223 " di_unretire_device failed:" 224 " error: %d %s", error, path); 225 rval = -1; 226 } 227 } 228 } 229 } 230 /* 231 * Run through again to handle new faults in a list.updated. 232 */ 233 for (f = 0; f < nfaults; f++) { 234 if (nvlist_lookup_boolean_value(faults[f], FM_SUSPECT_RETIRE, 235 &rtr) == 0 && !rtr) { 236 fmd_hdl_debug(hdl, "rio_recv: retire suppressed"); 237 continue; 238 } 239 240 if (nvlist_lookup_nvlist(faults[f], FM_FAULT_ASRU, 241 &asru) != 0) { 242 fmd_hdl_debug(hdl, "rio_recv: no asru in fault"); 243 continue; 244 } 245 246 scheme = NULL; 247 if (nvlist_lookup_string(asru, FM_FMRI_SCHEME, &scheme) != 0 || 248 strcmp(scheme, FM_FMRI_SCHEME_DEV) != 0) { 249 fmd_hdl_debug(hdl, "rio_recv: not \"dev\" scheme: %s", 250 scheme ? scheme : "<NULL>"); 251 continue; 252 } 253 254 if (fault_exception(hdl, faults[f])) 255 continue; 256 257 if (nvlist_lookup_string(asru, FM_FMRI_DEV_PATH, 258 &path) != 0 || path[0] == '\0') { 259 fmd_hdl_debug(hdl, "rio_recv: no dev path in asru"); 260 continue; 261 } 262 263 if (strcmp(class, FM_LIST_UPDATED_CLASS) == 0) { 264 if (fmd_nvl_fmri_has_fault(hdl, asru, 265 FMD_HAS_FAULT_ASRU, NULL) == 1) { 266 error = di_retire_device(path, &drt, 0); 267 if (error != 0) { 268 fmd_hdl_debug(hdl, "rio_recv:" 269 " di_retire_device failed:" 270 " error: %d %s", error, path); 271 } 272 } 273 } 274 } 275 276 /* 277 * Don't send uuclose or uuresolved unless at least one suspect 278 * was valid for this retire agent and no retires/unretires failed. 279 */ 280 if (valid_suspect == 0) 281 return; 282 283 /* 284 * The fmd framework takes care of moving a case to the repaired 285 * state. To move the case to the closed state however, we (the 286 * retire agent) need to call fmd_case_uuclose() 287 */ 288 if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 && rval == 0) { 289 if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0 && 290 !fmd_case_uuclosed(hdl, uuid)) { 291 fmd_case_uuclose(hdl, uuid); 292 } 293 } 294 295 /* 296 * Similarly to move the case to the resolved state, we (the 297 * retire agent) need to call fmd_case_uuresolved() 298 */ 299 if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0 && rval == 0 && 300 nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0) 301 fmd_case_uuresolved(hdl, uuid); 302 } 303 304 static const fmd_hdl_ops_t fmd_ops = { 305 rio_recv, /* fmdo_recv */ 306 NULL, /* fmdo_timeout */ 307 NULL, /* fmdo_close */ 308 NULL, /* fmdo_stats */ 309 NULL, /* fmdo_gc */ 310 }; 311 312 static const fmd_prop_t rio_props[] = { 313 { "global-disable", FMD_TYPE_BOOL, "false" }, 314 { "fault-exceptions", FMD_TYPE_STRING, NULL }, 315 { NULL, 0, NULL } 316 }; 317 318 static const fmd_hdl_info_t fmd_info = { 319 "I/O Retire Agent", "2.0", &fmd_ops, rio_props 320 }; 321 322 void 323 _fmd_init(fmd_hdl_t *hdl) 324 { 325 char *estr; 326 char *estrdup; 327 328 if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) { 329 fmd_hdl_debug(hdl, "failed to register handle\n"); 330 return; 331 } 332 333 global_disable = fmd_prop_get_int32(hdl, "global-disable"); 334 335 estrdup = NULL; 336 if (estr = fmd_prop_get_string(hdl, "fault-exceptions")) { 337 estrdup = fmd_hdl_strdup(hdl, estr, FMD_SLEEP); 338 fmd_prop_free_string(hdl, estr); 339 parse_exception_string(hdl, estrdup); 340 fmd_hdl_strfree(hdl, estrdup); 341 } 342 } 343 344 void 345 _fmd_fini(fmd_hdl_t *hdl) 346 { 347 free_exception_list(hdl); 348 } 349