1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/fm/protocol.h> 28 #include <uuid/uuid.h> 29 30 #include <dirent.h> 31 #include <limits.h> 32 #include <unistd.h> 33 #include <alloca.h> 34 #include <stddef.h> 35 #include <fm/libtopo.h> 36 37 #include <fmd_alloc.h> 38 #include <fmd_string.h> 39 #include <fmd_error.h> 40 #include <fmd_subr.h> 41 #include <fmd_protocol.h> 42 #include <fmd_event.h> 43 #include <fmd_conf.h> 44 #include <fmd_fmri.h> 45 #include <fmd_dispq.h> 46 #include <fmd_case.h> 47 #include <fmd_module.h> 48 #include <fmd_asru.h> 49 50 #include <fmd.h> 51 52 static const char *const _fmd_asru_events[] = { 53 FMD_RSRC_CLASS "asru.ok", /* UNUSABLE=0 FAULTED=0 */ 54 FMD_RSRC_CLASS "asru.degraded", /* UNUSABLE=0 FAULTED=1 */ 55 FMD_RSRC_CLASS "asru.unknown", /* UNUSABLE=1 FAULTED=0 */ 56 FMD_RSRC_CLASS "asru.faulted" /* UNUSABLE=1 FAULTED=1 */ 57 }; 58 59 static const char *const _fmd_asru_snames[] = { 60 "uf", "uF", "Uf", "UF" /* same order as above */ 61 }; 62 63 volatile uint32_t fmd_asru_fake_not_present = 0; 64 65 static uint_t 66 fmd_asru_strhash(fmd_asru_hash_t *ahp, const char *val) 67 { 68 return (topo_fmri_strhash(ahp->ah_topo->ft_hdl, val) % ahp->ah_hashlen); 69 } 70 71 static boolean_t 72 fmd_asru_strcmp(fmd_asru_hash_t *ahp, const char *a, const char *b) 73 { 74 return (topo_fmri_strcmp(ahp->ah_topo->ft_hdl, a, b)); 75 } 76 77 static fmd_asru_t * 78 fmd_asru_create(fmd_asru_hash_t *ahp, const char *uuid, 79 const char *name, nvlist_t *fmri) 80 { 81 fmd_asru_t *ap = fmd_zalloc(sizeof (fmd_asru_t), FMD_SLEEP); 82 char *s; 83 84 (void) pthread_mutex_init(&ap->asru_lock, NULL); 85 (void) pthread_cond_init(&ap->asru_cv, NULL); 86 87 ap->asru_name = fmd_strdup(name, FMD_SLEEP); 88 if (fmri) 89 (void) nvlist_xdup(fmri, &ap->asru_fmri, &fmd.d_nva); 90 ap->asru_root = fmd_strdup(ahp->ah_dirpath, FMD_SLEEP); 91 ap->asru_uuid = fmd_strdup(uuid, FMD_SLEEP); 92 ap->asru_uuidlen = ap->asru_uuid ? strlen(ap->asru_uuid) : 0; 93 ap->asru_refs = 1; 94 95 if (fmri && nvlist_lookup_string(fmri, FM_FMRI_SCHEME, &s) == 0 && 96 strcmp(s, FM_FMRI_SCHEME_FMD) == 0) 97 ap->asru_flags |= FMD_ASRU_INTERNAL; 98 99 return (ap); 100 } 101 102 static void 103 fmd_asru_destroy(fmd_asru_t *ap) 104 { 105 ASSERT(MUTEX_HELD(&ap->asru_lock)); 106 ASSERT(ap->asru_refs == 0); 107 108 nvlist_free(ap->asru_event); 109 fmd_strfree(ap->asru_name); 110 nvlist_free(ap->asru_fmri); 111 fmd_strfree(ap->asru_root); 112 fmd_free(ap->asru_uuid, ap->asru_uuidlen + 1); 113 fmd_free(ap, sizeof (fmd_asru_t)); 114 } 115 116 static void 117 fmd_asru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_t *ap) 118 { 119 uint_t h = fmd_asru_strhash(ahp, ap->asru_name); 120 121 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 122 ap->asru_next = ahp->ah_hash[h]; 123 ahp->ah_hash[h] = ap; 124 ahp->ah_count++; 125 } 126 127 static fmd_asru_t * 128 fmd_asru_hold(fmd_asru_t *ap) 129 { 130 (void) pthread_mutex_lock(&ap->asru_lock); 131 ap->asru_refs++; 132 ASSERT(ap->asru_refs != 0); 133 (void) pthread_mutex_unlock(&ap->asru_lock); 134 return (ap); 135 } 136 137 /* 138 * Lookup an asru in the hash by name and place a hold on it. If the asru is 139 * not found, no entry is created and NULL is returned. This internal function 140 * is for callers who have the ah_lock held and is used by lookup_name below. 141 */ 142 fmd_asru_t * 143 fmd_asru_hash_lookup(fmd_asru_hash_t *ahp, const char *name) 144 { 145 fmd_asru_t *ap; 146 uint_t h; 147 148 ASSERT(RW_LOCK_HELD(&ahp->ah_lock)); 149 h = fmd_asru_strhash(ahp, name); 150 151 for (ap = ahp->ah_hash[h]; ap != NULL; ap = ap->asru_next) { 152 if (fmd_asru_strcmp(ahp, ap->asru_name, name)) 153 break; 154 } 155 156 if (ap != NULL) 157 (void) fmd_asru_hold(ap); 158 else 159 (void) fmd_set_errno(EFMD_ASRU_NOENT); 160 161 return (ap); 162 } 163 164 static int 165 fmd_asru_replacement_state(nvlist_t *event) 166 { 167 int ps = -1; 168 nvlist_t *asru, *fru, *rsrc; 169 170 /* 171 * Check if there is evidence that this object is no longer present. 172 * In general fmd_fmri_present() should be supported on resources and/or 173 * frus, as those are the things that are physically present or not 174 * present - an asru can be spread over a number of frus some of which 175 * are present and some not, so fmd_fmri_present() is not generally 176 * meaningful. However retain a check for asru first for compatibility. 177 * If we have checked all three and we still get -1 then nothing knows 178 * whether it's present or not, so err on the safe side and treat it 179 * as still present. 180 */ 181 if (fmd_asru_fake_not_present) 182 return (fmd_asru_fake_not_present); 183 if (nvlist_lookup_nvlist(event, FM_FAULT_ASRU, &asru) == 0) 184 ps = fmd_fmri_replaced(asru); 185 if (ps == -1) { 186 if (nvlist_lookup_nvlist(event, FM_FAULT_RESOURCE, &rsrc) == 0) 187 ps = fmd_fmri_replaced(rsrc); 188 } else if (ps == FMD_OBJ_STATE_UNKNOWN) { 189 /* see if we can improve on UNKNOWN */ 190 if (nvlist_lookup_nvlist(event, FM_FAULT_RESOURCE, 191 &rsrc) == 0) { 192 int ps2 = fmd_fmri_replaced(rsrc); 193 if (ps2 == FMD_OBJ_STATE_STILL_PRESENT || 194 ps2 == FMD_OBJ_STATE_REPLACED) 195 ps = ps2; 196 } 197 } 198 if (ps == -1) { 199 if (nvlist_lookup_nvlist(event, FM_FAULT_FRU, &fru) == 0) 200 ps = fmd_fmri_replaced(fru); 201 } else if (ps == FMD_OBJ_STATE_UNKNOWN) { 202 /* see if we can improve on UNKNOWN */ 203 if (nvlist_lookup_nvlist(event, FM_FAULT_FRU, &fru) == 0) { 204 int ps2 = fmd_fmri_replaced(fru); 205 if (ps2 == FMD_OBJ_STATE_STILL_PRESENT || 206 ps2 == FMD_OBJ_STATE_REPLACED) 207 ps = ps2; 208 } 209 } 210 if (ps == -1) 211 ps = FMD_OBJ_STATE_UNKNOWN; 212 return (ps); 213 } 214 215 static void 216 fmd_asru_asru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, 217 char *name) 218 { 219 uint_t h = fmd_asru_strhash(ahp, name); 220 221 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 222 alp->al_asru_next = ahp->ah_asru_hash[h]; 223 ahp->ah_asru_hash[h] = alp; 224 ahp->ah_al_count++; 225 } 226 227 static void 228 fmd_asru_case_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, 229 char *name) 230 { 231 uint_t h = fmd_asru_strhash(ahp, name); 232 233 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 234 alp->al_case_next = ahp->ah_case_hash[h]; 235 ahp->ah_case_hash[h] = alp; 236 } 237 238 static void 239 fmd_asru_fru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, char *name) 240 { 241 uint_t h = fmd_asru_strhash(ahp, name); 242 243 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 244 alp->al_fru_next = ahp->ah_fru_hash[h]; 245 ahp->ah_fru_hash[h] = alp; 246 } 247 248 static void 249 fmd_asru_label_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, 250 char *name) 251 { 252 uint_t h = fmd_asru_strhash(ahp, name); 253 254 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 255 alp->al_label_next = ahp->ah_label_hash[h]; 256 ahp->ah_label_hash[h] = alp; 257 } 258 259 static void 260 fmd_asru_rsrc_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, 261 char *name) 262 { 263 uint_t h = fmd_asru_strhash(ahp, name); 264 265 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 266 alp->al_rsrc_next = ahp->ah_rsrc_hash[h]; 267 ahp->ah_rsrc_hash[h] = alp; 268 } 269 270 static void 271 fmd_asru_al_destroy(fmd_asru_link_t *alp) 272 { 273 ASSERT(alp->al_refs == 0); 274 ASSERT(MUTEX_HELD(&alp->al_asru->asru_lock)); 275 276 if (alp->al_log != NULL) 277 fmd_log_rele(alp->al_log); 278 279 fmd_free(alp->al_uuid, alp->al_uuidlen + 1); 280 nvlist_free(alp->al_event); 281 fmd_strfree(alp->al_rsrc_name); 282 fmd_strfree(alp->al_case_uuid); 283 fmd_strfree(alp->al_fru_name); 284 fmd_strfree(alp->al_asru_name); 285 fmd_strfree(alp->al_label); 286 nvlist_free(alp->al_asru_fmri); 287 fmd_free(alp, sizeof (fmd_asru_link_t)); 288 } 289 290 static fmd_asru_link_t * 291 fmd_asru_al_hold(fmd_asru_link_t *alp) 292 { 293 fmd_asru_t *ap = alp->al_asru; 294 295 (void) pthread_mutex_lock(&ap->asru_lock); 296 ap->asru_refs++; 297 alp->al_refs++; 298 ASSERT(alp->al_refs != 0); 299 (void) pthread_mutex_unlock(&ap->asru_lock); 300 return (alp); 301 } 302 303 static void fmd_asru_destroy(fmd_asru_t *ap); 304 305 /*ARGSUSED*/ 306 static void 307 fmd_asru_al_hash_release(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp) 308 { 309 fmd_asru_t *ap = alp->al_asru; 310 311 (void) pthread_mutex_lock(&ap->asru_lock); 312 ASSERT(alp->al_refs != 0); 313 if (--alp->al_refs == 0) 314 fmd_asru_al_destroy(alp); 315 ASSERT(ap->asru_refs != 0); 316 if (--ap->asru_refs == 0) 317 fmd_asru_destroy(ap); 318 else 319 (void) pthread_mutex_unlock(&ap->asru_lock); 320 } 321 322 static int 323 fmd_asru_get_namestr(nvlist_t *nvl, char **name, ssize_t *namelen) 324 { 325 if ((*namelen = fmd_fmri_nvl2str(nvl, NULL, 0)) == -1) 326 return (EFMD_ASRU_FMRI); 327 *name = fmd_alloc(*namelen + 1, FMD_SLEEP); 328 if (fmd_fmri_nvl2str(nvl, *name, *namelen + 1) == -1) { 329 if (*name != NULL) 330 fmd_free(*name, *namelen + 1); 331 return (EFMD_ASRU_FMRI); 332 } 333 return (0); 334 } 335 336 static fmd_asru_link_t * 337 fmd_asru_al_create(fmd_asru_hash_t *ahp, nvlist_t *nvl, fmd_case_t *cp, 338 const char *al_uuid) 339 { 340 nvlist_t *asru = NULL, *fru, *rsrc; 341 int got_rsrc = 0, got_asru = 0, got_fru = 0; 342 ssize_t fru_namelen, rsrc_namelen, asru_namelen; 343 char *asru_name, *rsrc_name, *fru_name, *name, *label; 344 fmd_asru_link_t *alp; 345 fmd_asru_t *ap; 346 boolean_t msg; 347 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 348 349 if (nvlist_lookup_nvlist(nvl, FM_FAULT_ASRU, &asru) == 0 && 350 fmd_asru_get_namestr(asru, &asru_name, &asru_namelen) == 0) 351 got_asru = 1; 352 if (nvlist_lookup_nvlist(nvl, FM_FAULT_FRU, &fru) == 0 && 353 fmd_asru_get_namestr(fru, &fru_name, &fru_namelen) == 0) 354 got_fru = 1; 355 if (nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &rsrc) == 0 && 356 fmd_asru_get_namestr(rsrc, &rsrc_name, &rsrc_namelen) == 0) 357 got_rsrc = 1; 358 if (nvlist_lookup_string(nvl, FM_FAULT_LOCATION, &label) != 0) 359 label = ""; 360 361 /* 362 * Grab the rwlock as a writer; Then create and insert the asru with 363 * ahp->ah_lock held and hash it in. We'll then drop the rwlock and 364 * proceed to initializing the asru. 365 */ 366 (void) pthread_rwlock_wrlock(&ahp->ah_lock); 367 368 /* 369 * Create and initialise the per-fault "link" structure. 370 */ 371 alp = fmd_zalloc(sizeof (fmd_asru_link_t), FMD_SLEEP); 372 if (got_asru) 373 (void) nvlist_xdup(asru, &alp->al_asru_fmri, &fmd.d_nva); 374 alp->al_uuid = fmd_strdup(al_uuid, FMD_SLEEP); 375 alp->al_uuidlen = strlen(alp->al_uuid); 376 alp->al_refs = 1; 377 378 /* 379 * If this is the first fault for this asru, then create the per-asru 380 * structure and link into the hash. 381 */ 382 name = got_asru ? asru_name : ""; 383 if ((ap = fmd_asru_hash_lookup(ahp, name)) == NULL) { 384 ap = fmd_asru_create(ahp, al_uuid, name, got_asru ? asru : 385 NULL); 386 fmd_asru_hash_insert(ahp, ap); 387 } else 388 nvlist_free(ap->asru_event); 389 (void) nvlist_xdup(nvl, &ap->asru_event, &fmd.d_nva); 390 391 /* 392 * Put the link structure on the list associated with the per-asru 393 * structure. Then put the link structure on the various hashes. 394 */ 395 fmd_list_append(&ap->asru_list, (fmd_list_t *)alp); 396 alp->al_asru = ap; 397 alp->al_asru_name = got_asru ? asru_name : fmd_strdup("", FMD_SLEEP); 398 fmd_asru_asru_hash_insert(ahp, alp, alp->al_asru_name); 399 alp->al_fru_name = got_fru ? fru_name : fmd_strdup("", FMD_SLEEP); 400 fmd_asru_fru_hash_insert(ahp, alp, alp->al_fru_name); 401 alp->al_rsrc_name = got_rsrc ? rsrc_name : fmd_strdup("", FMD_SLEEP); 402 fmd_asru_rsrc_hash_insert(ahp, alp, alp->al_rsrc_name); 403 alp->al_label = fmd_strdup(label, FMD_SLEEP); 404 fmd_asru_label_hash_insert(ahp, alp, label); 405 alp->al_case_uuid = fmd_strdup(cip->ci_uuid, FMD_SLEEP); 406 fmd_asru_case_hash_insert(ahp, alp, cip->ci_uuid); 407 (void) pthread_mutex_lock(&ap->asru_lock); 408 (void) pthread_rwlock_unlock(&ahp->ah_lock); 409 410 ap->asru_case = alp->al_case = cp; 411 if (nvlist_lookup_boolean_value(nvl, FM_SUSPECT_MESSAGE, &msg) == 0 && 412 msg == B_FALSE) 413 ap->asru_flags |= FMD_ASRU_INVISIBLE; 414 (void) nvlist_xdup(nvl, &alp->al_event, &fmd.d_nva); 415 ap->asru_flags |= FMD_ASRU_VALID; 416 (void) pthread_cond_broadcast(&ap->asru_cv); 417 (void) pthread_mutex_unlock(&ap->asru_lock); 418 return (alp); 419 } 420 421 static void 422 fmd_asru_hash_recreate(fmd_log_t *lp, fmd_event_t *ep, fmd_asru_hash_t *ahp) 423 { 424 nvlist_t *nvl = FMD_EVENT_NVL(ep); 425 boolean_t faulty = FMD_B_FALSE, unusable = FMD_B_FALSE; 426 int ps; 427 boolean_t repaired = FMD_B_FALSE, replaced = FMD_B_FALSE; 428 boolean_t acquitted = FMD_B_FALSE; 429 nvlist_t *flt, *flt_copy, *asru; 430 char *case_uuid = NULL, *case_code = NULL; 431 fmd_asru_t *ap; 432 fmd_asru_link_t *alp; 433 fmd_case_t *cp; 434 int64_t *diag_time; 435 uint_t nelem; 436 topo_hdl_t *thp; 437 char *class; 438 nvlist_t *rsrc; 439 int err; 440 441 /* 442 * Extract the most recent values of 'faulty' from the event log. 443 */ 444 if (nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_FAULTY, 445 &faulty) != 0) { 446 fmd_error(EFMD_ASRU_EVENT, "failed to reload asru %s: " 447 "invalid event log record\n", lp->log_name); 448 ahp->ah_error = EFMD_ASRU_EVENT; 449 return; 450 } 451 if (nvlist_lookup_nvlist(nvl, FM_RSRC_ASRU_EVENT, &flt) != 0) { 452 fmd_error(EFMD_ASRU_EVENT, "failed to reload asru %s: " 453 "invalid event log record\n", lp->log_name); 454 ahp->ah_error = EFMD_ASRU_EVENT; 455 return; 456 } 457 (void) nvlist_lookup_string(nvl, FM_RSRC_ASRU_UUID, &case_uuid); 458 (void) nvlist_lookup_string(nvl, FM_RSRC_ASRU_CODE, &case_code); 459 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_UNUSABLE, 460 &unusable); 461 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_REPAIRED, 462 &repaired); 463 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_REPLACED, 464 &replaced); 465 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_ACQUITTED, 466 &acquitted); 467 468 /* 469 * Attempt to recreate the case in either the CLOSED or REPAIRED state 470 * (depending on whether the faulty bit is still set). 471 * If the case is already present, fmd_case_recreate() will return it. 472 * If not, we'll create a new orphaned case. Either way, we use the 473 * ASRU event to insert a suspect into the partially-restored case. 474 */ 475 fmd_module_lock(fmd.d_rmod); 476 cp = fmd_case_recreate(fmd.d_rmod, NULL, faulty ? FMD_CASE_CLOSED : 477 FMD_CASE_REPAIRED, case_uuid, case_code); 478 fmd_case_hold(cp); 479 fmd_module_unlock(fmd.d_rmod); 480 if (nvlist_lookup_int64_array(nvl, FM_SUSPECT_DIAG_TIME, &diag_time, 481 &nelem) == 0 && nelem >= 2) 482 fmd_case_settime(cp, diag_time[0], diag_time[1]); 483 else 484 fmd_case_settime(cp, lp->log_stat.st_ctime, 0); 485 (void) nvlist_xdup(flt, &flt_copy, &fmd.d_nva); 486 487 /* 488 * For faults with a resource, re-evaluate the asru from the resource. 489 */ 490 thp = fmd_fmri_topo_hold(TOPO_VERSION); 491 if (nvlist_lookup_string(flt_copy, FM_CLASS, &class) == 0 && 492 strncmp(class, "fault", 5) == 0 && 493 nvlist_lookup_nvlist(flt_copy, FM_FAULT_RESOURCE, &rsrc) == 0 && 494 rsrc != NULL && topo_fmri_asru(thp, rsrc, &asru, &err) == 0) { 495 (void) nvlist_remove(flt_copy, FM_FAULT_ASRU, DATA_TYPE_NVLIST); 496 (void) nvlist_add_nvlist(flt_copy, FM_FAULT_ASRU, asru); 497 nvlist_free(asru); 498 } 499 fmd_fmri_topo_rele(thp); 500 501 (void) nvlist_xdup(flt_copy, &flt, &fmd.d_nva); 502 503 fmd_case_recreate_suspect(cp, flt_copy); 504 505 /* 506 * Now create the resource cache entries. 507 */ 508 alp = fmd_asru_al_create(ahp, flt, cp, fmd_strbasename(lp->log_name)); 509 ap = alp->al_asru; 510 511 /* 512 * Check to see if the resource is still present in the system. 513 */ 514 ps = fmd_asru_replacement_state(flt); 515 if (ps == FMD_OBJ_STATE_REPLACED) { 516 replaced = FMD_B_TRUE; 517 } else if (ps == FMD_OBJ_STATE_STILL_PRESENT || 518 ps == FMD_OBJ_STATE_UNKNOWN) { 519 ap->asru_flags |= FMD_ASRU_PRESENT; 520 if (nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, 521 &asru) == 0) { 522 int us; 523 524 switch (fmd_fmri_service_state(asru)) { 525 case FMD_SERVICE_STATE_UNUSABLE: 526 unusable = FMD_B_TRUE; 527 break; 528 case FMD_SERVICE_STATE_OK: 529 case FMD_SERVICE_STATE_ISOLATE_PENDING: 530 case FMD_SERVICE_STATE_DEGRADED: 531 unusable = FMD_B_FALSE; 532 break; 533 case FMD_SERVICE_STATE_UNKNOWN: 534 case -1: 535 /* not supported by scheme */ 536 us = fmd_fmri_unusable(asru); 537 if (us > 0) 538 unusable = FMD_B_TRUE; 539 else if (us == 0) 540 unusable = FMD_B_FALSE; 541 break; 542 } 543 } 544 } 545 546 nvlist_free(flt); 547 548 ap->asru_flags |= FMD_ASRU_RECREATED; 549 if (faulty) { 550 alp->al_flags |= FMD_ASRU_FAULTY; 551 ap->asru_flags |= FMD_ASRU_FAULTY; 552 } 553 if (unusable) { 554 alp->al_flags |= FMD_ASRU_UNUSABLE; 555 ap->asru_flags |= FMD_ASRU_UNUSABLE; 556 } 557 if (replaced) 558 alp->al_reason = FMD_ASRU_REPLACED; 559 else if (repaired) 560 alp->al_reason = FMD_ASRU_REPAIRED; 561 else if (acquitted) 562 alp->al_reason = FMD_ASRU_ACQUITTED; 563 564 TRACE((FMD_DBG_ASRU, "asru %s recreated as %p (%s)", alp->al_uuid, 565 (void *)ap, _fmd_asru_snames[ap->asru_flags & FMD_ASRU_STATE])); 566 } 567 568 static void 569 fmd_asru_hash_discard(fmd_asru_hash_t *ahp, const char *uuid, int err) 570 { 571 char src[PATH_MAX], dst[PATH_MAX]; 572 573 (void) snprintf(src, PATH_MAX, "%s/%s", ahp->ah_dirpath, uuid); 574 (void) snprintf(dst, PATH_MAX, "%s/%s-", ahp->ah_dirpath, uuid); 575 576 if (err != 0) 577 err = rename(src, dst); 578 else 579 err = unlink(src); 580 581 if (err != 0 && errno != ENOENT) 582 fmd_error(EFMD_ASRU_EVENT, "failed to rename log %s", src); 583 } 584 585 /* 586 * Open a saved log file and restore it into the ASRU hash. If we can't even 587 * open the log, rename the log file to <uuid>- to indicate it is corrupt. If 588 * fmd_log_replay() fails, we either delete the file (if it has reached the 589 * upper limit on cache age) or rename it for debugging if it was corrupted. 590 */ 591 static void 592 fmd_asru_hash_logopen(fmd_asru_hash_t *ahp, const char *uuid) 593 { 594 fmd_log_t *lp = fmd_log_tryopen(ahp->ah_dirpath, uuid, FMD_LOG_ASRU); 595 uint_t n; 596 597 if (lp == NULL) { 598 fmd_asru_hash_discard(ahp, uuid, errno); 599 return; 600 } 601 602 ahp->ah_error = 0; 603 n = ahp->ah_al_count; 604 605 fmd_log_replay(lp, (fmd_log_f *)fmd_asru_hash_recreate, ahp); 606 fmd_log_rele(lp); 607 608 if (ahp->ah_al_count == n) 609 fmd_asru_hash_discard(ahp, uuid, ahp->ah_error); 610 } 611 612 void 613 fmd_asru_hash_refresh(fmd_asru_hash_t *ahp) 614 { 615 struct dirent *dp; 616 DIR *dirp; 617 int zero; 618 619 if ((dirp = opendir(ahp->ah_dirpath)) == NULL) { 620 fmd_error(EFMD_ASRU_NODIR, 621 "failed to open asru cache directory %s", ahp->ah_dirpath); 622 return; 623 } 624 625 (void) fmd_conf_getprop(fmd.d_conf, "rsrc.zero", &zero); 626 627 (void) pthread_rwlock_wrlock(&ahp->ah_lock); 628 629 while ((dp = readdir(dirp)) != NULL) { 630 if (dp->d_name[0] == '.') 631 continue; /* skip "." and ".." */ 632 633 if (zero) 634 fmd_asru_hash_discard(ahp, dp->d_name, 0); 635 else if (!fmd_strmatch(dp->d_name, "*-")) 636 fmd_asru_hash_logopen(ahp, dp->d_name); 637 } 638 639 (void) pthread_rwlock_unlock(&ahp->ah_lock); 640 (void) closedir(dirp); 641 } 642 643 /* 644 * If the resource is present and faulty but not unusable, replay the fault 645 * event that caused it be marked faulty. This will cause the agent 646 * subscribing to this fault class to again disable the resource. 647 */ 648 /*ARGSUSED*/ 649 static void 650 fmd_asru_hash_replay_asru(fmd_asru_t *ap, void *data) 651 { 652 fmd_event_t *e; 653 nvlist_t *nvl; 654 char *class; 655 656 if (ap->asru_event != NULL && (ap->asru_flags & (FMD_ASRU_STATE | 657 FMD_ASRU_PRESENT)) == (FMD_ASRU_FAULTY | FMD_ASRU_PRESENT)) { 658 659 fmd_dprintf(FMD_DBG_ASRU, 660 "replaying fault event for %s", ap->asru_name); 661 662 (void) nvlist_xdup(ap->asru_event, &nvl, &fmd.d_nva); 663 (void) nvlist_lookup_string(nvl, FM_CLASS, &class); 664 665 (void) nvlist_add_string(nvl, FMD_EVN_UUID, 666 ((fmd_case_impl_t *)ap->asru_case)->ci_uuid); 667 668 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class); 669 fmd_dispq_dispatch(fmd.d_disp, e, class); 670 } 671 } 672 673 void 674 fmd_asru_hash_replay(fmd_asru_hash_t *ahp) 675 { 676 fmd_asru_hash_apply(ahp, fmd_asru_hash_replay_asru, NULL); 677 } 678 679 /* 680 * Check if the resource is still present. If not, and if the rsrc.age time 681 * has expired, then do an implicit repair on the resource. 682 */ 683 /*ARGSUSED*/ 684 static void 685 fmd_asru_repair_if_aged(fmd_asru_link_t *alp, void *arg) 686 { 687 struct timeval tv; 688 fmd_log_t *lp; 689 hrtime_t hrt; 690 int ps; 691 int err; 692 693 ps = fmd_asru_replacement_state(alp->al_event); 694 if (ps == FMD_OBJ_STATE_REPLACED) { 695 fmd_asru_replaced(alp, &err); 696 } else if (ps == FMD_OBJ_STATE_NOT_PRESENT) { 697 fmd_time_gettimeofday(&tv); 698 lp = fmd_log_open(alp->al_asru->asru_root, alp->al_uuid, 699 FMD_LOG_ASRU); 700 hrt = (hrtime_t)(tv.tv_sec - lp->log_stat.st_mtime); 701 fmd_log_rele(lp); 702 if (hrt * NANOSEC >= fmd.d_asrus->ah_lifetime) 703 fmd_asru_removed(alp); 704 } 705 } 706 707 void 708 fmd_asru_clear_aged_rsrcs() 709 { 710 fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_repair_if_aged, NULL); 711 } 712 713 fmd_asru_hash_t * 714 fmd_asru_hash_create(const char *root, const char *dir) 715 { 716 fmd_asru_hash_t *ahp; 717 char path[PATH_MAX]; 718 719 ahp = fmd_alloc(sizeof (fmd_asru_hash_t), FMD_SLEEP); 720 (void) pthread_rwlock_init(&ahp->ah_lock, NULL); 721 ahp->ah_hashlen = fmd.d_str_buckets; 722 ahp->ah_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, FMD_SLEEP); 723 ahp->ah_asru_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 724 FMD_SLEEP); 725 ahp->ah_case_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 726 FMD_SLEEP); 727 ahp->ah_fru_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 728 FMD_SLEEP); 729 ahp->ah_label_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 730 FMD_SLEEP); 731 ahp->ah_rsrc_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 732 FMD_SLEEP); 733 (void) snprintf(path, sizeof (path), "%s/%s", root, dir); 734 ahp->ah_dirpath = fmd_strdup(path, FMD_SLEEP); 735 (void) fmd_conf_getprop(fmd.d_conf, "rsrc.age", &ahp->ah_lifetime); 736 (void) fmd_conf_getprop(fmd.d_conf, "fakenotpresent", 737 (uint32_t *)&fmd_asru_fake_not_present); 738 ahp->ah_al_count = 0; 739 ahp->ah_count = 0; 740 ahp->ah_error = 0; 741 ahp->ah_topo = fmd_topo_hold(); 742 743 return (ahp); 744 } 745 746 void 747 fmd_asru_hash_destroy(fmd_asru_hash_t *ahp) 748 { 749 fmd_asru_link_t *alp, *np; 750 uint_t i; 751 752 for (i = 0; i < ahp->ah_hashlen; i++) { 753 for (alp = ahp->ah_case_hash[i]; alp != NULL; alp = np) { 754 np = alp->al_case_next; 755 alp->al_case_next = NULL; 756 fmd_case_rele(alp->al_case); 757 alp->al_case = NULL; 758 fmd_asru_al_hash_release(ahp, alp); 759 } 760 } 761 762 fmd_strfree(ahp->ah_dirpath); 763 fmd_free(ahp->ah_hash, sizeof (void *) * ahp->ah_hashlen); 764 fmd_free(ahp->ah_asru_hash, sizeof (void *) * ahp->ah_hashlen); 765 fmd_free(ahp->ah_case_hash, sizeof (void *) * ahp->ah_hashlen); 766 fmd_free(ahp->ah_fru_hash, sizeof (void *) * ahp->ah_hashlen); 767 fmd_free(ahp->ah_label_hash, sizeof (void *) * ahp->ah_hashlen); 768 fmd_free(ahp->ah_rsrc_hash, sizeof (void *) * ahp->ah_hashlen); 769 fmd_topo_rele(ahp->ah_topo); 770 fmd_free(ahp, sizeof (fmd_asru_hash_t)); 771 } 772 773 /* 774 * Take a snapshot of the ASRU database by placing an additional hold on each 775 * member in an auxiliary array, and then call 'func' for each ASRU. 776 */ 777 void 778 fmd_asru_hash_apply(fmd_asru_hash_t *ahp, 779 void (*func)(fmd_asru_t *, void *), void *arg) 780 { 781 fmd_asru_t *ap, **aps, **app; 782 uint_t apc, i; 783 784 (void) pthread_rwlock_rdlock(&ahp->ah_lock); 785 786 aps = app = fmd_alloc(ahp->ah_count * sizeof (fmd_asru_t *), FMD_SLEEP); 787 apc = ahp->ah_count; 788 789 for (i = 0; i < ahp->ah_hashlen; i++) { 790 for (ap = ahp->ah_hash[i]; ap != NULL; ap = ap->asru_next) 791 *app++ = fmd_asru_hold(ap); 792 } 793 794 ASSERT(app == aps + apc); 795 (void) pthread_rwlock_unlock(&ahp->ah_lock); 796 797 for (i = 0; i < apc; i++) { 798 if (aps[i]->asru_fmri != NULL) 799 func(aps[i], arg); 800 fmd_asru_hash_release(ahp, aps[i]); 801 } 802 803 fmd_free(aps, apc * sizeof (fmd_asru_t *)); 804 } 805 806 void 807 fmd_asru_al_hash_apply(fmd_asru_hash_t *ahp, 808 void (*func)(fmd_asru_link_t *, void *), void *arg) 809 { 810 fmd_asru_link_t *alp, **alps, **alpp; 811 uint_t alpc, i; 812 813 (void) pthread_rwlock_rdlock(&ahp->ah_lock); 814 815 alps = alpp = fmd_alloc(ahp->ah_al_count * sizeof (fmd_asru_link_t *), 816 FMD_SLEEP); 817 alpc = ahp->ah_al_count; 818 819 for (i = 0; i < ahp->ah_hashlen; i++) { 820 for (alp = ahp->ah_case_hash[i]; alp != NULL; 821 alp = alp->al_case_next) 822 *alpp++ = fmd_asru_al_hold(alp); 823 } 824 825 ASSERT(alpp == alps + alpc); 826 (void) pthread_rwlock_unlock(&ahp->ah_lock); 827 828 for (i = 0; i < alpc; i++) { 829 func(alps[i], arg); 830 fmd_asru_al_hash_release(ahp, alps[i]); 831 } 832 833 fmd_free(alps, alpc * sizeof (fmd_asru_link_t *)); 834 } 835 836 static void 837 fmd_asru_do_hash_apply(fmd_asru_hash_t *ahp, char *name, 838 void (*func)(fmd_asru_link_t *, void *), void *arg, 839 fmd_asru_link_t **hash, size_t match_offset, size_t next_offset) 840 { 841 fmd_asru_link_t *alp, **alps, **alpp; 842 uint_t alpc = 0, i; 843 uint_t h; 844 845 (void) pthread_rwlock_rdlock(&ahp->ah_lock); 846 847 h = fmd_asru_strhash(ahp, name); 848 849 for (alp = hash[h]; alp != NULL; alp = 850 /* LINTED pointer alignment */ 851 FMD_ASRU_AL_HASH_NEXT(alp, next_offset)) 852 if (fmd_asru_strcmp(ahp, 853 /* LINTED pointer alignment */ 854 FMD_ASRU_AL_HASH_NAME(alp, match_offset), name)) 855 alpc++; 856 857 alps = alpp = fmd_alloc(alpc * sizeof (fmd_asru_link_t *), FMD_SLEEP); 858 859 for (alp = hash[h]; alp != NULL; alp = 860 /* LINTED pointer alignment */ 861 FMD_ASRU_AL_HASH_NEXT(alp, next_offset)) 862 if (fmd_asru_strcmp(ahp, 863 /* LINTED pointer alignment */ 864 FMD_ASRU_AL_HASH_NAME(alp, match_offset), name)) 865 *alpp++ = fmd_asru_al_hold(alp); 866 867 ASSERT(alpp == alps + alpc); 868 (void) pthread_rwlock_unlock(&ahp->ah_lock); 869 870 for (i = 0; i < alpc; i++) { 871 func(alps[i], arg); 872 fmd_asru_al_hash_release(ahp, alps[i]); 873 } 874 875 fmd_free(alps, alpc * sizeof (fmd_asru_link_t *)); 876 } 877 878 void 879 fmd_asru_hash_apply_by_asru(fmd_asru_hash_t *ahp, char *name, 880 void (*func)(fmd_asru_link_t *, void *), void *arg) 881 { 882 fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_asru_hash, 883 offsetof(fmd_asru_link_t, al_asru_name), 884 offsetof(fmd_asru_link_t, al_asru_next)); 885 } 886 887 void 888 fmd_asru_hash_apply_by_case(fmd_asru_hash_t *ahp, fmd_case_t *cp, 889 void (*func)(fmd_asru_link_t *, void *), void *arg) 890 { 891 fmd_asru_do_hash_apply(ahp, ((fmd_case_impl_t *)cp)->ci_uuid, func, arg, 892 ahp->ah_case_hash, offsetof(fmd_asru_link_t, al_case_uuid), 893 offsetof(fmd_asru_link_t, al_case_next)); 894 } 895 896 void 897 fmd_asru_hash_apply_by_fru(fmd_asru_hash_t *ahp, char *name, 898 void (*func)(fmd_asru_link_t *, void *), void *arg) 899 { 900 fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_fru_hash, 901 offsetof(fmd_asru_link_t, al_fru_name), 902 offsetof(fmd_asru_link_t, al_fru_next)); 903 } 904 905 void 906 fmd_asru_hash_apply_by_rsrc(fmd_asru_hash_t *ahp, char *name, 907 void (*func)(fmd_asru_link_t *, void *), void *arg) 908 { 909 fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_rsrc_hash, 910 offsetof(fmd_asru_link_t, al_rsrc_name), 911 offsetof(fmd_asru_link_t, al_rsrc_next)); 912 } 913 914 void 915 fmd_asru_hash_apply_by_label(fmd_asru_hash_t *ahp, char *name, 916 void (*func)(fmd_asru_link_t *, void *), void *arg) 917 { 918 fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_label_hash, 919 offsetof(fmd_asru_link_t, al_label), 920 offsetof(fmd_asru_link_t, al_label_next)); 921 } 922 923 /* 924 * Lookup an asru in the hash by name and place a hold on it. If the asru is 925 * not found, no entry is created and NULL is returned. 926 */ 927 fmd_asru_t * 928 fmd_asru_hash_lookup_name(fmd_asru_hash_t *ahp, const char *name) 929 { 930 fmd_asru_t *ap; 931 932 (void) pthread_rwlock_rdlock(&ahp->ah_lock); 933 ap = fmd_asru_hash_lookup(ahp, name); 934 (void) pthread_rwlock_unlock(&ahp->ah_lock); 935 936 return (ap); 937 } 938 939 /* 940 * Create a resource cache entry using the fault event "nvl" for one of the 941 * suspects from the case "cp". 942 * 943 * The fault event can have the following components : FM_FAULT_ASRU, 944 * FM_FAULT_FRU, FM_FAULT_RESOURCE. These should be set by the Diagnosis Engine 945 * when calling fmd_nvl_create_fault(). In the general case, these are all 946 * optional and an entry will always be added into the cache even if one or all 947 * of these fields is missing. 948 * 949 * However, for hardware faults the recommended practice is that the fault 950 * event should always have the FM_FAULT_RESOURCE field present and that this 951 * should be represented in hc-scheme. 952 * 953 * Currently the DE should also add the FM_FAULT_ASRU and FM_FAULT_FRU fields 954 * where known, though at some future stage fmd might be able to fill these 955 * in automatically from the topology. 956 */ 957 fmd_asru_link_t * 958 fmd_asru_hash_create_entry(fmd_asru_hash_t *ahp, fmd_case_t *cp, nvlist_t *nvl) 959 { 960 char *parsed_uuid; 961 uuid_t uuid; 962 int uuidlen; 963 fmd_asru_link_t *alp; 964 965 /* 966 * Generate a UUID for the ASRU. libuuid cleverly gives us no 967 * interface for specifying or learning the buffer size. Sigh. 968 * The spec says 36 bytes but we use a tunable just to be safe. 969 */ 970 (void) fmd_conf_getprop(fmd.d_conf, "uuidlen", &uuidlen); 971 parsed_uuid = fmd_zalloc(uuidlen + 1, FMD_SLEEP); 972 uuid_generate(uuid); 973 uuid_unparse(uuid, parsed_uuid); 974 975 /* 976 * Now create the resource cache entries. 977 */ 978 fmd_case_hold_locked(cp); 979 alp = fmd_asru_al_create(ahp, nvl, cp, parsed_uuid); 980 TRACE((FMD_DBG_ASRU, "asru %s created as %p", 981 alp->al_uuid, (void *)alp->al_asru)); 982 983 fmd_free(parsed_uuid, uuidlen + 1); 984 return (alp); 985 986 } 987 988 /* 989 * Release the reference count on an asru obtained using fmd_asru_hash_lookup. 990 * We take 'ahp' for symmetry and in case we need to use it in future work. 991 */ 992 /*ARGSUSED*/ 993 void 994 fmd_asru_hash_release(fmd_asru_hash_t *ahp, fmd_asru_t *ap) 995 { 996 (void) pthread_mutex_lock(&ap->asru_lock); 997 998 ASSERT(ap->asru_refs != 0); 999 if (--ap->asru_refs == 0) 1000 fmd_asru_destroy(ap); 1001 else 1002 (void) pthread_mutex_unlock(&ap->asru_lock); 1003 } 1004 1005 static void 1006 fmd_asru_do_delete_entry(fmd_asru_hash_t *ahp, fmd_case_t *cp, 1007 fmd_asru_link_t **hash, size_t next_offset, char *name) 1008 { 1009 uint_t h; 1010 fmd_asru_link_t *alp, **pp, *alpnext, **alpnextp; 1011 1012 (void) pthread_rwlock_wrlock(&ahp->ah_lock); 1013 h = fmd_asru_strhash(ahp, name); 1014 pp = &hash[h]; 1015 for (alp = *pp; alp != NULL; alp = alpnext) { 1016 /* LINTED pointer alignment */ 1017 alpnextp = FMD_ASRU_AL_HASH_NEXTP(alp, next_offset); 1018 alpnext = *alpnextp; 1019 if (alp->al_case == cp) { 1020 *pp = *alpnextp; 1021 *alpnextp = NULL; 1022 } else 1023 pp = alpnextp; 1024 } 1025 (void) pthread_rwlock_unlock(&ahp->ah_lock); 1026 } 1027 1028 static void 1029 fmd_asru_do_hash_delete(fmd_asru_hash_t *ahp, fmd_case_susp_t *cis, 1030 fmd_case_t *cp, fmd_asru_link_t **hash, size_t next_offset, char *nvname) 1031 { 1032 nvlist_t *nvl; 1033 char *name = NULL; 1034 ssize_t namelen; 1035 1036 if (nvlist_lookup_nvlist(cis->cis_nvl, nvname, &nvl) == 0 && 1037 (namelen = fmd_fmri_nvl2str(nvl, NULL, 0)) != -1 && 1038 (name = fmd_alloc(namelen + 1, FMD_SLEEP)) != NULL) { 1039 if (fmd_fmri_nvl2str(nvl, name, namelen + 1) != -1) 1040 fmd_asru_do_delete_entry(ahp, cp, hash, next_offset, 1041 name); 1042 fmd_free(name, namelen + 1); 1043 } else 1044 fmd_asru_do_delete_entry(ahp, cp, hash, next_offset, ""); 1045 } 1046 1047 void 1048 fmd_asru_hash_delete_case(fmd_asru_hash_t *ahp, fmd_case_t *cp) 1049 { 1050 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 1051 fmd_case_susp_t *cis; 1052 fmd_asru_link_t *alp, **plp, *alpnext; 1053 fmd_asru_t *ap; 1054 char path[PATH_MAX]; 1055 char *label; 1056 uint_t h; 1057 1058 /* 1059 * first delete hash entries for each suspect 1060 */ 1061 for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) { 1062 fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_fru_hash, 1063 offsetof(fmd_asru_link_t, al_fru_next), FM_FAULT_FRU); 1064 fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_rsrc_hash, 1065 offsetof(fmd_asru_link_t, al_rsrc_next), FM_FAULT_RESOURCE); 1066 if (nvlist_lookup_string(cis->cis_nvl, FM_FAULT_LOCATION, 1067 &label) != 0) 1068 label = ""; 1069 fmd_asru_do_delete_entry(ahp, cp, ahp->ah_label_hash, 1070 offsetof(fmd_asru_link_t, al_label_next), label); 1071 fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_asru_hash, 1072 offsetof(fmd_asru_link_t, al_asru_next), FM_FAULT_ASRU); 1073 } 1074 1075 /* 1076 * then delete associated case hash entries 1077 */ 1078 (void) pthread_rwlock_wrlock(&ahp->ah_lock); 1079 h = fmd_asru_strhash(ahp, cip->ci_uuid); 1080 plp = &ahp->ah_case_hash[h]; 1081 for (alp = *plp; alp != NULL; alp = alpnext) { 1082 alpnext = alp->al_case_next; 1083 if (alp->al_case == cp) { 1084 *plp = alp->al_case_next; 1085 alp->al_case_next = NULL; 1086 ASSERT(ahp->ah_al_count != 0); 1087 ahp->ah_al_count--; 1088 1089 /* 1090 * decrement case ref. 1091 */ 1092 fmd_case_rele_locked(cp); 1093 alp->al_case = NULL; 1094 1095 /* 1096 * If we found a matching ASRU, unlink its log file and 1097 * then release the hash entry. Note that it may still 1098 * be referenced if another thread is manipulating it; 1099 * this is ok because once we unlink, the log file will 1100 * not be restored, and the log data will be freed when 1101 * all of the referencing threads release their 1102 * respective references. 1103 */ 1104 (void) snprintf(path, sizeof (path), "%s/%s", 1105 ahp->ah_dirpath, alp->al_uuid); 1106 if (unlink(path) != 0) 1107 fmd_error(EFMD_ASRU_UNLINK, 1108 "failed to unlink asru %s", path); 1109 1110 /* 1111 * Now unlink from the global per-resource cache 1112 * and if this is the last link then remove that from 1113 * it's own hash too. 1114 */ 1115 ap = alp->al_asru; 1116 (void) pthread_mutex_lock(&ap->asru_lock); 1117 fmd_list_delete(&ap->asru_list, alp); 1118 if (ap->asru_list.l_next == NULL) { 1119 uint_t h; 1120 fmd_asru_t *ap2, **pp; 1121 fmd_asru_t *apnext, **apnextp; 1122 1123 ASSERT(ahp->ah_count != 0); 1124 ahp->ah_count--; 1125 h = fmd_asru_strhash(ahp, ap->asru_name); 1126 pp = &ahp->ah_hash[h]; 1127 for (ap2 = *pp; ap2 != NULL; ap2 = apnext) { 1128 apnextp = &ap2->asru_next; 1129 apnext = *apnextp; 1130 if (ap2 == ap) { 1131 *pp = *apnextp; 1132 *apnextp = NULL; 1133 } else 1134 pp = apnextp; 1135 } 1136 } 1137 (void) pthread_mutex_unlock(&ap->asru_lock); 1138 fmd_asru_al_hash_release(ahp, alp); 1139 } else 1140 plp = &alp->al_case_next; 1141 } 1142 (void) pthread_rwlock_unlock(&ahp->ah_lock); 1143 } 1144 1145 static void 1146 fmd_asru_repair_containee(fmd_asru_link_t *alp, void *er) 1147 { 1148 if (er && (alp->al_asru->asru_flags & FMD_ASRU_INVISIBLE) && 1149 alp->al_asru_fmri && fmd_fmri_contains(er, 1150 alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, 1151 FMD_ASRU_REPAIRED)) 1152 fmd_case_update(alp->al_case); 1153 } 1154 1155 void 1156 fmd_asru_repaired(fmd_asru_link_t *alp, void *er) 1157 { 1158 int flags; 1159 int rval; 1160 1161 /* 1162 * repair this asru cache entry 1163 */ 1164 rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, FMD_ASRU_REPAIRED); 1165 1166 /* 1167 * now check if all entries associated with this asru are repaired and 1168 * if so repair containees 1169 */ 1170 (void) pthread_mutex_lock(&alp->al_asru->asru_lock); 1171 flags = alp->al_asru->asru_flags; 1172 (void) pthread_mutex_unlock(&alp->al_asru->asru_lock); 1173 if (!(flags & (FMD_ASRU_FAULTY | FMD_ASRU_INVISIBLE))) 1174 fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_repair_containee, 1175 alp->al_asru_fmri); 1176 1177 /* 1178 * if called from fmd_adm_repair() and we really did clear the bit then 1179 * we need to do a case update to see if the associated case can be 1180 * repaired. No need to do this if called from fmd_case_repair() (ie 1181 * when er is NULL) as the case will be explicitly repaired anyway. 1182 */ 1183 if (er) { 1184 *(int *)er = 0; 1185 if (rval) 1186 fmd_case_update(alp->al_case); 1187 } 1188 } 1189 1190 static void 1191 fmd_asru_acquit_containee(fmd_asru_link_t *alp, void *er) 1192 { 1193 if (er && (alp->al_asru->asru_flags & FMD_ASRU_INVISIBLE) && 1194 alp->al_asru_fmri && fmd_fmri_contains(er, 1195 alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, 1196 FMD_ASRU_ACQUITTED)) 1197 fmd_case_update(alp->al_case); 1198 } 1199 1200 void 1201 fmd_asru_acquit(fmd_asru_link_t *alp, void *er) 1202 { 1203 int flags; 1204 int rval; 1205 1206 /* 1207 * acquit this asru cache entry 1208 */ 1209 rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, FMD_ASRU_ACQUITTED); 1210 1211 /* 1212 * now check if all entries associated with this asru are acquitted and 1213 * if so acquit containees 1214 */ 1215 (void) pthread_mutex_lock(&alp->al_asru->asru_lock); 1216 flags = alp->al_asru->asru_flags; 1217 (void) pthread_mutex_unlock(&alp->al_asru->asru_lock); 1218 if (!(flags & (FMD_ASRU_FAULTY | FMD_ASRU_INVISIBLE))) 1219 fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_acquit_containee, 1220 alp->al_asru_fmri); 1221 1222 /* 1223 * if called from fmd_adm_acquit() and we really did clear the bit then 1224 * we need to do a case update to see if the associated case can be 1225 * repaired. No need to do this if called from fmd_case_acquit() (ie 1226 * when er is NULL) as the case will be explicitly repaired anyway. 1227 */ 1228 if (er) { 1229 *(int *)er = 0; 1230 if (rval) 1231 fmd_case_update(alp->al_case); 1232 } 1233 } 1234 1235 static void 1236 fmd_asru_replaced_containee(fmd_asru_link_t *alp, void *er) 1237 { 1238 if (er && (alp->al_asru->asru_flags & FMD_ASRU_INVISIBLE) && 1239 alp->al_asru_fmri && fmd_fmri_contains(er, 1240 alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, 1241 FMD_ASRU_REPLACED)) 1242 fmd_case_update(alp->al_case); 1243 } 1244 1245 void 1246 fmd_asru_replaced(fmd_asru_link_t *alp, void *er) 1247 { 1248 int flags; 1249 int rval; 1250 int ps; 1251 1252 ps = fmd_asru_replacement_state(alp->al_event); 1253 if (ps == FMD_OBJ_STATE_STILL_PRESENT) 1254 return; 1255 1256 /* 1257 * mark this cache entry as replaced 1258 */ 1259 rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, FMD_ASRU_REPLACED); 1260 1261 /* 1262 * now check if all entries associated with this asru are replaced and 1263 * if so replace containees 1264 */ 1265 (void) pthread_mutex_lock(&alp->al_asru->asru_lock); 1266 flags = alp->al_asru->asru_flags; 1267 (void) pthread_mutex_unlock(&alp->al_asru->asru_lock); 1268 if (!(flags & (FMD_ASRU_FAULTY | FMD_ASRU_INVISIBLE))) 1269 fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_replaced_containee, 1270 alp->al_asru_fmri); 1271 1272 *(int *)er = 0; 1273 if (rval) 1274 fmd_case_update(alp->al_case); 1275 } 1276 1277 static void 1278 fmd_asru_removed_containee(fmd_asru_link_t *alp, void *er) 1279 { 1280 if (er && (alp->al_asru->asru_flags & FMD_ASRU_INVISIBLE) && 1281 alp->al_asru_fmri && fmd_fmri_contains(er, 1282 alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, 1283 0)) 1284 fmd_case_update(alp->al_case); 1285 } 1286 1287 void 1288 fmd_asru_removed(fmd_asru_link_t *alp) 1289 { 1290 int flags; 1291 int rval; 1292 1293 /* 1294 * mark this cache entry as replacded 1295 */ 1296 rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, 0); 1297 1298 /* 1299 * now check if all entries associated with this asru are removed and 1300 * if so replace containees 1301 */ 1302 (void) pthread_mutex_lock(&alp->al_asru->asru_lock); 1303 flags = alp->al_asru->asru_flags; 1304 (void) pthread_mutex_unlock(&alp->al_asru->asru_lock); 1305 if (!(flags & (FMD_ASRU_FAULTY | FMD_ASRU_INVISIBLE))) 1306 fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_removed_containee, 1307 alp->al_asru_fmri); 1308 if (rval) 1309 fmd_case_update(alp->al_case); 1310 } 1311 1312 static void 1313 fmd_asru_logevent(fmd_asru_link_t *alp) 1314 { 1315 fmd_asru_t *ap = alp->al_asru; 1316 boolean_t faulty = (alp->al_flags & FMD_ASRU_FAULTY) != 0; 1317 boolean_t unusable = (alp->al_flags & FMD_ASRU_UNUSABLE) != 0; 1318 boolean_t message = (ap->asru_flags & FMD_ASRU_INVISIBLE) == 0; 1319 boolean_t repaired = (alp->al_reason == FMD_ASRU_REPAIRED); 1320 boolean_t replaced = (alp->al_reason == FMD_ASRU_REPLACED); 1321 boolean_t acquitted = (alp->al_reason == FMD_ASRU_ACQUITTED); 1322 1323 fmd_case_impl_t *cip; 1324 fmd_event_t *e; 1325 fmd_log_t *lp; 1326 nvlist_t *nvl; 1327 char *class; 1328 1329 ASSERT(MUTEX_HELD(&ap->asru_lock)); 1330 cip = (fmd_case_impl_t *)alp->al_case; 1331 ASSERT(cip != NULL); 1332 1333 if ((lp = alp->al_log) == NULL) 1334 lp = fmd_log_open(ap->asru_root, alp->al_uuid, FMD_LOG_ASRU); 1335 1336 if (lp == NULL) 1337 return; /* can't log events if we can't open the log */ 1338 1339 nvl = fmd_protocol_rsrc_asru(_fmd_asru_events[faulty | (unusable << 1)], 1340 alp->al_asru_fmri, cip->ci_uuid, cip->ci_code, faulty, unusable, 1341 message, alp->al_event, &cip->ci_tv, repaired, replaced, acquitted); 1342 1343 (void) nvlist_lookup_string(nvl, FM_CLASS, &class); 1344 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class); 1345 1346 fmd_event_hold(e); 1347 fmd_log_append(lp, e, NULL); 1348 fmd_event_rele(e); 1349 1350 /* 1351 * For now, we close the log file after every update to conserve file 1352 * descriptors and daemon overhead. If this becomes a performance 1353 * issue this code can change to keep a fixed-size LRU cache of logs. 1354 */ 1355 fmd_log_rele(lp); 1356 alp->al_log = NULL; 1357 } 1358 1359 int 1360 fmd_asru_setflags(fmd_asru_link_t *alp, uint_t sflag) 1361 { 1362 fmd_asru_t *ap = alp->al_asru; 1363 uint_t nstate, ostate; 1364 1365 ASSERT(!(sflag & ~FMD_ASRU_STATE)); 1366 ASSERT(sflag != FMD_ASRU_STATE); 1367 1368 (void) pthread_mutex_lock(&ap->asru_lock); 1369 1370 ostate = alp->al_flags & FMD_ASRU_STATE; 1371 alp->al_flags |= sflag; 1372 nstate = alp->al_flags & FMD_ASRU_STATE; 1373 1374 if (nstate == ostate) { 1375 (void) pthread_mutex_unlock(&ap->asru_lock); 1376 return (0); 1377 } 1378 1379 ap->asru_flags |= sflag; 1380 TRACE((FMD_DBG_ASRU, "asru %s %s->%s", alp->al_uuid, 1381 _fmd_asru_snames[ostate], _fmd_asru_snames[nstate])); 1382 1383 fmd_asru_logevent(alp); 1384 1385 (void) pthread_cond_broadcast(&ap->asru_cv); 1386 (void) pthread_mutex_unlock(&ap->asru_lock); 1387 return (1); 1388 } 1389 1390 int 1391 fmd_asru_clrflags(fmd_asru_link_t *alp, uint_t sflag, uint8_t reason) 1392 { 1393 fmd_asru_t *ap = alp->al_asru; 1394 fmd_asru_link_t *nalp; 1395 uint_t nstate, ostate, flags = 0; 1396 1397 ASSERT(!(sflag & ~FMD_ASRU_STATE)); 1398 ASSERT(sflag != FMD_ASRU_STATE); 1399 1400 (void) pthread_mutex_lock(&ap->asru_lock); 1401 1402 ostate = alp->al_flags & FMD_ASRU_STATE; 1403 alp->al_flags &= ~sflag; 1404 nstate = alp->al_flags & FMD_ASRU_STATE; 1405 1406 if (nstate == ostate) { 1407 if (reason > alp->al_reason) { 1408 alp->al_reason = reason; 1409 fmd_asru_logevent(alp); 1410 (void) pthread_cond_broadcast(&ap->asru_cv); 1411 } 1412 (void) pthread_mutex_unlock(&ap->asru_lock); 1413 return (0); 1414 } 1415 if (reason > alp->al_reason) 1416 alp->al_reason = reason; 1417 1418 if (sflag == FMD_ASRU_UNUSABLE) 1419 ap->asru_flags &= ~sflag; 1420 else if (sflag == FMD_ASRU_FAULTY) { 1421 /* 1422 * only clear the faulty bit if all links are clear 1423 */ 1424 for (nalp = fmd_list_next(&ap->asru_list); nalp != NULL; 1425 nalp = fmd_list_next(nalp)) 1426 flags |= nalp->al_flags; 1427 if (!(flags & FMD_ASRU_FAULTY)) 1428 ap->asru_flags &= ~sflag; 1429 } 1430 1431 TRACE((FMD_DBG_ASRU, "asru %s %s->%s", alp->al_uuid, 1432 _fmd_asru_snames[ostate], _fmd_asru_snames[nstate])); 1433 1434 fmd_asru_logevent(alp); 1435 1436 (void) pthread_cond_broadcast(&ap->asru_cv); 1437 (void) pthread_mutex_unlock(&ap->asru_lock); 1438 1439 return (1); 1440 } 1441 1442 /* 1443 * Report the current known state of the link entry (ie this particular fault 1444 * affecting this particular ASRU). 1445 */ 1446 int 1447 fmd_asru_al_getstate(fmd_asru_link_t *alp) 1448 { 1449 int us, st; 1450 nvlist_t *asru; 1451 int ps; 1452 1453 ps = fmd_asru_replacement_state(alp->al_event); 1454 if (ps == FMD_OBJ_STATE_NOT_PRESENT) 1455 return ((alp->al_flags & FMD_ASRU_FAULTY) | FMD_ASRU_UNUSABLE); 1456 if (ps == FMD_OBJ_STATE_REPLACED) { 1457 if (alp->al_reason < FMD_ASRU_REPLACED) 1458 alp->al_reason = FMD_ASRU_REPLACED; 1459 return ((alp->al_flags & FMD_ASRU_FAULTY) | FMD_ASRU_UNUSABLE); 1460 } 1461 1462 st = (alp->al_flags & FMD_ASRU_STATE) | FMD_ASRU_PRESENT; 1463 if (nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, &asru) == 0) { 1464 us = fmd_fmri_service_state(asru); 1465 if (us == -1 || us == FMD_SERVICE_STATE_UNKNOWN) { 1466 /* not supported by scheme - try fmd_fmri_unusable */ 1467 us = fmd_fmri_unusable(asru); 1468 } else if (us == FMD_SERVICE_STATE_UNUSABLE) { 1469 st |= FMD_ASRU_UNUSABLE; 1470 return (st); 1471 } else if (us == FMD_SERVICE_STATE_OK) { 1472 st &= ~FMD_ASRU_UNUSABLE; 1473 return (st); 1474 } else if (us == FMD_SERVICE_STATE_ISOLATE_PENDING) { 1475 st &= ~FMD_ASRU_UNUSABLE; 1476 return (st); 1477 } else if (us == FMD_SERVICE_STATE_DEGRADED) { 1478 st &= ~FMD_ASRU_UNUSABLE; 1479 st |= FMD_ASRU_DEGRADED; 1480 return (st); 1481 } 1482 } else 1483 us = (alp->al_flags & FMD_ASRU_UNUSABLE); 1484 if (us > 0) 1485 st |= FMD_ASRU_UNUSABLE; 1486 else if (us == 0) 1487 st &= ~FMD_ASRU_UNUSABLE; 1488 return (st); 1489 } 1490 1491 /* 1492 * Report the current known state of the ASRU by refreshing its unusable status 1493 * based upon the routines provided by the scheme module. If the unusable bit 1494 * is different, we do *not* generate a state change here because that change 1495 * may be unrelated to fmd activities and therefore we have no case or event. 1496 * The absence of the transition is harmless as this function is only provided 1497 * for RPC observability and fmd's clients are only concerned with ASRU_FAULTY. 1498 */ 1499 int 1500 fmd_asru_getstate(fmd_asru_t *ap) 1501 { 1502 int us, st; 1503 1504 if (!(ap->asru_flags & FMD_ASRU_INTERNAL) && 1505 (fmd_asru_fake_not_present >= FMD_OBJ_STATE_REPLACED || 1506 fmd_fmri_present(ap->asru_fmri) <= 0)) 1507 return (0); /* do not report non-fmd non-present resources */ 1508 1509 us = fmd_fmri_unusable(ap->asru_fmri); 1510 st = ap->asru_flags & FMD_ASRU_STATE; 1511 1512 if (us > 0) 1513 st |= FMD_ASRU_UNUSABLE; 1514 else if (us == 0) 1515 st &= ~FMD_ASRU_UNUSABLE; 1516 1517 return (st); 1518 } 1519