1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/fm/protocol.h> 28 #include <uuid/uuid.h> 29 30 #include <dirent.h> 31 #include <limits.h> 32 #include <unistd.h> 33 #include <alloca.h> 34 #include <stddef.h> 35 #include <fm/libtopo.h> 36 37 #include <fmd_alloc.h> 38 #include <fmd_string.h> 39 #include <fmd_error.h> 40 #include <fmd_subr.h> 41 #include <fmd_protocol.h> 42 #include <fmd_event.h> 43 #include <fmd_conf.h> 44 #include <fmd_fmri.h> 45 #include <fmd_dispq.h> 46 #include <fmd_case.h> 47 #include <fmd_module.h> 48 #include <fmd_asru.h> 49 50 #include <fmd.h> 51 52 static const char *const _fmd_asru_events[] = { 53 FMD_RSRC_CLASS "asru.ok", /* UNUSABLE=0 FAULTED=0 */ 54 FMD_RSRC_CLASS "asru.degraded", /* UNUSABLE=0 FAULTED=1 */ 55 FMD_RSRC_CLASS "asru.unknown", /* UNUSABLE=1 FAULTED=0 */ 56 FMD_RSRC_CLASS "asru.faulted" /* UNUSABLE=1 FAULTED=1 */ 57 }; 58 59 static const char *const _fmd_asru_snames[] = { 60 "uf", "uF", "Uf", "UF" /* same order as above */ 61 }; 62 63 volatile uint32_t fmd_asru_fake_not_present = 0; 64 65 static uint_t 66 fmd_asru_strhash(fmd_asru_hash_t *ahp, const char *val) 67 { 68 return (topo_fmri_strhash(ahp->ah_topo->ft_hdl, val) % ahp->ah_hashlen); 69 } 70 71 static boolean_t 72 fmd_asru_strcmp(fmd_asru_hash_t *ahp, const char *a, const char *b) 73 { 74 return (topo_fmri_strcmp(ahp->ah_topo->ft_hdl, a, b)); 75 } 76 77 static fmd_asru_t * 78 fmd_asru_create(fmd_asru_hash_t *ahp, const char *uuid, 79 const char *name, nvlist_t *fmri) 80 { 81 fmd_asru_t *ap = fmd_zalloc(sizeof (fmd_asru_t), FMD_SLEEP); 82 char *s; 83 84 (void) pthread_mutex_init(&ap->asru_lock, NULL); 85 (void) pthread_cond_init(&ap->asru_cv, NULL); 86 87 ap->asru_name = fmd_strdup(name, FMD_SLEEP); 88 if (fmri) 89 (void) nvlist_xdup(fmri, &ap->asru_fmri, &fmd.d_nva); 90 ap->asru_root = fmd_strdup(ahp->ah_dirpath, FMD_SLEEP); 91 ap->asru_uuid = fmd_strdup(uuid, FMD_SLEEP); 92 ap->asru_uuidlen = ap->asru_uuid ? strlen(ap->asru_uuid) : 0; 93 ap->asru_refs = 1; 94 95 if (fmri && nvlist_lookup_string(fmri, FM_FMRI_SCHEME, &s) == 0 && 96 strcmp(s, FM_FMRI_SCHEME_FMD) == 0) 97 ap->asru_flags |= FMD_ASRU_INTERNAL; 98 99 return (ap); 100 } 101 102 static void 103 fmd_asru_destroy(fmd_asru_t *ap) 104 { 105 ASSERT(MUTEX_HELD(&ap->asru_lock)); 106 ASSERT(ap->asru_refs == 0); 107 108 nvlist_free(ap->asru_event); 109 fmd_strfree(ap->asru_name); 110 nvlist_free(ap->asru_fmri); 111 fmd_strfree(ap->asru_root); 112 fmd_free(ap->asru_uuid, ap->asru_uuidlen + 1); 113 fmd_free(ap, sizeof (fmd_asru_t)); 114 } 115 116 static void 117 fmd_asru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_t *ap) 118 { 119 uint_t h = fmd_asru_strhash(ahp, ap->asru_name); 120 121 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 122 ap->asru_next = ahp->ah_hash[h]; 123 ahp->ah_hash[h] = ap; 124 ahp->ah_count++; 125 } 126 127 static fmd_asru_t * 128 fmd_asru_hold(fmd_asru_t *ap) 129 { 130 (void) pthread_mutex_lock(&ap->asru_lock); 131 ap->asru_refs++; 132 ASSERT(ap->asru_refs != 0); 133 (void) pthread_mutex_unlock(&ap->asru_lock); 134 return (ap); 135 } 136 137 /* 138 * Lookup an asru in the hash by name and place a hold on it. If the asru is 139 * not found, no entry is created and NULL is returned. This internal function 140 * is for callers who have the ah_lock held and is used by lookup_name below. 141 */ 142 fmd_asru_t * 143 fmd_asru_hash_lookup(fmd_asru_hash_t *ahp, const char *name) 144 { 145 fmd_asru_t *ap; 146 uint_t h; 147 148 ASSERT(RW_LOCK_HELD(&ahp->ah_lock)); 149 h = fmd_asru_strhash(ahp, name); 150 151 for (ap = ahp->ah_hash[h]; ap != NULL; ap = ap->asru_next) { 152 if (fmd_asru_strcmp(ahp, ap->asru_name, name)) 153 break; 154 } 155 156 if (ap != NULL) 157 (void) fmd_asru_hold(ap); 158 else 159 (void) fmd_set_errno(EFMD_ASRU_NOENT); 160 161 return (ap); 162 } 163 164 static int 165 fmd_asru_replacement_state(nvlist_t *event) 166 { 167 int ps = -1; 168 nvlist_t *asru, *fru, *rsrc; 169 170 /* 171 * Check if there is evidence that this object is no longer present. 172 * In general fmd_fmri_present() should be supported on resources and/or 173 * frus, as those are the things that are physically present or not 174 * present - an asru can be spread over a number of frus some of which 175 * are present and some not, so fmd_fmri_present() is not generally 176 * meaningful. However retain a check for asru first for compatibility. 177 * If we have checked all three and we still get -1 then nothing knows 178 * whether it's present or not, so err on the safe side and treat it 179 * as still present. 180 */ 181 if (fmd_asru_fake_not_present) 182 return (fmd_asru_fake_not_present); 183 if (nvlist_lookup_nvlist(event, FM_FAULT_ASRU, &asru) == 0) 184 ps = fmd_fmri_replaced(asru); 185 if (ps == -1) { 186 if (nvlist_lookup_nvlist(event, FM_FAULT_RESOURCE, &rsrc) == 0) 187 ps = fmd_fmri_replaced(rsrc); 188 } else if (ps == FMD_OBJ_STATE_UNKNOWN) { 189 /* see if we can improve on UNKNOWN */ 190 if (nvlist_lookup_nvlist(event, FM_FAULT_RESOURCE, 191 &rsrc) == 0) { 192 int ps2 = fmd_fmri_replaced(rsrc); 193 if (ps2 == FMD_OBJ_STATE_STILL_PRESENT || 194 ps2 == FMD_OBJ_STATE_REPLACED) 195 ps = ps2; 196 } 197 } 198 if (ps == -1) { 199 if (nvlist_lookup_nvlist(event, FM_FAULT_FRU, &fru) == 0) 200 ps = fmd_fmri_replaced(fru); 201 } else if (ps == FMD_OBJ_STATE_UNKNOWN) { 202 /* see if we can improve on UNKNOWN */ 203 if (nvlist_lookup_nvlist(event, FM_FAULT_FRU, &fru) == 0) { 204 int ps2 = fmd_fmri_replaced(fru); 205 if (ps2 == FMD_OBJ_STATE_STILL_PRESENT || 206 ps2 == FMD_OBJ_STATE_REPLACED) 207 ps = ps2; 208 } 209 } 210 if (ps == -1) 211 ps = FMD_OBJ_STATE_UNKNOWN; 212 return (ps); 213 } 214 215 static void 216 fmd_asru_asru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, 217 char *name) 218 { 219 uint_t h = fmd_asru_strhash(ahp, name); 220 221 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 222 alp->al_asru_next = ahp->ah_asru_hash[h]; 223 ahp->ah_asru_hash[h] = alp; 224 ahp->ah_al_count++; 225 } 226 227 static void 228 fmd_asru_case_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, 229 char *name) 230 { 231 uint_t h = fmd_asru_strhash(ahp, name); 232 233 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 234 alp->al_case_next = ahp->ah_case_hash[h]; 235 ahp->ah_case_hash[h] = alp; 236 } 237 238 static void 239 fmd_asru_fru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, char *name) 240 { 241 uint_t h = fmd_asru_strhash(ahp, name); 242 243 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 244 alp->al_fru_next = ahp->ah_fru_hash[h]; 245 ahp->ah_fru_hash[h] = alp; 246 } 247 248 static void 249 fmd_asru_label_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, 250 char *name) 251 { 252 uint_t h = fmd_asru_strhash(ahp, name); 253 254 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 255 alp->al_label_next = ahp->ah_label_hash[h]; 256 ahp->ah_label_hash[h] = alp; 257 } 258 259 static void 260 fmd_asru_rsrc_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, 261 char *name) 262 { 263 uint_t h = fmd_asru_strhash(ahp, name); 264 265 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 266 alp->al_rsrc_next = ahp->ah_rsrc_hash[h]; 267 ahp->ah_rsrc_hash[h] = alp; 268 } 269 270 static void 271 fmd_asru_al_destroy(fmd_asru_link_t *alp) 272 { 273 ASSERT(alp->al_refs == 0); 274 ASSERT(MUTEX_HELD(&alp->al_asru->asru_lock)); 275 276 if (alp->al_log != NULL) 277 fmd_log_rele(alp->al_log); 278 279 fmd_free(alp->al_uuid, alp->al_uuidlen + 1); 280 nvlist_free(alp->al_event); 281 fmd_strfree(alp->al_rsrc_name); 282 fmd_strfree(alp->al_case_uuid); 283 fmd_strfree(alp->al_fru_name); 284 fmd_strfree(alp->al_asru_name); 285 fmd_strfree(alp->al_label); 286 nvlist_free(alp->al_asru_fmri); 287 fmd_free(alp, sizeof (fmd_asru_link_t)); 288 } 289 290 static fmd_asru_link_t * 291 fmd_asru_al_hold(fmd_asru_link_t *alp) 292 { 293 fmd_asru_t *ap = alp->al_asru; 294 295 (void) pthread_mutex_lock(&ap->asru_lock); 296 ap->asru_refs++; 297 alp->al_refs++; 298 ASSERT(alp->al_refs != 0); 299 (void) pthread_mutex_unlock(&ap->asru_lock); 300 return (alp); 301 } 302 303 static void fmd_asru_destroy(fmd_asru_t *ap); 304 305 /*ARGSUSED*/ 306 static void 307 fmd_asru_al_hash_release(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp) 308 { 309 fmd_asru_t *ap = alp->al_asru; 310 311 (void) pthread_mutex_lock(&ap->asru_lock); 312 ASSERT(alp->al_refs != 0); 313 if (--alp->al_refs == 0) 314 fmd_asru_al_destroy(alp); 315 ASSERT(ap->asru_refs != 0); 316 if (--ap->asru_refs == 0) 317 fmd_asru_destroy(ap); 318 else 319 (void) pthread_mutex_unlock(&ap->asru_lock); 320 } 321 322 static int 323 fmd_asru_get_namestr(nvlist_t *nvl, char **name, ssize_t *namelen) 324 { 325 if ((*namelen = fmd_fmri_nvl2str(nvl, NULL, 0)) == -1) 326 return (EFMD_ASRU_FMRI); 327 *name = fmd_alloc(*namelen + 1, FMD_SLEEP); 328 if (fmd_fmri_nvl2str(nvl, *name, *namelen + 1) == -1) { 329 if (*name != NULL) 330 fmd_free(*name, *namelen + 1); 331 return (EFMD_ASRU_FMRI); 332 } 333 return (0); 334 } 335 336 static fmd_asru_link_t * 337 fmd_asru_al_create(fmd_asru_hash_t *ahp, nvlist_t *nvl, fmd_case_t *cp, 338 const char *al_uuid) 339 { 340 nvlist_t *asru = NULL, *fru, *rsrc; 341 int got_rsrc = 0, got_asru = 0, got_fru = 0; 342 ssize_t fru_namelen, rsrc_namelen, asru_namelen; 343 char *asru_name, *rsrc_name, *fru_name, *name, *label; 344 fmd_asru_link_t *alp; 345 fmd_asru_t *ap; 346 boolean_t msg; 347 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 348 349 if (nvlist_lookup_nvlist(nvl, FM_FAULT_ASRU, &asru) == 0 && 350 fmd_asru_get_namestr(asru, &asru_name, &asru_namelen) == 0) 351 got_asru = 1; 352 if (nvlist_lookup_nvlist(nvl, FM_FAULT_FRU, &fru) == 0 && 353 fmd_asru_get_namestr(fru, &fru_name, &fru_namelen) == 0) 354 got_fru = 1; 355 if (nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &rsrc) == 0 && 356 fmd_asru_get_namestr(rsrc, &rsrc_name, &rsrc_namelen) == 0) 357 got_rsrc = 1; 358 if (nvlist_lookup_string(nvl, FM_FAULT_LOCATION, &label) != 0) 359 label = ""; 360 361 /* 362 * Grab the rwlock as a writer; Then create and insert the asru with 363 * ahp->ah_lock held and hash it in. We'll then drop the rwlock and 364 * proceed to initializing the asru. 365 */ 366 (void) pthread_rwlock_wrlock(&ahp->ah_lock); 367 368 /* 369 * Create and initialise the per-fault "link" structure. 370 */ 371 alp = fmd_zalloc(sizeof (fmd_asru_link_t), FMD_SLEEP); 372 if (got_asru) 373 (void) nvlist_xdup(asru, &alp->al_asru_fmri, &fmd.d_nva); 374 alp->al_uuid = fmd_strdup(al_uuid, FMD_SLEEP); 375 alp->al_uuidlen = strlen(alp->al_uuid); 376 alp->al_refs = 1; 377 378 /* 379 * If this is the first fault for this asru, then create the per-asru 380 * structure and link into the hash. 381 */ 382 name = got_asru ? asru_name : ""; 383 if ((ap = fmd_asru_hash_lookup(ahp, name)) == NULL) { 384 ap = fmd_asru_create(ahp, al_uuid, name, got_asru ? asru : 385 NULL); 386 fmd_asru_hash_insert(ahp, ap); 387 } else 388 nvlist_free(ap->asru_event); 389 (void) nvlist_xdup(nvl, &ap->asru_event, &fmd.d_nva); 390 391 /* 392 * Put the link structure on the list associated with the per-asru 393 * structure. Then put the link structure on the various hashes. 394 */ 395 fmd_list_append(&ap->asru_list, (fmd_list_t *)alp); 396 alp->al_asru = ap; 397 alp->al_asru_name = got_asru ? asru_name : fmd_strdup("", FMD_SLEEP); 398 fmd_asru_asru_hash_insert(ahp, alp, alp->al_asru_name); 399 alp->al_fru_name = got_fru ? fru_name : fmd_strdup("", FMD_SLEEP); 400 fmd_asru_fru_hash_insert(ahp, alp, alp->al_fru_name); 401 alp->al_rsrc_name = got_rsrc ? rsrc_name : fmd_strdup("", FMD_SLEEP); 402 fmd_asru_rsrc_hash_insert(ahp, alp, alp->al_rsrc_name); 403 alp->al_label = fmd_strdup(label, FMD_SLEEP); 404 fmd_asru_label_hash_insert(ahp, alp, label); 405 alp->al_case_uuid = fmd_strdup(cip->ci_uuid, FMD_SLEEP); 406 fmd_asru_case_hash_insert(ahp, alp, cip->ci_uuid); 407 (void) pthread_mutex_lock(&ap->asru_lock); 408 (void) pthread_rwlock_unlock(&ahp->ah_lock); 409 410 ap->asru_case = alp->al_case = cp; 411 if (nvlist_lookup_boolean_value(nvl, FM_SUSPECT_MESSAGE, &msg) == 0 && 412 msg == B_FALSE) 413 ap->asru_flags |= FMD_ASRU_INVISIBLE; 414 (void) nvlist_xdup(nvl, &alp->al_event, &fmd.d_nva); 415 ap->asru_flags |= FMD_ASRU_VALID; 416 (void) pthread_cond_broadcast(&ap->asru_cv); 417 (void) pthread_mutex_unlock(&ap->asru_lock); 418 return (alp); 419 } 420 421 static void 422 fmd_asru_hash_recreate(fmd_log_t *lp, fmd_event_t *ep, fmd_asru_hash_t *ahp) 423 { 424 nvlist_t *nvl = FMD_EVENT_NVL(ep); 425 boolean_t faulty = FMD_B_FALSE, unusable = FMD_B_FALSE; 426 int ps; 427 boolean_t repaired = FMD_B_FALSE, replaced = FMD_B_FALSE; 428 boolean_t acquitted = FMD_B_FALSE; 429 nvlist_t *flt, *flt_copy, *asru; 430 char *case_uuid = NULL, *case_code = NULL; 431 fmd_asru_t *ap; 432 fmd_asru_link_t *alp; 433 fmd_case_t *cp; 434 int64_t *diag_time; 435 uint_t nelem; 436 topo_hdl_t *thp; 437 char *class; 438 nvlist_t *rsrc; 439 int err; 440 441 /* 442 * Extract the most recent values of 'faulty' from the event log. 443 */ 444 if (nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_FAULTY, 445 &faulty) != 0) { 446 fmd_error(EFMD_ASRU_EVENT, "failed to reload asru %s: " 447 "invalid event log record\n", lp->log_name); 448 ahp->ah_error = EFMD_ASRU_EVENT; 449 return; 450 } 451 if (nvlist_lookup_nvlist(nvl, FM_RSRC_ASRU_EVENT, &flt) != 0) { 452 fmd_error(EFMD_ASRU_EVENT, "failed to reload asru %s: " 453 "invalid event log record\n", lp->log_name); 454 ahp->ah_error = EFMD_ASRU_EVENT; 455 return; 456 } 457 (void) nvlist_lookup_string(nvl, FM_RSRC_ASRU_UUID, &case_uuid); 458 (void) nvlist_lookup_string(nvl, FM_RSRC_ASRU_CODE, &case_code); 459 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_UNUSABLE, 460 &unusable); 461 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_REPAIRED, 462 &repaired); 463 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_REPLACED, 464 &replaced); 465 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_ACQUITTED, 466 &acquitted); 467 468 /* 469 * Attempt to recreate the case in either the CLOSED or REPAIRED state 470 * (depending on whether the faulty bit is still set). 471 * If the case is already present, fmd_case_recreate() will return it. 472 * If not, we'll create a new orphaned case. Either way, we use the 473 * ASRU event to insert a suspect into the partially-restored case. 474 */ 475 fmd_module_lock(fmd.d_rmod); 476 cp = fmd_case_recreate(fmd.d_rmod, NULL, faulty ? FMD_CASE_CLOSED : 477 FMD_CASE_REPAIRED, case_uuid, case_code); 478 fmd_case_hold(cp); 479 fmd_module_unlock(fmd.d_rmod); 480 if (nvlist_lookup_int64_array(nvl, FM_SUSPECT_DIAG_TIME, &diag_time, 481 &nelem) == 0 && nelem >= 2) 482 fmd_case_settime(cp, diag_time[0], diag_time[1]); 483 else 484 fmd_case_settime(cp, lp->log_stat.st_ctime, 0); 485 (void) nvlist_xdup(flt, &flt_copy, &fmd.d_nva); 486 487 /* 488 * For faults with a resource, re-evaluate the asru from the resource. 489 */ 490 thp = fmd_fmri_topo_hold(TOPO_VERSION); 491 if (nvlist_lookup_string(flt_copy, FM_CLASS, &class) == 0 && 492 strncmp(class, "fault", 5) == 0 && 493 nvlist_lookup_nvlist(flt_copy, FM_FAULT_RESOURCE, &rsrc) == 0 && 494 rsrc != NULL && topo_fmri_asru(thp, rsrc, &asru, &err) == 0) { 495 (void) nvlist_remove(flt_copy, FM_FAULT_ASRU, DATA_TYPE_NVLIST); 496 (void) nvlist_add_nvlist(flt_copy, FM_FAULT_ASRU, asru); 497 nvlist_free(asru); 498 } 499 fmd_fmri_topo_rele(thp); 500 501 (void) nvlist_xdup(flt_copy, &flt, &fmd.d_nva); 502 503 fmd_case_recreate_suspect(cp, flt_copy); 504 505 /* 506 * Now create the resource cache entries. 507 */ 508 alp = fmd_asru_al_create(ahp, flt, cp, fmd_strbasename(lp->log_name)); 509 ap = alp->al_asru; 510 511 /* 512 * Check to see if the resource is still present in the system. 513 */ 514 ps = fmd_asru_replacement_state(flt); 515 if (ps == FMD_OBJ_STATE_REPLACED) { 516 replaced = FMD_B_TRUE; 517 } else if (ps == FMD_OBJ_STATE_STILL_PRESENT || 518 ps == FMD_OBJ_STATE_UNKNOWN) { 519 ap->asru_flags |= FMD_ASRU_PRESENT; 520 if (nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, 521 &asru) == 0) { 522 int us; 523 524 switch (fmd_fmri_service_state(asru)) { 525 case FMD_SERVICE_STATE_UNUSABLE: 526 unusable = FMD_B_TRUE; 527 break; 528 case FMD_SERVICE_STATE_OK: 529 case FMD_SERVICE_STATE_DEGRADED: 530 unusable = FMD_B_FALSE; 531 break; 532 case FMD_SERVICE_STATE_UNKNOWN: 533 case -1: 534 /* not supported by scheme */ 535 us = fmd_fmri_unusable(asru); 536 if (us > 0) 537 unusable = FMD_B_TRUE; 538 else if (us == 0) 539 unusable = FMD_B_FALSE; 540 break; 541 } 542 } 543 } 544 545 nvlist_free(flt); 546 547 ap->asru_flags |= FMD_ASRU_RECREATED; 548 if (faulty) { 549 alp->al_flags |= FMD_ASRU_FAULTY; 550 ap->asru_flags |= FMD_ASRU_FAULTY; 551 } 552 if (unusable) { 553 alp->al_flags |= FMD_ASRU_UNUSABLE; 554 ap->asru_flags |= FMD_ASRU_UNUSABLE; 555 } 556 if (replaced) 557 alp->al_reason = FMD_ASRU_REPLACED; 558 else if (repaired) 559 alp->al_reason = FMD_ASRU_REPAIRED; 560 else if (acquitted) 561 alp->al_reason = FMD_ASRU_ACQUITTED; 562 563 TRACE((FMD_DBG_ASRU, "asru %s recreated as %p (%s)", alp->al_uuid, 564 (void *)ap, _fmd_asru_snames[ap->asru_flags & FMD_ASRU_STATE])); 565 } 566 567 static void 568 fmd_asru_hash_discard(fmd_asru_hash_t *ahp, const char *uuid, int err) 569 { 570 char src[PATH_MAX], dst[PATH_MAX]; 571 572 (void) snprintf(src, PATH_MAX, "%s/%s", ahp->ah_dirpath, uuid); 573 (void) snprintf(dst, PATH_MAX, "%s/%s-", ahp->ah_dirpath, uuid); 574 575 if (err != 0) 576 err = rename(src, dst); 577 else 578 err = unlink(src); 579 580 if (err != 0 && errno != ENOENT) 581 fmd_error(EFMD_ASRU_EVENT, "failed to rename log %s", src); 582 } 583 584 /* 585 * Open a saved log file and restore it into the ASRU hash. If we can't even 586 * open the log, rename the log file to <uuid>- to indicate it is corrupt. If 587 * fmd_log_replay() fails, we either delete the file (if it has reached the 588 * upper limit on cache age) or rename it for debugging if it was corrupted. 589 */ 590 static void 591 fmd_asru_hash_logopen(fmd_asru_hash_t *ahp, const char *uuid) 592 { 593 fmd_log_t *lp = fmd_log_tryopen(ahp->ah_dirpath, uuid, FMD_LOG_ASRU); 594 uint_t n; 595 596 if (lp == NULL) { 597 fmd_asru_hash_discard(ahp, uuid, errno); 598 return; 599 } 600 601 ahp->ah_error = 0; 602 n = ahp->ah_al_count; 603 604 fmd_log_replay(lp, (fmd_log_f *)fmd_asru_hash_recreate, ahp); 605 fmd_log_rele(lp); 606 607 if (ahp->ah_al_count == n) 608 fmd_asru_hash_discard(ahp, uuid, ahp->ah_error); 609 } 610 611 void 612 fmd_asru_hash_refresh(fmd_asru_hash_t *ahp) 613 { 614 struct dirent *dp; 615 DIR *dirp; 616 int zero; 617 618 if ((dirp = opendir(ahp->ah_dirpath)) == NULL) { 619 fmd_error(EFMD_ASRU_NODIR, 620 "failed to open asru cache directory %s", ahp->ah_dirpath); 621 return; 622 } 623 624 (void) fmd_conf_getprop(fmd.d_conf, "rsrc.zero", &zero); 625 626 (void) pthread_rwlock_wrlock(&ahp->ah_lock); 627 628 while ((dp = readdir(dirp)) != NULL) { 629 if (dp->d_name[0] == '.') 630 continue; /* skip "." and ".." */ 631 632 if (zero) 633 fmd_asru_hash_discard(ahp, dp->d_name, 0); 634 else if (!fmd_strmatch(dp->d_name, "*-")) 635 fmd_asru_hash_logopen(ahp, dp->d_name); 636 } 637 638 (void) pthread_rwlock_unlock(&ahp->ah_lock); 639 (void) closedir(dirp); 640 } 641 642 /* 643 * If the resource is present and faulty but not unusable, replay the fault 644 * event that caused it be marked faulty. This will cause the agent 645 * subscribing to this fault class to again disable the resource. 646 */ 647 /*ARGSUSED*/ 648 static void 649 fmd_asru_hash_replay_asru(fmd_asru_t *ap, void *data) 650 { 651 fmd_event_t *e; 652 nvlist_t *nvl; 653 char *class; 654 655 if (ap->asru_event != NULL && (ap->asru_flags & (FMD_ASRU_STATE | 656 FMD_ASRU_PRESENT)) == (FMD_ASRU_FAULTY | FMD_ASRU_PRESENT)) { 657 658 fmd_dprintf(FMD_DBG_ASRU, 659 "replaying fault event for %s", ap->asru_name); 660 661 (void) nvlist_xdup(ap->asru_event, &nvl, &fmd.d_nva); 662 (void) nvlist_lookup_string(nvl, FM_CLASS, &class); 663 664 (void) nvlist_add_string(nvl, FMD_EVN_UUID, 665 ((fmd_case_impl_t *)ap->asru_case)->ci_uuid); 666 667 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class); 668 fmd_dispq_dispatch(fmd.d_disp, e, class); 669 } 670 } 671 672 void 673 fmd_asru_hash_replay(fmd_asru_hash_t *ahp) 674 { 675 fmd_asru_hash_apply(ahp, fmd_asru_hash_replay_asru, NULL); 676 } 677 678 /* 679 * Check if the resource is still present. If not, and if the rsrc.age time 680 * has expired, then do an implicit repair on the resource. 681 */ 682 /*ARGSUSED*/ 683 static void 684 fmd_asru_repair_if_aged(fmd_asru_link_t *alp, void *arg) 685 { 686 struct timeval tv; 687 fmd_log_t *lp; 688 hrtime_t hrt; 689 int ps; 690 int err; 691 692 ps = fmd_asru_replacement_state(alp->al_event); 693 if (ps == FMD_OBJ_STATE_REPLACED) { 694 fmd_asru_replaced(alp, &err); 695 } else if (ps == FMD_OBJ_STATE_NOT_PRESENT) { 696 fmd_time_gettimeofday(&tv); 697 lp = fmd_log_open(alp->al_asru->asru_root, alp->al_uuid, 698 FMD_LOG_ASRU); 699 hrt = (hrtime_t)(tv.tv_sec - lp->log_stat.st_mtime); 700 fmd_log_rele(lp); 701 if (hrt * NANOSEC >= fmd.d_asrus->ah_lifetime) 702 fmd_asru_removed(alp); 703 } 704 } 705 706 void 707 fmd_asru_clear_aged_rsrcs() 708 { 709 fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_repair_if_aged, NULL); 710 } 711 712 fmd_asru_hash_t * 713 fmd_asru_hash_create(const char *root, const char *dir) 714 { 715 fmd_asru_hash_t *ahp; 716 char path[PATH_MAX]; 717 718 ahp = fmd_alloc(sizeof (fmd_asru_hash_t), FMD_SLEEP); 719 (void) pthread_rwlock_init(&ahp->ah_lock, NULL); 720 ahp->ah_hashlen = fmd.d_str_buckets; 721 ahp->ah_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, FMD_SLEEP); 722 ahp->ah_asru_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 723 FMD_SLEEP); 724 ahp->ah_case_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 725 FMD_SLEEP); 726 ahp->ah_fru_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 727 FMD_SLEEP); 728 ahp->ah_label_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 729 FMD_SLEEP); 730 ahp->ah_rsrc_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 731 FMD_SLEEP); 732 (void) snprintf(path, sizeof (path), "%s/%s", root, dir); 733 ahp->ah_dirpath = fmd_strdup(path, FMD_SLEEP); 734 (void) fmd_conf_getprop(fmd.d_conf, "rsrc.age", &ahp->ah_lifetime); 735 (void) fmd_conf_getprop(fmd.d_conf, "fakenotpresent", 736 (uint32_t *)&fmd_asru_fake_not_present); 737 ahp->ah_al_count = 0; 738 ahp->ah_count = 0; 739 ahp->ah_error = 0; 740 ahp->ah_topo = fmd_topo_hold(); 741 742 return (ahp); 743 } 744 745 void 746 fmd_asru_hash_destroy(fmd_asru_hash_t *ahp) 747 { 748 fmd_asru_link_t *alp, *np; 749 uint_t i; 750 751 for (i = 0; i < ahp->ah_hashlen; i++) { 752 for (alp = ahp->ah_case_hash[i]; alp != NULL; alp = np) { 753 np = alp->al_case_next; 754 alp->al_case_next = NULL; 755 fmd_case_rele(alp->al_case); 756 alp->al_case = NULL; 757 fmd_asru_al_hash_release(ahp, alp); 758 } 759 } 760 761 fmd_strfree(ahp->ah_dirpath); 762 fmd_free(ahp->ah_hash, sizeof (void *) * ahp->ah_hashlen); 763 fmd_free(ahp->ah_asru_hash, sizeof (void *) * ahp->ah_hashlen); 764 fmd_free(ahp->ah_case_hash, sizeof (void *) * ahp->ah_hashlen); 765 fmd_free(ahp->ah_fru_hash, sizeof (void *) * ahp->ah_hashlen); 766 fmd_free(ahp->ah_label_hash, sizeof (void *) * ahp->ah_hashlen); 767 fmd_free(ahp->ah_rsrc_hash, sizeof (void *) * ahp->ah_hashlen); 768 fmd_topo_rele(ahp->ah_topo); 769 fmd_free(ahp, sizeof (fmd_asru_hash_t)); 770 } 771 772 /* 773 * Take a snapshot of the ASRU database by placing an additional hold on each 774 * member in an auxiliary array, and then call 'func' for each ASRU. 775 */ 776 void 777 fmd_asru_hash_apply(fmd_asru_hash_t *ahp, 778 void (*func)(fmd_asru_t *, void *), void *arg) 779 { 780 fmd_asru_t *ap, **aps, **app; 781 uint_t apc, i; 782 783 (void) pthread_rwlock_rdlock(&ahp->ah_lock); 784 785 aps = app = fmd_alloc(ahp->ah_count * sizeof (fmd_asru_t *), FMD_SLEEP); 786 apc = ahp->ah_count; 787 788 for (i = 0; i < ahp->ah_hashlen; i++) { 789 for (ap = ahp->ah_hash[i]; ap != NULL; ap = ap->asru_next) 790 *app++ = fmd_asru_hold(ap); 791 } 792 793 ASSERT(app == aps + apc); 794 (void) pthread_rwlock_unlock(&ahp->ah_lock); 795 796 for (i = 0; i < apc; i++) { 797 if (aps[i]->asru_fmri != NULL) 798 func(aps[i], arg); 799 fmd_asru_hash_release(ahp, aps[i]); 800 } 801 802 fmd_free(aps, apc * sizeof (fmd_asru_t *)); 803 } 804 805 void 806 fmd_asru_al_hash_apply(fmd_asru_hash_t *ahp, 807 void (*func)(fmd_asru_link_t *, void *), void *arg) 808 { 809 fmd_asru_link_t *alp, **alps, **alpp; 810 uint_t alpc, i; 811 812 (void) pthread_rwlock_rdlock(&ahp->ah_lock); 813 814 alps = alpp = fmd_alloc(ahp->ah_al_count * sizeof (fmd_asru_link_t *), 815 FMD_SLEEP); 816 alpc = ahp->ah_al_count; 817 818 for (i = 0; i < ahp->ah_hashlen; i++) { 819 for (alp = ahp->ah_case_hash[i]; alp != NULL; 820 alp = alp->al_case_next) 821 *alpp++ = fmd_asru_al_hold(alp); 822 } 823 824 ASSERT(alpp == alps + alpc); 825 (void) pthread_rwlock_unlock(&ahp->ah_lock); 826 827 for (i = 0; i < alpc; i++) { 828 func(alps[i], arg); 829 fmd_asru_al_hash_release(ahp, alps[i]); 830 } 831 832 fmd_free(alps, alpc * sizeof (fmd_asru_link_t *)); 833 } 834 835 static void 836 fmd_asru_do_hash_apply(fmd_asru_hash_t *ahp, char *name, 837 void (*func)(fmd_asru_link_t *, void *), void *arg, 838 fmd_asru_link_t **hash, size_t match_offset, size_t next_offset) 839 { 840 fmd_asru_link_t *alp, **alps, **alpp; 841 uint_t alpc = 0, i; 842 uint_t h; 843 844 (void) pthread_rwlock_rdlock(&ahp->ah_lock); 845 846 h = fmd_asru_strhash(ahp, name); 847 848 for (alp = hash[h]; alp != NULL; alp = 849 /* LINTED pointer alignment */ 850 FMD_ASRU_AL_HASH_NEXT(alp, next_offset)) 851 if (fmd_asru_strcmp(ahp, 852 /* LINTED pointer alignment */ 853 FMD_ASRU_AL_HASH_NAME(alp, match_offset), name)) 854 alpc++; 855 856 alps = alpp = fmd_alloc(alpc * sizeof (fmd_asru_link_t *), FMD_SLEEP); 857 858 for (alp = hash[h]; alp != NULL; alp = 859 /* LINTED pointer alignment */ 860 FMD_ASRU_AL_HASH_NEXT(alp, next_offset)) 861 if (fmd_asru_strcmp(ahp, 862 /* LINTED pointer alignment */ 863 FMD_ASRU_AL_HASH_NAME(alp, match_offset), name)) 864 *alpp++ = fmd_asru_al_hold(alp); 865 866 ASSERT(alpp == alps + alpc); 867 (void) pthread_rwlock_unlock(&ahp->ah_lock); 868 869 for (i = 0; i < alpc; i++) { 870 func(alps[i], arg); 871 fmd_asru_al_hash_release(ahp, alps[i]); 872 } 873 874 fmd_free(alps, alpc * sizeof (fmd_asru_link_t *)); 875 } 876 877 void 878 fmd_asru_hash_apply_by_asru(fmd_asru_hash_t *ahp, char *name, 879 void (*func)(fmd_asru_link_t *, void *), void *arg) 880 { 881 fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_asru_hash, 882 offsetof(fmd_asru_link_t, al_asru_name), 883 offsetof(fmd_asru_link_t, al_asru_next)); 884 } 885 886 void 887 fmd_asru_hash_apply_by_case(fmd_asru_hash_t *ahp, fmd_case_t *cp, 888 void (*func)(fmd_asru_link_t *, void *), void *arg) 889 { 890 fmd_asru_do_hash_apply(ahp, ((fmd_case_impl_t *)cp)->ci_uuid, func, arg, 891 ahp->ah_case_hash, offsetof(fmd_asru_link_t, al_case_uuid), 892 offsetof(fmd_asru_link_t, al_case_next)); 893 } 894 895 void 896 fmd_asru_hash_apply_by_fru(fmd_asru_hash_t *ahp, char *name, 897 void (*func)(fmd_asru_link_t *, void *), void *arg) 898 { 899 fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_fru_hash, 900 offsetof(fmd_asru_link_t, al_fru_name), 901 offsetof(fmd_asru_link_t, al_fru_next)); 902 } 903 904 void 905 fmd_asru_hash_apply_by_rsrc(fmd_asru_hash_t *ahp, char *name, 906 void (*func)(fmd_asru_link_t *, void *), void *arg) 907 { 908 fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_rsrc_hash, 909 offsetof(fmd_asru_link_t, al_rsrc_name), 910 offsetof(fmd_asru_link_t, al_rsrc_next)); 911 } 912 913 void 914 fmd_asru_hash_apply_by_label(fmd_asru_hash_t *ahp, char *name, 915 void (*func)(fmd_asru_link_t *, void *), void *arg) 916 { 917 fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_label_hash, 918 offsetof(fmd_asru_link_t, al_label), 919 offsetof(fmd_asru_link_t, al_label_next)); 920 } 921 922 /* 923 * Lookup an asru in the hash by name and place a hold on it. If the asru is 924 * not found, no entry is created and NULL is returned. 925 */ 926 fmd_asru_t * 927 fmd_asru_hash_lookup_name(fmd_asru_hash_t *ahp, const char *name) 928 { 929 fmd_asru_t *ap; 930 931 (void) pthread_rwlock_rdlock(&ahp->ah_lock); 932 ap = fmd_asru_hash_lookup(ahp, name); 933 (void) pthread_rwlock_unlock(&ahp->ah_lock); 934 935 return (ap); 936 } 937 938 /* 939 * Create a resource cache entry using the fault event "nvl" for one of the 940 * suspects from the case "cp". 941 * 942 * The fault event can have the following components : FM_FAULT_ASRU, 943 * FM_FAULT_FRU, FM_FAULT_RESOURCE. These should be set by the Diagnosis Engine 944 * when calling fmd_nvl_create_fault(). In the general case, these are all 945 * optional and an entry will always be added into the cache even if one or all 946 * of these fields is missing. 947 * 948 * However, for hardware faults the recommended practice is that the fault 949 * event should always have the FM_FAULT_RESOURCE field present and that this 950 * should be represented in hc-scheme. 951 * 952 * Currently the DE should also add the FM_FAULT_ASRU and FM_FAULT_FRU fields 953 * where known, though at some future stage fmd might be able to fill these 954 * in automatically from the topology. 955 */ 956 fmd_asru_link_t * 957 fmd_asru_hash_create_entry(fmd_asru_hash_t *ahp, fmd_case_t *cp, nvlist_t *nvl) 958 { 959 char *parsed_uuid; 960 uuid_t uuid; 961 int uuidlen; 962 fmd_asru_link_t *alp; 963 964 /* 965 * Generate a UUID for the ASRU. libuuid cleverly gives us no 966 * interface for specifying or learning the buffer size. Sigh. 967 * The spec says 36 bytes but we use a tunable just to be safe. 968 */ 969 (void) fmd_conf_getprop(fmd.d_conf, "uuidlen", &uuidlen); 970 parsed_uuid = fmd_zalloc(uuidlen + 1, FMD_SLEEP); 971 uuid_generate(uuid); 972 uuid_unparse(uuid, parsed_uuid); 973 974 /* 975 * Now create the resource cache entries. 976 */ 977 fmd_case_hold_locked(cp); 978 alp = fmd_asru_al_create(ahp, nvl, cp, parsed_uuid); 979 TRACE((FMD_DBG_ASRU, "asru %s created as %p", 980 alp->al_uuid, (void *)alp->al_asru)); 981 982 fmd_free(parsed_uuid, uuidlen + 1); 983 return (alp); 984 985 } 986 987 /* 988 * Release the reference count on an asru obtained using fmd_asru_hash_lookup. 989 * We take 'ahp' for symmetry and in case we need to use it in future work. 990 */ 991 /*ARGSUSED*/ 992 void 993 fmd_asru_hash_release(fmd_asru_hash_t *ahp, fmd_asru_t *ap) 994 { 995 (void) pthread_mutex_lock(&ap->asru_lock); 996 997 ASSERT(ap->asru_refs != 0); 998 if (--ap->asru_refs == 0) 999 fmd_asru_destroy(ap); 1000 else 1001 (void) pthread_mutex_unlock(&ap->asru_lock); 1002 } 1003 1004 static void 1005 fmd_asru_do_delete_entry(fmd_asru_hash_t *ahp, fmd_case_t *cp, 1006 fmd_asru_link_t **hash, size_t next_offset, char *name) 1007 { 1008 uint_t h; 1009 fmd_asru_link_t *alp, **pp, *alpnext, **alpnextp; 1010 1011 (void) pthread_rwlock_wrlock(&ahp->ah_lock); 1012 h = fmd_asru_strhash(ahp, name); 1013 pp = &hash[h]; 1014 for (alp = *pp; alp != NULL; alp = alpnext) { 1015 /* LINTED pointer alignment */ 1016 alpnextp = FMD_ASRU_AL_HASH_NEXTP(alp, next_offset); 1017 alpnext = *alpnextp; 1018 if (alp->al_case == cp) { 1019 *pp = *alpnextp; 1020 *alpnextp = NULL; 1021 } else 1022 pp = alpnextp; 1023 } 1024 (void) pthread_rwlock_unlock(&ahp->ah_lock); 1025 } 1026 1027 static void 1028 fmd_asru_do_hash_delete(fmd_asru_hash_t *ahp, fmd_case_susp_t *cis, 1029 fmd_case_t *cp, fmd_asru_link_t **hash, size_t next_offset, char *nvname) 1030 { 1031 nvlist_t *nvl; 1032 char *name = NULL; 1033 ssize_t namelen; 1034 1035 if (nvlist_lookup_nvlist(cis->cis_nvl, nvname, &nvl) == 0 && 1036 (namelen = fmd_fmri_nvl2str(nvl, NULL, 0)) != -1 && 1037 (name = fmd_alloc(namelen + 1, FMD_SLEEP)) != NULL) { 1038 if (fmd_fmri_nvl2str(nvl, name, namelen + 1) != -1) 1039 fmd_asru_do_delete_entry(ahp, cp, hash, next_offset, 1040 name); 1041 fmd_free(name, namelen + 1); 1042 } else 1043 fmd_asru_do_delete_entry(ahp, cp, hash, next_offset, ""); 1044 } 1045 1046 void 1047 fmd_asru_hash_delete_case(fmd_asru_hash_t *ahp, fmd_case_t *cp) 1048 { 1049 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 1050 fmd_case_susp_t *cis; 1051 fmd_asru_link_t *alp, **plp, *alpnext; 1052 fmd_asru_t *ap; 1053 char path[PATH_MAX]; 1054 char *label; 1055 uint_t h; 1056 1057 /* 1058 * first delete hash entries for each suspect 1059 */ 1060 for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) { 1061 fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_fru_hash, 1062 offsetof(fmd_asru_link_t, al_fru_next), FM_FAULT_FRU); 1063 fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_rsrc_hash, 1064 offsetof(fmd_asru_link_t, al_rsrc_next), FM_FAULT_RESOURCE); 1065 if (nvlist_lookup_string(cis->cis_nvl, FM_FAULT_LOCATION, 1066 &label) != 0) 1067 label = ""; 1068 fmd_asru_do_delete_entry(ahp, cp, ahp->ah_label_hash, 1069 offsetof(fmd_asru_link_t, al_label_next), label); 1070 fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_asru_hash, 1071 offsetof(fmd_asru_link_t, al_asru_next), FM_FAULT_ASRU); 1072 } 1073 1074 /* 1075 * then delete associated case hash entries 1076 */ 1077 (void) pthread_rwlock_wrlock(&ahp->ah_lock); 1078 h = fmd_asru_strhash(ahp, cip->ci_uuid); 1079 plp = &ahp->ah_case_hash[h]; 1080 for (alp = *plp; alp != NULL; alp = alpnext) { 1081 alpnext = alp->al_case_next; 1082 if (alp->al_case == cp) { 1083 *plp = alp->al_case_next; 1084 alp->al_case_next = NULL; 1085 ASSERT(ahp->ah_al_count != 0); 1086 ahp->ah_al_count--; 1087 1088 /* 1089 * decrement case ref. 1090 */ 1091 fmd_case_rele_locked(cp); 1092 alp->al_case = NULL; 1093 1094 /* 1095 * If we found a matching ASRU, unlink its log file and 1096 * then release the hash entry. Note that it may still 1097 * be referenced if another thread is manipulating it; 1098 * this is ok because once we unlink, the log file will 1099 * not be restored, and the log data will be freed when 1100 * all of the referencing threads release their 1101 * respective references. 1102 */ 1103 (void) snprintf(path, sizeof (path), "%s/%s", 1104 ahp->ah_dirpath, alp->al_uuid); 1105 if (unlink(path) != 0) 1106 fmd_error(EFMD_ASRU_UNLINK, 1107 "failed to unlink asru %s", path); 1108 1109 /* 1110 * Now unlink from the global per-resource cache 1111 * and if this is the last link then remove that from 1112 * it's own hash too. 1113 */ 1114 ap = alp->al_asru; 1115 (void) pthread_mutex_lock(&ap->asru_lock); 1116 fmd_list_delete(&ap->asru_list, alp); 1117 if (ap->asru_list.l_next == NULL) { 1118 uint_t h; 1119 fmd_asru_t *ap2, **pp; 1120 fmd_asru_t *apnext, **apnextp; 1121 1122 ASSERT(ahp->ah_count != 0); 1123 ahp->ah_count--; 1124 h = fmd_asru_strhash(ahp, ap->asru_name); 1125 pp = &ahp->ah_hash[h]; 1126 for (ap2 = *pp; ap2 != NULL; ap2 = apnext) { 1127 apnextp = &ap2->asru_next; 1128 apnext = *apnextp; 1129 if (ap2 == ap) { 1130 *pp = *apnextp; 1131 *apnextp = NULL; 1132 } else 1133 pp = apnextp; 1134 } 1135 } 1136 (void) pthread_mutex_unlock(&ap->asru_lock); 1137 fmd_asru_al_hash_release(ahp, alp); 1138 } else 1139 plp = &alp->al_case_next; 1140 } 1141 (void) pthread_rwlock_unlock(&ahp->ah_lock); 1142 } 1143 1144 static void 1145 fmd_asru_repair_containee(fmd_asru_link_t *alp, void *er) 1146 { 1147 if (er && alp->al_asru_fmri && fmd_fmri_contains(er, 1148 alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, 1149 FMD_ASRU_REPAIRED)) 1150 fmd_case_update(alp->al_case); 1151 } 1152 1153 void 1154 fmd_asru_repaired(fmd_asru_link_t *alp, void *er) 1155 { 1156 int flags; 1157 int rval; 1158 1159 /* 1160 * repair this asru cache entry 1161 */ 1162 rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, FMD_ASRU_REPAIRED); 1163 1164 /* 1165 * now check if all entries associated with this asru are repaired and 1166 * if so repair containees 1167 */ 1168 (void) pthread_mutex_lock(&alp->al_asru->asru_lock); 1169 flags = alp->al_asru->asru_flags; 1170 (void) pthread_mutex_unlock(&alp->al_asru->asru_lock); 1171 if (!(flags & FMD_ASRU_FAULTY)) 1172 fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_repair_containee, 1173 alp->al_asru_fmri); 1174 1175 /* 1176 * if called from fmd_adm_repair() and we really did clear the bit then 1177 * we need to do a case update to see if the associated case can be 1178 * repaired. No need to do this if called from fmd_case_repair() (ie 1179 * when er is NULL) as the case will be explicitly repaired anyway. 1180 */ 1181 if (er) { 1182 *(int *)er = 0; 1183 if (rval) 1184 fmd_case_update(alp->al_case); 1185 } 1186 } 1187 1188 static void 1189 fmd_asru_acquit_containee(fmd_asru_link_t *alp, void *er) 1190 { 1191 if (er && alp->al_asru_fmri && fmd_fmri_contains(er, 1192 alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, 1193 FMD_ASRU_ACQUITTED)) 1194 fmd_case_update(alp->al_case); 1195 } 1196 1197 void 1198 fmd_asru_acquit(fmd_asru_link_t *alp, void *er) 1199 { 1200 int flags; 1201 int rval; 1202 1203 /* 1204 * acquit this asru cache entry 1205 */ 1206 rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, FMD_ASRU_ACQUITTED); 1207 1208 /* 1209 * now check if all entries associated with this asru are acquitted and 1210 * if so acquit containees 1211 */ 1212 (void) pthread_mutex_lock(&alp->al_asru->asru_lock); 1213 flags = alp->al_asru->asru_flags; 1214 (void) pthread_mutex_unlock(&alp->al_asru->asru_lock); 1215 if (!(flags & FMD_ASRU_FAULTY)) 1216 fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_acquit_containee, 1217 alp->al_asru_fmri); 1218 1219 /* 1220 * if called from fmd_adm_acquit() and we really did clear the bit then 1221 * we need to do a case update to see if the associated case can be 1222 * repaired. No need to do this if called from fmd_case_acquit() (ie 1223 * when er is NULL) as the case will be explicitly repaired anyway. 1224 */ 1225 if (er) { 1226 *(int *)er = 0; 1227 if (rval) 1228 fmd_case_update(alp->al_case); 1229 } 1230 } 1231 1232 static void 1233 fmd_asru_replaced_containee(fmd_asru_link_t *alp, void *er) 1234 { 1235 if (er && alp->al_asru_fmri && fmd_fmri_contains(er, 1236 alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, 1237 FMD_ASRU_REPLACED)) 1238 fmd_case_update(alp->al_case); 1239 } 1240 1241 void 1242 fmd_asru_replaced(fmd_asru_link_t *alp, void *er) 1243 { 1244 int flags; 1245 int rval; 1246 int ps; 1247 1248 ps = fmd_asru_replacement_state(alp->al_event); 1249 if (ps == FMD_OBJ_STATE_STILL_PRESENT) 1250 return; 1251 1252 /* 1253 * mark this cache entry as replaced 1254 */ 1255 rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, FMD_ASRU_REPLACED); 1256 1257 /* 1258 * now check if all entries associated with this asru are replaced and 1259 * if so replace containees 1260 */ 1261 (void) pthread_mutex_lock(&alp->al_asru->asru_lock); 1262 flags = alp->al_asru->asru_flags; 1263 (void) pthread_mutex_unlock(&alp->al_asru->asru_lock); 1264 if (!(flags & FMD_ASRU_FAULTY)) 1265 fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_replaced_containee, 1266 alp->al_asru_fmri); 1267 1268 *(int *)er = 0; 1269 if (rval) 1270 fmd_case_update(alp->al_case); 1271 } 1272 1273 static void 1274 fmd_asru_removed_containee(fmd_asru_link_t *alp, void *er) 1275 { 1276 if (er && alp->al_asru_fmri && fmd_fmri_contains(er, 1277 alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, 1278 0)) 1279 fmd_case_update(alp->al_case); 1280 } 1281 1282 void 1283 fmd_asru_removed(fmd_asru_link_t *alp) 1284 { 1285 int flags; 1286 int rval; 1287 1288 /* 1289 * mark this cache entry as replacded 1290 */ 1291 rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, 0); 1292 1293 /* 1294 * now check if all entries associated with this asru are removed and 1295 * if so replace containees 1296 */ 1297 (void) pthread_mutex_lock(&alp->al_asru->asru_lock); 1298 flags = alp->al_asru->asru_flags; 1299 (void) pthread_mutex_unlock(&alp->al_asru->asru_lock); 1300 if (!(flags & FMD_ASRU_FAULTY)) 1301 fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_removed_containee, 1302 alp->al_asru_fmri); 1303 if (rval) 1304 fmd_case_update(alp->al_case); 1305 } 1306 1307 static void 1308 fmd_asru_logevent(fmd_asru_link_t *alp) 1309 { 1310 fmd_asru_t *ap = alp->al_asru; 1311 boolean_t faulty = (alp->al_flags & FMD_ASRU_FAULTY) != 0; 1312 boolean_t unusable = (alp->al_flags & FMD_ASRU_UNUSABLE) != 0; 1313 boolean_t message = (ap->asru_flags & FMD_ASRU_INVISIBLE) == 0; 1314 boolean_t repaired = (alp->al_reason == FMD_ASRU_REPAIRED); 1315 boolean_t replaced = (alp->al_reason == FMD_ASRU_REPLACED); 1316 boolean_t acquitted = (alp->al_reason == FMD_ASRU_ACQUITTED); 1317 1318 fmd_case_impl_t *cip; 1319 fmd_event_t *e; 1320 fmd_log_t *lp; 1321 nvlist_t *nvl; 1322 char *class; 1323 1324 ASSERT(MUTEX_HELD(&ap->asru_lock)); 1325 cip = (fmd_case_impl_t *)alp->al_case; 1326 ASSERT(cip != NULL); 1327 1328 if ((lp = alp->al_log) == NULL) 1329 lp = fmd_log_open(ap->asru_root, alp->al_uuid, FMD_LOG_ASRU); 1330 1331 if (lp == NULL) 1332 return; /* can't log events if we can't open the log */ 1333 1334 nvl = fmd_protocol_rsrc_asru(_fmd_asru_events[faulty | (unusable << 1)], 1335 alp->al_asru_fmri, cip->ci_uuid, cip->ci_code, faulty, unusable, 1336 message, alp->al_event, &cip->ci_tv, repaired, replaced, acquitted); 1337 1338 (void) nvlist_lookup_string(nvl, FM_CLASS, &class); 1339 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class); 1340 1341 fmd_event_hold(e); 1342 fmd_log_append(lp, e, NULL); 1343 fmd_event_rele(e); 1344 1345 /* 1346 * For now, we close the log file after every update to conserve file 1347 * descriptors and daemon overhead. If this becomes a performance 1348 * issue this code can change to keep a fixed-size LRU cache of logs. 1349 */ 1350 fmd_log_rele(lp); 1351 alp->al_log = NULL; 1352 } 1353 1354 int 1355 fmd_asru_setflags(fmd_asru_link_t *alp, uint_t sflag) 1356 { 1357 fmd_asru_t *ap = alp->al_asru; 1358 uint_t nstate, ostate; 1359 1360 ASSERT(!(sflag & ~FMD_ASRU_STATE)); 1361 ASSERT(sflag != FMD_ASRU_STATE); 1362 1363 (void) pthread_mutex_lock(&ap->asru_lock); 1364 1365 ostate = alp->al_flags & FMD_ASRU_STATE; 1366 alp->al_flags |= sflag; 1367 nstate = alp->al_flags & FMD_ASRU_STATE; 1368 1369 if (nstate == ostate) { 1370 (void) pthread_mutex_unlock(&ap->asru_lock); 1371 return (0); 1372 } 1373 1374 ap->asru_flags |= sflag; 1375 TRACE((FMD_DBG_ASRU, "asru %s %s->%s", alp->al_uuid, 1376 _fmd_asru_snames[ostate], _fmd_asru_snames[nstate])); 1377 1378 fmd_asru_logevent(alp); 1379 1380 (void) pthread_cond_broadcast(&ap->asru_cv); 1381 (void) pthread_mutex_unlock(&ap->asru_lock); 1382 return (1); 1383 } 1384 1385 int 1386 fmd_asru_clrflags(fmd_asru_link_t *alp, uint_t sflag, uint8_t reason) 1387 { 1388 fmd_asru_t *ap = alp->al_asru; 1389 fmd_asru_link_t *nalp; 1390 uint_t nstate, ostate, flags = 0; 1391 1392 ASSERT(!(sflag & ~FMD_ASRU_STATE)); 1393 ASSERT(sflag != FMD_ASRU_STATE); 1394 1395 (void) pthread_mutex_lock(&ap->asru_lock); 1396 1397 ostate = alp->al_flags & FMD_ASRU_STATE; 1398 alp->al_flags &= ~sflag; 1399 nstate = alp->al_flags & FMD_ASRU_STATE; 1400 1401 if (nstate == ostate) { 1402 if (reason > alp->al_reason) { 1403 alp->al_reason = reason; 1404 fmd_asru_logevent(alp); 1405 (void) pthread_cond_broadcast(&ap->asru_cv); 1406 } 1407 (void) pthread_mutex_unlock(&ap->asru_lock); 1408 return (0); 1409 } 1410 if (reason > alp->al_reason) 1411 alp->al_reason = reason; 1412 1413 if (sflag == FMD_ASRU_UNUSABLE) 1414 ap->asru_flags &= ~sflag; 1415 else if (sflag == FMD_ASRU_FAULTY) { 1416 /* 1417 * only clear the faulty bit if all links are clear 1418 */ 1419 for (nalp = fmd_list_next(&ap->asru_list); nalp != NULL; 1420 nalp = fmd_list_next(nalp)) 1421 flags |= nalp->al_flags; 1422 if (!(flags & FMD_ASRU_FAULTY)) 1423 ap->asru_flags &= ~sflag; 1424 } 1425 1426 TRACE((FMD_DBG_ASRU, "asru %s %s->%s", alp->al_uuid, 1427 _fmd_asru_snames[ostate], _fmd_asru_snames[nstate])); 1428 1429 fmd_asru_logevent(alp); 1430 1431 (void) pthread_cond_broadcast(&ap->asru_cv); 1432 (void) pthread_mutex_unlock(&ap->asru_lock); 1433 1434 return (1); 1435 } 1436 1437 /* 1438 * Report the current known state of the link entry (ie this particular fault 1439 * affecting this particular ASRU). 1440 */ 1441 int 1442 fmd_asru_al_getstate(fmd_asru_link_t *alp) 1443 { 1444 int us, st; 1445 nvlist_t *asru; 1446 int ps; 1447 1448 ps = fmd_asru_replacement_state(alp->al_event); 1449 if (ps == FMD_OBJ_STATE_NOT_PRESENT) 1450 return ((alp->al_flags & FMD_ASRU_FAULTY) | FMD_ASRU_UNUSABLE); 1451 if (ps == FMD_OBJ_STATE_REPLACED) { 1452 if (alp->al_reason < FMD_ASRU_REPLACED) 1453 alp->al_reason = FMD_ASRU_REPLACED; 1454 return ((alp->al_flags & FMD_ASRU_FAULTY) | FMD_ASRU_UNUSABLE); 1455 } 1456 1457 st = (alp->al_flags & FMD_ASRU_STATE) | FMD_ASRU_PRESENT; 1458 if (nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, &asru) == 0) { 1459 us = fmd_fmri_service_state(asru); 1460 if (us == -1 || us == FMD_SERVICE_STATE_UNKNOWN) { 1461 /* not supported by scheme - try fmd_fmri_unusable */ 1462 us = fmd_fmri_unusable(asru); 1463 } else if (us == FMD_SERVICE_STATE_UNUSABLE) { 1464 st |= FMD_ASRU_UNUSABLE; 1465 return (st); 1466 } else if (us == FMD_SERVICE_STATE_OK) { 1467 st &= ~FMD_ASRU_UNUSABLE; 1468 return (st); 1469 } else if (us == FMD_SERVICE_STATE_DEGRADED) { 1470 st &= ~FMD_ASRU_UNUSABLE; 1471 st |= FMD_ASRU_DEGRADED; 1472 return (st); 1473 } 1474 } else 1475 us = (alp->al_flags & FMD_ASRU_UNUSABLE); 1476 if (us > 0) 1477 st |= FMD_ASRU_UNUSABLE; 1478 else if (us == 0) 1479 st &= ~FMD_ASRU_UNUSABLE; 1480 return (st); 1481 } 1482 1483 /* 1484 * Report the current known state of the ASRU by refreshing its unusable status 1485 * based upon the routines provided by the scheme module. If the unusable bit 1486 * is different, we do *not* generate a state change here because that change 1487 * may be unrelated to fmd activities and therefore we have no case or event. 1488 * The absence of the transition is harmless as this function is only provided 1489 * for RPC observability and fmd's clients are only concerned with ASRU_FAULTY. 1490 */ 1491 int 1492 fmd_asru_getstate(fmd_asru_t *ap) 1493 { 1494 int us, st; 1495 1496 if (!(ap->asru_flags & FMD_ASRU_INTERNAL) && 1497 (fmd_asru_fake_not_present >= FMD_OBJ_STATE_REPLACED || 1498 fmd_fmri_present(ap->asru_fmri) <= 0)) 1499 return (0); /* do not report non-fmd non-present resources */ 1500 1501 us = fmd_fmri_unusable(ap->asru_fmri); 1502 st = ap->asru_flags & FMD_ASRU_STATE; 1503 1504 if (us > 0) 1505 st |= FMD_ASRU_UNUSABLE; 1506 else if (us == 0) 1507 st &= ~FMD_ASRU_UNUSABLE; 1508 1509 return (st); 1510 } 1511