1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/fm/protocol.h> 30 #include <uuid/uuid.h> 31 32 #include <dirent.h> 33 #include <limits.h> 34 #include <unistd.h> 35 #include <alloca.h> 36 #include <stddef.h> 37 #include <fm/libtopo.h> 38 39 #include <fmd_alloc.h> 40 #include <fmd_string.h> 41 #include <fmd_error.h> 42 #include <fmd_subr.h> 43 #include <fmd_protocol.h> 44 #include <fmd_event.h> 45 #include <fmd_conf.h> 46 #include <fmd_fmri.h> 47 #include <fmd_dispq.h> 48 #include <fmd_case.h> 49 #include <fmd_module.h> 50 #include <fmd_asru.h> 51 52 #include <fmd.h> 53 54 static const char *const _fmd_asru_events[] = { 55 FMD_RSRC_CLASS "asru.ok", /* UNUSABLE=0 FAULTED=0 */ 56 FMD_RSRC_CLASS "asru.degraded", /* UNUSABLE=0 FAULTED=1 */ 57 FMD_RSRC_CLASS "asru.unknown", /* UNUSABLE=1 FAULTED=0 */ 58 FMD_RSRC_CLASS "asru.faulted" /* UNUSABLE=1 FAULTED=1 */ 59 }; 60 61 static const char *const _fmd_asru_snames[] = { 62 "uf", "uF", "Uf", "UF" /* same order as above */ 63 }; 64 65 volatile uint32_t fmd_asru_fake_not_present = 0; 66 67 static uint_t 68 fmd_asru_strhash(fmd_asru_hash_t *ahp, const char *val) 69 { 70 return (topo_fmri_strhash(ahp->ah_topo->ft_hdl, val) % ahp->ah_hashlen); 71 } 72 73 static boolean_t 74 fmd_asru_strcmp(fmd_asru_hash_t *ahp, const char *a, const char *b) 75 { 76 return (topo_fmri_strcmp(ahp->ah_topo->ft_hdl, a, b)); 77 } 78 79 static fmd_asru_t * 80 fmd_asru_create(fmd_asru_hash_t *ahp, const char *uuid, 81 const char *name, nvlist_t *fmri) 82 { 83 fmd_asru_t *ap = fmd_zalloc(sizeof (fmd_asru_t), FMD_SLEEP); 84 char *s; 85 86 (void) pthread_mutex_init(&ap->asru_lock, NULL); 87 (void) pthread_cond_init(&ap->asru_cv, NULL); 88 89 ap->asru_name = fmd_strdup(name, FMD_SLEEP); 90 if (fmri) 91 (void) nvlist_xdup(fmri, &ap->asru_fmri, &fmd.d_nva); 92 ap->asru_root = fmd_strdup(ahp->ah_dirpath, FMD_SLEEP); 93 ap->asru_uuid = fmd_strdup(uuid, FMD_SLEEP); 94 ap->asru_uuidlen = ap->asru_uuid ? strlen(ap->asru_uuid) : 0; 95 ap->asru_refs = 1; 96 97 if (fmri && nvlist_lookup_string(fmri, FM_FMRI_SCHEME, &s) == 0 && 98 strcmp(s, FM_FMRI_SCHEME_FMD) == 0) 99 ap->asru_flags |= FMD_ASRU_INTERNAL; 100 101 return (ap); 102 } 103 104 static void 105 fmd_asru_destroy(fmd_asru_t *ap) 106 { 107 ASSERT(MUTEX_HELD(&ap->asru_lock)); 108 ASSERT(ap->asru_refs == 0); 109 110 nvlist_free(ap->asru_event); 111 fmd_strfree(ap->asru_name); 112 nvlist_free(ap->asru_fmri); 113 fmd_strfree(ap->asru_root); 114 fmd_free(ap->asru_uuid, ap->asru_uuidlen + 1); 115 fmd_free(ap, sizeof (fmd_asru_t)); 116 } 117 118 static void 119 fmd_asru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_t *ap) 120 { 121 uint_t h = fmd_asru_strhash(ahp, ap->asru_name); 122 123 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 124 ap->asru_next = ahp->ah_hash[h]; 125 ahp->ah_hash[h] = ap; 126 ahp->ah_count++; 127 } 128 129 static fmd_asru_t * 130 fmd_asru_hold(fmd_asru_t *ap) 131 { 132 (void) pthread_mutex_lock(&ap->asru_lock); 133 ap->asru_refs++; 134 ASSERT(ap->asru_refs != 0); 135 (void) pthread_mutex_unlock(&ap->asru_lock); 136 return (ap); 137 } 138 139 /* 140 * Lookup an asru in the hash by name and place a hold on it. If the asru is 141 * not found, no entry is created and NULL is returned. This internal function 142 * is for callers who have the ah_lock held and is used by lookup_name below. 143 */ 144 fmd_asru_t * 145 fmd_asru_hash_lookup(fmd_asru_hash_t *ahp, const char *name) 146 { 147 fmd_asru_t *ap; 148 uint_t h; 149 150 ASSERT(RW_LOCK_HELD(&ahp->ah_lock)); 151 h = fmd_asru_strhash(ahp, name); 152 153 for (ap = ahp->ah_hash[h]; ap != NULL; ap = ap->asru_next) { 154 if (fmd_asru_strcmp(ahp, ap->asru_name, name)) 155 break; 156 } 157 158 if (ap != NULL) 159 (void) fmd_asru_hold(ap); 160 else 161 (void) fmd_set_errno(EFMD_ASRU_NOENT); 162 163 return (ap); 164 } 165 166 static int 167 fmd_asru_replacement_state(nvlist_t *event) 168 { 169 int ps = -1; 170 nvlist_t *asru, *fru, *rsrc; 171 172 /* 173 * Check if there is evidence that this object is no longer present. 174 * In general fmd_fmri_present() should be supported on resources and/or 175 * frus, as those are the things that are physically present or not 176 * present - an asru can be spread over a number of frus some of which 177 * are present and some not, so fmd_fmri_present() is not generally 178 * meaningful. However retain a check for asru first for compatibility. 179 * If we have checked all three and we still get -1 then nothing knows 180 * whether it's present or not, so err on the safe side and treat it 181 * as still present. 182 */ 183 if (fmd_asru_fake_not_present) 184 return (fmd_asru_fake_not_present); 185 if (nvlist_lookup_nvlist(event, FM_FAULT_ASRU, &asru) == 0) 186 ps = fmd_fmri_replaced(asru); 187 if (ps == -1) { 188 if (nvlist_lookup_nvlist(event, FM_FAULT_RESOURCE, &rsrc) == 0) 189 ps = fmd_fmri_replaced(rsrc); 190 } else if (ps == FMD_OBJ_STATE_UNKNOWN) { 191 /* see if we can improve on UNKNOWN */ 192 if (nvlist_lookup_nvlist(event, FM_FAULT_RESOURCE, 193 &rsrc) == 0) { 194 int ps2 = fmd_fmri_replaced(rsrc); 195 if (ps2 == FMD_OBJ_STATE_STILL_PRESENT || 196 ps2 == FMD_OBJ_STATE_REPLACED) 197 ps = ps2; 198 } 199 } 200 if (ps == -1) { 201 if (nvlist_lookup_nvlist(event, FM_FAULT_FRU, &fru) == 0) 202 ps = fmd_fmri_replaced(fru); 203 } else if (ps == FMD_OBJ_STATE_UNKNOWN) { 204 /* see if we can improve on UNKNOWN */ 205 if (nvlist_lookup_nvlist(event, FM_FAULT_FRU, &fru) == 0) { 206 int ps2 = fmd_fmri_replaced(fru); 207 if (ps2 == FMD_OBJ_STATE_STILL_PRESENT || 208 ps2 == FMD_OBJ_STATE_REPLACED) 209 ps = ps2; 210 } 211 } 212 if (ps == -1) 213 ps = FMD_OBJ_STATE_UNKNOWN; 214 return (ps); 215 } 216 217 static void 218 fmd_asru_asru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, 219 char *name) 220 { 221 uint_t h = fmd_asru_strhash(ahp, name); 222 223 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 224 alp->al_asru_next = ahp->ah_asru_hash[h]; 225 ahp->ah_asru_hash[h] = alp; 226 ahp->ah_al_count++; 227 } 228 229 static void 230 fmd_asru_case_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, 231 char *name) 232 { 233 uint_t h = fmd_asru_strhash(ahp, name); 234 235 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 236 alp->al_case_next = ahp->ah_case_hash[h]; 237 ahp->ah_case_hash[h] = alp; 238 } 239 240 static void 241 fmd_asru_fru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, char *name) 242 { 243 uint_t h = fmd_asru_strhash(ahp, name); 244 245 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 246 alp->al_fru_next = ahp->ah_fru_hash[h]; 247 ahp->ah_fru_hash[h] = alp; 248 } 249 250 static void 251 fmd_asru_label_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, 252 char *name) 253 { 254 uint_t h = fmd_asru_strhash(ahp, name); 255 256 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 257 alp->al_label_next = ahp->ah_label_hash[h]; 258 ahp->ah_label_hash[h] = alp; 259 } 260 261 static void 262 fmd_asru_rsrc_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, 263 char *name) 264 { 265 uint_t h = fmd_asru_strhash(ahp, name); 266 267 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 268 alp->al_rsrc_next = ahp->ah_rsrc_hash[h]; 269 ahp->ah_rsrc_hash[h] = alp; 270 } 271 272 static void 273 fmd_asru_al_destroy(fmd_asru_link_t *alp) 274 { 275 ASSERT(alp->al_refs == 0); 276 ASSERT(MUTEX_HELD(&alp->al_asru->asru_lock)); 277 278 if (alp->al_log != NULL) 279 fmd_log_rele(alp->al_log); 280 281 fmd_free(alp->al_uuid, alp->al_uuidlen + 1); 282 nvlist_free(alp->al_event); 283 fmd_strfree(alp->al_rsrc_name); 284 fmd_strfree(alp->al_case_uuid); 285 fmd_strfree(alp->al_fru_name); 286 fmd_strfree(alp->al_asru_name); 287 fmd_strfree(alp->al_label); 288 nvlist_free(alp->al_asru_fmri); 289 fmd_free(alp, sizeof (fmd_asru_link_t)); 290 } 291 292 static fmd_asru_link_t * 293 fmd_asru_al_hold(fmd_asru_link_t *alp) 294 { 295 fmd_asru_t *ap = alp->al_asru; 296 297 (void) pthread_mutex_lock(&ap->asru_lock); 298 ap->asru_refs++; 299 alp->al_refs++; 300 ASSERT(alp->al_refs != 0); 301 (void) pthread_mutex_unlock(&ap->asru_lock); 302 return (alp); 303 } 304 305 static void fmd_asru_destroy(fmd_asru_t *ap); 306 307 /*ARGSUSED*/ 308 static void 309 fmd_asru_al_hash_release(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp) 310 { 311 fmd_asru_t *ap = alp->al_asru; 312 313 (void) pthread_mutex_lock(&ap->asru_lock); 314 ASSERT(alp->al_refs != 0); 315 if (--alp->al_refs == 0) 316 fmd_asru_al_destroy(alp); 317 ASSERT(ap->asru_refs != 0); 318 if (--ap->asru_refs == 0) 319 fmd_asru_destroy(ap); 320 else 321 (void) pthread_mutex_unlock(&ap->asru_lock); 322 } 323 324 static int 325 fmd_asru_get_namestr(nvlist_t *nvl, char **name, ssize_t *namelen) 326 { 327 if ((*namelen = fmd_fmri_nvl2str(nvl, NULL, 0)) == -1) 328 return (EFMD_ASRU_FMRI); 329 *name = fmd_alloc(*namelen + 1, FMD_SLEEP); 330 if (fmd_fmri_nvl2str(nvl, *name, *namelen + 1) == -1) { 331 if (*name != NULL) 332 fmd_free(*name, *namelen + 1); 333 return (EFMD_ASRU_FMRI); 334 } 335 return (0); 336 } 337 338 static fmd_asru_link_t * 339 fmd_asru_al_create(fmd_asru_hash_t *ahp, nvlist_t *nvl, fmd_case_t *cp, 340 const char *al_uuid) 341 { 342 nvlist_t *asru = NULL, *fru, *rsrc; 343 int got_rsrc = 0, got_asru = 0, got_fru = 0; 344 ssize_t fru_namelen, rsrc_namelen, asru_namelen; 345 char *asru_name, *rsrc_name, *fru_name, *name, *label; 346 fmd_asru_link_t *alp; 347 fmd_asru_t *ap; 348 boolean_t msg; 349 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 350 351 if (nvlist_lookup_nvlist(nvl, FM_FAULT_ASRU, &asru) == 0 && 352 fmd_asru_get_namestr(asru, &asru_name, &asru_namelen) == 0) 353 got_asru = 1; 354 if (nvlist_lookup_nvlist(nvl, FM_FAULT_FRU, &fru) == 0 && 355 fmd_asru_get_namestr(fru, &fru_name, &fru_namelen) == 0) 356 got_fru = 1; 357 if (nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &rsrc) == 0 && 358 fmd_asru_get_namestr(rsrc, &rsrc_name, &rsrc_namelen) == 0) 359 got_rsrc = 1; 360 if (nvlist_lookup_string(nvl, FM_FAULT_LOCATION, &label) != 0) 361 label = ""; 362 363 /* 364 * Grab the rwlock as a writer; Then create and insert the asru with 365 * ahp->ah_lock held and hash it in. We'll then drop the rwlock and 366 * proceed to initializing the asru. 367 */ 368 (void) pthread_rwlock_wrlock(&ahp->ah_lock); 369 370 /* 371 * Create and initialise the per-fault "link" structure. 372 */ 373 alp = fmd_zalloc(sizeof (fmd_asru_link_t), FMD_SLEEP); 374 if (got_asru) 375 (void) nvlist_xdup(asru, &alp->al_asru_fmri, &fmd.d_nva); 376 alp->al_uuid = fmd_strdup(al_uuid, FMD_SLEEP); 377 alp->al_uuidlen = strlen(alp->al_uuid); 378 alp->al_refs = 1; 379 380 /* 381 * If this is the first fault for this asru, then create the per-asru 382 * structure and link into the hash. 383 */ 384 name = got_asru ? asru_name : ""; 385 if ((ap = fmd_asru_hash_lookup(ahp, name)) == NULL) { 386 ap = fmd_asru_create(ahp, al_uuid, name, got_asru ? asru : 387 NULL); 388 fmd_asru_hash_insert(ahp, ap); 389 } else 390 nvlist_free(ap->asru_event); 391 (void) nvlist_xdup(nvl, &ap->asru_event, &fmd.d_nva); 392 393 /* 394 * Put the link structure on the list associated with the per-asru 395 * structure. Then put the link structure on the various hashes. 396 */ 397 fmd_list_append(&ap->asru_list, (fmd_list_t *)alp); 398 alp->al_asru = ap; 399 alp->al_asru_name = got_asru ? asru_name : fmd_strdup("", FMD_SLEEP); 400 fmd_asru_asru_hash_insert(ahp, alp, alp->al_asru_name); 401 alp->al_fru_name = got_fru ? fru_name : fmd_strdup("", FMD_SLEEP); 402 fmd_asru_fru_hash_insert(ahp, alp, alp->al_fru_name); 403 alp->al_rsrc_name = got_rsrc ? rsrc_name : fmd_strdup("", FMD_SLEEP); 404 fmd_asru_rsrc_hash_insert(ahp, alp, alp->al_rsrc_name); 405 alp->al_label = fmd_strdup(label, FMD_SLEEP); 406 fmd_asru_label_hash_insert(ahp, alp, label); 407 alp->al_case_uuid = fmd_strdup(cip->ci_uuid, FMD_SLEEP); 408 fmd_asru_case_hash_insert(ahp, alp, cip->ci_uuid); 409 (void) pthread_mutex_lock(&ap->asru_lock); 410 (void) pthread_rwlock_unlock(&ahp->ah_lock); 411 412 ap->asru_case = alp->al_case = cp; 413 if (nvlist_lookup_boolean_value(nvl, FM_SUSPECT_MESSAGE, &msg) == 0 && 414 msg == B_FALSE) 415 ap->asru_flags |= FMD_ASRU_INVISIBLE; 416 (void) nvlist_xdup(nvl, &alp->al_event, &fmd.d_nva); 417 ap->asru_flags |= FMD_ASRU_VALID; 418 (void) pthread_cond_broadcast(&ap->asru_cv); 419 (void) pthread_mutex_unlock(&ap->asru_lock); 420 return (alp); 421 } 422 423 static void 424 fmd_asru_hash_recreate(fmd_log_t *lp, fmd_event_t *ep, fmd_asru_hash_t *ahp) 425 { 426 nvlist_t *nvl = FMD_EVENT_NVL(ep); 427 boolean_t faulty = FMD_B_FALSE, unusable = FMD_B_FALSE; 428 int ps; 429 boolean_t repaired = FMD_B_FALSE, replaced = FMD_B_FALSE; 430 boolean_t acquitted = FMD_B_FALSE; 431 nvlist_t *flt, *flt_copy, *asru; 432 char *case_uuid = NULL, *case_code = NULL; 433 fmd_asru_t *ap; 434 fmd_asru_link_t *alp; 435 fmd_case_t *cp; 436 int64_t *diag_time; 437 uint_t nelem; 438 topo_hdl_t *thp; 439 char *class; 440 nvlist_t *rsrc; 441 int err; 442 443 /* 444 * Extract the most recent values of 'faulty' from the event log. 445 */ 446 if (nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_FAULTY, 447 &faulty) != 0) { 448 fmd_error(EFMD_ASRU_EVENT, "failed to reload asru %s: " 449 "invalid event log record\n", lp->log_name); 450 ahp->ah_error = EFMD_ASRU_EVENT; 451 return; 452 } 453 if (nvlist_lookup_nvlist(nvl, FM_RSRC_ASRU_EVENT, &flt) != 0) { 454 fmd_error(EFMD_ASRU_EVENT, "failed to reload asru %s: " 455 "invalid event log record\n", lp->log_name); 456 ahp->ah_error = EFMD_ASRU_EVENT; 457 return; 458 } 459 (void) nvlist_lookup_string(nvl, FM_RSRC_ASRU_UUID, &case_uuid); 460 (void) nvlist_lookup_string(nvl, FM_RSRC_ASRU_CODE, &case_code); 461 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_UNUSABLE, 462 &unusable); 463 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_REPAIRED, 464 &repaired); 465 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_REPLACED, 466 &replaced); 467 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_ACQUITTED, 468 &acquitted); 469 470 /* 471 * Attempt to recreate the case in either the CLOSED or REPAIRED state 472 * (depending on whether the faulty bit is still set). 473 * If the case is already present, fmd_case_recreate() will return it. 474 * If not, we'll create a new orphaned case. Either way, we use the 475 * ASRU event to insert a suspect into the partially-restored case. 476 */ 477 fmd_module_lock(fmd.d_rmod); 478 cp = fmd_case_recreate(fmd.d_rmod, NULL, faulty ? FMD_CASE_CLOSED : 479 FMD_CASE_REPAIRED, case_uuid, case_code); 480 fmd_case_hold(cp); 481 fmd_module_unlock(fmd.d_rmod); 482 if (nvlist_lookup_int64_array(nvl, FM_SUSPECT_DIAG_TIME, &diag_time, 483 &nelem) == 0 && nelem >= 2) 484 fmd_case_settime(cp, diag_time[0], diag_time[1]); 485 else 486 fmd_case_settime(cp, lp->log_stat.st_ctime, 0); 487 (void) nvlist_xdup(flt, &flt_copy, &fmd.d_nva); 488 489 /* 490 * For faults with a resource, re-evaluate the asru from the resource. 491 */ 492 thp = fmd_fmri_topo_hold(TOPO_VERSION); 493 if (nvlist_lookup_string(flt_copy, FM_CLASS, &class) == 0 && 494 strncmp(class, "fault", 5) == 0 && 495 nvlist_lookup_nvlist(flt_copy, FM_FAULT_RESOURCE, &rsrc) == 0 && 496 rsrc != NULL && topo_fmri_asru(thp, rsrc, &asru, &err) == 0) { 497 (void) nvlist_remove(flt_copy, FM_FAULT_ASRU, DATA_TYPE_NVLIST); 498 (void) nvlist_add_nvlist(flt_copy, FM_FAULT_ASRU, asru); 499 nvlist_free(asru); 500 } 501 fmd_fmri_topo_rele(thp); 502 503 (void) nvlist_xdup(flt_copy, &flt, &fmd.d_nva); 504 505 fmd_case_recreate_suspect(cp, flt_copy); 506 507 /* 508 * Now create the resource cache entries. 509 */ 510 alp = fmd_asru_al_create(ahp, flt, cp, fmd_strbasename(lp->log_name)); 511 ap = alp->al_asru; 512 513 /* 514 * Check to see if the resource is still present in the system. 515 */ 516 ps = fmd_asru_replacement_state(flt); 517 if (ps == FMD_OBJ_STATE_STILL_PRESENT || ps == FMD_OBJ_STATE_UNKNOWN) 518 ap->asru_flags |= FMD_ASRU_PRESENT; 519 else if (ps == FMD_OBJ_STATE_REPLACED) 520 replaced = FMD_B_TRUE; 521 522 nvlist_free(flt); 523 524 ap->asru_flags |= FMD_ASRU_RECREATED; 525 if (faulty) { 526 alp->al_flags |= FMD_ASRU_FAULTY; 527 ap->asru_flags |= FMD_ASRU_FAULTY; 528 } 529 if (unusable) { 530 alp->al_flags |= FMD_ASRU_UNUSABLE; 531 ap->asru_flags |= FMD_ASRU_UNUSABLE; 532 } 533 if (replaced) 534 alp->al_reason = FMD_ASRU_REPLACED; 535 else if (repaired) 536 alp->al_reason = FMD_ASRU_REPAIRED; 537 else if (acquitted) 538 alp->al_reason = FMD_ASRU_ACQUITTED; 539 540 TRACE((FMD_DBG_ASRU, "asru %s recreated as %p (%s)", alp->al_uuid, 541 (void *)ap, _fmd_asru_snames[ap->asru_flags & FMD_ASRU_STATE])); 542 } 543 544 static void 545 fmd_asru_hash_discard(fmd_asru_hash_t *ahp, const char *uuid, int err) 546 { 547 char src[PATH_MAX], dst[PATH_MAX]; 548 549 (void) snprintf(src, PATH_MAX, "%s/%s", ahp->ah_dirpath, uuid); 550 (void) snprintf(dst, PATH_MAX, "%s/%s-", ahp->ah_dirpath, uuid); 551 552 if (err != 0) 553 err = rename(src, dst); 554 else 555 err = unlink(src); 556 557 if (err != 0 && errno != ENOENT) 558 fmd_error(EFMD_ASRU_EVENT, "failed to rename log %s", src); 559 } 560 561 /* 562 * Open a saved log file and restore it into the ASRU hash. If we can't even 563 * open the log, rename the log file to <uuid>- to indicate it is corrupt. If 564 * fmd_log_replay() fails, we either delete the file (if it has reached the 565 * upper limit on cache age) or rename it for debugging if it was corrupted. 566 */ 567 static void 568 fmd_asru_hash_logopen(fmd_asru_hash_t *ahp, const char *uuid) 569 { 570 fmd_log_t *lp = fmd_log_tryopen(ahp->ah_dirpath, uuid, FMD_LOG_ASRU); 571 uint_t n; 572 573 if (lp == NULL) { 574 fmd_asru_hash_discard(ahp, uuid, errno); 575 return; 576 } 577 578 ahp->ah_error = 0; 579 n = ahp->ah_al_count; 580 581 fmd_log_replay(lp, (fmd_log_f *)fmd_asru_hash_recreate, ahp); 582 fmd_log_rele(lp); 583 584 if (ahp->ah_al_count == n) 585 fmd_asru_hash_discard(ahp, uuid, ahp->ah_error); 586 } 587 588 void 589 fmd_asru_hash_refresh(fmd_asru_hash_t *ahp) 590 { 591 struct dirent *dp; 592 DIR *dirp; 593 int zero; 594 595 if ((dirp = opendir(ahp->ah_dirpath)) == NULL) { 596 fmd_error(EFMD_ASRU_NODIR, 597 "failed to open asru cache directory %s", ahp->ah_dirpath); 598 return; 599 } 600 601 (void) fmd_conf_getprop(fmd.d_conf, "rsrc.zero", &zero); 602 603 (void) pthread_rwlock_wrlock(&ahp->ah_lock); 604 605 while ((dp = readdir(dirp)) != NULL) { 606 if (dp->d_name[0] == '.') 607 continue; /* skip "." and ".." */ 608 609 if (zero) 610 fmd_asru_hash_discard(ahp, dp->d_name, 0); 611 else if (!fmd_strmatch(dp->d_name, "*-")) 612 fmd_asru_hash_logopen(ahp, dp->d_name); 613 } 614 615 (void) pthread_rwlock_unlock(&ahp->ah_lock); 616 (void) closedir(dirp); 617 } 618 619 /* 620 * If the resource is present and faulty but not unusable, replay the fault 621 * event that caused it be marked faulty. This will cause the agent 622 * subscribing to this fault class to again disable the resource. 623 */ 624 /*ARGSUSED*/ 625 static void 626 fmd_asru_hash_replay_asru(fmd_asru_t *ap, void *data) 627 { 628 fmd_event_t *e; 629 nvlist_t *nvl; 630 char *class; 631 632 if (ap->asru_event != NULL && (ap->asru_flags & (FMD_ASRU_STATE | 633 FMD_ASRU_PRESENT)) == (FMD_ASRU_FAULTY | FMD_ASRU_PRESENT)) { 634 635 fmd_dprintf(FMD_DBG_ASRU, 636 "replaying fault event for %s", ap->asru_name); 637 638 (void) nvlist_xdup(ap->asru_event, &nvl, &fmd.d_nva); 639 (void) nvlist_lookup_string(nvl, FM_CLASS, &class); 640 641 (void) nvlist_add_string(nvl, FMD_EVN_UUID, 642 ((fmd_case_impl_t *)ap->asru_case)->ci_uuid); 643 644 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class); 645 fmd_dispq_dispatch(fmd.d_disp, e, class); 646 } 647 } 648 649 void 650 fmd_asru_hash_replay(fmd_asru_hash_t *ahp) 651 { 652 fmd_asru_hash_apply(ahp, fmd_asru_hash_replay_asru, NULL); 653 } 654 655 /* 656 * Check if the resource is still present. If not, and if the rsrc.age time 657 * has expired, then do an implicit repair on the resource. 658 */ 659 /*ARGSUSED*/ 660 static void 661 fmd_asru_repair_if_aged(fmd_asru_link_t *alp, void *arg) 662 { 663 struct timeval tv; 664 fmd_log_t *lp; 665 hrtime_t hrt; 666 int ps; 667 int err; 668 669 ps = fmd_asru_replacement_state(alp->al_event); 670 if (ps == FMD_OBJ_STATE_REPLACED) { 671 fmd_asru_replaced(alp, &err); 672 } else if (ps == FMD_OBJ_STATE_NOT_PRESENT) { 673 fmd_time_gettimeofday(&tv); 674 lp = fmd_log_open(alp->al_asru->asru_root, alp->al_uuid, 675 FMD_LOG_ASRU); 676 hrt = (hrtime_t)(tv.tv_sec - lp->log_stat.st_mtime); 677 fmd_log_rele(lp); 678 if (hrt * NANOSEC >= fmd.d_asrus->ah_lifetime) 679 fmd_asru_removed(alp); 680 } 681 } 682 683 void 684 fmd_asru_clear_aged_rsrcs() 685 { 686 fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_repair_if_aged, NULL); 687 } 688 689 fmd_asru_hash_t * 690 fmd_asru_hash_create(const char *root, const char *dir) 691 { 692 fmd_asru_hash_t *ahp; 693 char path[PATH_MAX]; 694 695 ahp = fmd_alloc(sizeof (fmd_asru_hash_t), FMD_SLEEP); 696 (void) pthread_rwlock_init(&ahp->ah_lock, NULL); 697 ahp->ah_hashlen = fmd.d_str_buckets; 698 ahp->ah_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, FMD_SLEEP); 699 ahp->ah_asru_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 700 FMD_SLEEP); 701 ahp->ah_case_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 702 FMD_SLEEP); 703 ahp->ah_fru_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 704 FMD_SLEEP); 705 ahp->ah_label_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 706 FMD_SLEEP); 707 ahp->ah_rsrc_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 708 FMD_SLEEP); 709 (void) snprintf(path, sizeof (path), "%s/%s", root, dir); 710 ahp->ah_dirpath = fmd_strdup(path, FMD_SLEEP); 711 (void) fmd_conf_getprop(fmd.d_conf, "rsrc.age", &ahp->ah_lifetime); 712 (void) fmd_conf_getprop(fmd.d_conf, "fakenotpresent", 713 (uint32_t *)&fmd_asru_fake_not_present); 714 ahp->ah_al_count = 0; 715 ahp->ah_count = 0; 716 ahp->ah_error = 0; 717 ahp->ah_topo = fmd_topo_hold(); 718 719 return (ahp); 720 } 721 722 void 723 fmd_asru_hash_destroy(fmd_asru_hash_t *ahp) 724 { 725 fmd_asru_link_t *alp, *np; 726 uint_t i; 727 728 for (i = 0; i < ahp->ah_hashlen; i++) { 729 for (alp = ahp->ah_case_hash[i]; alp != NULL; alp = np) { 730 np = alp->al_case_next; 731 alp->al_case_next = NULL; 732 fmd_case_rele(alp->al_case); 733 alp->al_case = NULL; 734 fmd_asru_al_hash_release(ahp, alp); 735 } 736 } 737 738 fmd_strfree(ahp->ah_dirpath); 739 fmd_free(ahp->ah_hash, sizeof (void *) * ahp->ah_hashlen); 740 fmd_free(ahp->ah_asru_hash, sizeof (void *) * ahp->ah_hashlen); 741 fmd_free(ahp->ah_case_hash, sizeof (void *) * ahp->ah_hashlen); 742 fmd_free(ahp->ah_fru_hash, sizeof (void *) * ahp->ah_hashlen); 743 fmd_free(ahp->ah_label_hash, sizeof (void *) * ahp->ah_hashlen); 744 fmd_free(ahp->ah_rsrc_hash, sizeof (void *) * ahp->ah_hashlen); 745 fmd_topo_rele(ahp->ah_topo); 746 fmd_free(ahp, sizeof (fmd_asru_hash_t)); 747 } 748 749 /* 750 * Take a snapshot of the ASRU database by placing an additional hold on each 751 * member in an auxiliary array, and then call 'func' for each ASRU. 752 */ 753 void 754 fmd_asru_hash_apply(fmd_asru_hash_t *ahp, 755 void (*func)(fmd_asru_t *, void *), void *arg) 756 { 757 fmd_asru_t *ap, **aps, **app; 758 uint_t apc, i; 759 760 (void) pthread_rwlock_rdlock(&ahp->ah_lock); 761 762 aps = app = fmd_alloc(ahp->ah_count * sizeof (fmd_asru_t *), FMD_SLEEP); 763 apc = ahp->ah_count; 764 765 for (i = 0; i < ahp->ah_hashlen; i++) { 766 for (ap = ahp->ah_hash[i]; ap != NULL; ap = ap->asru_next) 767 *app++ = fmd_asru_hold(ap); 768 } 769 770 ASSERT(app == aps + apc); 771 (void) pthread_rwlock_unlock(&ahp->ah_lock); 772 773 for (i = 0; i < apc; i++) { 774 if (aps[i]->asru_fmri != NULL) 775 func(aps[i], arg); 776 fmd_asru_hash_release(ahp, aps[i]); 777 } 778 779 fmd_free(aps, apc * sizeof (fmd_asru_t *)); 780 } 781 782 void 783 fmd_asru_al_hash_apply(fmd_asru_hash_t *ahp, 784 void (*func)(fmd_asru_link_t *, void *), void *arg) 785 { 786 fmd_asru_link_t *alp, **alps, **alpp; 787 uint_t alpc, i; 788 789 (void) pthread_rwlock_rdlock(&ahp->ah_lock); 790 791 alps = alpp = fmd_alloc(ahp->ah_al_count * sizeof (fmd_asru_link_t *), 792 FMD_SLEEP); 793 alpc = ahp->ah_al_count; 794 795 for (i = 0; i < ahp->ah_hashlen; i++) { 796 for (alp = ahp->ah_case_hash[i]; alp != NULL; 797 alp = alp->al_case_next) 798 *alpp++ = fmd_asru_al_hold(alp); 799 } 800 801 ASSERT(alpp == alps + alpc); 802 (void) pthread_rwlock_unlock(&ahp->ah_lock); 803 804 for (i = 0; i < alpc; i++) { 805 func(alps[i], arg); 806 fmd_asru_al_hash_release(ahp, alps[i]); 807 } 808 809 fmd_free(alps, alpc * sizeof (fmd_asru_link_t *)); 810 } 811 812 static void 813 fmd_asru_do_hash_apply(fmd_asru_hash_t *ahp, char *name, 814 void (*func)(fmd_asru_link_t *, void *), void *arg, 815 fmd_asru_link_t **hash, size_t match_offset, size_t next_offset) 816 { 817 fmd_asru_link_t *alp, **alps, **alpp; 818 uint_t alpc = 0, i; 819 uint_t h; 820 821 (void) pthread_rwlock_rdlock(&ahp->ah_lock); 822 823 h = fmd_asru_strhash(ahp, name); 824 825 for (alp = hash[h]; alp != NULL; alp = 826 /* LINTED pointer alignment */ 827 FMD_ASRU_AL_HASH_NEXT(alp, next_offset)) 828 if (fmd_asru_strcmp(ahp, 829 /* LINTED pointer alignment */ 830 FMD_ASRU_AL_HASH_NAME(alp, match_offset), name)) 831 alpc++; 832 833 alps = alpp = fmd_alloc(alpc * sizeof (fmd_asru_link_t *), FMD_SLEEP); 834 835 for (alp = hash[h]; alp != NULL; alp = 836 /* LINTED pointer alignment */ 837 FMD_ASRU_AL_HASH_NEXT(alp, next_offset)) 838 if (fmd_asru_strcmp(ahp, 839 /* LINTED pointer alignment */ 840 FMD_ASRU_AL_HASH_NAME(alp, match_offset), name)) 841 *alpp++ = fmd_asru_al_hold(alp); 842 843 ASSERT(alpp == alps + alpc); 844 (void) pthread_rwlock_unlock(&ahp->ah_lock); 845 846 for (i = 0; i < alpc; i++) { 847 func(alps[i], arg); 848 fmd_asru_al_hash_release(ahp, alps[i]); 849 } 850 851 fmd_free(alps, alpc * sizeof (fmd_asru_link_t *)); 852 } 853 854 void 855 fmd_asru_hash_apply_by_asru(fmd_asru_hash_t *ahp, char *name, 856 void (*func)(fmd_asru_link_t *, void *), void *arg) 857 { 858 fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_asru_hash, 859 offsetof(fmd_asru_link_t, al_asru_name), 860 offsetof(fmd_asru_link_t, al_asru_next)); 861 } 862 863 void 864 fmd_asru_hash_apply_by_case(fmd_asru_hash_t *ahp, fmd_case_t *cp, 865 void (*func)(fmd_asru_link_t *, void *), void *arg) 866 { 867 fmd_asru_do_hash_apply(ahp, ((fmd_case_impl_t *)cp)->ci_uuid, func, arg, 868 ahp->ah_case_hash, offsetof(fmd_asru_link_t, al_case_uuid), 869 offsetof(fmd_asru_link_t, al_case_next)); 870 } 871 872 void 873 fmd_asru_hash_apply_by_fru(fmd_asru_hash_t *ahp, char *name, 874 void (*func)(fmd_asru_link_t *, void *), void *arg) 875 { 876 fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_fru_hash, 877 offsetof(fmd_asru_link_t, al_fru_name), 878 offsetof(fmd_asru_link_t, al_fru_next)); 879 } 880 881 void 882 fmd_asru_hash_apply_by_rsrc(fmd_asru_hash_t *ahp, char *name, 883 void (*func)(fmd_asru_link_t *, void *), void *arg) 884 { 885 fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_rsrc_hash, 886 offsetof(fmd_asru_link_t, al_rsrc_name), 887 offsetof(fmd_asru_link_t, al_rsrc_next)); 888 } 889 890 void 891 fmd_asru_hash_apply_by_label(fmd_asru_hash_t *ahp, char *name, 892 void (*func)(fmd_asru_link_t *, void *), void *arg) 893 { 894 fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_label_hash, 895 offsetof(fmd_asru_link_t, al_label), 896 offsetof(fmd_asru_link_t, al_label_next)); 897 } 898 899 /* 900 * Lookup an asru in the hash by name and place a hold on it. If the asru is 901 * not found, no entry is created and NULL is returned. 902 */ 903 fmd_asru_t * 904 fmd_asru_hash_lookup_name(fmd_asru_hash_t *ahp, const char *name) 905 { 906 fmd_asru_t *ap; 907 908 (void) pthread_rwlock_rdlock(&ahp->ah_lock); 909 ap = fmd_asru_hash_lookup(ahp, name); 910 (void) pthread_rwlock_unlock(&ahp->ah_lock); 911 912 return (ap); 913 } 914 915 /* 916 * Create a resource cache entry using the fault event "nvl" for one of the 917 * suspects from the case "cp". 918 * 919 * The fault event can have the following components : FM_FAULT_ASRU, 920 * FM_FAULT_FRU, FM_FAULT_RESOURCE. These should be set by the Diagnosis Engine 921 * when calling fmd_nvl_create_fault(). In the general case, these are all 922 * optional and an entry will always be added into the cache even if one or all 923 * of these fields is missing. 924 * 925 * However, for hardware faults the recommended practice is that the fault 926 * event should always have the FM_FAULT_RESOURCE field present and that this 927 * should be represented in hc-scheme. 928 * 929 * Currently the DE should also add the FM_FAULT_ASRU and FM_FAULT_FRU fields 930 * where known, though at some future stage fmd might be able to fill these 931 * in automatically from the topology. 932 */ 933 fmd_asru_link_t * 934 fmd_asru_hash_create_entry(fmd_asru_hash_t *ahp, fmd_case_t *cp, nvlist_t *nvl) 935 { 936 char *parsed_uuid; 937 uuid_t uuid; 938 int uuidlen; 939 fmd_asru_link_t *alp; 940 941 /* 942 * Generate a UUID for the ASRU. libuuid cleverly gives us no 943 * interface for specifying or learning the buffer size. Sigh. 944 * The spec says 36 bytes but we use a tunable just to be safe. 945 */ 946 (void) fmd_conf_getprop(fmd.d_conf, "uuidlen", &uuidlen); 947 parsed_uuid = fmd_zalloc(uuidlen + 1, FMD_SLEEP); 948 uuid_generate(uuid); 949 uuid_unparse(uuid, parsed_uuid); 950 951 /* 952 * Now create the resource cache entries. 953 */ 954 fmd_case_hold_locked(cp); 955 alp = fmd_asru_al_create(ahp, nvl, cp, parsed_uuid); 956 TRACE((FMD_DBG_ASRU, "asru %s created as %p", 957 alp->al_uuid, (void *)alp->al_asru)); 958 959 fmd_free(parsed_uuid, uuidlen + 1); 960 return (alp); 961 962 } 963 964 /* 965 * Release the reference count on an asru obtained using fmd_asru_hash_lookup. 966 * We take 'ahp' for symmetry and in case we need to use it in future work. 967 */ 968 /*ARGSUSED*/ 969 void 970 fmd_asru_hash_release(fmd_asru_hash_t *ahp, fmd_asru_t *ap) 971 { 972 (void) pthread_mutex_lock(&ap->asru_lock); 973 974 ASSERT(ap->asru_refs != 0); 975 if (--ap->asru_refs == 0) 976 fmd_asru_destroy(ap); 977 else 978 (void) pthread_mutex_unlock(&ap->asru_lock); 979 } 980 981 static void 982 fmd_asru_do_delete_entry(fmd_asru_hash_t *ahp, fmd_case_t *cp, 983 fmd_asru_link_t **hash, size_t next_offset, char *name) 984 { 985 uint_t h; 986 fmd_asru_link_t *alp, **pp, *alpnext, **alpnextp; 987 988 (void) pthread_rwlock_wrlock(&ahp->ah_lock); 989 h = fmd_asru_strhash(ahp, name); 990 pp = &hash[h]; 991 for (alp = *pp; alp != NULL; alp = alpnext) { 992 /* LINTED pointer alignment */ 993 alpnextp = FMD_ASRU_AL_HASH_NEXTP(alp, next_offset); 994 alpnext = *alpnextp; 995 if (alp->al_case == cp) { 996 *pp = *alpnextp; 997 *alpnextp = NULL; 998 } else 999 pp = alpnextp; 1000 } 1001 (void) pthread_rwlock_unlock(&ahp->ah_lock); 1002 } 1003 1004 static void 1005 fmd_asru_do_hash_delete(fmd_asru_hash_t *ahp, fmd_case_susp_t *cis, 1006 fmd_case_t *cp, fmd_asru_link_t **hash, size_t next_offset, char *nvname) 1007 { 1008 nvlist_t *nvl; 1009 char *name = NULL; 1010 ssize_t namelen; 1011 1012 if (nvlist_lookup_nvlist(cis->cis_nvl, nvname, &nvl) == 0 && 1013 (namelen = fmd_fmri_nvl2str(nvl, NULL, 0)) != -1 && 1014 (name = fmd_alloc(namelen + 1, FMD_SLEEP)) != NULL) { 1015 if (fmd_fmri_nvl2str(nvl, name, namelen + 1) != -1) 1016 fmd_asru_do_delete_entry(ahp, cp, hash, next_offset, 1017 name); 1018 fmd_free(name, namelen + 1); 1019 } else 1020 fmd_asru_do_delete_entry(ahp, cp, hash, next_offset, ""); 1021 } 1022 1023 void 1024 fmd_asru_hash_delete_case(fmd_asru_hash_t *ahp, fmd_case_t *cp) 1025 { 1026 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 1027 fmd_case_susp_t *cis; 1028 fmd_asru_link_t *alp, **plp, *alpnext; 1029 fmd_asru_t *ap; 1030 char path[PATH_MAX]; 1031 char *label; 1032 uint_t h; 1033 1034 /* 1035 * first delete hash entries for each suspect 1036 */ 1037 for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) { 1038 fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_fru_hash, 1039 offsetof(fmd_asru_link_t, al_fru_next), FM_FAULT_FRU); 1040 fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_rsrc_hash, 1041 offsetof(fmd_asru_link_t, al_rsrc_next), FM_FAULT_RESOURCE); 1042 if (nvlist_lookup_string(cis->cis_nvl, FM_FAULT_LOCATION, 1043 &label) != 0) 1044 label = ""; 1045 fmd_asru_do_delete_entry(ahp, cp, ahp->ah_label_hash, 1046 offsetof(fmd_asru_link_t, al_label_next), label); 1047 fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_asru_hash, 1048 offsetof(fmd_asru_link_t, al_asru_next), FM_FAULT_ASRU); 1049 } 1050 1051 /* 1052 * then delete associated case hash entries 1053 */ 1054 (void) pthread_rwlock_wrlock(&ahp->ah_lock); 1055 h = fmd_asru_strhash(ahp, cip->ci_uuid); 1056 plp = &ahp->ah_case_hash[h]; 1057 for (alp = *plp; alp != NULL; alp = alpnext) { 1058 alpnext = alp->al_case_next; 1059 if (alp->al_case == cp) { 1060 *plp = alp->al_case_next; 1061 alp->al_case_next = NULL; 1062 ASSERT(ahp->ah_al_count != 0); 1063 ahp->ah_al_count--; 1064 1065 /* 1066 * decrement case ref. 1067 */ 1068 fmd_case_rele_locked(cp); 1069 alp->al_case = NULL; 1070 1071 /* 1072 * If we found a matching ASRU, unlink its log file and 1073 * then release the hash entry. Note that it may still 1074 * be referenced if another thread is manipulating it; 1075 * this is ok because once we unlink, the log file will 1076 * not be restored, and the log data will be freed when 1077 * all of the referencing threads release their 1078 * respective references. 1079 */ 1080 (void) snprintf(path, sizeof (path), "%s/%s", 1081 ahp->ah_dirpath, alp->al_uuid); 1082 if (unlink(path) != 0) 1083 fmd_error(EFMD_ASRU_UNLINK, 1084 "failed to unlink asru %s", path); 1085 1086 /* 1087 * Now unlink from the global per-resource cache 1088 * and if this is the last link then remove that from 1089 * it's own hash too. 1090 */ 1091 ap = alp->al_asru; 1092 (void) pthread_mutex_lock(&ap->asru_lock); 1093 fmd_list_delete(&ap->asru_list, alp); 1094 if (ap->asru_list.l_next == NULL) { 1095 uint_t h; 1096 fmd_asru_t *ap2, **pp; 1097 fmd_asru_t *apnext, **apnextp; 1098 1099 ASSERT(ahp->ah_count != 0); 1100 ahp->ah_count--; 1101 h = fmd_asru_strhash(ahp, ap->asru_name); 1102 pp = &ahp->ah_hash[h]; 1103 for (ap2 = *pp; ap2 != NULL; ap2 = apnext) { 1104 apnextp = &ap2->asru_next; 1105 apnext = *apnextp; 1106 if (ap2 == ap) { 1107 *pp = *apnextp; 1108 *apnextp = NULL; 1109 } else 1110 pp = apnextp; 1111 } 1112 } 1113 (void) pthread_mutex_unlock(&ap->asru_lock); 1114 fmd_asru_al_hash_release(ahp, alp); 1115 } else 1116 plp = &alp->al_case_next; 1117 } 1118 (void) pthread_rwlock_unlock(&ahp->ah_lock); 1119 } 1120 1121 static void 1122 fmd_asru_repair_containee(fmd_asru_link_t *alp, void *er) 1123 { 1124 if (er && alp->al_asru_fmri && fmd_fmri_contains(er, 1125 alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, 1126 FMD_ASRU_REPAIRED)) 1127 fmd_case_update(alp->al_case); 1128 } 1129 1130 void 1131 fmd_asru_repaired(fmd_asru_link_t *alp, void *er) 1132 { 1133 int flags; 1134 int rval; 1135 1136 /* 1137 * repair this asru cache entry 1138 */ 1139 rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, FMD_ASRU_REPAIRED); 1140 1141 /* 1142 * now check if all entries associated with this asru are repaired and 1143 * if so repair containees 1144 */ 1145 (void) pthread_mutex_lock(&alp->al_asru->asru_lock); 1146 flags = alp->al_asru->asru_flags; 1147 (void) pthread_mutex_unlock(&alp->al_asru->asru_lock); 1148 if (!(flags & FMD_ASRU_FAULTY)) 1149 fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_repair_containee, 1150 alp->al_asru_fmri); 1151 1152 /* 1153 * if called from fmd_adm_repair() and we really did clear the bit then 1154 * we need to do a case update to see if the associated case can be 1155 * repaired. No need to do this if called from fmd_case_repair() (ie 1156 * when er is NULL) as the case will be explicitly repaired anyway. 1157 */ 1158 if (er) { 1159 *(int *)er = 0; 1160 if (rval) 1161 fmd_case_update(alp->al_case); 1162 } 1163 } 1164 1165 static void 1166 fmd_asru_acquit_containee(fmd_asru_link_t *alp, void *er) 1167 { 1168 if (er && alp->al_asru_fmri && fmd_fmri_contains(er, 1169 alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, 1170 FMD_ASRU_ACQUITTED)) 1171 fmd_case_update(alp->al_case); 1172 } 1173 1174 void 1175 fmd_asru_acquit(fmd_asru_link_t *alp, void *er) 1176 { 1177 int flags; 1178 int rval; 1179 1180 /* 1181 * acquit this asru cache entry 1182 */ 1183 rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, FMD_ASRU_ACQUITTED); 1184 1185 /* 1186 * now check if all entries associated with this asru are acquitted and 1187 * if so acquit containees 1188 */ 1189 (void) pthread_mutex_lock(&alp->al_asru->asru_lock); 1190 flags = alp->al_asru->asru_flags; 1191 (void) pthread_mutex_unlock(&alp->al_asru->asru_lock); 1192 if (!(flags & FMD_ASRU_FAULTY)) 1193 fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_acquit_containee, 1194 alp->al_asru_fmri); 1195 1196 /* 1197 * if called from fmd_adm_acquit() and we really did clear the bit then 1198 * we need to do a case update to see if the associated case can be 1199 * repaired. No need to do this if called from fmd_case_acquit() (ie 1200 * when er is NULL) as the case will be explicitly repaired anyway. 1201 */ 1202 if (er) { 1203 *(int *)er = 0; 1204 if (rval) 1205 fmd_case_update(alp->al_case); 1206 } 1207 } 1208 1209 static void 1210 fmd_asru_replaced_containee(fmd_asru_link_t *alp, void *er) 1211 { 1212 if (er && alp->al_asru_fmri && fmd_fmri_contains(er, 1213 alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, 1214 FMD_ASRU_REPLACED)) 1215 fmd_case_update(alp->al_case); 1216 } 1217 1218 void 1219 fmd_asru_replaced(fmd_asru_link_t *alp, void *er) 1220 { 1221 int flags; 1222 int rval; 1223 int ps; 1224 1225 ps = fmd_asru_replacement_state(alp->al_event); 1226 if (ps == FMD_OBJ_STATE_STILL_PRESENT) 1227 return; 1228 1229 /* 1230 * mark this cache entry as replaced 1231 */ 1232 rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, FMD_ASRU_REPLACED); 1233 1234 /* 1235 * now check if all entries associated with this asru are replaced and 1236 * if so replace containees 1237 */ 1238 (void) pthread_mutex_lock(&alp->al_asru->asru_lock); 1239 flags = alp->al_asru->asru_flags; 1240 (void) pthread_mutex_unlock(&alp->al_asru->asru_lock); 1241 if (!(flags & FMD_ASRU_FAULTY)) 1242 fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_replaced_containee, 1243 alp->al_asru_fmri); 1244 1245 *(int *)er = 0; 1246 if (rval) 1247 fmd_case_update(alp->al_case); 1248 } 1249 1250 static void 1251 fmd_asru_removed_containee(fmd_asru_link_t *alp, void *er) 1252 { 1253 if (er && alp->al_asru_fmri && fmd_fmri_contains(er, 1254 alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, 1255 0)) 1256 fmd_case_update(alp->al_case); 1257 } 1258 1259 void 1260 fmd_asru_removed(fmd_asru_link_t *alp) 1261 { 1262 int flags; 1263 int rval; 1264 1265 /* 1266 * mark this cache entry as replacded 1267 */ 1268 rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, 0); 1269 1270 /* 1271 * now check if all entries associated with this asru are removed and 1272 * if so replace containees 1273 */ 1274 (void) pthread_mutex_lock(&alp->al_asru->asru_lock); 1275 flags = alp->al_asru->asru_flags; 1276 (void) pthread_mutex_unlock(&alp->al_asru->asru_lock); 1277 if (!(flags & FMD_ASRU_FAULTY)) 1278 fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_removed_containee, 1279 alp->al_asru_fmri); 1280 if (rval) 1281 fmd_case_update(alp->al_case); 1282 } 1283 1284 static void 1285 fmd_asru_logevent(fmd_asru_link_t *alp) 1286 { 1287 fmd_asru_t *ap = alp->al_asru; 1288 boolean_t faulty = (alp->al_flags & FMD_ASRU_FAULTY) != 0; 1289 boolean_t unusable = (alp->al_flags & FMD_ASRU_UNUSABLE) != 0; 1290 boolean_t message = (ap->asru_flags & FMD_ASRU_INVISIBLE) == 0; 1291 boolean_t repaired = (alp->al_reason == FMD_ASRU_REPAIRED); 1292 boolean_t replaced = (alp->al_reason == FMD_ASRU_REPLACED); 1293 boolean_t acquitted = (alp->al_reason == FMD_ASRU_ACQUITTED); 1294 1295 fmd_case_impl_t *cip; 1296 fmd_event_t *e; 1297 fmd_log_t *lp; 1298 nvlist_t *nvl; 1299 char *class; 1300 1301 ASSERT(MUTEX_HELD(&ap->asru_lock)); 1302 cip = (fmd_case_impl_t *)alp->al_case; 1303 ASSERT(cip != NULL); 1304 1305 if ((lp = alp->al_log) == NULL) 1306 lp = fmd_log_open(ap->asru_root, alp->al_uuid, FMD_LOG_ASRU); 1307 1308 if (lp == NULL) 1309 return; /* can't log events if we can't open the log */ 1310 1311 nvl = fmd_protocol_rsrc_asru(_fmd_asru_events[faulty | (unusable << 1)], 1312 alp->al_asru_fmri, cip->ci_uuid, cip->ci_code, faulty, unusable, 1313 message, alp->al_event, &cip->ci_tv, repaired, replaced, acquitted); 1314 1315 (void) nvlist_lookup_string(nvl, FM_CLASS, &class); 1316 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class); 1317 1318 fmd_event_hold(e); 1319 fmd_log_append(lp, e, NULL); 1320 fmd_event_rele(e); 1321 1322 /* 1323 * For now, we close the log file after every update to conserve file 1324 * descriptors and daemon overhead. If this becomes a performance 1325 * issue this code can change to keep a fixed-size LRU cache of logs. 1326 */ 1327 fmd_log_rele(lp); 1328 alp->al_log = NULL; 1329 } 1330 1331 int 1332 fmd_asru_setflags(fmd_asru_link_t *alp, uint_t sflag) 1333 { 1334 fmd_asru_t *ap = alp->al_asru; 1335 uint_t nstate, ostate; 1336 1337 ASSERT(!(sflag & ~FMD_ASRU_STATE)); 1338 ASSERT(sflag != FMD_ASRU_STATE); 1339 1340 (void) pthread_mutex_lock(&ap->asru_lock); 1341 1342 ostate = alp->al_flags & FMD_ASRU_STATE; 1343 alp->al_flags |= sflag; 1344 nstate = alp->al_flags & FMD_ASRU_STATE; 1345 1346 if (nstate == ostate) { 1347 (void) pthread_mutex_unlock(&ap->asru_lock); 1348 return (0); 1349 } 1350 1351 ap->asru_flags |= sflag; 1352 TRACE((FMD_DBG_ASRU, "asru %s %s->%s", alp->al_uuid, 1353 _fmd_asru_snames[ostate], _fmd_asru_snames[nstate])); 1354 1355 fmd_asru_logevent(alp); 1356 1357 (void) pthread_cond_broadcast(&ap->asru_cv); 1358 (void) pthread_mutex_unlock(&ap->asru_lock); 1359 return (1); 1360 } 1361 1362 int 1363 fmd_asru_clrflags(fmd_asru_link_t *alp, uint_t sflag, uint8_t reason) 1364 { 1365 fmd_asru_t *ap = alp->al_asru; 1366 fmd_asru_link_t *nalp; 1367 uint_t nstate, ostate, flags = 0; 1368 1369 ASSERT(!(sflag & ~FMD_ASRU_STATE)); 1370 ASSERT(sflag != FMD_ASRU_STATE); 1371 1372 (void) pthread_mutex_lock(&ap->asru_lock); 1373 1374 ostate = alp->al_flags & FMD_ASRU_STATE; 1375 alp->al_flags &= ~sflag; 1376 nstate = alp->al_flags & FMD_ASRU_STATE; 1377 1378 if (nstate == ostate) { 1379 if (reason > alp->al_reason) { 1380 alp->al_reason = reason; 1381 fmd_asru_logevent(alp); 1382 (void) pthread_cond_broadcast(&ap->asru_cv); 1383 } 1384 (void) pthread_mutex_unlock(&ap->asru_lock); 1385 return (0); 1386 } 1387 if (reason > alp->al_reason) 1388 alp->al_reason = reason; 1389 1390 if (sflag == FMD_ASRU_UNUSABLE) 1391 ap->asru_flags &= ~sflag; 1392 else if (sflag == FMD_ASRU_FAULTY) { 1393 /* 1394 * only clear the faulty bit if all links are clear 1395 */ 1396 for (nalp = fmd_list_next(&ap->asru_list); nalp != NULL; 1397 nalp = fmd_list_next(nalp)) 1398 flags |= nalp->al_flags; 1399 if (!(flags & FMD_ASRU_FAULTY)) 1400 ap->asru_flags &= ~sflag; 1401 } 1402 1403 TRACE((FMD_DBG_ASRU, "asru %s %s->%s", alp->al_uuid, 1404 _fmd_asru_snames[ostate], _fmd_asru_snames[nstate])); 1405 1406 fmd_asru_logevent(alp); 1407 1408 (void) pthread_cond_broadcast(&ap->asru_cv); 1409 (void) pthread_mutex_unlock(&ap->asru_lock); 1410 1411 return (1); 1412 } 1413 1414 /* 1415 * Report the current known state of the link entry (ie this particular fault 1416 * affecting this particular ASRU). 1417 */ 1418 int 1419 fmd_asru_al_getstate(fmd_asru_link_t *alp) 1420 { 1421 int us, st; 1422 nvlist_t *asru; 1423 int ps; 1424 1425 ps = fmd_asru_replacement_state(alp->al_event); 1426 if (ps == FMD_OBJ_STATE_NOT_PRESENT) 1427 return ((alp->al_flags & FMD_ASRU_FAULTY) | FMD_ASRU_UNUSABLE); 1428 if (ps == FMD_OBJ_STATE_REPLACED) { 1429 if (alp->al_reason < FMD_ASRU_REPLACED) 1430 alp->al_reason = FMD_ASRU_REPLACED; 1431 return ((alp->al_flags & FMD_ASRU_FAULTY) | FMD_ASRU_UNUSABLE); 1432 } 1433 1434 st = (alp->al_flags & FMD_ASRU_STATE) | FMD_ASRU_PRESENT; 1435 if (nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, &asru) == 0) { 1436 us = fmd_fmri_service_state(asru); 1437 if (us == -1 || us == FMD_SERVICE_STATE_UNKNOWN) { 1438 /* not supported by scheme - try fmd_fmri_unusable */ 1439 us = fmd_fmri_unusable(asru); 1440 } else if (us == FMD_SERVICE_STATE_UNUSABLE) { 1441 st |= FMD_ASRU_UNUSABLE; 1442 return (st); 1443 } else if (us == FMD_SERVICE_STATE_OK) { 1444 st &= ~FMD_ASRU_UNUSABLE; 1445 return (st); 1446 } else if (us == FMD_SERVICE_STATE_DEGRADED) { 1447 st &= ~FMD_ASRU_UNUSABLE; 1448 st |= FMD_ASRU_DEGRADED; 1449 return (st); 1450 } 1451 } else 1452 us = (alp->al_flags & FMD_ASRU_UNUSABLE); 1453 if (us > 0) 1454 st |= FMD_ASRU_UNUSABLE; 1455 else if (us == 0) 1456 st &= ~FMD_ASRU_UNUSABLE; 1457 return (st); 1458 } 1459 1460 /* 1461 * Report the current known state of the ASRU by refreshing its unusable status 1462 * based upon the routines provided by the scheme module. If the unusable bit 1463 * is different, we do *not* generate a state change here because that change 1464 * may be unrelated to fmd activities and therefore we have no case or event. 1465 * The absence of the transition is harmless as this function is only provided 1466 * for RPC observability and fmd's clients are only concerned with ASRU_FAULTY. 1467 */ 1468 int 1469 fmd_asru_getstate(fmd_asru_t *ap) 1470 { 1471 int us, st; 1472 1473 if (!(ap->asru_flags & FMD_ASRU_INTERNAL) && 1474 (fmd_asru_fake_not_present >= FMD_OBJ_STATE_REPLACED || 1475 fmd_fmri_present(ap->asru_fmri) <= 0)) 1476 return (0); /* do not report non-fmd non-present resources */ 1477 1478 us = fmd_fmri_unusable(ap->asru_fmri); 1479 st = ap->asru_flags & FMD_ASRU_STATE; 1480 1481 if (us > 0) 1482 st |= FMD_ASRU_UNUSABLE; 1483 else if (us == 0) 1484 st &= ~FMD_ASRU_UNUSABLE; 1485 1486 return (st); 1487 } 1488