1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/fm/protocol.h> 30 #include <uuid/uuid.h> 31 32 #include <dirent.h> 33 #include <limits.h> 34 #include <unistd.h> 35 #include <alloca.h> 36 #include <stddef.h> 37 #include <fm/libtopo.h> 38 39 #include <fmd_alloc.h> 40 #include <fmd_string.h> 41 #include <fmd_error.h> 42 #include <fmd_subr.h> 43 #include <fmd_protocol.h> 44 #include <fmd_event.h> 45 #include <fmd_conf.h> 46 #include <fmd_fmri.h> 47 #include <fmd_dispq.h> 48 #include <fmd_case.h> 49 #include <fmd_module.h> 50 #include <fmd_asru.h> 51 52 #include <fmd.h> 53 54 static const char *const _fmd_asru_events[] = { 55 FMD_RSRC_CLASS "asru.ok", /* UNUSABLE=0 FAULTED=0 */ 56 FMD_RSRC_CLASS "asru.degraded", /* UNUSABLE=0 FAULTED=1 */ 57 FMD_RSRC_CLASS "asru.unknown", /* UNUSABLE=1 FAULTED=0 */ 58 FMD_RSRC_CLASS "asru.faulted" /* UNUSABLE=1 FAULTED=1 */ 59 }; 60 61 static const char *const _fmd_asru_snames[] = { 62 "uf", "uF", "Uf", "UF" /* same order as above */ 63 }; 64 65 volatile uint32_t fmd_asru_fake_not_present = 0; 66 67 static uint_t 68 fmd_asru_strhash(fmd_asru_hash_t *ahp, const char *val) 69 { 70 return (topo_fmri_strhash(ahp->ah_topo->ft_hdl, val) % ahp->ah_hashlen); 71 } 72 73 static boolean_t 74 fmd_asru_strcmp(fmd_asru_hash_t *ahp, const char *a, const char *b) 75 { 76 return (topo_fmri_strcmp(ahp->ah_topo->ft_hdl, a, b)); 77 } 78 79 static fmd_asru_t * 80 fmd_asru_create(fmd_asru_hash_t *ahp, const char *uuid, 81 const char *name, nvlist_t *fmri) 82 { 83 fmd_asru_t *ap = fmd_zalloc(sizeof (fmd_asru_t), FMD_SLEEP); 84 char *s; 85 86 (void) pthread_mutex_init(&ap->asru_lock, NULL); 87 (void) pthread_cond_init(&ap->asru_cv, NULL); 88 89 ap->asru_name = fmd_strdup(name, FMD_SLEEP); 90 if (fmri) 91 (void) nvlist_xdup(fmri, &ap->asru_fmri, &fmd.d_nva); 92 ap->asru_root = fmd_strdup(ahp->ah_dirpath, FMD_SLEEP); 93 ap->asru_uuid = fmd_strdup(uuid, FMD_SLEEP); 94 ap->asru_uuidlen = ap->asru_uuid ? strlen(ap->asru_uuid) : 0; 95 ap->asru_refs = 1; 96 97 if (fmri && nvlist_lookup_string(fmri, FM_FMRI_SCHEME, &s) == 0 && 98 strcmp(s, FM_FMRI_SCHEME_FMD) == 0) 99 ap->asru_flags |= FMD_ASRU_INTERNAL; 100 101 return (ap); 102 } 103 104 static void 105 fmd_asru_destroy(fmd_asru_t *ap) 106 { 107 ASSERT(MUTEX_HELD(&ap->asru_lock)); 108 ASSERT(ap->asru_refs == 0); 109 110 nvlist_free(ap->asru_event); 111 fmd_strfree(ap->asru_name); 112 nvlist_free(ap->asru_fmri); 113 fmd_strfree(ap->asru_root); 114 fmd_free(ap->asru_uuid, ap->asru_uuidlen + 1); 115 fmd_free(ap, sizeof (fmd_asru_t)); 116 } 117 118 static void 119 fmd_asru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_t *ap) 120 { 121 uint_t h = fmd_asru_strhash(ahp, ap->asru_name); 122 123 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 124 ap->asru_next = ahp->ah_hash[h]; 125 ahp->ah_hash[h] = ap; 126 ahp->ah_count++; 127 } 128 129 static fmd_asru_t * 130 fmd_asru_hold(fmd_asru_t *ap) 131 { 132 (void) pthread_mutex_lock(&ap->asru_lock); 133 ap->asru_refs++; 134 ASSERT(ap->asru_refs != 0); 135 (void) pthread_mutex_unlock(&ap->asru_lock); 136 return (ap); 137 } 138 139 /* 140 * Lookup an asru in the hash by name and place a hold on it. If the asru is 141 * not found, no entry is created and NULL is returned. This internal function 142 * is for callers who have the ah_lock held and is used by lookup_name below. 143 */ 144 fmd_asru_t * 145 fmd_asru_hash_lookup(fmd_asru_hash_t *ahp, const char *name) 146 { 147 fmd_asru_t *ap; 148 uint_t h; 149 150 ASSERT(RW_LOCK_HELD(&ahp->ah_lock)); 151 h = fmd_asru_strhash(ahp, name); 152 153 for (ap = ahp->ah_hash[h]; ap != NULL; ap = ap->asru_next) { 154 if (fmd_asru_strcmp(ahp, ap->asru_name, name)) 155 break; 156 } 157 158 if (ap != NULL) 159 (void) fmd_asru_hold(ap); 160 else 161 (void) fmd_set_errno(EFMD_ASRU_NOENT); 162 163 return (ap); 164 } 165 166 static int 167 fmd_asru_is_present(nvlist_t *event) 168 { 169 int ps = -1; 170 nvlist_t *asru, *fru, *rsrc; 171 172 /* 173 * Check if there is evidence that this object is no longer present. 174 * In general fmd_fmri_present() should be supported on resources and/or 175 * frus, as those are the things that are physically present or not 176 * present - an asru can be spread over a number of frus some of which 177 * are present and some not, so fmd_fmri_present() is not generally 178 * meaningful. However retain a check for asru first for compatibility. 179 * If we have checked all three and we still get -1 then nothing knows 180 * whether it's present or not, so err on the safe side and treat it 181 * as still present. 182 */ 183 if (fmd_asru_fake_not_present) 184 ps = 0; 185 if (ps == -1 && nvlist_lookup_nvlist(event, FM_FAULT_ASRU, &asru) == 0) 186 ps = fmd_fmri_present(asru); 187 if (ps == -1 && nvlist_lookup_nvlist(event, FM_FAULT_RESOURCE, 188 &rsrc) == 0) 189 ps = fmd_fmri_present(rsrc); 190 if (ps == -1 && nvlist_lookup_nvlist(event, FM_FAULT_FRU, &fru) == 0) 191 ps = fmd_fmri_present(fru); 192 if (ps == -1) 193 ps = 1; 194 return (ps); 195 } 196 197 static void 198 fmd_asru_asru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, 199 char *name) 200 { 201 uint_t h = fmd_asru_strhash(ahp, name); 202 203 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 204 alp->al_asru_next = ahp->ah_asru_hash[h]; 205 ahp->ah_asru_hash[h] = alp; 206 ahp->ah_al_count++; 207 } 208 209 static void 210 fmd_asru_case_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, 211 char *name) 212 { 213 uint_t h = fmd_asru_strhash(ahp, name); 214 215 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 216 alp->al_case_next = ahp->ah_case_hash[h]; 217 ahp->ah_case_hash[h] = alp; 218 } 219 220 static void 221 fmd_asru_fru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, char *name) 222 { 223 uint_t h = fmd_asru_strhash(ahp, name); 224 225 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 226 alp->al_fru_next = ahp->ah_fru_hash[h]; 227 ahp->ah_fru_hash[h] = alp; 228 } 229 230 static void 231 fmd_asru_label_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, 232 char *name) 233 { 234 uint_t h = fmd_asru_strhash(ahp, name); 235 236 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 237 alp->al_label_next = ahp->ah_label_hash[h]; 238 ahp->ah_label_hash[h] = alp; 239 } 240 241 static void 242 fmd_asru_rsrc_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, 243 char *name) 244 { 245 uint_t h = fmd_asru_strhash(ahp, name); 246 247 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 248 alp->al_rsrc_next = ahp->ah_rsrc_hash[h]; 249 ahp->ah_rsrc_hash[h] = alp; 250 } 251 252 static void 253 fmd_asru_al_destroy(fmd_asru_link_t *alp) 254 { 255 ASSERT(alp->al_refs == 0); 256 ASSERT(MUTEX_HELD(&alp->al_asru->asru_lock)); 257 258 if (alp->al_log != NULL) 259 fmd_log_rele(alp->al_log); 260 261 fmd_free(alp->al_uuid, alp->al_uuidlen + 1); 262 nvlist_free(alp->al_event); 263 fmd_strfree(alp->al_rsrc_name); 264 fmd_strfree(alp->al_case_uuid); 265 fmd_strfree(alp->al_fru_name); 266 fmd_strfree(alp->al_asru_name); 267 fmd_strfree(alp->al_label); 268 nvlist_free(alp->al_asru_fmri); 269 fmd_free(alp, sizeof (fmd_asru_link_t)); 270 } 271 272 static fmd_asru_link_t * 273 fmd_asru_al_hold(fmd_asru_link_t *alp) 274 { 275 fmd_asru_t *ap = alp->al_asru; 276 277 (void) pthread_mutex_lock(&ap->asru_lock); 278 ap->asru_refs++; 279 alp->al_refs++; 280 ASSERT(alp->al_refs != 0); 281 (void) pthread_mutex_unlock(&ap->asru_lock); 282 return (alp); 283 } 284 285 static void fmd_asru_destroy(fmd_asru_t *ap); 286 287 /*ARGSUSED*/ 288 static void 289 fmd_asru_al_hash_release(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp) 290 { 291 fmd_asru_t *ap = alp->al_asru; 292 293 (void) pthread_mutex_lock(&ap->asru_lock); 294 ASSERT(alp->al_refs != 0); 295 if (--alp->al_refs == 0) 296 fmd_asru_al_destroy(alp); 297 ASSERT(ap->asru_refs != 0); 298 if (--ap->asru_refs == 0) 299 fmd_asru_destroy(ap); 300 else 301 (void) pthread_mutex_unlock(&ap->asru_lock); 302 } 303 304 static int 305 fmd_asru_get_namestr(nvlist_t *nvl, char **name, ssize_t *namelen) 306 { 307 if ((*namelen = fmd_fmri_nvl2str(nvl, NULL, 0)) == -1) 308 return (EFMD_ASRU_FMRI); 309 *name = fmd_alloc(*namelen + 1, FMD_SLEEP); 310 if (fmd_fmri_nvl2str(nvl, *name, *namelen + 1) == -1) { 311 if (*name != NULL) 312 fmd_free(*name, *namelen + 1); 313 return (EFMD_ASRU_FMRI); 314 } 315 return (0); 316 } 317 318 static fmd_asru_link_t * 319 fmd_asru_al_create(fmd_asru_hash_t *ahp, nvlist_t *nvl, fmd_case_t *cp, 320 const char *al_uuid) 321 { 322 nvlist_t *asru = NULL, *fru, *rsrc; 323 int got_rsrc = 0, got_asru = 0, got_fru = 0; 324 ssize_t fru_namelen, rsrc_namelen, asru_namelen; 325 char *asru_name, *rsrc_name, *fru_name, *name, *label; 326 fmd_asru_link_t *alp; 327 fmd_asru_t *ap; 328 boolean_t msg; 329 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 330 331 if (nvlist_lookup_nvlist(nvl, FM_FAULT_ASRU, &asru) == 0 && 332 fmd_asru_get_namestr(asru, &asru_name, &asru_namelen) == 0) 333 got_asru = 1; 334 if (nvlist_lookup_nvlist(nvl, FM_FAULT_FRU, &fru) == 0 && 335 fmd_asru_get_namestr(fru, &fru_name, &fru_namelen) == 0) 336 got_fru = 1; 337 if (nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &rsrc) == 0 && 338 fmd_asru_get_namestr(rsrc, &rsrc_name, &rsrc_namelen) == 0) 339 got_rsrc = 1; 340 if (nvlist_lookup_string(nvl, FM_FAULT_LOCATION, &label) != 0) 341 label = ""; 342 343 /* 344 * Grab the rwlock as a writer; Then create and insert the asru with 345 * ahp->ah_lock held and hash it in. We'll then drop the rwlock and 346 * proceed to initializing the asru. 347 */ 348 (void) pthread_rwlock_wrlock(&ahp->ah_lock); 349 350 /* 351 * Create and initialise the per-fault "link" structure. 352 */ 353 alp = fmd_zalloc(sizeof (fmd_asru_link_t), FMD_SLEEP); 354 if (got_asru) 355 (void) nvlist_xdup(asru, &alp->al_asru_fmri, &fmd.d_nva); 356 alp->al_uuid = fmd_strdup(al_uuid, FMD_SLEEP); 357 alp->al_uuidlen = strlen(alp->al_uuid); 358 alp->al_refs = 1; 359 360 /* 361 * If this is the first fault for this asru, then create the per-asru 362 * structure and link into the hash. 363 */ 364 name = got_asru ? asru_name : ""; 365 if ((ap = fmd_asru_hash_lookup(ahp, name)) == NULL) { 366 ap = fmd_asru_create(ahp, al_uuid, name, got_asru ? asru : 367 NULL); 368 fmd_asru_hash_insert(ahp, ap); 369 } else 370 nvlist_free(ap->asru_event); 371 (void) nvlist_xdup(nvl, &ap->asru_event, &fmd.d_nva); 372 373 /* 374 * Put the link structure on the list associated with the per-asru 375 * structure. Then put the link structure on the various hashes. 376 */ 377 fmd_list_append(&ap->asru_list, (fmd_list_t *)alp); 378 alp->al_asru = ap; 379 alp->al_asru_name = got_asru ? asru_name : fmd_strdup("", FMD_SLEEP); 380 fmd_asru_asru_hash_insert(ahp, alp, alp->al_asru_name); 381 alp->al_fru_name = got_fru ? fru_name : fmd_strdup("", FMD_SLEEP); 382 fmd_asru_fru_hash_insert(ahp, alp, alp->al_fru_name); 383 alp->al_rsrc_name = got_rsrc ? rsrc_name : fmd_strdup("", FMD_SLEEP); 384 fmd_asru_rsrc_hash_insert(ahp, alp, alp->al_rsrc_name); 385 alp->al_label = fmd_strdup(label, FMD_SLEEP); 386 fmd_asru_label_hash_insert(ahp, alp, label); 387 alp->al_case_uuid = fmd_strdup(cip->ci_uuid, FMD_SLEEP); 388 fmd_asru_case_hash_insert(ahp, alp, cip->ci_uuid); 389 (void) pthread_mutex_lock(&ap->asru_lock); 390 (void) pthread_rwlock_unlock(&ahp->ah_lock); 391 392 ap->asru_case = alp->al_case = cp; 393 if (nvlist_lookup_boolean_value(nvl, FM_SUSPECT_MESSAGE, &msg) == 0 && 394 msg == B_FALSE) 395 ap->asru_flags |= FMD_ASRU_INVISIBLE; 396 (void) nvlist_xdup(nvl, &alp->al_event, &fmd.d_nva); 397 ap->asru_flags |= FMD_ASRU_VALID; 398 (void) pthread_cond_broadcast(&ap->asru_cv); 399 (void) pthread_mutex_unlock(&ap->asru_lock); 400 return (alp); 401 } 402 403 static void 404 fmd_asru_hash_recreate(fmd_log_t *lp, fmd_event_t *ep, fmd_asru_hash_t *ahp) 405 { 406 nvlist_t *nvl = FMD_EVENT_NVL(ep); 407 boolean_t f, u, ps, us; 408 nvlist_t *flt, *flt_copy, *asru; 409 char *case_uuid = NULL, *case_code = NULL; 410 fmd_asru_t *ap; 411 fmd_asru_link_t *alp; 412 fmd_case_t *cp; 413 int64_t *diag_time; 414 uint_t nelem; 415 topo_hdl_t *thp; 416 char *class; 417 nvlist_t *rsrc; 418 int err; 419 420 /* 421 * Extract the most recent values of 'faulty' from the event log. 422 */ 423 if (nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_FAULTY, &f) != 0) { 424 fmd_error(EFMD_ASRU_EVENT, "failed to reload asru %s: " 425 "invalid event log record\n", lp->log_name); 426 ahp->ah_error = EFMD_ASRU_EVENT; 427 return; 428 } 429 if (nvlist_lookup_nvlist(nvl, FM_RSRC_ASRU_EVENT, &flt) != 0) { 430 fmd_error(EFMD_ASRU_EVENT, "failed to reload asru %s: " 431 "invalid event log record\n", lp->log_name); 432 ahp->ah_error = EFMD_ASRU_EVENT; 433 return; 434 } 435 (void) nvlist_lookup_string(nvl, FM_RSRC_ASRU_UUID, &case_uuid); 436 (void) nvlist_lookup_string(nvl, FM_RSRC_ASRU_CODE, &case_code); 437 438 /* 439 * Attempt to recreate the case in the CLOSED state. 440 * If the case is already present, fmd_case_recreate() will return it. 441 * If not, we'll create a new orphaned case. Either way, we use the 442 * ASRU event to insert a suspect into the partially-restored case. 443 */ 444 fmd_module_lock(fmd.d_rmod); 445 cp = fmd_case_recreate(fmd.d_rmod, NULL, FMD_CASE_CLOSED, case_uuid, 446 case_code); 447 fmd_case_hold(cp); 448 fmd_module_unlock(fmd.d_rmod); 449 if (nvlist_lookup_int64_array(nvl, FM_SUSPECT_DIAG_TIME, &diag_time, 450 &nelem) == 0 && nelem >= 2) 451 fmd_case_settime(cp, diag_time[0], diag_time[1]); 452 else 453 fmd_case_settime(cp, lp->log_stat.st_ctime, 0); 454 (void) nvlist_xdup(flt, &flt_copy, &fmd.d_nva); 455 456 /* 457 * For faults with a resource, re-evaluate the asru from the resource. 458 */ 459 thp = fmd_fmri_topo_hold(TOPO_VERSION); 460 if (nvlist_lookup_string(flt_copy, FM_CLASS, &class) == 0 && 461 strncmp(class, "fault", 5) == 0 && 462 nvlist_lookup_nvlist(flt_copy, FM_FAULT_RESOURCE, &rsrc) == 0 && 463 rsrc != NULL && topo_fmri_asru(thp, rsrc, &asru, &err) == 0) { 464 (void) nvlist_remove(flt_copy, FM_FAULT_ASRU, DATA_TYPE_NVLIST); 465 (void) nvlist_add_nvlist(flt_copy, FM_FAULT_ASRU, asru); 466 nvlist_free(asru); 467 } 468 fmd_fmri_topo_rele(thp); 469 470 (void) nvlist_xdup(flt_copy, &flt, &fmd.d_nva); 471 472 fmd_case_recreate_suspect(cp, flt_copy); 473 474 /* 475 * Now create the resource cache entries. 476 */ 477 alp = fmd_asru_al_create(ahp, flt, cp, fmd_strbasename(lp->log_name)); 478 ap = alp->al_asru; 479 480 /* 481 * Check to see if the resource is still present in the system. If 482 * so, then update the value of the unusable bit based on the current 483 * system configuration. If not, then consider unusable. 484 */ 485 ps = fmd_asru_is_present(flt); 486 if (ps) { 487 if (nvlist_lookup_nvlist(flt, FM_FAULT_ASRU, &asru) != 0) 488 u = FMD_B_FALSE; 489 else if ((us = fmd_fmri_unusable(asru)) == -1) { 490 fmd_error(EFMD_ASRU_FMRI, "failed to update " 491 "status of asru %s", lp->log_name); 492 u = FMD_B_FALSE; 493 } else 494 u = us != 0; 495 496 } else 497 u = FMD_B_TRUE; /* not present; set unusable */ 498 499 nvlist_free(flt); 500 501 ap->asru_flags |= FMD_ASRU_RECREATED; 502 if (ps) 503 ap->asru_flags |= FMD_ASRU_PRESENT; 504 if (f) { 505 alp->al_flags |= FMD_ASRU_FAULTY; 506 ap->asru_flags |= FMD_ASRU_FAULTY; 507 } 508 if (u) { 509 alp->al_flags |= FMD_ASRU_UNUSABLE; 510 ap->asru_flags |= FMD_ASRU_UNUSABLE; 511 } 512 513 TRACE((FMD_DBG_ASRU, "asru %s recreated as %p (%s)", alp->al_uuid, 514 (void *)ap, _fmd_asru_snames[ap->asru_flags & FMD_ASRU_STATE])); 515 } 516 517 static void 518 fmd_asru_hash_discard(fmd_asru_hash_t *ahp, const char *uuid, int err) 519 { 520 char src[PATH_MAX], dst[PATH_MAX]; 521 522 (void) snprintf(src, PATH_MAX, "%s/%s", ahp->ah_dirpath, uuid); 523 (void) snprintf(dst, PATH_MAX, "%s/%s-", ahp->ah_dirpath, uuid); 524 525 if (err != 0) 526 err = rename(src, dst); 527 else 528 err = unlink(src); 529 530 if (err != 0 && errno != ENOENT) 531 fmd_error(EFMD_ASRU_EVENT, "failed to rename log %s", src); 532 } 533 534 /* 535 * Open a saved log file and restore it into the ASRU hash. If we can't even 536 * open the log, rename the log file to <uuid>- to indicate it is corrupt. If 537 * fmd_log_replay() fails, we either delete the file (if it has reached the 538 * upper limit on cache age) or rename it for debugging if it was corrupted. 539 */ 540 static void 541 fmd_asru_hash_logopen(fmd_asru_hash_t *ahp, const char *uuid) 542 { 543 fmd_log_t *lp = fmd_log_tryopen(ahp->ah_dirpath, uuid, FMD_LOG_ASRU); 544 uint_t n; 545 546 if (lp == NULL) { 547 fmd_asru_hash_discard(ahp, uuid, errno); 548 return; 549 } 550 551 ahp->ah_error = 0; 552 n = ahp->ah_al_count; 553 554 fmd_log_replay(lp, (fmd_log_f *)fmd_asru_hash_recreate, ahp); 555 fmd_log_rele(lp); 556 557 if (ahp->ah_al_count == n) 558 fmd_asru_hash_discard(ahp, uuid, ahp->ah_error); 559 } 560 561 void 562 fmd_asru_hash_refresh(fmd_asru_hash_t *ahp) 563 { 564 struct dirent *dp; 565 DIR *dirp; 566 int zero; 567 568 if ((dirp = opendir(ahp->ah_dirpath)) == NULL) { 569 fmd_error(EFMD_ASRU_NODIR, 570 "failed to open asru cache directory %s", ahp->ah_dirpath); 571 return; 572 } 573 574 (void) fmd_conf_getprop(fmd.d_conf, "rsrc.zero", &zero); 575 576 (void) pthread_rwlock_wrlock(&ahp->ah_lock); 577 578 while ((dp = readdir(dirp)) != NULL) { 579 if (dp->d_name[0] == '.') 580 continue; /* skip "." and ".." */ 581 582 if (zero) 583 fmd_asru_hash_discard(ahp, dp->d_name, 0); 584 else if (!fmd_strmatch(dp->d_name, "*-")) 585 fmd_asru_hash_logopen(ahp, dp->d_name); 586 } 587 588 (void) pthread_rwlock_unlock(&ahp->ah_lock); 589 (void) closedir(dirp); 590 } 591 592 /* 593 * If the resource is present and faulty but not unusable, replay the fault 594 * event that caused it be marked faulty. This will cause the agent 595 * subscribing to this fault class to again disable the resource. 596 */ 597 /*ARGSUSED*/ 598 static void 599 fmd_asru_hash_replay_asru(fmd_asru_t *ap, void *data) 600 { 601 fmd_event_t *e; 602 nvlist_t *nvl; 603 char *class; 604 605 if (ap->asru_event != NULL && (ap->asru_flags & (FMD_ASRU_STATE | 606 FMD_ASRU_PRESENT)) == (FMD_ASRU_FAULTY | FMD_ASRU_PRESENT)) { 607 608 fmd_dprintf(FMD_DBG_ASRU, 609 "replaying fault event for %s", ap->asru_name); 610 611 (void) nvlist_xdup(ap->asru_event, &nvl, &fmd.d_nva); 612 (void) nvlist_lookup_string(nvl, FM_CLASS, &class); 613 614 (void) nvlist_add_string(nvl, FMD_EVN_UUID, 615 ((fmd_case_impl_t *)ap->asru_case)->ci_uuid); 616 617 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class); 618 fmd_dispq_dispatch(fmd.d_disp, e, class); 619 } 620 } 621 622 void 623 fmd_asru_hash_replay(fmd_asru_hash_t *ahp) 624 { 625 fmd_asru_hash_apply(ahp, fmd_asru_hash_replay_asru, NULL); 626 } 627 628 /* 629 * Check if the resource is still present. If not, and if the rsrc.age time 630 * has expired, then do an implicit repair on the resource. 631 */ 632 static void 633 fmd_asru_repair_if_aged(fmd_asru_link_t *alp, void *er) 634 { 635 struct timeval tv; 636 fmd_log_t *lp; 637 hrtime_t hrt; 638 639 if (fmd_asru_is_present(alp->al_event)) 640 return; 641 fmd_time_gettimeofday(&tv); 642 lp = fmd_log_open(alp->al_asru->asru_root, alp->al_uuid, FMD_LOG_ASRU); 643 hrt = (hrtime_t)(tv.tv_sec - lp->log_stat.st_mtime); 644 fmd_log_rele(lp); 645 if (hrt * NANOSEC >= fmd.d_asrus->ah_lifetime) 646 fmd_asru_repair(alp, er); 647 } 648 649 void 650 fmd_asru_clear_aged_rsrcs() 651 { 652 int err; 653 654 fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_repair_if_aged, &err); 655 } 656 657 fmd_asru_hash_t * 658 fmd_asru_hash_create(const char *root, const char *dir) 659 { 660 fmd_asru_hash_t *ahp; 661 char path[PATH_MAX]; 662 663 ahp = fmd_alloc(sizeof (fmd_asru_hash_t), FMD_SLEEP); 664 (void) pthread_rwlock_init(&ahp->ah_lock, NULL); 665 ahp->ah_hashlen = fmd.d_str_buckets; 666 ahp->ah_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, FMD_SLEEP); 667 ahp->ah_asru_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 668 FMD_SLEEP); 669 ahp->ah_case_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 670 FMD_SLEEP); 671 ahp->ah_fru_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 672 FMD_SLEEP); 673 ahp->ah_label_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 674 FMD_SLEEP); 675 ahp->ah_rsrc_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 676 FMD_SLEEP); 677 (void) snprintf(path, sizeof (path), "%s/%s", root, dir); 678 ahp->ah_dirpath = fmd_strdup(path, FMD_SLEEP); 679 (void) fmd_conf_getprop(fmd.d_conf, "rsrc.age", &ahp->ah_lifetime); 680 (void) fmd_conf_getprop(fmd.d_conf, "fakenotpresent", 681 (uint32_t *)&fmd_asru_fake_not_present); 682 ahp->ah_al_count = 0; 683 ahp->ah_count = 0; 684 ahp->ah_error = 0; 685 ahp->ah_topo = fmd_topo_hold(); 686 687 return (ahp); 688 } 689 690 void 691 fmd_asru_hash_destroy(fmd_asru_hash_t *ahp) 692 { 693 fmd_asru_link_t *alp, *np; 694 uint_t i; 695 696 for (i = 0; i < ahp->ah_hashlen; i++) { 697 for (alp = ahp->ah_case_hash[i]; alp != NULL; alp = np) { 698 np = alp->al_case_next; 699 alp->al_case_next = NULL; 700 fmd_case_rele(alp->al_case); 701 alp->al_case = NULL; 702 fmd_asru_al_hash_release(ahp, alp); 703 } 704 } 705 706 fmd_strfree(ahp->ah_dirpath); 707 fmd_free(ahp->ah_hash, sizeof (void *) * ahp->ah_hashlen); 708 fmd_free(ahp->ah_asru_hash, sizeof (void *) * ahp->ah_hashlen); 709 fmd_free(ahp->ah_case_hash, sizeof (void *) * ahp->ah_hashlen); 710 fmd_free(ahp->ah_fru_hash, sizeof (void *) * ahp->ah_hashlen); 711 fmd_free(ahp->ah_label_hash, sizeof (void *) * ahp->ah_hashlen); 712 fmd_free(ahp->ah_rsrc_hash, sizeof (void *) * ahp->ah_hashlen); 713 fmd_topo_rele(ahp->ah_topo); 714 fmd_free(ahp, sizeof (fmd_asru_hash_t)); 715 } 716 717 /* 718 * Take a snapshot of the ASRU database by placing an additional hold on each 719 * member in an auxiliary array, and then call 'func' for each ASRU. 720 */ 721 void 722 fmd_asru_hash_apply(fmd_asru_hash_t *ahp, 723 void (*func)(fmd_asru_t *, void *), void *arg) 724 { 725 fmd_asru_t *ap, **aps, **app; 726 uint_t apc, i; 727 728 (void) pthread_rwlock_rdlock(&ahp->ah_lock); 729 730 aps = app = fmd_alloc(ahp->ah_count * sizeof (fmd_asru_t *), FMD_SLEEP); 731 apc = ahp->ah_count; 732 733 for (i = 0; i < ahp->ah_hashlen; i++) { 734 for (ap = ahp->ah_hash[i]; ap != NULL; ap = ap->asru_next) 735 *app++ = fmd_asru_hold(ap); 736 } 737 738 ASSERT(app == aps + apc); 739 (void) pthread_rwlock_unlock(&ahp->ah_lock); 740 741 for (i = 0; i < apc; i++) { 742 if (aps[i]->asru_fmri != NULL) 743 func(aps[i], arg); 744 fmd_asru_hash_release(ahp, aps[i]); 745 } 746 747 fmd_free(aps, apc * sizeof (fmd_asru_t *)); 748 } 749 750 void 751 fmd_asru_al_hash_apply(fmd_asru_hash_t *ahp, 752 void (*func)(fmd_asru_link_t *, void *), void *arg) 753 { 754 fmd_asru_link_t *alp, **alps, **alpp; 755 uint_t alpc, i; 756 757 (void) pthread_rwlock_rdlock(&ahp->ah_lock); 758 759 alps = alpp = fmd_alloc(ahp->ah_al_count * sizeof (fmd_asru_link_t *), 760 FMD_SLEEP); 761 alpc = ahp->ah_al_count; 762 763 for (i = 0; i < ahp->ah_hashlen; i++) { 764 for (alp = ahp->ah_case_hash[i]; alp != NULL; 765 alp = alp->al_case_next) 766 *alpp++ = fmd_asru_al_hold(alp); 767 } 768 769 ASSERT(alpp == alps + alpc); 770 (void) pthread_rwlock_unlock(&ahp->ah_lock); 771 772 for (i = 0; i < alpc; i++) { 773 func(alps[i], arg); 774 fmd_asru_al_hash_release(ahp, alps[i]); 775 } 776 777 fmd_free(alps, alpc * sizeof (fmd_asru_link_t *)); 778 } 779 780 static void 781 fmd_asru_do_hash_apply(fmd_asru_hash_t *ahp, char *name, 782 void (*func)(fmd_asru_link_t *, void *), void *arg, 783 fmd_asru_link_t **hash, size_t match_offset, size_t next_offset) 784 { 785 fmd_asru_link_t *alp, **alps, **alpp; 786 uint_t alpc = 0, i; 787 uint_t h; 788 789 (void) pthread_rwlock_rdlock(&ahp->ah_lock); 790 791 h = fmd_asru_strhash(ahp, name); 792 793 for (alp = hash[h]; alp != NULL; alp = 794 /* LINTED pointer alignment */ 795 FMD_ASRU_AL_HASH_NEXT(alp, next_offset)) 796 if (fmd_asru_strcmp(ahp, 797 /* LINTED pointer alignment */ 798 FMD_ASRU_AL_HASH_NAME(alp, match_offset), name)) 799 alpc++; 800 801 alps = alpp = fmd_alloc(alpc * sizeof (fmd_asru_link_t *), FMD_SLEEP); 802 803 for (alp = hash[h]; alp != NULL; alp = 804 /* LINTED pointer alignment */ 805 FMD_ASRU_AL_HASH_NEXT(alp, next_offset)) 806 if (fmd_asru_strcmp(ahp, 807 /* LINTED pointer alignment */ 808 FMD_ASRU_AL_HASH_NAME(alp, match_offset), name)) 809 *alpp++ = fmd_asru_al_hold(alp); 810 811 ASSERT(alpp == alps + alpc); 812 (void) pthread_rwlock_unlock(&ahp->ah_lock); 813 814 for (i = 0; i < alpc; i++) { 815 func(alps[i], arg); 816 fmd_asru_al_hash_release(ahp, alps[i]); 817 } 818 819 fmd_free(alps, alpc * sizeof (fmd_asru_link_t *)); 820 } 821 822 void 823 fmd_asru_hash_apply_by_asru(fmd_asru_hash_t *ahp, char *name, 824 void (*func)(fmd_asru_link_t *, void *), void *arg) 825 { 826 fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_asru_hash, 827 offsetof(fmd_asru_link_t, al_asru_name), 828 offsetof(fmd_asru_link_t, al_asru_next)); 829 } 830 831 void 832 fmd_asru_hash_apply_by_case(fmd_asru_hash_t *ahp, fmd_case_t *cp, 833 void (*func)(fmd_asru_link_t *, void *), void *arg) 834 { 835 fmd_asru_do_hash_apply(ahp, ((fmd_case_impl_t *)cp)->ci_uuid, func, arg, 836 ahp->ah_case_hash, offsetof(fmd_asru_link_t, al_case_uuid), 837 offsetof(fmd_asru_link_t, al_case_next)); 838 } 839 840 void 841 fmd_asru_hash_apply_by_fru(fmd_asru_hash_t *ahp, char *name, 842 void (*func)(fmd_asru_link_t *, void *), void *arg) 843 { 844 fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_fru_hash, 845 offsetof(fmd_asru_link_t, al_fru_name), 846 offsetof(fmd_asru_link_t, al_fru_next)); 847 } 848 849 void 850 fmd_asru_hash_apply_by_rsrc(fmd_asru_hash_t *ahp, char *name, 851 void (*func)(fmd_asru_link_t *, void *), void *arg) 852 { 853 fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_rsrc_hash, 854 offsetof(fmd_asru_link_t, al_rsrc_name), 855 offsetof(fmd_asru_link_t, al_rsrc_next)); 856 } 857 858 void 859 fmd_asru_hash_apply_by_label(fmd_asru_hash_t *ahp, char *name, 860 void (*func)(fmd_asru_link_t *, void *), void *arg) 861 { 862 fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_label_hash, 863 offsetof(fmd_asru_link_t, al_label), 864 offsetof(fmd_asru_link_t, al_label_next)); 865 } 866 867 /* 868 * Lookup an asru in the hash by name and place a hold on it. If the asru is 869 * not found, no entry is created and NULL is returned. 870 */ 871 fmd_asru_t * 872 fmd_asru_hash_lookup_name(fmd_asru_hash_t *ahp, const char *name) 873 { 874 fmd_asru_t *ap; 875 876 (void) pthread_rwlock_rdlock(&ahp->ah_lock); 877 ap = fmd_asru_hash_lookup(ahp, name); 878 (void) pthread_rwlock_unlock(&ahp->ah_lock); 879 880 return (ap); 881 } 882 883 /* 884 * Lookup an asru in the hash and place a hold on it. 885 */ 886 fmd_asru_t * 887 fmd_asru_hash_lookup_nvl(fmd_asru_hash_t *ahp, nvlist_t *fmri) 888 { 889 fmd_asru_t *ap; 890 char *name = NULL; 891 ssize_t namelen; 892 893 if (fmd_asru_get_namestr(fmri, &name, &namelen) != 0) 894 return (NULL); 895 (void) pthread_rwlock_rdlock(&ahp->ah_lock); 896 ap = fmd_asru_hash_lookup(ahp, name); 897 (void) pthread_rwlock_unlock(&ahp->ah_lock); 898 fmd_free(name, namelen + 1); 899 return (ap); 900 } 901 902 /* 903 * Create a resource cache entry using the fault event "nvl" for one of the 904 * suspects from the case "cp". 905 * 906 * The fault event can have the following components : FM_FAULT_ASRU, 907 * FM_FAULT_FRU, FM_FAULT_RESOURCE. These should be set by the Diagnosis Engine 908 * when calling fmd_nvl_create_fault(). In the general case, these are all 909 * optional and an entry will always be added into the cache even if one or all 910 * of these fields is missing. 911 * 912 * However, for hardware faults the recommended practice is that the fault 913 * event should always have the FM_FAULT_RESOURCE field present and that this 914 * should be represented in hc-scheme. 915 * 916 * Currently the DE should also add the FM_FAULT_ASRU and FM_FAULT_FRU fields 917 * where known, though at some future stage fmd might be able to fill these 918 * in automatically from the topology. 919 */ 920 fmd_asru_link_t * 921 fmd_asru_hash_create_entry(fmd_asru_hash_t *ahp, fmd_case_t *cp, nvlist_t *nvl) 922 { 923 char *parsed_uuid; 924 uuid_t uuid; 925 int uuidlen; 926 fmd_asru_link_t *alp; 927 928 /* 929 * Generate a UUID for the ASRU. libuuid cleverly gives us no 930 * interface for specifying or learning the buffer size. Sigh. 931 * The spec says 36 bytes but we use a tunable just to be safe. 932 */ 933 (void) fmd_conf_getprop(fmd.d_conf, "uuidlen", &uuidlen); 934 parsed_uuid = fmd_zalloc(uuidlen + 1, FMD_SLEEP); 935 uuid_generate(uuid); 936 uuid_unparse(uuid, parsed_uuid); 937 938 /* 939 * Now create the resource cache entries. 940 */ 941 fmd_case_hold_locked(cp); 942 alp = fmd_asru_al_create(ahp, nvl, cp, parsed_uuid); 943 TRACE((FMD_DBG_ASRU, "asru %s created as %p", 944 alp->al_uuid, (void *)alp->al_asru)); 945 946 fmd_free(parsed_uuid, uuidlen + 1); 947 return (alp); 948 949 } 950 951 /* 952 * Release the reference count on an asru obtained using fmd_asru_hash_lookup. 953 * We take 'ahp' for symmetry and in case we need to use it in future work. 954 */ 955 /*ARGSUSED*/ 956 void 957 fmd_asru_hash_release(fmd_asru_hash_t *ahp, fmd_asru_t *ap) 958 { 959 (void) pthread_mutex_lock(&ap->asru_lock); 960 961 ASSERT(ap->asru_refs != 0); 962 if (--ap->asru_refs == 0) 963 fmd_asru_destroy(ap); 964 else 965 (void) pthread_mutex_unlock(&ap->asru_lock); 966 } 967 968 static void 969 fmd_asru_do_delete_entry(fmd_asru_hash_t *ahp, fmd_case_t *cp, 970 fmd_asru_link_t **hash, size_t next_offset, char *name) 971 { 972 uint_t h; 973 fmd_asru_link_t *alp, **pp, *alpnext, **alpnextp; 974 975 (void) pthread_rwlock_wrlock(&ahp->ah_lock); 976 h = fmd_asru_strhash(ahp, name); 977 pp = &hash[h]; 978 for (alp = *pp; alp != NULL; alp = alpnext) { 979 /* LINTED pointer alignment */ 980 alpnextp = FMD_ASRU_AL_HASH_NEXTP(alp, next_offset); 981 alpnext = *alpnextp; 982 if (alp->al_case == cp) { 983 *pp = *alpnextp; 984 *alpnextp = NULL; 985 } else 986 pp = alpnextp; 987 } 988 (void) pthread_rwlock_unlock(&ahp->ah_lock); 989 } 990 991 static void 992 fmd_asru_do_hash_delete(fmd_asru_hash_t *ahp, fmd_case_susp_t *cis, 993 fmd_case_t *cp, fmd_asru_link_t **hash, size_t next_offset, char *nvname) 994 { 995 nvlist_t *nvl; 996 char *name = NULL; 997 ssize_t namelen; 998 999 if (nvlist_lookup_nvlist(cis->cis_nvl, nvname, &nvl) == 0 && 1000 (namelen = fmd_fmri_nvl2str(nvl, NULL, 0)) != -1 && 1001 (name = fmd_alloc(namelen + 1, FMD_SLEEP)) != NULL) { 1002 if (fmd_fmri_nvl2str(nvl, name, namelen + 1) != -1) 1003 fmd_asru_do_delete_entry(ahp, cp, hash, next_offset, 1004 name); 1005 fmd_free(name, namelen + 1); 1006 } else 1007 fmd_asru_do_delete_entry(ahp, cp, hash, next_offset, ""); 1008 } 1009 1010 void 1011 fmd_asru_hash_delete_case(fmd_asru_hash_t *ahp, fmd_case_t *cp) 1012 { 1013 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 1014 fmd_case_susp_t *cis; 1015 fmd_asru_link_t *alp, **plp, *alpnext; 1016 fmd_asru_t *ap; 1017 char path[PATH_MAX]; 1018 char *label; 1019 uint_t h; 1020 1021 /* 1022 * first delete hash entries for each suspect 1023 */ 1024 for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) { 1025 fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_fru_hash, 1026 offsetof(fmd_asru_link_t, al_fru_next), FM_FAULT_FRU); 1027 fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_rsrc_hash, 1028 offsetof(fmd_asru_link_t, al_rsrc_next), FM_FAULT_RESOURCE); 1029 if (nvlist_lookup_string(cis->cis_nvl, FM_FAULT_LOCATION, 1030 &label) != 0) 1031 label = ""; 1032 fmd_asru_do_delete_entry(ahp, cp, ahp->ah_label_hash, 1033 offsetof(fmd_asru_link_t, al_label_next), label); 1034 fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_asru_hash, 1035 offsetof(fmd_asru_link_t, al_asru_next), FM_FAULT_ASRU); 1036 } 1037 1038 /* 1039 * then delete associated case hash entries 1040 */ 1041 (void) pthread_rwlock_wrlock(&ahp->ah_lock); 1042 h = fmd_asru_strhash(ahp, cip->ci_uuid); 1043 plp = &ahp->ah_case_hash[h]; 1044 for (alp = *plp; alp != NULL; alp = alpnext) { 1045 alpnext = alp->al_case_next; 1046 if (alp->al_case == cp) { 1047 *plp = alp->al_case_next; 1048 alp->al_case_next = NULL; 1049 ASSERT(ahp->ah_al_count != 0); 1050 ahp->ah_al_count--; 1051 1052 /* 1053 * decrement case ref. 1054 */ 1055 fmd_case_rele_locked(cp); 1056 alp->al_case = NULL; 1057 1058 /* 1059 * If we found a matching ASRU, unlink its log file and 1060 * then release the hash entry. Note that it may still 1061 * be referenced if another thread is manipulating it; 1062 * this is ok because once we unlink, the log file will 1063 * not be restored, and the log data will be freed when 1064 * all of the referencing threads release their 1065 * respective references. 1066 */ 1067 (void) snprintf(path, sizeof (path), "%s/%s", 1068 ahp->ah_dirpath, alp->al_uuid); 1069 if (unlink(path) != 0) 1070 fmd_error(EFMD_ASRU_UNLINK, 1071 "failed to unlink asru %s", path); 1072 1073 /* 1074 * Now unlink from the global per-resource cache 1075 * and if this is the last link then remove that from 1076 * it's own hash too. 1077 */ 1078 ap = alp->al_asru; 1079 (void) pthread_mutex_lock(&ap->asru_lock); 1080 fmd_list_delete(&ap->asru_list, alp); 1081 if (ap->asru_list.l_next == NULL) { 1082 uint_t h; 1083 fmd_asru_t *ap2, **pp; 1084 fmd_asru_t *apnext, **apnextp; 1085 1086 ASSERT(ahp->ah_count != 0); 1087 ahp->ah_count--; 1088 h = fmd_asru_strhash(ahp, ap->asru_name); 1089 pp = &ahp->ah_hash[h]; 1090 for (ap2 = *pp; ap2 != NULL; ap2 = apnext) { 1091 apnextp = &ap2->asru_next; 1092 apnext = *apnextp; 1093 if (ap2 == ap) { 1094 *pp = *apnextp; 1095 *apnextp = NULL; 1096 } else 1097 pp = apnextp; 1098 } 1099 } 1100 (void) pthread_mutex_unlock(&ap->asru_lock); 1101 fmd_asru_al_hash_release(ahp, alp); 1102 } else 1103 plp = &alp->al_case_next; 1104 } 1105 (void) pthread_rwlock_unlock(&ahp->ah_lock); 1106 } 1107 1108 static void 1109 fmd_asru_repair_containee(fmd_asru_link_t *alp, void *er) 1110 { 1111 if (er && alp->al_asru_fmri && fmd_fmri_contains(er, 1112 alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY)) 1113 fmd_case_update(alp->al_case); 1114 } 1115 1116 void 1117 fmd_asru_repair(fmd_asru_link_t *alp, void *er) 1118 { 1119 int flags; 1120 int rval; 1121 1122 /* 1123 * repair this asru cache entry 1124 */ 1125 rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY); 1126 1127 /* 1128 * now check if all entries associated with this asru are repaired and 1129 * if so repair containees 1130 */ 1131 (void) pthread_mutex_lock(&alp->al_asru->asru_lock); 1132 flags = alp->al_asru->asru_flags; 1133 (void) pthread_mutex_unlock(&alp->al_asru->asru_lock); 1134 if (!(flags & FMD_ASRU_FAULTY)) 1135 fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_repair_containee, 1136 alp->al_asru_fmri); 1137 1138 /* 1139 * if called from fmd_adm_repair() and we really did clear the bit then 1140 * we need to do a case update to see if the associated case can be 1141 * repaired. No need to do this if called from fmd_case_repair() (ie 1142 * when er is NULL) as the case will be explicitly repaired anyway. 1143 */ 1144 if (er) { 1145 *(int *)er = 0; 1146 if (rval) 1147 fmd_case_update(alp->al_case); 1148 } 1149 } 1150 1151 static void 1152 fmd_asru_logevent(fmd_asru_link_t *alp) 1153 { 1154 fmd_asru_t *ap = alp->al_asru; 1155 boolean_t f = (ap->asru_flags & FMD_ASRU_FAULTY) != 0; 1156 boolean_t u = (ap->asru_flags & FMD_ASRU_UNUSABLE) != 0; 1157 boolean_t m = (ap->asru_flags & FMD_ASRU_INVISIBLE) == 0; 1158 1159 fmd_case_impl_t *cip; 1160 fmd_event_t *e; 1161 fmd_log_t *lp; 1162 nvlist_t *nvl; 1163 char *class; 1164 1165 ASSERT(MUTEX_HELD(&ap->asru_lock)); 1166 cip = (fmd_case_impl_t *)alp->al_case; 1167 ASSERT(cip != NULL); 1168 1169 if ((lp = alp->al_log) == NULL) 1170 lp = fmd_log_open(ap->asru_root, alp->al_uuid, FMD_LOG_ASRU); 1171 1172 if (lp == NULL) 1173 return; /* can't log events if we can't open the log */ 1174 1175 nvl = fmd_protocol_rsrc_asru(_fmd_asru_events[f | (u << 1)], 1176 alp->al_asru_fmri, cip->ci_uuid, cip->ci_code, f, u, m, 1177 alp->al_event, &cip->ci_tv); 1178 1179 (void) nvlist_lookup_string(nvl, FM_CLASS, &class); 1180 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class); 1181 1182 fmd_event_hold(e); 1183 fmd_log_append(lp, e, NULL); 1184 fmd_event_rele(e); 1185 1186 /* 1187 * For now, we close the log file after every update to conserve file 1188 * descriptors and daemon overhead. If this becomes a performance 1189 * issue this code can change to keep a fixed-size LRU cache of logs. 1190 */ 1191 fmd_log_rele(lp); 1192 alp->al_log = NULL; 1193 } 1194 1195 int 1196 fmd_asru_setflags(fmd_asru_link_t *alp, uint_t sflag) 1197 { 1198 fmd_asru_t *ap = alp->al_asru; 1199 uint_t nstate, ostate; 1200 1201 ASSERT(!(sflag & ~FMD_ASRU_STATE)); 1202 ASSERT(sflag != FMD_ASRU_STATE); 1203 1204 (void) pthread_mutex_lock(&ap->asru_lock); 1205 1206 ostate = alp->al_flags & FMD_ASRU_STATE; 1207 alp->al_flags |= sflag; 1208 nstate = alp->al_flags & FMD_ASRU_STATE; 1209 1210 if (nstate == ostate) { 1211 (void) pthread_mutex_unlock(&ap->asru_lock); 1212 return (0); 1213 } 1214 1215 ap->asru_flags |= sflag; 1216 TRACE((FMD_DBG_ASRU, "asru %s %s->%s", alp->al_uuid, 1217 _fmd_asru_snames[ostate], _fmd_asru_snames[nstate])); 1218 1219 fmd_asru_logevent(alp); 1220 1221 (void) pthread_cond_broadcast(&ap->asru_cv); 1222 (void) pthread_mutex_unlock(&ap->asru_lock); 1223 return (1); 1224 } 1225 1226 int 1227 fmd_asru_clrflags(fmd_asru_link_t *alp, uint_t sflag) 1228 { 1229 fmd_asru_t *ap = alp->al_asru; 1230 fmd_asru_link_t *nalp; 1231 uint_t nstate, ostate, flags = 0; 1232 1233 ASSERT(!(sflag & ~FMD_ASRU_STATE)); 1234 ASSERT(sflag != FMD_ASRU_STATE); 1235 1236 (void) pthread_mutex_lock(&ap->asru_lock); 1237 1238 ostate = alp->al_flags & FMD_ASRU_STATE; 1239 alp->al_flags &= ~sflag; 1240 nstate = alp->al_flags & FMD_ASRU_STATE; 1241 1242 if (nstate == ostate) { 1243 (void) pthread_mutex_unlock(&ap->asru_lock); 1244 return (0); 1245 } 1246 1247 if (sflag == FMD_ASRU_UNUSABLE) 1248 ap->asru_flags &= ~sflag; 1249 else if (sflag == FMD_ASRU_FAULTY) { 1250 /* 1251 * only clear the faulty bit if all links are clear 1252 */ 1253 for (nalp = fmd_list_next(&ap->asru_list); nalp != NULL; 1254 nalp = fmd_list_next(nalp)) 1255 flags |= nalp->al_flags; 1256 if (!(flags & FMD_ASRU_FAULTY)) 1257 ap->asru_flags &= ~sflag; 1258 } 1259 1260 TRACE((FMD_DBG_ASRU, "asru %s %s->%s", alp->al_uuid, 1261 _fmd_asru_snames[ostate], _fmd_asru_snames[nstate])); 1262 1263 fmd_asru_logevent(alp); 1264 1265 (void) pthread_cond_broadcast(&ap->asru_cv); 1266 (void) pthread_mutex_unlock(&ap->asru_lock); 1267 1268 return (1); 1269 } 1270 1271 /* 1272 * Report the current known state of the link entry (ie this particular fault 1273 * affecting this particular ASRU). 1274 */ 1275 int 1276 fmd_asru_al_getstate(fmd_asru_link_t *alp) 1277 { 1278 int us, st; 1279 nvlist_t *asru; 1280 1281 if (fmd_asru_is_present(alp->al_event) == 0) 1282 return ((alp->al_flags & FMD_ASRU_FAULTY) | FMD_ASRU_UNUSABLE); 1283 1284 if (nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, &asru) == 0) 1285 us = fmd_fmri_unusable(asru); 1286 else 1287 us = (alp->al_flags & FMD_ASRU_UNUSABLE); 1288 st = (alp->al_flags & FMD_ASRU_STATE) | FMD_ASRU_PRESENT; 1289 if (us > 0) 1290 st |= FMD_ASRU_UNUSABLE; 1291 else if (us == 0) 1292 st &= ~FMD_ASRU_UNUSABLE; 1293 return (st); 1294 } 1295 1296 /* 1297 * Report the current known state of the ASRU by refreshing its unusable status 1298 * based upon the routines provided by the scheme module. If the unusable bit 1299 * is different, we do *not* generate a state change here because that change 1300 * may be unrelated to fmd activities and therefore we have no case or event. 1301 * The absence of the transition is harmless as this function is only provided 1302 * for RPC observability and fmd's clients are only concerned with ASRU_FAULTY. 1303 */ 1304 int 1305 fmd_asru_getstate(fmd_asru_t *ap) 1306 { 1307 int us, st; 1308 1309 if (!(ap->asru_flags & FMD_ASRU_INTERNAL) && 1310 (fmd_asru_fake_not_present || fmd_fmri_present(ap->asru_fmri) <= 0)) 1311 return (0); /* do not report non-fmd non-present resources */ 1312 1313 us = fmd_fmri_unusable(ap->asru_fmri); 1314 st = ap->asru_flags & FMD_ASRU_STATE; 1315 1316 if (us > 0) 1317 st |= FMD_ASRU_UNUSABLE; 1318 else if (us == 0) 1319 st &= ~FMD_ASRU_UNUSABLE; 1320 1321 return (st); 1322 } 1323