1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/fm/protocol.h> 28 #include <uuid/uuid.h> 29 30 #include <dirent.h> 31 #include <limits.h> 32 #include <unistd.h> 33 #include <alloca.h> 34 #include <stddef.h> 35 #include <fm/libtopo.h> 36 37 #include <fmd_alloc.h> 38 #include <fmd_string.h> 39 #include <fmd_error.h> 40 #include <fmd_subr.h> 41 #include <fmd_protocol.h> 42 #include <fmd_event.h> 43 #include <fmd_conf.h> 44 #include <fmd_fmri.h> 45 #include <fmd_dispq.h> 46 #include <fmd_case.h> 47 #include <fmd_module.h> 48 #include <fmd_asru.h> 49 50 #include <fmd.h> 51 52 static const char *const _fmd_asru_events[] = { 53 FMD_RSRC_CLASS "asru.ok", /* UNUSABLE=0 FAULTED=0 */ 54 FMD_RSRC_CLASS "asru.degraded", /* UNUSABLE=0 FAULTED=1 */ 55 FMD_RSRC_CLASS "asru.unknown", /* UNUSABLE=1 FAULTED=0 */ 56 FMD_RSRC_CLASS "asru.faulted" /* UNUSABLE=1 FAULTED=1 */ 57 }; 58 59 static const char *const _fmd_asru_snames[] = { 60 "uf", "uF", "Uf", "UF" /* same order as above */ 61 }; 62 63 volatile uint32_t fmd_asru_fake_not_present = 0; 64 65 static uint_t 66 fmd_asru_strhash(fmd_asru_hash_t *ahp, const char *val) 67 { 68 return (topo_fmri_strhash(ahp->ah_topo->ft_hdl, val) % ahp->ah_hashlen); 69 } 70 71 static boolean_t 72 fmd_asru_strcmp(fmd_asru_hash_t *ahp, const char *a, const char *b) 73 { 74 return (topo_fmri_strcmp(ahp->ah_topo->ft_hdl, a, b)); 75 } 76 77 static fmd_asru_t * 78 fmd_asru_create(fmd_asru_hash_t *ahp, const char *uuid, 79 const char *name, nvlist_t *fmri) 80 { 81 fmd_asru_t *ap = fmd_zalloc(sizeof (fmd_asru_t), FMD_SLEEP); 82 char *s; 83 84 (void) pthread_mutex_init(&ap->asru_lock, NULL); 85 (void) pthread_cond_init(&ap->asru_cv, NULL); 86 87 ap->asru_name = fmd_strdup(name, FMD_SLEEP); 88 if (fmri) 89 (void) nvlist_xdup(fmri, &ap->asru_fmri, &fmd.d_nva); 90 ap->asru_root = fmd_strdup(ahp->ah_dirpath, FMD_SLEEP); 91 ap->asru_uuid = fmd_strdup(uuid, FMD_SLEEP); 92 ap->asru_uuidlen = ap->asru_uuid ? strlen(ap->asru_uuid) : 0; 93 ap->asru_refs = 1; 94 95 if (fmri && nvlist_lookup_string(fmri, FM_FMRI_SCHEME, &s) == 0 && 96 strcmp(s, FM_FMRI_SCHEME_FMD) == 0) 97 ap->asru_flags |= FMD_ASRU_INTERNAL; 98 99 return (ap); 100 } 101 102 static void 103 fmd_asru_destroy(fmd_asru_t *ap) 104 { 105 ASSERT(MUTEX_HELD(&ap->asru_lock)); 106 ASSERT(ap->asru_refs == 0); 107 108 nvlist_free(ap->asru_event); 109 fmd_strfree(ap->asru_name); 110 nvlist_free(ap->asru_fmri); 111 fmd_strfree(ap->asru_root); 112 fmd_free(ap->asru_uuid, ap->asru_uuidlen + 1); 113 fmd_free(ap, sizeof (fmd_asru_t)); 114 } 115 116 static void 117 fmd_asru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_t *ap) 118 { 119 uint_t h = fmd_asru_strhash(ahp, ap->asru_name); 120 121 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 122 ap->asru_next = ahp->ah_hash[h]; 123 ahp->ah_hash[h] = ap; 124 ahp->ah_count++; 125 } 126 127 static fmd_asru_t * 128 fmd_asru_hold(fmd_asru_t *ap) 129 { 130 (void) pthread_mutex_lock(&ap->asru_lock); 131 ap->asru_refs++; 132 ASSERT(ap->asru_refs != 0); 133 (void) pthread_mutex_unlock(&ap->asru_lock); 134 return (ap); 135 } 136 137 /* 138 * Lookup an asru in the hash by name and place a hold on it. If the asru is 139 * not found, no entry is created and NULL is returned. This internal function 140 * is for callers who have the ah_lock held and is used by lookup_name below. 141 */ 142 fmd_asru_t * 143 fmd_asru_hash_lookup(fmd_asru_hash_t *ahp, const char *name) 144 { 145 fmd_asru_t *ap; 146 uint_t h; 147 148 ASSERT(RW_LOCK_HELD(&ahp->ah_lock)); 149 h = fmd_asru_strhash(ahp, name); 150 151 for (ap = ahp->ah_hash[h]; ap != NULL; ap = ap->asru_next) { 152 if (fmd_asru_strcmp(ahp, ap->asru_name, name)) 153 break; 154 } 155 156 if (ap != NULL) 157 (void) fmd_asru_hold(ap); 158 else 159 (void) fmd_set_errno(EFMD_ASRU_NOENT); 160 161 return (ap); 162 } 163 164 #define HC_ONLY_FALSE 0 165 #define HC_ONLY_TRUE 1 166 167 static int 168 fmd_asru_replacement_state(nvlist_t *event, int hc_only) 169 { 170 int ps = -1; 171 nvlist_t *asru, *fru, *rsrc; 172 char *s; 173 174 /* 175 * Check if there is evidence that this object is no longer present. 176 * In general fmd_fmri_present() should be supported on resources and/or 177 * frus, as those are the things that are physically present or not 178 * present - an asru can be spread over a number of frus some of which 179 * are present and some not, so fmd_fmri_present() is not generally 180 * meaningful. However retain a check for asru first for compatibility. 181 * If we have checked all three and we still get -1 then nothing knows 182 * whether it's present or not, so err on the safe side and treat it 183 * as still present. 184 * 185 * Note that if hc_only is set, then we only check status using fmris 186 * that are in hc-scheme. 187 */ 188 if (fmd_asru_fake_not_present) 189 return (fmd_asru_fake_not_present); 190 if (nvlist_lookup_nvlist(event, FM_FAULT_ASRU, &asru) == 0 && 191 (hc_only == HC_ONLY_FALSE || (nvlist_lookup_string(asru, 192 FM_FMRI_SCHEME, &s) == 0 && strcmp(s, FM_FMRI_SCHEME_HC) == 0))) 193 ps = fmd_fmri_replaced(asru); 194 if (ps == -1 || ps == FMD_OBJ_STATE_UNKNOWN) { 195 if (nvlist_lookup_nvlist(event, FM_FAULT_RESOURCE, 196 &rsrc) == 0 && (hc_only == HC_ONLY_FALSE || 197 (nvlist_lookup_string(rsrc, FM_FMRI_SCHEME, &s) == 0 && 198 strcmp(s, FM_FMRI_SCHEME_HC) == 0))) { 199 if (ps == -1) { 200 ps = fmd_fmri_replaced(rsrc); 201 } else { 202 /* see if we can improve on UNKNOWN */ 203 int ps2 = fmd_fmri_replaced(rsrc); 204 if (ps2 == FMD_OBJ_STATE_STILL_PRESENT || 205 ps2 == FMD_OBJ_STATE_REPLACED) 206 ps = ps2; 207 } 208 } 209 } 210 if (ps == -1 || ps == FMD_OBJ_STATE_UNKNOWN) { 211 if (nvlist_lookup_nvlist(event, FM_FAULT_FRU, &fru) == 0 && 212 (hc_only == HC_ONLY_FALSE || (nvlist_lookup_string(fru, 213 FM_FMRI_SCHEME, &s) == 0 && 214 strcmp(s, FM_FMRI_SCHEME_HC) == 0))) { 215 if (ps == -1) { 216 ps = fmd_fmri_replaced(fru); 217 } else { 218 /* see if we can improve on UNKNOWN */ 219 int ps2 = fmd_fmri_replaced(fru); 220 if (ps2 == FMD_OBJ_STATE_STILL_PRESENT || 221 ps2 == FMD_OBJ_STATE_REPLACED) 222 ps = ps2; 223 } 224 } 225 } 226 if (ps == -1) 227 ps = FMD_OBJ_STATE_UNKNOWN; 228 return (ps); 229 } 230 231 static void 232 fmd_asru_asru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, 233 char *name) 234 { 235 uint_t h = fmd_asru_strhash(ahp, name); 236 237 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 238 alp->al_asru_next = ahp->ah_asru_hash[h]; 239 ahp->ah_asru_hash[h] = alp; 240 ahp->ah_al_count++; 241 } 242 243 static void 244 fmd_asru_case_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, 245 char *name) 246 { 247 uint_t h = fmd_asru_strhash(ahp, name); 248 249 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 250 alp->al_case_next = ahp->ah_case_hash[h]; 251 ahp->ah_case_hash[h] = alp; 252 } 253 254 static void 255 fmd_asru_fru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, char *name) 256 { 257 uint_t h = fmd_asru_strhash(ahp, name); 258 259 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 260 alp->al_fru_next = ahp->ah_fru_hash[h]; 261 ahp->ah_fru_hash[h] = alp; 262 } 263 264 static void 265 fmd_asru_label_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, 266 char *name) 267 { 268 uint_t h = fmd_asru_strhash(ahp, name); 269 270 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 271 alp->al_label_next = ahp->ah_label_hash[h]; 272 ahp->ah_label_hash[h] = alp; 273 } 274 275 static void 276 fmd_asru_rsrc_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, 277 char *name) 278 { 279 uint_t h = fmd_asru_strhash(ahp, name); 280 281 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 282 alp->al_rsrc_next = ahp->ah_rsrc_hash[h]; 283 ahp->ah_rsrc_hash[h] = alp; 284 } 285 286 static void 287 fmd_asru_al_destroy(fmd_asru_link_t *alp) 288 { 289 ASSERT(alp->al_refs == 0); 290 ASSERT(MUTEX_HELD(&alp->al_asru->asru_lock)); 291 292 if (alp->al_log != NULL) 293 fmd_log_rele(alp->al_log); 294 295 fmd_free(alp->al_uuid, alp->al_uuidlen + 1); 296 nvlist_free(alp->al_event); 297 fmd_strfree(alp->al_rsrc_name); 298 fmd_strfree(alp->al_case_uuid); 299 fmd_strfree(alp->al_fru_name); 300 fmd_strfree(alp->al_asru_name); 301 fmd_strfree(alp->al_label); 302 nvlist_free(alp->al_asru_fmri); 303 fmd_free(alp, sizeof (fmd_asru_link_t)); 304 } 305 306 static fmd_asru_link_t * 307 fmd_asru_al_hold(fmd_asru_link_t *alp) 308 { 309 fmd_asru_t *ap = alp->al_asru; 310 311 (void) pthread_mutex_lock(&ap->asru_lock); 312 ap->asru_refs++; 313 alp->al_refs++; 314 ASSERT(alp->al_refs != 0); 315 (void) pthread_mutex_unlock(&ap->asru_lock); 316 return (alp); 317 } 318 319 static void fmd_asru_destroy(fmd_asru_t *ap); 320 321 /*ARGSUSED*/ 322 static void 323 fmd_asru_al_hash_release(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp) 324 { 325 fmd_asru_t *ap = alp->al_asru; 326 327 (void) pthread_mutex_lock(&ap->asru_lock); 328 ASSERT(alp->al_refs != 0); 329 if (--alp->al_refs == 0) 330 fmd_asru_al_destroy(alp); 331 ASSERT(ap->asru_refs != 0); 332 if (--ap->asru_refs == 0) 333 fmd_asru_destroy(ap); 334 else 335 (void) pthread_mutex_unlock(&ap->asru_lock); 336 } 337 338 static int 339 fmd_asru_get_namestr(nvlist_t *nvl, char **name, ssize_t *namelen) 340 { 341 if ((*namelen = fmd_fmri_nvl2str(nvl, NULL, 0)) == -1) 342 return (EFMD_ASRU_FMRI); 343 *name = fmd_alloc(*namelen + 1, FMD_SLEEP); 344 if (fmd_fmri_nvl2str(nvl, *name, *namelen + 1) == -1) { 345 if (*name != NULL) 346 fmd_free(*name, *namelen + 1); 347 return (EFMD_ASRU_FMRI); 348 } 349 return (0); 350 } 351 352 static fmd_asru_link_t * 353 fmd_asru_al_create(fmd_asru_hash_t *ahp, nvlist_t *nvl, fmd_case_t *cp, 354 const char *al_uuid) 355 { 356 nvlist_t *asru = NULL, *fru, *rsrc; 357 int got_rsrc = 0, got_asru = 0, got_fru = 0; 358 ssize_t fru_namelen, rsrc_namelen, asru_namelen; 359 char *asru_name, *rsrc_name, *fru_name, *name, *label; 360 fmd_asru_link_t *alp; 361 fmd_asru_t *ap; 362 boolean_t msg; 363 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 364 365 if (nvlist_lookup_nvlist(nvl, FM_FAULT_ASRU, &asru) == 0 && 366 fmd_asru_get_namestr(asru, &asru_name, &asru_namelen) == 0) 367 got_asru = 1; 368 if (nvlist_lookup_nvlist(nvl, FM_FAULT_FRU, &fru) == 0 && 369 fmd_asru_get_namestr(fru, &fru_name, &fru_namelen) == 0) 370 got_fru = 1; 371 if (nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &rsrc) == 0 && 372 fmd_asru_get_namestr(rsrc, &rsrc_name, &rsrc_namelen) == 0) 373 got_rsrc = 1; 374 if (nvlist_lookup_string(nvl, FM_FAULT_LOCATION, &label) != 0) 375 label = ""; 376 377 /* 378 * Grab the rwlock as a writer; Then create and insert the asru with 379 * ahp->ah_lock held and hash it in. We'll then drop the rwlock and 380 * proceed to initializing the asru. 381 */ 382 (void) pthread_rwlock_wrlock(&ahp->ah_lock); 383 384 /* 385 * Create and initialise the per-fault "link" structure. 386 */ 387 alp = fmd_zalloc(sizeof (fmd_asru_link_t), FMD_SLEEP); 388 if (got_asru) 389 (void) nvlist_xdup(asru, &alp->al_asru_fmri, &fmd.d_nva); 390 alp->al_uuid = fmd_strdup(al_uuid, FMD_SLEEP); 391 alp->al_uuidlen = strlen(alp->al_uuid); 392 alp->al_refs = 1; 393 394 /* 395 * If this is the first fault for this asru, then create the per-asru 396 * structure and link into the hash. 397 */ 398 name = got_asru ? asru_name : ""; 399 if ((ap = fmd_asru_hash_lookup(ahp, name)) == NULL) { 400 ap = fmd_asru_create(ahp, al_uuid, name, got_asru ? asru : 401 NULL); 402 fmd_asru_hash_insert(ahp, ap); 403 } else 404 nvlist_free(ap->asru_event); 405 (void) nvlist_xdup(nvl, &ap->asru_event, &fmd.d_nva); 406 407 /* 408 * Put the link structure on the list associated with the per-asru 409 * structure. Then put the link structure on the various hashes. 410 */ 411 fmd_list_append(&ap->asru_list, (fmd_list_t *)alp); 412 alp->al_asru = ap; 413 alp->al_asru_name = got_asru ? asru_name : fmd_strdup("", FMD_SLEEP); 414 fmd_asru_asru_hash_insert(ahp, alp, alp->al_asru_name); 415 alp->al_fru_name = got_fru ? fru_name : fmd_strdup("", FMD_SLEEP); 416 fmd_asru_fru_hash_insert(ahp, alp, alp->al_fru_name); 417 alp->al_rsrc_name = got_rsrc ? rsrc_name : fmd_strdup("", FMD_SLEEP); 418 fmd_asru_rsrc_hash_insert(ahp, alp, alp->al_rsrc_name); 419 alp->al_label = fmd_strdup(label, FMD_SLEEP); 420 fmd_asru_label_hash_insert(ahp, alp, label); 421 alp->al_case_uuid = fmd_strdup(cip->ci_uuid, FMD_SLEEP); 422 fmd_asru_case_hash_insert(ahp, alp, cip->ci_uuid); 423 (void) pthread_mutex_lock(&ap->asru_lock); 424 (void) pthread_rwlock_unlock(&ahp->ah_lock); 425 426 ap->asru_case = alp->al_case = cp; 427 if (nvlist_lookup_boolean_value(nvl, FM_SUSPECT_MESSAGE, &msg) == 0 && 428 msg == B_FALSE) 429 ap->asru_flags |= FMD_ASRU_INVISIBLE; 430 (void) nvlist_xdup(nvl, &alp->al_event, &fmd.d_nva); 431 ap->asru_flags |= FMD_ASRU_VALID; 432 (void) pthread_cond_broadcast(&ap->asru_cv); 433 (void) pthread_mutex_unlock(&ap->asru_lock); 434 return (alp); 435 } 436 437 static void 438 fmd_asru_hash_recreate(fmd_log_t *lp, fmd_event_t *ep, fmd_asru_hash_t *ahp) 439 { 440 nvlist_t *nvl = FMD_EVENT_NVL(ep); 441 boolean_t faulty = FMD_B_FALSE, unusable = FMD_B_FALSE; 442 int ps; 443 boolean_t repaired = FMD_B_FALSE, replaced = FMD_B_FALSE; 444 boolean_t acquitted = FMD_B_FALSE, resolved = FMD_B_FALSE; 445 nvlist_t *flt, *flt_copy, *asru; 446 char *case_uuid = NULL, *case_code = NULL; 447 fmd_asru_t *ap; 448 fmd_asru_link_t *alp; 449 fmd_case_t *cp; 450 int64_t *diag_time; 451 nvlist_t *de_fmri, *de_fmri_dup; 452 uint_t nelem; 453 topo_hdl_t *thp; 454 char *class; 455 nvlist_t *rsrc; 456 int err; 457 458 /* 459 * Extract the most recent values of 'faulty' from the event log. 460 */ 461 if (nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_FAULTY, 462 &faulty) != 0) { 463 fmd_error(EFMD_ASRU_EVENT, "failed to reload asru %s: " 464 "invalid event log record\n", lp->log_name); 465 ahp->ah_error = EFMD_ASRU_EVENT; 466 return; 467 } 468 if (nvlist_lookup_nvlist(nvl, FM_RSRC_ASRU_EVENT, &flt) != 0) { 469 fmd_error(EFMD_ASRU_EVENT, "failed to reload asru %s: " 470 "invalid event log record\n", lp->log_name); 471 ahp->ah_error = EFMD_ASRU_EVENT; 472 return; 473 } 474 (void) nvlist_lookup_string(nvl, FM_RSRC_ASRU_UUID, &case_uuid); 475 (void) nvlist_lookup_string(nvl, FM_RSRC_ASRU_CODE, &case_code); 476 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_UNUSABLE, 477 &unusable); 478 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_REPAIRED, 479 &repaired); 480 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_REPLACED, 481 &replaced); 482 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_ACQUITTED, 483 &acquitted); 484 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_RESOLVED, 485 &resolved); 486 487 /* 488 * Attempt to recreate the case in CLOSED, REPAIRED or RESOLVED state 489 * (depending on whether the faulty/resolved bits are set). 490 * If the case is already present, fmd_case_recreate() will return it. 491 * If not, we'll create a new orphaned case. Either way, we use the 492 * ASRU event to insert a suspect into the partially-restored case. 493 */ 494 fmd_module_lock(fmd.d_rmod); 495 cp = fmd_case_recreate(fmd.d_rmod, NULL, faulty ? FMD_CASE_CLOSED : 496 resolved ? FMD_CASE_RESOLVED : FMD_CASE_REPAIRED, case_uuid, 497 case_code); 498 fmd_case_hold(cp); 499 fmd_module_unlock(fmd.d_rmod); 500 if (nvlist_lookup_int64_array(nvl, FM_SUSPECT_DIAG_TIME, &diag_time, 501 &nelem) == 0 && nelem >= 2) 502 fmd_case_settime(cp, diag_time[0], diag_time[1]); 503 else 504 fmd_case_settime(cp, lp->log_stat.st_ctime, 0); 505 if (nvlist_lookup_nvlist(nvl, FM_SUSPECT_DE, &de_fmri) == 0) { 506 (void) nvlist_xdup(de_fmri, &de_fmri_dup, &fmd.d_nva); 507 fmd_case_set_de_fmri(cp, de_fmri_dup); 508 } 509 (void) nvlist_xdup(flt, &flt_copy, &fmd.d_nva); 510 511 /* 512 * For faults with a resource, re-evaluate the asru from the resource. 513 */ 514 thp = fmd_fmri_topo_hold(TOPO_VERSION); 515 if (nvlist_lookup_string(flt_copy, FM_CLASS, &class) == 0 && 516 strncmp(class, "fault", 5) == 0 && 517 nvlist_lookup_nvlist(flt_copy, FM_FAULT_RESOURCE, &rsrc) == 0 && 518 rsrc != NULL && topo_fmri_asru(thp, rsrc, &asru, &err) == 0) { 519 (void) nvlist_remove(flt_copy, FM_FAULT_ASRU, DATA_TYPE_NVLIST); 520 (void) nvlist_add_nvlist(flt_copy, FM_FAULT_ASRU, asru); 521 nvlist_free(asru); 522 } 523 fmd_fmri_topo_rele(thp); 524 525 (void) nvlist_xdup(flt_copy, &flt, &fmd.d_nva); 526 527 fmd_case_recreate_suspect(cp, flt_copy); 528 529 /* 530 * Now create the resource cache entries. 531 */ 532 alp = fmd_asru_al_create(ahp, flt, cp, fmd_strbasename(lp->log_name)); 533 ap = alp->al_asru; 534 535 /* 536 * Check to see if the resource is still present in the system. 537 */ 538 ps = fmd_asru_replacement_state(flt, HC_ONLY_FALSE); 539 if (ps == FMD_OBJ_STATE_REPLACED) { 540 replaced = FMD_B_TRUE; 541 } else if (ps == FMD_OBJ_STATE_STILL_PRESENT || 542 ps == FMD_OBJ_STATE_UNKNOWN) { 543 ap->asru_flags |= FMD_ASRU_PRESENT; 544 if (nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, 545 &asru) == 0) { 546 int us; 547 548 switch (fmd_fmri_service_state(asru)) { 549 case FMD_SERVICE_STATE_UNUSABLE: 550 unusable = FMD_B_TRUE; 551 break; 552 case FMD_SERVICE_STATE_OK: 553 case FMD_SERVICE_STATE_ISOLATE_PENDING: 554 case FMD_SERVICE_STATE_DEGRADED: 555 unusable = FMD_B_FALSE; 556 break; 557 case FMD_SERVICE_STATE_UNKNOWN: 558 case -1: 559 /* not supported by scheme */ 560 us = fmd_fmri_unusable(asru); 561 if (us > 0) 562 unusable = FMD_B_TRUE; 563 else if (us == 0) 564 unusable = FMD_B_FALSE; 565 break; 566 } 567 } 568 } 569 570 nvlist_free(flt); 571 572 ap->asru_flags |= FMD_ASRU_RECREATED; 573 if (faulty) { 574 alp->al_flags |= FMD_ASRU_FAULTY; 575 ap->asru_flags |= FMD_ASRU_FAULTY; 576 } 577 if (unusable) { 578 alp->al_flags |= FMD_ASRU_UNUSABLE; 579 ap->asru_flags |= FMD_ASRU_UNUSABLE; 580 } 581 if (replaced) 582 alp->al_reason = FMD_ASRU_REPLACED; 583 else if (repaired) 584 alp->al_reason = FMD_ASRU_REPAIRED; 585 else if (acquitted) 586 alp->al_reason = FMD_ASRU_ACQUITTED; 587 else 588 alp->al_reason = FMD_ASRU_REMOVED; 589 590 TRACE((FMD_DBG_ASRU, "asru %s recreated as %p (%s)", alp->al_uuid, 591 (void *)ap, _fmd_asru_snames[ap->asru_flags & FMD_ASRU_STATE])); 592 } 593 594 static void 595 fmd_asru_hash_discard(fmd_asru_hash_t *ahp, const char *uuid, int err) 596 { 597 char src[PATH_MAX], dst[PATH_MAX]; 598 599 (void) snprintf(src, PATH_MAX, "%s/%s", ahp->ah_dirpath, uuid); 600 (void) snprintf(dst, PATH_MAX, "%s/%s-", ahp->ah_dirpath, uuid); 601 602 if (err != 0) 603 err = rename(src, dst); 604 else 605 err = unlink(src); 606 607 if (err != 0 && errno != ENOENT) 608 fmd_error(EFMD_ASRU_EVENT, "failed to rename log %s", src); 609 } 610 611 /* 612 * Open a saved log file and restore it into the ASRU hash. If we can't even 613 * open the log, rename the log file to <uuid>- to indicate it is corrupt. If 614 * fmd_log_replay() fails, we either delete the file (if it has reached the 615 * upper limit on cache age) or rename it for debugging if it was corrupted. 616 */ 617 static void 618 fmd_asru_hash_logopen(fmd_asru_hash_t *ahp, const char *uuid) 619 { 620 fmd_log_t *lp = fmd_log_tryopen(ahp->ah_dirpath, uuid, FMD_LOG_ASRU); 621 uint_t n; 622 623 if (lp == NULL) { 624 fmd_asru_hash_discard(ahp, uuid, errno); 625 return; 626 } 627 628 ahp->ah_error = 0; 629 n = ahp->ah_al_count; 630 631 fmd_log_replay(lp, (fmd_log_f *)fmd_asru_hash_recreate, ahp); 632 fmd_log_rele(lp); 633 634 if (ahp->ah_al_count == n) 635 fmd_asru_hash_discard(ahp, uuid, ahp->ah_error); 636 } 637 638 void 639 fmd_asru_hash_refresh(fmd_asru_hash_t *ahp) 640 { 641 struct dirent *dp; 642 DIR *dirp; 643 int zero; 644 645 if ((dirp = opendir(ahp->ah_dirpath)) == NULL) { 646 fmd_error(EFMD_ASRU_NODIR, 647 "failed to open asru cache directory %s", ahp->ah_dirpath); 648 return; 649 } 650 651 (void) fmd_conf_getprop(fmd.d_conf, "rsrc.zero", &zero); 652 653 (void) pthread_rwlock_wrlock(&ahp->ah_lock); 654 655 while ((dp = readdir(dirp)) != NULL) { 656 if (dp->d_name[0] == '.') 657 continue; /* skip "." and ".." */ 658 659 if (zero) 660 fmd_asru_hash_discard(ahp, dp->d_name, 0); 661 else if (!fmd_strmatch(dp->d_name, "*-")) 662 fmd_asru_hash_logopen(ahp, dp->d_name); 663 } 664 665 (void) pthread_rwlock_unlock(&ahp->ah_lock); 666 (void) closedir(dirp); 667 } 668 669 /* 670 * If the resource is present and faulty but not unusable, replay the fault 671 * event that caused it be marked faulty. This will cause the agent 672 * subscribing to this fault class to again disable the resource. 673 */ 674 /*ARGSUSED*/ 675 static void 676 fmd_asru_hash_replay_asru(fmd_asru_t *ap, void *data) 677 { 678 fmd_event_t *e; 679 nvlist_t *nvl; 680 char *class; 681 682 if (ap->asru_event != NULL && (ap->asru_flags & (FMD_ASRU_STATE | 683 FMD_ASRU_PRESENT)) == (FMD_ASRU_FAULTY | FMD_ASRU_PRESENT)) { 684 685 fmd_dprintf(FMD_DBG_ASRU, 686 "replaying fault event for %s", ap->asru_name); 687 688 (void) nvlist_xdup(ap->asru_event, &nvl, &fmd.d_nva); 689 (void) nvlist_lookup_string(nvl, FM_CLASS, &class); 690 691 (void) nvlist_add_string(nvl, FMD_EVN_UUID, 692 ((fmd_case_impl_t *)ap->asru_case)->ci_uuid); 693 694 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class); 695 fmd_dispq_dispatch(fmd.d_disp, e, class); 696 } 697 } 698 699 void 700 fmd_asru_hash_replay(fmd_asru_hash_t *ahp) 701 { 702 fmd_asru_hash_apply(ahp, fmd_asru_hash_replay_asru, NULL); 703 } 704 705 /* 706 * Check if the resource is still present. If not, and if the rsrc.age time 707 * has expired, then do an implicit repair on the resource. 708 */ 709 /*ARGSUSED*/ 710 static void 711 fmd_asru_repair_if_aged(fmd_asru_link_t *alp, void *arg) 712 { 713 struct timeval tv; 714 fmd_log_t *lp; 715 hrtime_t hrt; 716 int ps; 717 int err; 718 fmd_asru_rep_arg_t fara; 719 720 if (!(alp->al_flags & FMD_ASRU_FAULTY)) 721 return; 722 723 /* 724 * Checking for aged resources only happens on the diagnosing side 725 * not on a proxy. 726 */ 727 if (alp->al_flags & FMD_ASRU_PROXY) 728 return; 729 730 ps = fmd_asru_replacement_state(alp->al_event, HC_ONLY_FALSE); 731 if (ps == FMD_OBJ_STATE_REPLACED) { 732 fara.fara_reason = FMD_ASRU_REPLACED; 733 fara.fara_bywhat = FARA_ALL; 734 fara.fara_rval = &err; 735 fmd_asru_repaired(alp, &fara); 736 } else if (ps == FMD_OBJ_STATE_NOT_PRESENT) { 737 fmd_time_gettimeofday(&tv); 738 lp = fmd_log_open(alp->al_asru->asru_root, alp->al_uuid, 739 FMD_LOG_ASRU); 740 hrt = (hrtime_t)(tv.tv_sec - lp->log_stat.st_mtime); 741 fmd_log_rele(lp); 742 if (hrt * NANOSEC >= fmd.d_asrus->ah_lifetime) { 743 fara.fara_reason = FMD_ASRU_REMOVED; 744 fara.fara_bywhat = FARA_ALL; 745 fara.fara_rval = &err; 746 fmd_asru_repaired(alp, &fara); 747 } 748 } 749 } 750 751 /*ARGSUSED*/ 752 void 753 fmd_asru_check_if_aged(fmd_asru_link_t *alp, void *arg) 754 { 755 struct timeval tv; 756 fmd_log_t *lp; 757 hrtime_t hrt; 758 759 /* 760 * Case must be in resolved state for this to be called. So modified 761 * time on resource cache entry should be the time the resolve occurred. 762 * Return 0 if not yet hit rsrc.aged. 763 */ 764 fmd_time_gettimeofday(&tv); 765 lp = fmd_log_open(alp->al_asru->asru_root, alp->al_uuid, FMD_LOG_ASRU); 766 if (lp == NULL) 767 return; 768 hrt = (hrtime_t)(tv.tv_sec - lp->log_stat.st_mtime); 769 fmd_log_rele(lp); 770 if (hrt * NANOSEC < fmd.d_asrus->ah_lifetime) 771 *(int *)arg = 0; 772 } 773 774 /*ARGSUSED*/ 775 void 776 fmd_asru_most_recent(fmd_asru_link_t *alp, void *arg) 777 { 778 fmd_log_t *lp; 779 uint64_t hrt; 780 781 /* 782 * Find most recent modified time of a set of resource cache entries. 783 */ 784 lp = fmd_log_open(alp->al_asru->asru_root, alp->al_uuid, FMD_LOG_ASRU); 785 if (lp == NULL) 786 return; 787 hrt = lp->log_stat.st_mtime; 788 fmd_log_rele(lp); 789 if (*(uint64_t *)arg < hrt) 790 *(uint64_t *)arg = hrt; 791 } 792 793 void 794 fmd_asru_clear_aged_rsrcs() 795 { 796 int check_if_aged = 1; 797 fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_repair_if_aged, NULL); 798 fmd_case_hash_apply(fmd.d_cases, fmd_case_discard_resolved, 799 &check_if_aged); 800 } 801 802 fmd_asru_hash_t * 803 fmd_asru_hash_create(const char *root, const char *dir) 804 { 805 fmd_asru_hash_t *ahp; 806 char path[PATH_MAX]; 807 808 ahp = fmd_alloc(sizeof (fmd_asru_hash_t), FMD_SLEEP); 809 (void) pthread_rwlock_init(&ahp->ah_lock, NULL); 810 ahp->ah_hashlen = fmd.d_str_buckets; 811 ahp->ah_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, FMD_SLEEP); 812 ahp->ah_asru_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 813 FMD_SLEEP); 814 ahp->ah_case_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 815 FMD_SLEEP); 816 ahp->ah_fru_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 817 FMD_SLEEP); 818 ahp->ah_label_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 819 FMD_SLEEP); 820 ahp->ah_rsrc_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 821 FMD_SLEEP); 822 (void) snprintf(path, sizeof (path), "%s/%s", root, dir); 823 ahp->ah_dirpath = fmd_strdup(path, FMD_SLEEP); 824 (void) fmd_conf_getprop(fmd.d_conf, "rsrc.age", &ahp->ah_lifetime); 825 (void) fmd_conf_getprop(fmd.d_conf, "fakenotpresent", 826 (uint32_t *)&fmd_asru_fake_not_present); 827 ahp->ah_al_count = 0; 828 ahp->ah_count = 0; 829 ahp->ah_error = 0; 830 ahp->ah_topo = fmd_topo_hold(); 831 832 return (ahp); 833 } 834 835 void 836 fmd_asru_hash_destroy(fmd_asru_hash_t *ahp) 837 { 838 fmd_asru_link_t *alp, *np; 839 uint_t i; 840 841 for (i = 0; i < ahp->ah_hashlen; i++) { 842 for (alp = ahp->ah_case_hash[i]; alp != NULL; alp = np) { 843 np = alp->al_case_next; 844 alp->al_case_next = NULL; 845 fmd_case_rele(alp->al_case); 846 alp->al_case = NULL; 847 fmd_asru_al_hash_release(ahp, alp); 848 } 849 } 850 851 fmd_strfree(ahp->ah_dirpath); 852 fmd_free(ahp->ah_hash, sizeof (void *) * ahp->ah_hashlen); 853 fmd_free(ahp->ah_asru_hash, sizeof (void *) * ahp->ah_hashlen); 854 fmd_free(ahp->ah_case_hash, sizeof (void *) * ahp->ah_hashlen); 855 fmd_free(ahp->ah_fru_hash, sizeof (void *) * ahp->ah_hashlen); 856 fmd_free(ahp->ah_label_hash, sizeof (void *) * ahp->ah_hashlen); 857 fmd_free(ahp->ah_rsrc_hash, sizeof (void *) * ahp->ah_hashlen); 858 fmd_topo_rele(ahp->ah_topo); 859 fmd_free(ahp, sizeof (fmd_asru_hash_t)); 860 } 861 862 /* 863 * Take a snapshot of the ASRU database by placing an additional hold on each 864 * member in an auxiliary array, and then call 'func' for each ASRU. 865 */ 866 void 867 fmd_asru_hash_apply(fmd_asru_hash_t *ahp, 868 void (*func)(fmd_asru_t *, void *), void *arg) 869 { 870 fmd_asru_t *ap, **aps, **app; 871 uint_t apc, i; 872 873 (void) pthread_rwlock_rdlock(&ahp->ah_lock); 874 875 aps = app = fmd_alloc(ahp->ah_count * sizeof (fmd_asru_t *), FMD_SLEEP); 876 apc = ahp->ah_count; 877 878 for (i = 0; i < ahp->ah_hashlen; i++) { 879 for (ap = ahp->ah_hash[i]; ap != NULL; ap = ap->asru_next) 880 *app++ = fmd_asru_hold(ap); 881 } 882 883 ASSERT(app == aps + apc); 884 (void) pthread_rwlock_unlock(&ahp->ah_lock); 885 886 for (i = 0; i < apc; i++) { 887 if (aps[i]->asru_fmri != NULL) 888 func(aps[i], arg); 889 fmd_asru_hash_release(ahp, aps[i]); 890 } 891 892 fmd_free(aps, apc * sizeof (fmd_asru_t *)); 893 } 894 895 void 896 fmd_asru_al_hash_apply(fmd_asru_hash_t *ahp, 897 void (*func)(fmd_asru_link_t *, void *), void *arg) 898 { 899 fmd_asru_link_t *alp, **alps, **alpp; 900 uint_t alpc, i; 901 902 (void) pthread_rwlock_rdlock(&ahp->ah_lock); 903 904 alps = alpp = fmd_alloc(ahp->ah_al_count * sizeof (fmd_asru_link_t *), 905 FMD_SLEEP); 906 alpc = ahp->ah_al_count; 907 908 for (i = 0; i < ahp->ah_hashlen; i++) { 909 for (alp = ahp->ah_case_hash[i]; alp != NULL; 910 alp = alp->al_case_next) 911 *alpp++ = fmd_asru_al_hold(alp); 912 } 913 914 ASSERT(alpp == alps + alpc); 915 (void) pthread_rwlock_unlock(&ahp->ah_lock); 916 917 for (i = 0; i < alpc; i++) { 918 func(alps[i], arg); 919 fmd_asru_al_hash_release(ahp, alps[i]); 920 } 921 922 fmd_free(alps, alpc * sizeof (fmd_asru_link_t *)); 923 } 924 925 static void 926 fmd_asru_do_hash_apply(fmd_asru_hash_t *ahp, const char *name, 927 void (*func)(fmd_asru_link_t *, void *), void *arg, 928 fmd_asru_link_t **hash, size_t match_offset, size_t next_offset) 929 { 930 fmd_asru_link_t *alp, **alps, **alpp; 931 uint_t alpc = 0, i; 932 uint_t h; 933 934 (void) pthread_rwlock_rdlock(&ahp->ah_lock); 935 936 h = fmd_asru_strhash(ahp, name); 937 938 for (alp = hash[h]; alp != NULL; alp = 939 /* LINTED pointer alignment */ 940 FMD_ASRU_AL_HASH_NEXT(alp, next_offset)) 941 if (fmd_asru_strcmp(ahp, 942 /* LINTED pointer alignment */ 943 FMD_ASRU_AL_HASH_NAME(alp, match_offset), name)) 944 alpc++; 945 946 alps = alpp = fmd_alloc(alpc * sizeof (fmd_asru_link_t *), FMD_SLEEP); 947 948 for (alp = hash[h]; alp != NULL; alp = 949 /* LINTED pointer alignment */ 950 FMD_ASRU_AL_HASH_NEXT(alp, next_offset)) 951 if (fmd_asru_strcmp(ahp, 952 /* LINTED pointer alignment */ 953 FMD_ASRU_AL_HASH_NAME(alp, match_offset), name)) 954 *alpp++ = fmd_asru_al_hold(alp); 955 956 ASSERT(alpp == alps + alpc); 957 (void) pthread_rwlock_unlock(&ahp->ah_lock); 958 959 for (i = 0; i < alpc; i++) { 960 func(alps[i], arg); 961 fmd_asru_al_hash_release(ahp, alps[i]); 962 } 963 964 fmd_free(alps, alpc * sizeof (fmd_asru_link_t *)); 965 } 966 967 void 968 fmd_asru_hash_apply_by_asru(fmd_asru_hash_t *ahp, const char *name, 969 void (*func)(fmd_asru_link_t *, void *), void *arg) 970 { 971 fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_asru_hash, 972 offsetof(fmd_asru_link_t, al_asru_name), 973 offsetof(fmd_asru_link_t, al_asru_next)); 974 } 975 976 void 977 fmd_asru_hash_apply_by_case(fmd_asru_hash_t *ahp, fmd_case_t *cp, 978 void (*func)(fmd_asru_link_t *, void *), void *arg) 979 { 980 fmd_asru_do_hash_apply(ahp, ((fmd_case_impl_t *)cp)->ci_uuid, func, arg, 981 ahp->ah_case_hash, offsetof(fmd_asru_link_t, al_case_uuid), 982 offsetof(fmd_asru_link_t, al_case_next)); 983 } 984 985 void 986 fmd_asru_hash_apply_by_fru(fmd_asru_hash_t *ahp, const char *name, 987 void (*func)(fmd_asru_link_t *, void *), void *arg) 988 { 989 fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_fru_hash, 990 offsetof(fmd_asru_link_t, al_fru_name), 991 offsetof(fmd_asru_link_t, al_fru_next)); 992 } 993 994 void 995 fmd_asru_hash_apply_by_rsrc(fmd_asru_hash_t *ahp, const char *name, 996 void (*func)(fmd_asru_link_t *, void *), void *arg) 997 { 998 fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_rsrc_hash, 999 offsetof(fmd_asru_link_t, al_rsrc_name), 1000 offsetof(fmd_asru_link_t, al_rsrc_next)); 1001 } 1002 1003 void 1004 fmd_asru_hash_apply_by_label(fmd_asru_hash_t *ahp, const char *name, 1005 void (*func)(fmd_asru_link_t *, void *), void *arg) 1006 { 1007 fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_label_hash, 1008 offsetof(fmd_asru_link_t, al_label), 1009 offsetof(fmd_asru_link_t, al_label_next)); 1010 } 1011 1012 /* 1013 * Lookup an asru in the hash by name and place a hold on it. If the asru is 1014 * not found, no entry is created and NULL is returned. 1015 */ 1016 fmd_asru_t * 1017 fmd_asru_hash_lookup_name(fmd_asru_hash_t *ahp, const char *name) 1018 { 1019 fmd_asru_t *ap; 1020 1021 (void) pthread_rwlock_rdlock(&ahp->ah_lock); 1022 ap = fmd_asru_hash_lookup(ahp, name); 1023 (void) pthread_rwlock_unlock(&ahp->ah_lock); 1024 1025 return (ap); 1026 } 1027 1028 /* 1029 * Create a resource cache entry using the fault event "nvl" for one of the 1030 * suspects from the case "cp". 1031 * 1032 * The fault event can have the following components : FM_FAULT_ASRU, 1033 * FM_FAULT_FRU, FM_FAULT_RESOURCE. These should be set by the Diagnosis Engine 1034 * when calling fmd_nvl_create_fault(). In the general case, these are all 1035 * optional and an entry will always be added into the cache even if one or all 1036 * of these fields is missing. 1037 * 1038 * However, for hardware faults the recommended practice is that the fault 1039 * event should always have the FM_FAULT_RESOURCE field present and that this 1040 * should be represented in hc-scheme. 1041 * 1042 * Currently the DE should also add the FM_FAULT_ASRU and FM_FAULT_FRU fields 1043 * where known, though at some future stage fmd might be able to fill these 1044 * in automatically from the topology. 1045 */ 1046 fmd_asru_link_t * 1047 fmd_asru_hash_create_entry(fmd_asru_hash_t *ahp, fmd_case_t *cp, nvlist_t *nvl) 1048 { 1049 char *parsed_uuid; 1050 uuid_t uuid; 1051 int uuidlen; 1052 fmd_asru_link_t *alp; 1053 1054 /* 1055 * Generate a UUID for the ASRU. libuuid cleverly gives us no 1056 * interface for specifying or learning the buffer size. Sigh. 1057 * The spec says 36 bytes but we use a tunable just to be safe. 1058 */ 1059 (void) fmd_conf_getprop(fmd.d_conf, "uuidlen", &uuidlen); 1060 parsed_uuid = fmd_zalloc(uuidlen + 1, FMD_SLEEP); 1061 uuid_generate(uuid); 1062 uuid_unparse(uuid, parsed_uuid); 1063 1064 /* 1065 * Now create the resource cache entries. 1066 */ 1067 fmd_case_hold_locked(cp); 1068 alp = fmd_asru_al_create(ahp, nvl, cp, parsed_uuid); 1069 TRACE((FMD_DBG_ASRU, "asru %s created as %p", 1070 alp->al_uuid, (void *)alp->al_asru)); 1071 1072 fmd_free(parsed_uuid, uuidlen + 1); 1073 return (alp); 1074 1075 } 1076 1077 /* 1078 * Release the reference count on an asru obtained using fmd_asru_hash_lookup. 1079 * We take 'ahp' for symmetry and in case we need to use it in future work. 1080 */ 1081 /*ARGSUSED*/ 1082 void 1083 fmd_asru_hash_release(fmd_asru_hash_t *ahp, fmd_asru_t *ap) 1084 { 1085 (void) pthread_mutex_lock(&ap->asru_lock); 1086 1087 ASSERT(ap->asru_refs != 0); 1088 if (--ap->asru_refs == 0) 1089 fmd_asru_destroy(ap); 1090 else 1091 (void) pthread_mutex_unlock(&ap->asru_lock); 1092 } 1093 1094 static void 1095 fmd_asru_do_delete_entry(fmd_asru_hash_t *ahp, fmd_case_t *cp, 1096 fmd_asru_link_t **hash, size_t next_offset, char *name) 1097 { 1098 uint_t h; 1099 fmd_asru_link_t *alp, **pp, *alpnext, **alpnextp; 1100 1101 (void) pthread_rwlock_wrlock(&ahp->ah_lock); 1102 h = fmd_asru_strhash(ahp, name); 1103 pp = &hash[h]; 1104 for (alp = *pp; alp != NULL; alp = alpnext) { 1105 /* LINTED pointer alignment */ 1106 alpnextp = FMD_ASRU_AL_HASH_NEXTP(alp, next_offset); 1107 alpnext = *alpnextp; 1108 if (alp->al_case == cp) { 1109 *pp = *alpnextp; 1110 *alpnextp = NULL; 1111 } else 1112 pp = alpnextp; 1113 } 1114 (void) pthread_rwlock_unlock(&ahp->ah_lock); 1115 } 1116 1117 static void 1118 fmd_asru_do_hash_delete(fmd_asru_hash_t *ahp, fmd_case_susp_t *cis, 1119 fmd_case_t *cp, fmd_asru_link_t **hash, size_t next_offset, char *nvname) 1120 { 1121 nvlist_t *nvl; 1122 char *name = NULL; 1123 ssize_t namelen; 1124 1125 if (nvlist_lookup_nvlist(cis->cis_nvl, nvname, &nvl) == 0 && 1126 (namelen = fmd_fmri_nvl2str(nvl, NULL, 0)) != -1 && 1127 (name = fmd_alloc(namelen + 1, FMD_SLEEP)) != NULL) { 1128 if (fmd_fmri_nvl2str(nvl, name, namelen + 1) != -1) 1129 fmd_asru_do_delete_entry(ahp, cp, hash, next_offset, 1130 name); 1131 fmd_free(name, namelen + 1); 1132 } else 1133 fmd_asru_do_delete_entry(ahp, cp, hash, next_offset, ""); 1134 } 1135 1136 void 1137 fmd_asru_hash_delete_case(fmd_asru_hash_t *ahp, fmd_case_t *cp) 1138 { 1139 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 1140 fmd_case_susp_t *cis; 1141 fmd_asru_link_t *alp, **plp, *alpnext; 1142 fmd_asru_t *ap; 1143 char path[PATH_MAX]; 1144 char *label; 1145 uint_t h; 1146 1147 /* 1148 * first delete hash entries for each suspect 1149 */ 1150 for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) { 1151 fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_fru_hash, 1152 offsetof(fmd_asru_link_t, al_fru_next), FM_FAULT_FRU); 1153 fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_rsrc_hash, 1154 offsetof(fmd_asru_link_t, al_rsrc_next), FM_FAULT_RESOURCE); 1155 if (nvlist_lookup_string(cis->cis_nvl, FM_FAULT_LOCATION, 1156 &label) != 0) 1157 label = ""; 1158 fmd_asru_do_delete_entry(ahp, cp, ahp->ah_label_hash, 1159 offsetof(fmd_asru_link_t, al_label_next), label); 1160 fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_asru_hash, 1161 offsetof(fmd_asru_link_t, al_asru_next), FM_FAULT_ASRU); 1162 } 1163 1164 /* 1165 * then delete associated case hash entries 1166 */ 1167 (void) pthread_rwlock_wrlock(&ahp->ah_lock); 1168 h = fmd_asru_strhash(ahp, cip->ci_uuid); 1169 plp = &ahp->ah_case_hash[h]; 1170 for (alp = *plp; alp != NULL; alp = alpnext) { 1171 alpnext = alp->al_case_next; 1172 if (alp->al_case == cp) { 1173 *plp = alp->al_case_next; 1174 alp->al_case_next = NULL; 1175 ASSERT(ahp->ah_al_count != 0); 1176 ahp->ah_al_count--; 1177 1178 /* 1179 * decrement case ref. 1180 */ 1181 fmd_case_rele_locked(cp); 1182 alp->al_case = NULL; 1183 1184 /* 1185 * If we found a matching ASRU, unlink its log file and 1186 * then release the hash entry. Note that it may still 1187 * be referenced if another thread is manipulating it; 1188 * this is ok because once we unlink, the log file will 1189 * not be restored, and the log data will be freed when 1190 * all of the referencing threads release their 1191 * respective references. 1192 */ 1193 (void) snprintf(path, sizeof (path), "%s/%s", 1194 ahp->ah_dirpath, alp->al_uuid); 1195 if (cip->ci_xprt == NULL && unlink(path) != 0) 1196 fmd_error(EFMD_ASRU_UNLINK, 1197 "failed to unlink asru %s", path); 1198 1199 /* 1200 * Now unlink from the global per-resource cache 1201 * and if this is the last link then remove that from 1202 * it's own hash too. 1203 */ 1204 ap = alp->al_asru; 1205 (void) pthread_mutex_lock(&ap->asru_lock); 1206 fmd_list_delete(&ap->asru_list, alp); 1207 if (ap->asru_list.l_next == NULL) { 1208 uint_t h; 1209 fmd_asru_t *ap2, **pp; 1210 fmd_asru_t *apnext, **apnextp; 1211 1212 ASSERT(ahp->ah_count != 0); 1213 ahp->ah_count--; 1214 h = fmd_asru_strhash(ahp, ap->asru_name); 1215 pp = &ahp->ah_hash[h]; 1216 for (ap2 = *pp; ap2 != NULL; ap2 = apnext) { 1217 apnextp = &ap2->asru_next; 1218 apnext = *apnextp; 1219 if (ap2 == ap) { 1220 *pp = *apnextp; 1221 *apnextp = NULL; 1222 } else 1223 pp = apnextp; 1224 } 1225 } 1226 (void) pthread_mutex_unlock(&ap->asru_lock); 1227 fmd_asru_al_hash_release(ahp, alp); 1228 } else 1229 plp = &alp->al_case_next; 1230 } 1231 (void) pthread_rwlock_unlock(&ahp->ah_lock); 1232 } 1233 1234 typedef struct { 1235 nvlist_t *farc_parent_fmri; 1236 uint8_t farc_reason; 1237 } fmd_asru_farc_t; 1238 1239 static void 1240 fmd_asru_repair_containee(fmd_asru_link_t *alp, void *arg) 1241 { 1242 fmd_asru_farc_t *farcp = (fmd_asru_farc_t *)arg; 1243 1244 if ((alp->al_asru->asru_flags & FMD_ASRU_INVISIBLE) && 1245 alp->al_asru_fmri && 1246 fmd_fmri_contains(farcp->farc_parent_fmri, alp->al_asru_fmri) > 0) { 1247 if (fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, 1248 farcp->farc_reason)) { 1249 if (alp->al_flags & FMD_ASRU_PROXY) 1250 fmd_case_xprt_updated(alp->al_case); 1251 else 1252 fmd_case_update(alp->al_case); 1253 } 1254 } 1255 } 1256 1257 static void 1258 fmd_asru_do_repair_containees(fmd_asru_link_t *alp, uint8_t reason) 1259 { 1260 int flags; 1261 1262 /* 1263 * Check if all entries associated with this asru are acquitted and 1264 * if so acquit containees. Don't try to repair containees on proxy 1265 * side unless we have local asru. 1266 */ 1267 if (alp->al_asru_fmri != NULL && (!(alp->al_flags & FMD_ASRU_PROXY) || 1268 (alp->al_flags & FMD_ASRU_PROXY_WITH_ASRU))) { 1269 (void) pthread_mutex_lock(&alp->al_asru->asru_lock); 1270 flags = alp->al_asru->asru_flags; 1271 (void) pthread_mutex_unlock(&alp->al_asru->asru_lock); 1272 if (!(flags & (FMD_ASRU_FAULTY | FMD_ASRU_INVISIBLE))) { 1273 fmd_asru_farc_t farc; 1274 1275 farc.farc_parent_fmri = alp->al_asru_fmri; 1276 farc.farc_reason = reason; 1277 fmd_asru_al_hash_apply(fmd.d_asrus, 1278 fmd_asru_repair_containee, &farc); 1279 } 1280 } 1281 } 1282 1283 void 1284 fmd_asru_repaired(fmd_asru_link_t *alp, void *arg) 1285 { 1286 int cleared; 1287 fmd_asru_rep_arg_t *farap = (fmd_asru_rep_arg_t *)arg; 1288 1289 /* 1290 * don't allow remote repair over readonly transport 1291 */ 1292 if (alp->al_flags & FMD_ASRU_PROXY_RDONLY) 1293 return; 1294 1295 /* 1296 * don't allow repair etc by asru on proxy unless asru is local 1297 */ 1298 if (farap->fara_bywhat == FARA_BY_ASRU && 1299 (alp->al_flags & FMD_ASRU_PROXY) && 1300 !(alp->al_flags & FMD_ASRU_PROXY_WITH_ASRU)) 1301 return; 1302 /* 1303 * For acquit, need to check both name and uuid if specified 1304 */ 1305 if (farap->fara_reason == FMD_ASRU_ACQUITTED && 1306 farap->fara_rval != NULL && strcmp(farap->fara_uuid, "") != 0 && 1307 strcmp(farap->fara_uuid, alp->al_case_uuid) != 0) 1308 return; 1309 1310 /* 1311 * For replaced, verify it has been replaced if we have serial number. 1312 * If not set *farap->fara_rval to FARA_ERR_RSRCNOTR. 1313 */ 1314 if (farap->fara_reason == FMD_ASRU_REPLACED && 1315 !(alp->al_flags & FMD_ASRU_PROXY_EXTERNAL) && 1316 fmd_asru_replacement_state(alp->al_event, 1317 (alp->al_flags & FMD_ASRU_PROXY) ? HC_ONLY_TRUE : HC_ONLY_FALSE) == 1318 FMD_OBJ_STATE_STILL_PRESENT) { 1319 if (farap->fara_rval) 1320 *farap->fara_rval = FARA_ERR_RSRCNOTR; 1321 return; 1322 } 1323 1324 cleared = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, farap->fara_reason); 1325 fmd_asru_do_repair_containees(alp, farap->fara_reason); 1326 1327 /* 1328 * if called from fmd_adm_*() and we really did clear the bit then 1329 * we need to do a case update to see if the associated case can be 1330 * repaired. No need to do this if called from fmd_case_*() (ie 1331 * when arg is NULL) as the case will be explicitly repaired anyway. 1332 */ 1333 if (farap->fara_rval) { 1334 /* 1335 * *farap->fara_rval defaults to FARA_ERR_RSRCNOTF (not found). 1336 * If we find a valid cache entry which we repair then we 1337 * set it to FARA_OK. However we don't want to do this if 1338 * we have already set it to FARA_ERR_RSRCNOTR (not replaced) 1339 * in a previous iteration (see above). So only set it to 1340 * FARA_OK if the current value is still FARA_ERR_RSRCNOTF. 1341 */ 1342 if (*farap->fara_rval == FARA_ERR_RSRCNOTF) 1343 *farap->fara_rval = FARA_OK; 1344 if (cleared) { 1345 if (alp->al_flags & FMD_ASRU_PROXY) 1346 fmd_case_xprt_updated(alp->al_case); 1347 else 1348 fmd_case_update(alp->al_case); 1349 } 1350 } 1351 } 1352 1353 /* 1354 * Discard the case associated with this alp if it is in resolved state. 1355 * Called on "fmadm flush". 1356 */ 1357 /*ARGSUSED*/ 1358 void 1359 fmd_asru_flush(fmd_asru_link_t *alp, void *arg) 1360 { 1361 int check_if_aged = 0; 1362 int *rval = (int *)arg; 1363 1364 if (alp->al_case) 1365 fmd_case_discard_resolved(alp->al_case, &check_if_aged); 1366 *rval = 0; 1367 } 1368 1369 /* 1370 * This is only called for proxied faults. Set various flags so we can 1371 * find the nature of the transport from the resource cache code. 1372 */ 1373 /*ARGSUSED*/ 1374 void 1375 fmd_asru_set_on_proxy(fmd_asru_link_t *alp, void *arg) 1376 { 1377 fmd_asru_set_on_proxy_t *entryp = (fmd_asru_set_on_proxy_t *)arg; 1378 1379 if (*entryp->fasp_countp >= entryp->fasp_maxcount) 1380 return; 1381 1382 /* 1383 * Note that this is a proxy fault and save whetehr transport is 1384 * RDONLY or EXTERNAL. 1385 */ 1386 alp->al_flags |= FMD_ASRU_PROXY; 1387 alp->al_asru->asru_flags |= FMD_ASRU_PROXY; 1388 1389 if (entryp->fasp_proxy_external) { 1390 alp->al_flags |= FMD_ASRU_PROXY_EXTERNAL; 1391 alp->al_asru->asru_flags |= FMD_ASRU_PROXY_EXTERNAL; 1392 } 1393 1394 if (entryp->fasp_proxy_rdonly) 1395 alp->al_flags |= FMD_ASRU_PROXY_RDONLY; 1396 1397 /* 1398 * Save whether asru is accessible in local domain 1399 */ 1400 if (entryp->fasp_proxy_asru[*entryp->fasp_countp]) { 1401 alp->al_flags |= FMD_ASRU_PROXY_WITH_ASRU; 1402 alp->al_asru->asru_flags |= FMD_ASRU_PROXY_WITH_ASRU; 1403 } 1404 (*entryp->fasp_countp)++; 1405 } 1406 1407 /*ARGSUSED*/ 1408 void 1409 fmd_asru_update_containees(fmd_asru_link_t *alp, void *arg) 1410 { 1411 fmd_asru_do_repair_containees(alp, alp->al_reason); 1412 } 1413 1414 /* 1415 * This function is used for fault proxying. It updates the resource status in 1416 * the resource cache based on information that has come from the other side of 1417 * the transport. This can be called on either the proxy side or the 1418 * diagnosing side. 1419 */ 1420 void 1421 fmd_asru_update_status(fmd_asru_link_t *alp, void *arg) 1422 { 1423 fmd_asru_update_status_t *entryp = (fmd_asru_update_status_t *)arg; 1424 uint8_t status; 1425 1426 if (*entryp->faus_countp >= entryp->faus_maxcount) 1427 return; 1428 1429 status = entryp->faus_ba[*entryp->faus_countp]; 1430 1431 /* 1432 * For proxy, if there is no asru on the proxy side, but there is on 1433 * the diag side, then take the diag side asru status. 1434 * For diag, if there is an asru on the proxy side, then take the proxy 1435 * side asru status. 1436 */ 1437 if (entryp->faus_is_proxy ? 1438 (entryp->faus_diag_asru[*entryp->faus_countp] && 1439 !entryp->faus_proxy_asru[*entryp->faus_countp]) : 1440 entryp->faus_proxy_asru[*entryp->faus_countp]) { 1441 if (status & FM_SUSPECT_DEGRADED) 1442 alp->al_flags |= FMD_ASRU_DEGRADED; 1443 else 1444 alp->al_flags &= ~FMD_ASRU_DEGRADED; 1445 if (status & FM_SUSPECT_UNUSABLE) 1446 (void) fmd_asru_setflags(alp, FMD_ASRU_UNUSABLE); 1447 else 1448 (void) fmd_asru_clrflags(alp, FMD_ASRU_UNUSABLE, 0); 1449 } 1450 1451 /* 1452 * Update the faulty status too. 1453 */ 1454 if (!(status & FM_SUSPECT_FAULTY)) 1455 (void) fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, 1456 (status & FM_SUSPECT_REPAIRED) ? FMD_ASRU_REPAIRED : 1457 (status & FM_SUSPECT_REPLACED) ? FMD_ASRU_REPLACED : 1458 (status & FM_SUSPECT_ACQUITTED) ? FMD_ASRU_ACQUITTED : 1459 FMD_ASRU_REMOVED); 1460 else if (entryp->faus_is_proxy) 1461 (void) fmd_asru_setflags(alp, FMD_ASRU_FAULTY); 1462 1463 /* 1464 * for proxy only, update the present status too. 1465 */ 1466 if (entryp->faus_is_proxy) { 1467 if (!(status & FM_SUSPECT_NOT_PRESENT)) { 1468 alp->al_flags |= FMD_ASRU_PRESENT; 1469 alp->al_asru->asru_flags |= FMD_ASRU_PRESENT; 1470 } else { 1471 alp->al_flags &= ~FMD_ASRU_PRESENT; 1472 alp->al_asru->asru_flags &= ~FMD_ASRU_PRESENT; 1473 } 1474 } 1475 (*entryp->faus_countp)++; 1476 } 1477 1478 /* 1479 * This function is called on the diagnosing side when fault proxying is 1480 * in use and the proxy has sent a uuclose. It updates the status of the 1481 * resource cache entries. 1482 */ 1483 void 1484 fmd_asru_close_status(fmd_asru_link_t *alp, void *arg) 1485 { 1486 fmd_asru_close_status_t *entryp = (fmd_asru_close_status_t *)arg; 1487 1488 if (*entryp->facs_countp >= entryp->facs_maxcount) 1489 return; 1490 alp->al_flags &= ~FMD_ASRU_DEGRADED; 1491 (void) fmd_asru_setflags(alp, FMD_ASRU_UNUSABLE); 1492 (*entryp->facs_countp)++; 1493 } 1494 1495 static void 1496 fmd_asru_logevent(fmd_asru_link_t *alp) 1497 { 1498 fmd_asru_t *ap = alp->al_asru; 1499 boolean_t faulty = (alp->al_flags & FMD_ASRU_FAULTY) != 0; 1500 boolean_t unusable = (alp->al_flags & FMD_ASRU_UNUSABLE) != 0; 1501 boolean_t message = (ap->asru_flags & FMD_ASRU_INVISIBLE) == 0; 1502 boolean_t repaired = (alp->al_reason == FMD_ASRU_REPAIRED); 1503 boolean_t replaced = (alp->al_reason == FMD_ASRU_REPLACED); 1504 boolean_t acquitted = (alp->al_reason == FMD_ASRU_ACQUITTED); 1505 1506 fmd_case_impl_t *cip; 1507 fmd_event_t *e; 1508 fmd_log_t *lp; 1509 nvlist_t *nvl; 1510 char *class; 1511 1512 ASSERT(MUTEX_HELD(&ap->asru_lock)); 1513 cip = (fmd_case_impl_t *)alp->al_case; 1514 ASSERT(cip != NULL); 1515 1516 /* 1517 * Don't log to disk on proxy side 1518 */ 1519 if (cip->ci_xprt != NULL) 1520 return; 1521 1522 if ((lp = alp->al_log) == NULL) 1523 lp = fmd_log_open(ap->asru_root, alp->al_uuid, FMD_LOG_ASRU); 1524 1525 if (lp == NULL) 1526 return; /* can't log events if we can't open the log */ 1527 1528 nvl = fmd_protocol_rsrc_asru(_fmd_asru_events[faulty | (unusable << 1)], 1529 alp->al_asru_fmri, cip->ci_uuid, cip->ci_code, faulty, unusable, 1530 message, alp->al_event, &cip->ci_tv, repaired, replaced, acquitted, 1531 cip->ci_state == FMD_CASE_RESOLVED, cip->ci_diag_de == NULL ? 1532 cip->ci_mod->mod_fmri : cip->ci_diag_de); 1533 1534 (void) nvlist_lookup_string(nvl, FM_CLASS, &class); 1535 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class); 1536 1537 fmd_event_hold(e); 1538 fmd_log_append(lp, e, NULL); 1539 fmd_event_rele(e); 1540 1541 /* 1542 * For now, we close the log file after every update to conserve file 1543 * descriptors and daemon overhead. If this becomes a performance 1544 * issue this code can change to keep a fixed-size LRU cache of logs. 1545 */ 1546 fmd_log_rele(lp); 1547 alp->al_log = NULL; 1548 } 1549 1550 int 1551 fmd_asru_setflags(fmd_asru_link_t *alp, uint_t sflag) 1552 { 1553 fmd_asru_t *ap = alp->al_asru; 1554 uint_t nstate, ostate; 1555 1556 ASSERT(!(sflag & ~FMD_ASRU_STATE)); 1557 ASSERT(sflag != FMD_ASRU_STATE); 1558 1559 (void) pthread_mutex_lock(&ap->asru_lock); 1560 1561 ostate = alp->al_flags & FMD_ASRU_STATE; 1562 alp->al_flags |= sflag; 1563 nstate = alp->al_flags & FMD_ASRU_STATE; 1564 1565 if (nstate == ostate) { 1566 (void) pthread_mutex_unlock(&ap->asru_lock); 1567 return (0); 1568 } 1569 1570 ap->asru_flags |= sflag; 1571 TRACE((FMD_DBG_ASRU, "asru %s %s->%s", alp->al_uuid, 1572 _fmd_asru_snames[ostate], _fmd_asru_snames[nstate])); 1573 1574 fmd_asru_logevent(alp); 1575 1576 (void) pthread_cond_broadcast(&ap->asru_cv); 1577 (void) pthread_mutex_unlock(&ap->asru_lock); 1578 return (1); 1579 } 1580 1581 int 1582 fmd_asru_clrflags(fmd_asru_link_t *alp, uint_t sflag, uint8_t reason) 1583 { 1584 fmd_asru_t *ap = alp->al_asru; 1585 fmd_asru_link_t *nalp; 1586 uint_t nstate, ostate, flags = 0; 1587 1588 ASSERT(!(sflag & ~FMD_ASRU_STATE)); 1589 ASSERT(sflag != FMD_ASRU_STATE); 1590 1591 (void) pthread_mutex_lock(&ap->asru_lock); 1592 1593 ostate = alp->al_flags & FMD_ASRU_STATE; 1594 alp->al_flags &= ~sflag; 1595 nstate = alp->al_flags & FMD_ASRU_STATE; 1596 1597 if (nstate == ostate) { 1598 if (reason > alp->al_reason && 1599 ((fmd_case_impl_t *)alp->al_case)->ci_state < 1600 FMD_CASE_REPAIRED) { 1601 alp->al_reason = reason; 1602 fmd_asru_logevent(alp); 1603 (void) pthread_cond_broadcast(&ap->asru_cv); 1604 } 1605 (void) pthread_mutex_unlock(&ap->asru_lock); 1606 return (0); 1607 } 1608 if (reason > alp->al_reason) 1609 alp->al_reason = reason; 1610 1611 if (sflag == FMD_ASRU_UNUSABLE) 1612 ap->asru_flags &= ~sflag; 1613 else if (sflag == FMD_ASRU_FAULTY) { 1614 /* 1615 * only clear the faulty bit if all links are clear 1616 */ 1617 for (nalp = fmd_list_next(&ap->asru_list); nalp != NULL; 1618 nalp = fmd_list_next(nalp)) 1619 flags |= nalp->al_flags; 1620 if (!(flags & FMD_ASRU_FAULTY)) 1621 ap->asru_flags &= ~sflag; 1622 } 1623 1624 TRACE((FMD_DBG_ASRU, "asru %s %s->%s", alp->al_uuid, 1625 _fmd_asru_snames[ostate], _fmd_asru_snames[nstate])); 1626 1627 fmd_asru_logevent(alp); 1628 1629 (void) pthread_cond_broadcast(&ap->asru_cv); 1630 (void) pthread_mutex_unlock(&ap->asru_lock); 1631 1632 return (1); 1633 } 1634 1635 /*ARGSUSED*/ 1636 void 1637 fmd_asru_log_resolved(fmd_asru_link_t *alp, void *unused) 1638 { 1639 fmd_asru_t *ap = alp->al_asru; 1640 1641 (void) pthread_mutex_lock(&ap->asru_lock); 1642 fmd_asru_logevent(alp); 1643 (void) pthread_cond_broadcast(&ap->asru_cv); 1644 (void) pthread_mutex_unlock(&ap->asru_lock); 1645 } 1646 1647 /* 1648 * Report the current known state of the link entry (ie this particular fault 1649 * affecting this particular ASRU). 1650 */ 1651 int 1652 fmd_asru_al_getstate(fmd_asru_link_t *alp) 1653 { 1654 int us, st = (alp->al_flags & (FMD_ASRU_FAULTY | FMD_ASRU_UNUSABLE)); 1655 nvlist_t *asru; 1656 int ps = FMD_OBJ_STATE_UNKNOWN; 1657 1658 /* 1659 * For fault proxying with an EXTERNAL transport, believe the presence 1660 * state as sent by the diagnosing side. Otherwise find the presence 1661 * state here. Note that if fault proxying with an INTERNAL transport 1662 * we can only trust the presence state where we are using hc-scheme 1663 * fmris which should be consistant across domains in the same system - 1664 * other schemes can refer to different devices in different domains. 1665 */ 1666 if (!(alp->al_flags & FMD_ASRU_PROXY_EXTERNAL)) { 1667 ps = fmd_asru_replacement_state(alp->al_event, (alp->al_flags & 1668 FMD_ASRU_PROXY)? HC_ONLY_TRUE : HC_ONLY_FALSE); 1669 if (ps == FMD_OBJ_STATE_NOT_PRESENT) 1670 return (st | FMD_ASRU_UNUSABLE); 1671 if (ps == FMD_OBJ_STATE_REPLACED) { 1672 if (alp->al_reason < FMD_ASRU_REPLACED) 1673 alp->al_reason = FMD_ASRU_REPLACED; 1674 return (st | FMD_ASRU_UNUSABLE); 1675 } 1676 } 1677 if (ps == FMD_OBJ_STATE_UNKNOWN && (alp->al_flags & FMD_ASRU_PROXY)) 1678 st |= (alp->al_flags & (FMD_ASRU_DEGRADED | FMD_ASRU_PRESENT)); 1679 else 1680 st |= (alp->al_flags & (FMD_ASRU_DEGRADED)) | FMD_ASRU_PRESENT; 1681 1682 /* 1683 * For fault proxying, unless we have a local ASRU, then believe the 1684 * service state sent by the diagnosing side. Otherwise find the service 1685 * state here. Try fmd_fmri_service_state() first, but if that's not 1686 * supported by the scheme then fall back to fmd_fmri_unusable(). 1687 */ 1688 if ((!(alp->al_flags & FMD_ASRU_PROXY) || 1689 (alp->al_flags & FMD_ASRU_PROXY_WITH_ASRU)) && 1690 nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, &asru) == 0) { 1691 us = fmd_fmri_service_state(asru); 1692 if (us == -1 || us == FMD_SERVICE_STATE_UNKNOWN) { 1693 /* not supported by scheme - try fmd_fmri_unusable */ 1694 us = fmd_fmri_unusable(asru); 1695 if (us > 0) 1696 st |= FMD_ASRU_UNUSABLE; 1697 else if (us == 0) 1698 st &= ~FMD_ASRU_UNUSABLE; 1699 } else { 1700 if (us == FMD_SERVICE_STATE_UNUSABLE) { 1701 st &= ~FMD_ASRU_DEGRADED; 1702 st |= FMD_ASRU_UNUSABLE; 1703 } else if (us == FMD_SERVICE_STATE_OK) { 1704 st &= ~(FMD_ASRU_DEGRADED | FMD_ASRU_UNUSABLE); 1705 } else if (us == FMD_SERVICE_STATE_ISOLATE_PENDING) { 1706 st &= ~(FMD_ASRU_DEGRADED | FMD_ASRU_UNUSABLE); 1707 } else if (us == FMD_SERVICE_STATE_DEGRADED) { 1708 st &= ~FMD_ASRU_UNUSABLE; 1709 st |= FMD_ASRU_DEGRADED; 1710 } 1711 } 1712 } 1713 return (st); 1714 } 1715 1716 /* 1717 * Report the current known state of the ASRU by refreshing its unusable status 1718 * based upon the routines provided by the scheme module. If the unusable bit 1719 * is different, we do *not* generate a state change here because that change 1720 * may be unrelated to fmd activities and therefore we have no case or event. 1721 * The absence of the transition is harmless as this function is only provided 1722 * for RPC observability and fmd's clients are only concerned with ASRU_FAULTY. 1723 */ 1724 int 1725 fmd_asru_getstate(fmd_asru_t *ap) 1726 { 1727 int us, st, p = -1; 1728 char *s; 1729 1730 /* do not report non-fmd non-present resources */ 1731 if (!(ap->asru_flags & FMD_ASRU_INTERNAL)) { 1732 /* 1733 * As with fmd_asru_al_getstate(), we can only trust the 1734 * local presence state on a proxy if the transport is 1735 * internal and the scheme is hc. Otherwise we believe the 1736 * state as sent by the diagnosing side. 1737 */ 1738 if (!(ap->asru_flags & FMD_ASRU_PROXY) || 1739 (!(ap->asru_flags & FMD_ASRU_PROXY_EXTERNAL) && 1740 (nvlist_lookup_string(ap->asru_fmri, FM_FMRI_SCHEME, 1741 &s) == 0 && strcmp(s, FM_FMRI_SCHEME_HC) == 0))) { 1742 if (fmd_asru_fake_not_present >= 1743 FMD_OBJ_STATE_REPLACED) 1744 return (0); 1745 p = fmd_fmri_present(ap->asru_fmri); 1746 } 1747 if (p == 0 || (p < 0 && !(ap->asru_flags & FMD_ASRU_PROXY) || 1748 !(ap->asru_flags & FMD_ASRU_PRESENT))) 1749 return (0); 1750 } 1751 1752 /* 1753 * As with fmd_asru_al_getstate(), we can only trust the local unusable 1754 * state on a proxy if there is a local ASRU. 1755 */ 1756 st = ap->asru_flags & (FMD_ASRU_FAULTY | FMD_ASRU_UNUSABLE); 1757 if (!(ap->asru_flags & FMD_ASRU_PROXY) || 1758 (ap->asru_flags & FMD_ASRU_PROXY_WITH_ASRU)) { 1759 us = fmd_fmri_unusable(ap->asru_fmri); 1760 if (us > 0) 1761 st |= FMD_ASRU_UNUSABLE; 1762 else if (us == 0) 1763 st &= ~FMD_ASRU_UNUSABLE; 1764 } 1765 return (st); 1766 } 1767