1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/fm/protocol.h> 28 #include <uuid/uuid.h> 29 30 #include <dirent.h> 31 #include <limits.h> 32 #include <unistd.h> 33 #include <alloca.h> 34 #include <stddef.h> 35 #include <fm/libtopo.h> 36 37 #include <fmd_alloc.h> 38 #include <fmd_string.h> 39 #include <fmd_error.h> 40 #include <fmd_subr.h> 41 #include <fmd_protocol.h> 42 #include <fmd_event.h> 43 #include <fmd_conf.h> 44 #include <fmd_fmri.h> 45 #include <fmd_dispq.h> 46 #include <fmd_case.h> 47 #include <fmd_module.h> 48 #include <fmd_asru.h> 49 50 #include <fmd.h> 51 52 static const char *const _fmd_asru_events[] = { 53 FMD_RSRC_CLASS "asru.ok", /* UNUSABLE=0 FAULTED=0 */ 54 FMD_RSRC_CLASS "asru.degraded", /* UNUSABLE=0 FAULTED=1 */ 55 FMD_RSRC_CLASS "asru.unknown", /* UNUSABLE=1 FAULTED=0 */ 56 FMD_RSRC_CLASS "asru.faulted" /* UNUSABLE=1 FAULTED=1 */ 57 }; 58 59 static const char *const _fmd_asru_snames[] = { 60 "uf", "uF", "Uf", "UF" /* same order as above */ 61 }; 62 63 volatile uint32_t fmd_asru_fake_not_present = 0; 64 65 static uint_t 66 fmd_asru_strhash(fmd_asru_hash_t *ahp, const char *val) 67 { 68 return (topo_fmri_strhash(ahp->ah_topo->ft_hdl, val) % ahp->ah_hashlen); 69 } 70 71 static boolean_t 72 fmd_asru_strcmp(fmd_asru_hash_t *ahp, const char *a, const char *b) 73 { 74 return (topo_fmri_strcmp(ahp->ah_topo->ft_hdl, a, b)); 75 } 76 77 static fmd_asru_t * 78 fmd_asru_create(fmd_asru_hash_t *ahp, const char *uuid, 79 const char *name, nvlist_t *fmri) 80 { 81 fmd_asru_t *ap = fmd_zalloc(sizeof (fmd_asru_t), FMD_SLEEP); 82 char *s; 83 84 (void) pthread_mutex_init(&ap->asru_lock, NULL); 85 (void) pthread_cond_init(&ap->asru_cv, NULL); 86 87 ap->asru_name = fmd_strdup(name, FMD_SLEEP); 88 if (fmri) 89 (void) nvlist_xdup(fmri, &ap->asru_fmri, &fmd.d_nva); 90 ap->asru_root = fmd_strdup(ahp->ah_dirpath, FMD_SLEEP); 91 ap->asru_uuid = fmd_strdup(uuid, FMD_SLEEP); 92 ap->asru_uuidlen = ap->asru_uuid ? strlen(ap->asru_uuid) : 0; 93 ap->asru_refs = 1; 94 95 if (fmri && nvlist_lookup_string(fmri, FM_FMRI_SCHEME, &s) == 0 && 96 strcmp(s, FM_FMRI_SCHEME_FMD) == 0) 97 ap->asru_flags |= FMD_ASRU_INTERNAL; 98 99 return (ap); 100 } 101 102 static void 103 fmd_asru_destroy(fmd_asru_t *ap) 104 { 105 ASSERT(MUTEX_HELD(&ap->asru_lock)); 106 ASSERT(ap->asru_refs == 0); 107 108 nvlist_free(ap->asru_event); 109 fmd_strfree(ap->asru_name); 110 nvlist_free(ap->asru_fmri); 111 fmd_strfree(ap->asru_root); 112 fmd_free(ap->asru_uuid, ap->asru_uuidlen + 1); 113 fmd_free(ap, sizeof (fmd_asru_t)); 114 } 115 116 static void 117 fmd_asru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_t *ap) 118 { 119 uint_t h = fmd_asru_strhash(ahp, ap->asru_name); 120 121 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 122 ap->asru_next = ahp->ah_hash[h]; 123 ahp->ah_hash[h] = ap; 124 ahp->ah_count++; 125 } 126 127 static fmd_asru_t * 128 fmd_asru_hold(fmd_asru_t *ap) 129 { 130 (void) pthread_mutex_lock(&ap->asru_lock); 131 ap->asru_refs++; 132 ASSERT(ap->asru_refs != 0); 133 (void) pthread_mutex_unlock(&ap->asru_lock); 134 return (ap); 135 } 136 137 /* 138 * Lookup an asru in the hash by name and place a hold on it. If the asru is 139 * not found, no entry is created and NULL is returned. This internal function 140 * is for callers who have the ah_lock held and is used by lookup_name below. 141 */ 142 fmd_asru_t * 143 fmd_asru_hash_lookup(fmd_asru_hash_t *ahp, const char *name) 144 { 145 fmd_asru_t *ap; 146 uint_t h; 147 148 ASSERT(RW_LOCK_HELD(&ahp->ah_lock)); 149 h = fmd_asru_strhash(ahp, name); 150 151 for (ap = ahp->ah_hash[h]; ap != NULL; ap = ap->asru_next) { 152 if (fmd_asru_strcmp(ahp, ap->asru_name, name)) 153 break; 154 } 155 156 if (ap != NULL) 157 (void) fmd_asru_hold(ap); 158 else 159 (void) fmd_set_errno(EFMD_ASRU_NOENT); 160 161 return (ap); 162 } 163 164 #define HC_ONLY_FALSE 0 165 #define HC_ONLY_TRUE 1 166 167 static int 168 fmd_asru_replacement_state(nvlist_t *event, int hc_only) 169 { 170 int ps = -1; 171 nvlist_t *asru, *fru, *rsrc; 172 char *s; 173 174 /* 175 * Check if there is evidence that this object is no longer present. 176 * In general fmd_fmri_present() should be supported on resources and/or 177 * frus, as those are the things that are physically present or not 178 * present - an asru can be spread over a number of frus some of which 179 * are present and some not, so fmd_fmri_present() is not generally 180 * meaningful. However retain a check for asru first for compatibility. 181 * If we have checked all three and we still get -1 then nothing knows 182 * whether it's present or not, so err on the safe side and treat it 183 * as still present. 184 * 185 * Note that if hc_only is set, then we only check status using fmris 186 * that are in hc-scheme. 187 */ 188 if (fmd_asru_fake_not_present) 189 return (fmd_asru_fake_not_present); 190 if (nvlist_lookup_nvlist(event, FM_FAULT_ASRU, &asru) == 0 && 191 (hc_only == HC_ONLY_FALSE || (nvlist_lookup_string(asru, 192 FM_FMRI_SCHEME, &s) == 0 && strcmp(s, FM_FMRI_SCHEME_HC) == 0))) 193 ps = fmd_fmri_replaced(asru); 194 if (ps == -1 || ps == FMD_OBJ_STATE_UNKNOWN) { 195 if (nvlist_lookup_nvlist(event, FM_FAULT_RESOURCE, 196 &rsrc) == 0 && (hc_only == HC_ONLY_FALSE || 197 (nvlist_lookup_string(rsrc, FM_FMRI_SCHEME, &s) == 0 && 198 strcmp(s, FM_FMRI_SCHEME_HC) == 0))) { 199 if (ps == -1) { 200 ps = fmd_fmri_replaced(rsrc); 201 } else { 202 /* see if we can improve on UNKNOWN */ 203 int ps2 = fmd_fmri_replaced(rsrc); 204 if (ps2 == FMD_OBJ_STATE_STILL_PRESENT || 205 ps2 == FMD_OBJ_STATE_REPLACED) 206 ps = ps2; 207 } 208 } 209 } 210 if (ps == -1 || ps == FMD_OBJ_STATE_UNKNOWN) { 211 if (nvlist_lookup_nvlist(event, FM_FAULT_FRU, &fru) == 0 && 212 (hc_only == HC_ONLY_FALSE || (nvlist_lookup_string(fru, 213 FM_FMRI_SCHEME, &s) == 0 && 214 strcmp(s, FM_FMRI_SCHEME_HC) == 0))) { 215 if (ps == -1) { 216 ps = fmd_fmri_replaced(fru); 217 } else { 218 /* see if we can improve on UNKNOWN */ 219 int ps2 = fmd_fmri_replaced(fru); 220 if (ps2 == FMD_OBJ_STATE_STILL_PRESENT || 221 ps2 == FMD_OBJ_STATE_REPLACED) 222 ps = ps2; 223 } 224 } 225 } 226 if (ps == -1) 227 ps = FMD_OBJ_STATE_UNKNOWN; 228 return (ps); 229 } 230 231 static void 232 fmd_asru_asru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, 233 char *name) 234 { 235 uint_t h = fmd_asru_strhash(ahp, name); 236 237 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 238 alp->al_asru_next = ahp->ah_asru_hash[h]; 239 ahp->ah_asru_hash[h] = alp; 240 ahp->ah_al_count++; 241 } 242 243 static void 244 fmd_asru_case_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, 245 char *name) 246 { 247 uint_t h = fmd_asru_strhash(ahp, name); 248 249 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 250 alp->al_case_next = ahp->ah_case_hash[h]; 251 ahp->ah_case_hash[h] = alp; 252 } 253 254 static void 255 fmd_asru_fru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, char *name) 256 { 257 uint_t h = fmd_asru_strhash(ahp, name); 258 259 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 260 alp->al_fru_next = ahp->ah_fru_hash[h]; 261 ahp->ah_fru_hash[h] = alp; 262 } 263 264 static void 265 fmd_asru_label_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, 266 char *name) 267 { 268 uint_t h = fmd_asru_strhash(ahp, name); 269 270 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 271 alp->al_label_next = ahp->ah_label_hash[h]; 272 ahp->ah_label_hash[h] = alp; 273 } 274 275 static void 276 fmd_asru_rsrc_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, 277 char *name) 278 { 279 uint_t h = fmd_asru_strhash(ahp, name); 280 281 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 282 alp->al_rsrc_next = ahp->ah_rsrc_hash[h]; 283 ahp->ah_rsrc_hash[h] = alp; 284 } 285 286 static void 287 fmd_asru_al_destroy(fmd_asru_link_t *alp) 288 { 289 ASSERT(alp->al_refs == 0); 290 ASSERT(MUTEX_HELD(&alp->al_asru->asru_lock)); 291 292 if (alp->al_log != NULL) 293 fmd_log_rele(alp->al_log); 294 295 fmd_free(alp->al_uuid, alp->al_uuidlen + 1); 296 nvlist_free(alp->al_event); 297 fmd_strfree(alp->al_rsrc_name); 298 fmd_strfree(alp->al_case_uuid); 299 fmd_strfree(alp->al_fru_name); 300 fmd_strfree(alp->al_asru_name); 301 fmd_strfree(alp->al_label); 302 nvlist_free(alp->al_asru_fmri); 303 fmd_free(alp, sizeof (fmd_asru_link_t)); 304 } 305 306 static fmd_asru_link_t * 307 fmd_asru_al_hold(fmd_asru_link_t *alp) 308 { 309 fmd_asru_t *ap = alp->al_asru; 310 311 (void) pthread_mutex_lock(&ap->asru_lock); 312 ap->asru_refs++; 313 alp->al_refs++; 314 ASSERT(alp->al_refs != 0); 315 (void) pthread_mutex_unlock(&ap->asru_lock); 316 return (alp); 317 } 318 319 static void fmd_asru_destroy(fmd_asru_t *ap); 320 321 /*ARGSUSED*/ 322 static void 323 fmd_asru_al_hash_release(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp) 324 { 325 fmd_asru_t *ap = alp->al_asru; 326 327 (void) pthread_mutex_lock(&ap->asru_lock); 328 ASSERT(alp->al_refs != 0); 329 if (--alp->al_refs == 0) 330 fmd_asru_al_destroy(alp); 331 ASSERT(ap->asru_refs != 0); 332 if (--ap->asru_refs == 0) 333 fmd_asru_destroy(ap); 334 else 335 (void) pthread_mutex_unlock(&ap->asru_lock); 336 } 337 338 static int 339 fmd_asru_get_namestr(nvlist_t *nvl, char **name, ssize_t *namelen) 340 { 341 if ((*namelen = fmd_fmri_nvl2str(nvl, NULL, 0)) == -1) 342 return (EFMD_ASRU_FMRI); 343 *name = fmd_alloc(*namelen + 1, FMD_SLEEP); 344 if (fmd_fmri_nvl2str(nvl, *name, *namelen + 1) == -1) { 345 if (*name != NULL) 346 fmd_free(*name, *namelen + 1); 347 return (EFMD_ASRU_FMRI); 348 } 349 return (0); 350 } 351 352 static fmd_asru_link_t * 353 fmd_asru_al_create(fmd_asru_hash_t *ahp, nvlist_t *nvl, fmd_case_t *cp, 354 const char *al_uuid) 355 { 356 nvlist_t *asru = NULL, *fru, *rsrc; 357 int got_rsrc = 0, got_asru = 0, got_fru = 0; 358 ssize_t fru_namelen, rsrc_namelen, asru_namelen; 359 char *asru_name, *rsrc_name, *fru_name, *name, *label; 360 fmd_asru_link_t *alp; 361 fmd_asru_t *ap; 362 boolean_t msg; 363 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 364 365 if (nvlist_lookup_nvlist(nvl, FM_FAULT_ASRU, &asru) == 0 && 366 fmd_asru_get_namestr(asru, &asru_name, &asru_namelen) == 0) 367 got_asru = 1; 368 if (nvlist_lookup_nvlist(nvl, FM_FAULT_FRU, &fru) == 0 && 369 fmd_asru_get_namestr(fru, &fru_name, &fru_namelen) == 0) 370 got_fru = 1; 371 if (nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &rsrc) == 0 && 372 fmd_asru_get_namestr(rsrc, &rsrc_name, &rsrc_namelen) == 0) 373 got_rsrc = 1; 374 if (nvlist_lookup_string(nvl, FM_FAULT_LOCATION, &label) != 0) 375 label = ""; 376 377 /* 378 * Grab the rwlock as a writer; Then create and insert the asru with 379 * ahp->ah_lock held and hash it in. We'll then drop the rwlock and 380 * proceed to initializing the asru. 381 */ 382 (void) pthread_rwlock_wrlock(&ahp->ah_lock); 383 384 /* 385 * Create and initialise the per-fault "link" structure. 386 */ 387 alp = fmd_zalloc(sizeof (fmd_asru_link_t), FMD_SLEEP); 388 if (got_asru) 389 (void) nvlist_xdup(asru, &alp->al_asru_fmri, &fmd.d_nva); 390 alp->al_uuid = fmd_strdup(al_uuid, FMD_SLEEP); 391 alp->al_uuidlen = strlen(alp->al_uuid); 392 alp->al_refs = 1; 393 394 /* 395 * If this is the first fault for this asru, then create the per-asru 396 * structure and link into the hash. 397 */ 398 name = got_asru ? asru_name : ""; 399 if ((ap = fmd_asru_hash_lookup(ahp, name)) == NULL) { 400 ap = fmd_asru_create(ahp, al_uuid, name, got_asru ? asru : 401 NULL); 402 fmd_asru_hash_insert(ahp, ap); 403 } else 404 nvlist_free(ap->asru_event); 405 (void) nvlist_xdup(nvl, &ap->asru_event, &fmd.d_nva); 406 407 /* 408 * Put the link structure on the list associated with the per-asru 409 * structure. Then put the link structure on the various hashes. 410 */ 411 fmd_list_append(&ap->asru_list, (fmd_list_t *)alp); 412 alp->al_asru = ap; 413 alp->al_asru_name = got_asru ? asru_name : fmd_strdup("", FMD_SLEEP); 414 fmd_asru_asru_hash_insert(ahp, alp, alp->al_asru_name); 415 alp->al_fru_name = got_fru ? fru_name : fmd_strdup("", FMD_SLEEP); 416 fmd_asru_fru_hash_insert(ahp, alp, alp->al_fru_name); 417 alp->al_rsrc_name = got_rsrc ? rsrc_name : fmd_strdup("", FMD_SLEEP); 418 fmd_asru_rsrc_hash_insert(ahp, alp, alp->al_rsrc_name); 419 alp->al_label = fmd_strdup(label, FMD_SLEEP); 420 fmd_asru_label_hash_insert(ahp, alp, label); 421 alp->al_case_uuid = fmd_strdup(cip->ci_uuid, FMD_SLEEP); 422 fmd_asru_case_hash_insert(ahp, alp, cip->ci_uuid); 423 (void) pthread_mutex_lock(&ap->asru_lock); 424 (void) pthread_rwlock_unlock(&ahp->ah_lock); 425 426 ap->asru_case = alp->al_case = cp; 427 if (nvlist_lookup_boolean_value(nvl, FM_SUSPECT_MESSAGE, &msg) == 0 && 428 msg == B_FALSE) 429 ap->asru_flags |= FMD_ASRU_INVISIBLE; 430 (void) nvlist_xdup(nvl, &alp->al_event, &fmd.d_nva); 431 ap->asru_flags |= FMD_ASRU_VALID; 432 (void) pthread_cond_broadcast(&ap->asru_cv); 433 (void) pthread_mutex_unlock(&ap->asru_lock); 434 return (alp); 435 } 436 437 static void 438 fmd_asru_hash_recreate(fmd_log_t *lp, fmd_event_t *ep, fmd_asru_hash_t *ahp) 439 { 440 nvlist_t *nvl = FMD_EVENT_NVL(ep); 441 boolean_t faulty = FMD_B_FALSE, unusable = FMD_B_FALSE; 442 int ps; 443 boolean_t repaired = FMD_B_FALSE, replaced = FMD_B_FALSE; 444 boolean_t acquitted = FMD_B_FALSE, resolved = FMD_B_FALSE; 445 nvlist_t *flt, *flt_copy, *asru; 446 char *case_uuid = NULL, *case_code = NULL; 447 fmd_asru_t *ap; 448 fmd_asru_link_t *alp; 449 fmd_case_t *cp; 450 int64_t *diag_time; 451 nvlist_t *de_fmri, *de_fmri_dup; 452 uint_t nelem; 453 topo_hdl_t *thp; 454 char *class; 455 nvlist_t *rsrc; 456 int err; 457 boolean_t injected; 458 459 /* 460 * Extract the most recent values of 'faulty' from the event log. 461 */ 462 if (nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_FAULTY, 463 &faulty) != 0) { 464 fmd_error(EFMD_ASRU_EVENT, "failed to reload asru %s: " 465 "invalid event log record\n", lp->log_name); 466 ahp->ah_error = EFMD_ASRU_EVENT; 467 return; 468 } 469 if (nvlist_lookup_nvlist(nvl, FM_RSRC_ASRU_EVENT, &flt) != 0) { 470 fmd_error(EFMD_ASRU_EVENT, "failed to reload asru %s: " 471 "invalid event log record\n", lp->log_name); 472 ahp->ah_error = EFMD_ASRU_EVENT; 473 return; 474 } 475 (void) nvlist_lookup_string(nvl, FM_RSRC_ASRU_UUID, &case_uuid); 476 (void) nvlist_lookup_string(nvl, FM_RSRC_ASRU_CODE, &case_code); 477 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_UNUSABLE, 478 &unusable); 479 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_REPAIRED, 480 &repaired); 481 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_REPLACED, 482 &replaced); 483 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_ACQUITTED, 484 &acquitted); 485 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_RESOLVED, 486 &resolved); 487 488 /* 489 * Attempt to recreate the case in CLOSED, REPAIRED or RESOLVED state 490 * (depending on whether the faulty/resolved bits are set). 491 * If the case is already present, fmd_case_recreate() will return it. 492 * If not, we'll create a new orphaned case. Either way, we use the 493 * ASRU event to insert a suspect into the partially-restored case. 494 */ 495 fmd_module_lock(fmd.d_rmod); 496 cp = fmd_case_recreate(fmd.d_rmod, NULL, faulty ? FMD_CASE_CLOSED : 497 resolved ? FMD_CASE_RESOLVED : FMD_CASE_REPAIRED, case_uuid, 498 case_code); 499 fmd_case_hold(cp); 500 fmd_module_unlock(fmd.d_rmod); 501 if (nvlist_lookup_boolean_value(nvl, FM_SUSPECT_INJECTED, 502 &injected) == 0 && injected) 503 fmd_case_set_injected(cp); 504 if (nvlist_lookup_int64_array(nvl, FM_SUSPECT_DIAG_TIME, &diag_time, 505 &nelem) == 0 && nelem >= 2) 506 fmd_case_settime(cp, diag_time[0], diag_time[1]); 507 else 508 fmd_case_settime(cp, lp->log_stat.st_ctime, 0); 509 if (nvlist_lookup_nvlist(nvl, FM_SUSPECT_DE, &de_fmri) == 0) { 510 (void) nvlist_xdup(de_fmri, &de_fmri_dup, &fmd.d_nva); 511 fmd_case_set_de_fmri(cp, de_fmri_dup); 512 } 513 (void) nvlist_xdup(flt, &flt_copy, &fmd.d_nva); 514 515 /* 516 * For faults with a resource, re-evaluate the asru from the resource. 517 */ 518 thp = fmd_fmri_topo_hold(TOPO_VERSION); 519 if (nvlist_lookup_string(flt_copy, FM_CLASS, &class) == 0 && 520 strncmp(class, "fault", 5) == 0 && 521 nvlist_lookup_nvlist(flt_copy, FM_FAULT_RESOURCE, &rsrc) == 0 && 522 rsrc != NULL && 523 (fmd_fmri_replaced(rsrc) != FMD_OBJ_STATE_REPLACED) && 524 topo_fmri_asru(thp, rsrc, &asru, &err) == 0) { 525 (void) nvlist_remove(flt_copy, FM_FAULT_ASRU, DATA_TYPE_NVLIST); 526 (void) nvlist_add_nvlist(flt_copy, FM_FAULT_ASRU, asru); 527 nvlist_free(asru); 528 } 529 fmd_fmri_topo_rele(thp); 530 531 (void) nvlist_xdup(flt_copy, &flt, &fmd.d_nva); 532 533 fmd_case_recreate_suspect(cp, flt_copy); 534 535 /* 536 * Now create the resource cache entries. 537 */ 538 alp = fmd_asru_al_create(ahp, flt, cp, fmd_strbasename(lp->log_name)); 539 ap = alp->al_asru; 540 541 /* 542 * Check to see if the resource is still present in the system. 543 */ 544 ps = fmd_asru_replacement_state(flt, HC_ONLY_FALSE); 545 if (ps == FMD_OBJ_STATE_REPLACED) { 546 replaced = FMD_B_TRUE; 547 } else if (ps == FMD_OBJ_STATE_STILL_PRESENT || 548 ps == FMD_OBJ_STATE_UNKNOWN) { 549 ap->asru_flags |= FMD_ASRU_PRESENT; 550 if (nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, 551 &asru) == 0) { 552 int us; 553 554 switch (fmd_fmri_service_state(asru)) { 555 case FMD_SERVICE_STATE_UNUSABLE: 556 unusable = FMD_B_TRUE; 557 break; 558 case FMD_SERVICE_STATE_OK: 559 case FMD_SERVICE_STATE_ISOLATE_PENDING: 560 case FMD_SERVICE_STATE_DEGRADED: 561 unusable = FMD_B_FALSE; 562 break; 563 case FMD_SERVICE_STATE_UNKNOWN: 564 case -1: 565 /* not supported by scheme */ 566 us = fmd_fmri_unusable(asru); 567 if (us > 0) 568 unusable = FMD_B_TRUE; 569 else if (us == 0) 570 unusable = FMD_B_FALSE; 571 break; 572 } 573 } 574 } 575 576 nvlist_free(flt); 577 578 ap->asru_flags |= FMD_ASRU_RECREATED; 579 if (faulty) { 580 alp->al_flags |= FMD_ASRU_FAULTY; 581 ap->asru_flags |= FMD_ASRU_FAULTY; 582 } 583 if (unusable) { 584 alp->al_flags |= FMD_ASRU_UNUSABLE; 585 ap->asru_flags |= FMD_ASRU_UNUSABLE; 586 } 587 if (replaced) 588 alp->al_reason = FMD_ASRU_REPLACED; 589 else if (repaired) 590 alp->al_reason = FMD_ASRU_REPAIRED; 591 else if (acquitted) 592 alp->al_reason = FMD_ASRU_ACQUITTED; 593 else 594 alp->al_reason = FMD_ASRU_REMOVED; 595 596 TRACE((FMD_DBG_ASRU, "asru %s recreated as %p (%s)", alp->al_uuid, 597 (void *)ap, _fmd_asru_snames[ap->asru_flags & FMD_ASRU_STATE])); 598 } 599 600 static void 601 fmd_asru_hash_discard(fmd_asru_hash_t *ahp, const char *uuid, int err) 602 { 603 char src[PATH_MAX], dst[PATH_MAX]; 604 605 (void) snprintf(src, PATH_MAX, "%s/%s", ahp->ah_dirpath, uuid); 606 (void) snprintf(dst, PATH_MAX, "%s/%s-", ahp->ah_dirpath, uuid); 607 608 if (err != 0) 609 err = rename(src, dst); 610 else 611 err = unlink(src); 612 613 if (err != 0 && errno != ENOENT) 614 fmd_error(EFMD_ASRU_EVENT, "failed to rename log %s", src); 615 } 616 617 /* 618 * Open a saved log file and restore it into the ASRU hash. If we can't even 619 * open the log, rename the log file to <uuid>- to indicate it is corrupt. If 620 * fmd_log_replay() fails, we either delete the file (if it has reached the 621 * upper limit on cache age) or rename it for debugging if it was corrupted. 622 */ 623 static void 624 fmd_asru_hash_logopen(fmd_asru_hash_t *ahp, const char *uuid) 625 { 626 fmd_log_t *lp = fmd_log_tryopen(ahp->ah_dirpath, uuid, FMD_LOG_ASRU); 627 uint_t n; 628 629 if (lp == NULL) { 630 fmd_asru_hash_discard(ahp, uuid, errno); 631 return; 632 } 633 634 ahp->ah_error = 0; 635 n = ahp->ah_al_count; 636 637 fmd_log_replay(lp, (fmd_log_f *)fmd_asru_hash_recreate, ahp); 638 fmd_log_rele(lp); 639 640 if (ahp->ah_al_count == n) 641 fmd_asru_hash_discard(ahp, uuid, ahp->ah_error); 642 } 643 644 void 645 fmd_asru_hash_refresh(fmd_asru_hash_t *ahp) 646 { 647 struct dirent *dp; 648 DIR *dirp; 649 int zero; 650 651 if ((dirp = opendir(ahp->ah_dirpath)) == NULL) { 652 fmd_error(EFMD_ASRU_NODIR, 653 "failed to open asru cache directory %s", ahp->ah_dirpath); 654 return; 655 } 656 657 (void) fmd_conf_getprop(fmd.d_conf, "rsrc.zero", &zero); 658 659 (void) pthread_rwlock_wrlock(&ahp->ah_lock); 660 661 while ((dp = readdir(dirp)) != NULL) { 662 if (dp->d_name[0] == '.') 663 continue; /* skip "." and ".." */ 664 665 if (zero) 666 fmd_asru_hash_discard(ahp, dp->d_name, 0); 667 else if (!fmd_strmatch(dp->d_name, "*-")) 668 fmd_asru_hash_logopen(ahp, dp->d_name); 669 } 670 671 (void) pthread_rwlock_unlock(&ahp->ah_lock); 672 (void) closedir(dirp); 673 } 674 675 /* 676 * If the resource is present and faulty but not unusable, replay the fault 677 * event that caused it be marked faulty. This will cause the agent 678 * subscribing to this fault class to again disable the resource. 679 */ 680 /*ARGSUSED*/ 681 static void 682 fmd_asru_hash_replay_asru(fmd_asru_t *ap, void *data) 683 { 684 fmd_event_t *e; 685 nvlist_t *nvl; 686 char *class; 687 688 if (ap->asru_event != NULL && (ap->asru_flags & (FMD_ASRU_STATE | 689 FMD_ASRU_PRESENT)) == (FMD_ASRU_FAULTY | FMD_ASRU_PRESENT)) { 690 691 fmd_dprintf(FMD_DBG_ASRU, 692 "replaying fault event for %s", ap->asru_name); 693 694 (void) nvlist_xdup(ap->asru_event, &nvl, &fmd.d_nva); 695 (void) nvlist_lookup_string(nvl, FM_CLASS, &class); 696 697 (void) nvlist_add_string(nvl, FMD_EVN_UUID, 698 ((fmd_case_impl_t *)ap->asru_case)->ci_uuid); 699 700 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class); 701 fmd_dispq_dispatch(fmd.d_disp, e, class); 702 } 703 } 704 705 void 706 fmd_asru_hash_replay(fmd_asru_hash_t *ahp) 707 { 708 fmd_asru_hash_apply(ahp, fmd_asru_hash_replay_asru, NULL); 709 } 710 711 /* 712 * Check if the resource is still present. If not, and if the rsrc.age time 713 * has expired, then do an implicit repair on the resource. 714 */ 715 /*ARGSUSED*/ 716 static void 717 fmd_asru_repair_if_aged(fmd_asru_link_t *alp, void *arg) 718 { 719 struct timeval tv; 720 fmd_log_t *lp; 721 hrtime_t hrt; 722 int ps; 723 int err; 724 fmd_asru_rep_arg_t fara; 725 726 if (!(alp->al_flags & FMD_ASRU_FAULTY)) 727 return; 728 729 /* 730 * Checking for aged resources only happens on the diagnosing side 731 * not on a proxy. 732 */ 733 if (alp->al_flags & FMD_ASRU_PROXY) 734 return; 735 736 ps = fmd_asru_replacement_state(alp->al_event, HC_ONLY_FALSE); 737 if (ps == FMD_OBJ_STATE_REPLACED) { 738 fara.fara_reason = FMD_ASRU_REPLACED; 739 fara.fara_bywhat = FARA_ALL; 740 fara.fara_rval = &err; 741 fmd_asru_repaired(alp, &fara); 742 } else if (ps == FMD_OBJ_STATE_NOT_PRESENT) { 743 fmd_time_gettimeofday(&tv); 744 lp = fmd_log_open(alp->al_asru->asru_root, alp->al_uuid, 745 FMD_LOG_ASRU); 746 if (lp == NULL) 747 return; 748 hrt = (hrtime_t)(tv.tv_sec - lp->log_stat.st_mtime); 749 fmd_log_rele(lp); 750 if (hrt * NANOSEC >= fmd.d_asrus->ah_lifetime) { 751 fara.fara_reason = FMD_ASRU_REMOVED; 752 fara.fara_bywhat = FARA_ALL; 753 fara.fara_rval = &err; 754 fmd_asru_repaired(alp, &fara); 755 } 756 } 757 } 758 759 /*ARGSUSED*/ 760 void 761 fmd_asru_check_if_aged(fmd_asru_link_t *alp, void *arg) 762 { 763 struct timeval tv; 764 fmd_log_t *lp; 765 hrtime_t hrt; 766 767 /* 768 * Case must be in resolved state for this to be called. So modified 769 * time on resource cache entry should be the time the resolve occurred. 770 * Return 0 if not yet hit rsrc.aged. 771 */ 772 fmd_time_gettimeofday(&tv); 773 lp = fmd_log_open(alp->al_asru->asru_root, alp->al_uuid, FMD_LOG_ASRU); 774 if (lp == NULL) 775 return; 776 hrt = (hrtime_t)(tv.tv_sec - lp->log_stat.st_mtime); 777 fmd_log_rele(lp); 778 if (hrt * NANOSEC < fmd.d_asrus->ah_lifetime) 779 *(int *)arg = 0; 780 } 781 782 /*ARGSUSED*/ 783 void 784 fmd_asru_most_recent(fmd_asru_link_t *alp, void *arg) 785 { 786 fmd_log_t *lp; 787 uint64_t hrt; 788 789 /* 790 * Find most recent modified time of a set of resource cache entries. 791 */ 792 lp = fmd_log_open(alp->al_asru->asru_root, alp->al_uuid, FMD_LOG_ASRU); 793 if (lp == NULL) 794 return; 795 hrt = lp->log_stat.st_mtime; 796 fmd_log_rele(lp); 797 if (*(uint64_t *)arg < hrt) 798 *(uint64_t *)arg = hrt; 799 } 800 801 void 802 fmd_asru_clear_aged_rsrcs() 803 { 804 int check_if_aged = 1; 805 fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_repair_if_aged, NULL); 806 fmd_case_hash_apply(fmd.d_cases, fmd_case_discard_resolved, 807 &check_if_aged); 808 } 809 810 fmd_asru_hash_t * 811 fmd_asru_hash_create(const char *root, const char *dir) 812 { 813 fmd_asru_hash_t *ahp; 814 char path[PATH_MAX]; 815 816 ahp = fmd_alloc(sizeof (fmd_asru_hash_t), FMD_SLEEP); 817 (void) pthread_rwlock_init(&ahp->ah_lock, NULL); 818 ahp->ah_hashlen = fmd.d_str_buckets; 819 ahp->ah_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, FMD_SLEEP); 820 ahp->ah_asru_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 821 FMD_SLEEP); 822 ahp->ah_case_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 823 FMD_SLEEP); 824 ahp->ah_fru_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 825 FMD_SLEEP); 826 ahp->ah_label_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 827 FMD_SLEEP); 828 ahp->ah_rsrc_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 829 FMD_SLEEP); 830 (void) snprintf(path, sizeof (path), "%s/%s", root, dir); 831 ahp->ah_dirpath = fmd_strdup(path, FMD_SLEEP); 832 (void) fmd_conf_getprop(fmd.d_conf, "rsrc.age", &ahp->ah_lifetime); 833 (void) fmd_conf_getprop(fmd.d_conf, "fakenotpresent", 834 (uint32_t *)&fmd_asru_fake_not_present); 835 ahp->ah_al_count = 0; 836 ahp->ah_count = 0; 837 ahp->ah_error = 0; 838 ahp->ah_topo = fmd_topo_hold(); 839 840 return (ahp); 841 } 842 843 void 844 fmd_asru_hash_destroy(fmd_asru_hash_t *ahp) 845 { 846 fmd_asru_link_t *alp, *np; 847 uint_t i; 848 849 for (i = 0; i < ahp->ah_hashlen; i++) { 850 for (alp = ahp->ah_case_hash[i]; alp != NULL; alp = np) { 851 np = alp->al_case_next; 852 alp->al_case_next = NULL; 853 fmd_case_rele(alp->al_case); 854 alp->al_case = NULL; 855 fmd_asru_al_hash_release(ahp, alp); 856 } 857 } 858 859 fmd_strfree(ahp->ah_dirpath); 860 fmd_free(ahp->ah_hash, sizeof (void *) * ahp->ah_hashlen); 861 fmd_free(ahp->ah_asru_hash, sizeof (void *) * ahp->ah_hashlen); 862 fmd_free(ahp->ah_case_hash, sizeof (void *) * ahp->ah_hashlen); 863 fmd_free(ahp->ah_fru_hash, sizeof (void *) * ahp->ah_hashlen); 864 fmd_free(ahp->ah_label_hash, sizeof (void *) * ahp->ah_hashlen); 865 fmd_free(ahp->ah_rsrc_hash, sizeof (void *) * ahp->ah_hashlen); 866 fmd_topo_rele(ahp->ah_topo); 867 fmd_free(ahp, sizeof (fmd_asru_hash_t)); 868 } 869 870 /* 871 * Take a snapshot of the ASRU database by placing an additional hold on each 872 * member in an auxiliary array, and then call 'func' for each ASRU. 873 */ 874 void 875 fmd_asru_hash_apply(fmd_asru_hash_t *ahp, 876 void (*func)(fmd_asru_t *, void *), void *arg) 877 { 878 fmd_asru_t *ap, **aps, **app; 879 uint_t apc, i; 880 881 (void) pthread_rwlock_rdlock(&ahp->ah_lock); 882 883 aps = app = fmd_alloc(ahp->ah_count * sizeof (fmd_asru_t *), FMD_SLEEP); 884 apc = ahp->ah_count; 885 886 for (i = 0; i < ahp->ah_hashlen; i++) { 887 for (ap = ahp->ah_hash[i]; ap != NULL; ap = ap->asru_next) 888 *app++ = fmd_asru_hold(ap); 889 } 890 891 ASSERT(app == aps + apc); 892 (void) pthread_rwlock_unlock(&ahp->ah_lock); 893 894 for (i = 0; i < apc; i++) { 895 if (aps[i]->asru_fmri != NULL) 896 func(aps[i], arg); 897 fmd_asru_hash_release(ahp, aps[i]); 898 } 899 900 fmd_free(aps, apc * sizeof (fmd_asru_t *)); 901 } 902 903 void 904 fmd_asru_al_hash_apply(fmd_asru_hash_t *ahp, 905 void (*func)(fmd_asru_link_t *, void *), void *arg) 906 { 907 fmd_asru_link_t *alp, **alps, **alpp; 908 uint_t alpc, i; 909 910 (void) pthread_rwlock_rdlock(&ahp->ah_lock); 911 912 alps = alpp = fmd_alloc(ahp->ah_al_count * sizeof (fmd_asru_link_t *), 913 FMD_SLEEP); 914 alpc = ahp->ah_al_count; 915 916 for (i = 0; i < ahp->ah_hashlen; i++) { 917 for (alp = ahp->ah_case_hash[i]; alp != NULL; 918 alp = alp->al_case_next) 919 *alpp++ = fmd_asru_al_hold(alp); 920 } 921 922 ASSERT(alpp == alps + alpc); 923 (void) pthread_rwlock_unlock(&ahp->ah_lock); 924 925 for (i = 0; i < alpc; i++) { 926 func(alps[i], arg); 927 fmd_asru_al_hash_release(ahp, alps[i]); 928 } 929 930 fmd_free(alps, alpc * sizeof (fmd_asru_link_t *)); 931 } 932 933 static void 934 fmd_asru_do_hash_apply(fmd_asru_hash_t *ahp, const char *name, 935 void (*func)(fmd_asru_link_t *, void *), void *arg, 936 fmd_asru_link_t **hash, size_t match_offset, size_t next_offset) 937 { 938 fmd_asru_link_t *alp, **alps, **alpp; 939 uint_t alpc = 0, i; 940 uint_t h; 941 942 (void) pthread_rwlock_rdlock(&ahp->ah_lock); 943 944 h = fmd_asru_strhash(ahp, name); 945 946 for (alp = hash[h]; alp != NULL; alp = 947 /* LINTED pointer alignment */ 948 FMD_ASRU_AL_HASH_NEXT(alp, next_offset)) 949 if (fmd_asru_strcmp(ahp, 950 /* LINTED pointer alignment */ 951 FMD_ASRU_AL_HASH_NAME(alp, match_offset), name)) 952 alpc++; 953 954 alps = alpp = fmd_alloc(alpc * sizeof (fmd_asru_link_t *), FMD_SLEEP); 955 956 for (alp = hash[h]; alp != NULL; alp = 957 /* LINTED pointer alignment */ 958 FMD_ASRU_AL_HASH_NEXT(alp, next_offset)) 959 if (fmd_asru_strcmp(ahp, 960 /* LINTED pointer alignment */ 961 FMD_ASRU_AL_HASH_NAME(alp, match_offset), name)) 962 *alpp++ = fmd_asru_al_hold(alp); 963 964 ASSERT(alpp == alps + alpc); 965 (void) pthread_rwlock_unlock(&ahp->ah_lock); 966 967 for (i = 0; i < alpc; i++) { 968 func(alps[i], arg); 969 fmd_asru_al_hash_release(ahp, alps[i]); 970 } 971 972 fmd_free(alps, alpc * sizeof (fmd_asru_link_t *)); 973 } 974 975 void 976 fmd_asru_hash_apply_by_asru(fmd_asru_hash_t *ahp, const char *name, 977 void (*func)(fmd_asru_link_t *, void *), void *arg) 978 { 979 fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_asru_hash, 980 offsetof(fmd_asru_link_t, al_asru_name), 981 offsetof(fmd_asru_link_t, al_asru_next)); 982 } 983 984 void 985 fmd_asru_hash_apply_by_case(fmd_asru_hash_t *ahp, fmd_case_t *cp, 986 void (*func)(fmd_asru_link_t *, void *), void *arg) 987 { 988 fmd_asru_do_hash_apply(ahp, ((fmd_case_impl_t *)cp)->ci_uuid, func, arg, 989 ahp->ah_case_hash, offsetof(fmd_asru_link_t, al_case_uuid), 990 offsetof(fmd_asru_link_t, al_case_next)); 991 } 992 993 void 994 fmd_asru_hash_apply_by_fru(fmd_asru_hash_t *ahp, const char *name, 995 void (*func)(fmd_asru_link_t *, void *), void *arg) 996 { 997 fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_fru_hash, 998 offsetof(fmd_asru_link_t, al_fru_name), 999 offsetof(fmd_asru_link_t, al_fru_next)); 1000 } 1001 1002 void 1003 fmd_asru_hash_apply_by_rsrc(fmd_asru_hash_t *ahp, const char *name, 1004 void (*func)(fmd_asru_link_t *, void *), void *arg) 1005 { 1006 fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_rsrc_hash, 1007 offsetof(fmd_asru_link_t, al_rsrc_name), 1008 offsetof(fmd_asru_link_t, al_rsrc_next)); 1009 } 1010 1011 void 1012 fmd_asru_hash_apply_by_label(fmd_asru_hash_t *ahp, const char *name, 1013 void (*func)(fmd_asru_link_t *, void *), void *arg) 1014 { 1015 fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_label_hash, 1016 offsetof(fmd_asru_link_t, al_label), 1017 offsetof(fmd_asru_link_t, al_label_next)); 1018 } 1019 1020 /* 1021 * Lookup an asru in the hash by name and place a hold on it. If the asru is 1022 * not found, no entry is created and NULL is returned. 1023 */ 1024 fmd_asru_t * 1025 fmd_asru_hash_lookup_name(fmd_asru_hash_t *ahp, const char *name) 1026 { 1027 fmd_asru_t *ap; 1028 1029 (void) pthread_rwlock_rdlock(&ahp->ah_lock); 1030 ap = fmd_asru_hash_lookup(ahp, name); 1031 (void) pthread_rwlock_unlock(&ahp->ah_lock); 1032 1033 return (ap); 1034 } 1035 1036 /* 1037 * Create a resource cache entry using the fault event "nvl" for one of the 1038 * suspects from the case "cp". 1039 * 1040 * The fault event can have the following components : FM_FAULT_ASRU, 1041 * FM_FAULT_FRU, FM_FAULT_RESOURCE. These should be set by the Diagnosis Engine 1042 * when calling fmd_nvl_create_fault(). In the general case, these are all 1043 * optional and an entry will always be added into the cache even if one or all 1044 * of these fields is missing. 1045 * 1046 * However, for hardware faults the recommended practice is that the fault 1047 * event should always have the FM_FAULT_RESOURCE field present and that this 1048 * should be represented in hc-scheme. 1049 * 1050 * Currently the DE should also add the FM_FAULT_ASRU and FM_FAULT_FRU fields 1051 * where known, though at some future stage fmd might be able to fill these 1052 * in automatically from the topology. 1053 */ 1054 fmd_asru_link_t * 1055 fmd_asru_hash_create_entry(fmd_asru_hash_t *ahp, fmd_case_t *cp, nvlist_t *nvl) 1056 { 1057 char *parsed_uuid; 1058 uuid_t uuid; 1059 int uuidlen; 1060 fmd_asru_link_t *alp; 1061 1062 /* 1063 * Generate a UUID for the ASRU. libuuid cleverly gives us no 1064 * interface for specifying or learning the buffer size. Sigh. 1065 * The spec says 36 bytes but we use a tunable just to be safe. 1066 */ 1067 (void) fmd_conf_getprop(fmd.d_conf, "uuidlen", &uuidlen); 1068 parsed_uuid = fmd_zalloc(uuidlen + 1, FMD_SLEEP); 1069 uuid_generate(uuid); 1070 uuid_unparse(uuid, parsed_uuid); 1071 1072 /* 1073 * Now create the resource cache entries. 1074 */ 1075 fmd_case_hold_locked(cp); 1076 alp = fmd_asru_al_create(ahp, nvl, cp, parsed_uuid); 1077 TRACE((FMD_DBG_ASRU, "asru %s created as %p", 1078 alp->al_uuid, (void *)alp->al_asru)); 1079 1080 fmd_free(parsed_uuid, uuidlen + 1); 1081 return (alp); 1082 1083 } 1084 1085 /* 1086 * Release the reference count on an asru obtained using fmd_asru_hash_lookup. 1087 * We take 'ahp' for symmetry and in case we need to use it in future work. 1088 */ 1089 /*ARGSUSED*/ 1090 void 1091 fmd_asru_hash_release(fmd_asru_hash_t *ahp, fmd_asru_t *ap) 1092 { 1093 (void) pthread_mutex_lock(&ap->asru_lock); 1094 1095 ASSERT(ap->asru_refs != 0); 1096 if (--ap->asru_refs == 0) 1097 fmd_asru_destroy(ap); 1098 else 1099 (void) pthread_mutex_unlock(&ap->asru_lock); 1100 } 1101 1102 static void 1103 fmd_asru_do_delete_entry(fmd_asru_hash_t *ahp, fmd_case_t *cp, 1104 fmd_asru_link_t **hash, size_t next_offset, char *name) 1105 { 1106 uint_t h; 1107 fmd_asru_link_t *alp, **pp, *alpnext, **alpnextp; 1108 1109 (void) pthread_rwlock_wrlock(&ahp->ah_lock); 1110 h = fmd_asru_strhash(ahp, name); 1111 pp = &hash[h]; 1112 for (alp = *pp; alp != NULL; alp = alpnext) { 1113 /* LINTED pointer alignment */ 1114 alpnextp = FMD_ASRU_AL_HASH_NEXTP(alp, next_offset); 1115 alpnext = *alpnextp; 1116 if (alp->al_case == cp) { 1117 *pp = *alpnextp; 1118 *alpnextp = NULL; 1119 } else 1120 pp = alpnextp; 1121 } 1122 (void) pthread_rwlock_unlock(&ahp->ah_lock); 1123 } 1124 1125 static void 1126 fmd_asru_do_hash_delete(fmd_asru_hash_t *ahp, fmd_case_susp_t *cis, 1127 fmd_case_t *cp, fmd_asru_link_t **hash, size_t next_offset, char *nvname) 1128 { 1129 nvlist_t *nvl; 1130 char *name = NULL; 1131 ssize_t namelen; 1132 1133 if (nvlist_lookup_nvlist(cis->cis_nvl, nvname, &nvl) == 0 && 1134 (namelen = fmd_fmri_nvl2str(nvl, NULL, 0)) != -1 && 1135 (name = fmd_alloc(namelen + 1, FMD_SLEEP)) != NULL) { 1136 if (fmd_fmri_nvl2str(nvl, name, namelen + 1) != -1) 1137 fmd_asru_do_delete_entry(ahp, cp, hash, next_offset, 1138 name); 1139 fmd_free(name, namelen + 1); 1140 } else 1141 fmd_asru_do_delete_entry(ahp, cp, hash, next_offset, ""); 1142 } 1143 1144 void 1145 fmd_asru_hash_delete_case(fmd_asru_hash_t *ahp, fmd_case_t *cp) 1146 { 1147 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 1148 fmd_case_susp_t *cis; 1149 fmd_asru_link_t *alp, **plp, *alpnext; 1150 fmd_asru_t *ap; 1151 char path[PATH_MAX]; 1152 char *label; 1153 uint_t h; 1154 1155 /* 1156 * first delete hash entries for each suspect 1157 */ 1158 for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) { 1159 fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_fru_hash, 1160 offsetof(fmd_asru_link_t, al_fru_next), FM_FAULT_FRU); 1161 fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_rsrc_hash, 1162 offsetof(fmd_asru_link_t, al_rsrc_next), FM_FAULT_RESOURCE); 1163 if (nvlist_lookup_string(cis->cis_nvl, FM_FAULT_LOCATION, 1164 &label) != 0) 1165 label = ""; 1166 fmd_asru_do_delete_entry(ahp, cp, ahp->ah_label_hash, 1167 offsetof(fmd_asru_link_t, al_label_next), label); 1168 fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_asru_hash, 1169 offsetof(fmd_asru_link_t, al_asru_next), FM_FAULT_ASRU); 1170 } 1171 1172 /* 1173 * then delete associated case hash entries 1174 */ 1175 (void) pthread_rwlock_wrlock(&ahp->ah_lock); 1176 h = fmd_asru_strhash(ahp, cip->ci_uuid); 1177 plp = &ahp->ah_case_hash[h]; 1178 for (alp = *plp; alp != NULL; alp = alpnext) { 1179 alpnext = alp->al_case_next; 1180 if (alp->al_case == cp) { 1181 *plp = alp->al_case_next; 1182 alp->al_case_next = NULL; 1183 ASSERT(ahp->ah_al_count != 0); 1184 ahp->ah_al_count--; 1185 1186 /* 1187 * decrement case ref. 1188 */ 1189 fmd_case_rele_locked(cp); 1190 alp->al_case = NULL; 1191 1192 /* 1193 * If we found a matching ASRU, unlink its log file and 1194 * then release the hash entry. Note that it may still 1195 * be referenced if another thread is manipulating it; 1196 * this is ok because once we unlink, the log file will 1197 * not be restored, and the log data will be freed when 1198 * all of the referencing threads release their 1199 * respective references. 1200 */ 1201 (void) snprintf(path, sizeof (path), "%s/%s", 1202 ahp->ah_dirpath, alp->al_uuid); 1203 if (cip->ci_xprt == NULL && unlink(path) != 0) 1204 fmd_error(EFMD_ASRU_UNLINK, 1205 "failed to unlink asru %s", path); 1206 1207 /* 1208 * Now unlink from the global per-resource cache 1209 * and if this is the last link then remove that from 1210 * it's own hash too. 1211 */ 1212 ap = alp->al_asru; 1213 (void) pthread_mutex_lock(&ap->asru_lock); 1214 fmd_list_delete(&ap->asru_list, alp); 1215 if (ap->asru_list.l_next == NULL) { 1216 uint_t h; 1217 fmd_asru_t *ap2, **pp; 1218 fmd_asru_t *apnext, **apnextp; 1219 1220 ASSERT(ahp->ah_count != 0); 1221 ahp->ah_count--; 1222 h = fmd_asru_strhash(ahp, ap->asru_name); 1223 pp = &ahp->ah_hash[h]; 1224 for (ap2 = *pp; ap2 != NULL; ap2 = apnext) { 1225 apnextp = &ap2->asru_next; 1226 apnext = *apnextp; 1227 if (ap2 == ap) { 1228 *pp = *apnextp; 1229 *apnextp = NULL; 1230 } else 1231 pp = apnextp; 1232 } 1233 } 1234 (void) pthread_mutex_unlock(&ap->asru_lock); 1235 fmd_asru_al_hash_release(ahp, alp); 1236 } else 1237 plp = &alp->al_case_next; 1238 } 1239 (void) pthread_rwlock_unlock(&ahp->ah_lock); 1240 } 1241 1242 typedef struct { 1243 nvlist_t *farc_parent_fmri; 1244 uint8_t farc_reason; 1245 } fmd_asru_farc_t; 1246 1247 static void 1248 fmd_asru_repair_containee(fmd_asru_link_t *alp, void *arg) 1249 { 1250 fmd_asru_farc_t *farcp = (fmd_asru_farc_t *)arg; 1251 1252 if ((alp->al_asru->asru_flags & FMD_ASRU_INVISIBLE) && 1253 alp->al_asru_fmri && 1254 fmd_fmri_contains(farcp->farc_parent_fmri, alp->al_asru_fmri) > 0) { 1255 if (fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, 1256 farcp->farc_reason)) { 1257 if (alp->al_flags & FMD_ASRU_PROXY) 1258 fmd_case_xprt_updated(alp->al_case); 1259 else 1260 fmd_case_update(alp->al_case); 1261 } 1262 } 1263 } 1264 1265 static void 1266 fmd_asru_do_repair_containees(fmd_asru_link_t *alp, uint8_t reason) 1267 { 1268 int flags; 1269 1270 /* 1271 * Check if all entries associated with this asru are acquitted and 1272 * if so acquit containees. Don't try to repair containees on proxy 1273 * side unless we have local asru. 1274 */ 1275 if (alp->al_asru_fmri != NULL && (!(alp->al_flags & FMD_ASRU_PROXY) || 1276 (alp->al_flags & FMD_ASRU_PROXY_WITH_ASRU))) { 1277 (void) pthread_mutex_lock(&alp->al_asru->asru_lock); 1278 flags = alp->al_asru->asru_flags; 1279 (void) pthread_mutex_unlock(&alp->al_asru->asru_lock); 1280 if (!(flags & (FMD_ASRU_FAULTY | FMD_ASRU_INVISIBLE))) { 1281 fmd_asru_farc_t farc; 1282 1283 farc.farc_parent_fmri = alp->al_asru_fmri; 1284 farc.farc_reason = reason; 1285 fmd_asru_al_hash_apply(fmd.d_asrus, 1286 fmd_asru_repair_containee, &farc); 1287 } 1288 } 1289 } 1290 1291 void 1292 fmd_asru_repaired(fmd_asru_link_t *alp, void *arg) 1293 { 1294 int cleared; 1295 fmd_asru_rep_arg_t *farap = (fmd_asru_rep_arg_t *)arg; 1296 1297 /* 1298 * don't allow remote repair over readonly transport 1299 */ 1300 if (alp->al_flags & FMD_ASRU_PROXY_RDONLY) 1301 return; 1302 1303 /* 1304 * don't allow repair etc by asru on proxy unless asru is local 1305 */ 1306 if (farap->fara_bywhat == FARA_BY_ASRU && 1307 (alp->al_flags & FMD_ASRU_PROXY) && 1308 !(alp->al_flags & FMD_ASRU_PROXY_WITH_ASRU)) 1309 return; 1310 /* 1311 * For acquit, need to check both name and uuid if specified 1312 */ 1313 if (farap->fara_reason == FMD_ASRU_ACQUITTED && 1314 farap->fara_rval != NULL && strcmp(farap->fara_uuid, "") != 0 && 1315 strcmp(farap->fara_uuid, alp->al_case_uuid) != 0) 1316 return; 1317 1318 /* 1319 * For replaced, verify it has been replaced if we have serial number. 1320 * If not set *farap->fara_rval to FARA_ERR_RSRCNOTR. 1321 */ 1322 if (farap->fara_reason == FMD_ASRU_REPLACED && 1323 !(alp->al_flags & FMD_ASRU_PROXY_EXTERNAL) && 1324 fmd_asru_replacement_state(alp->al_event, 1325 (alp->al_flags & FMD_ASRU_PROXY) ? HC_ONLY_TRUE : HC_ONLY_FALSE) == 1326 FMD_OBJ_STATE_STILL_PRESENT) { 1327 if (farap->fara_rval) 1328 *farap->fara_rval = FARA_ERR_RSRCNOTR; 1329 return; 1330 } 1331 1332 cleared = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, farap->fara_reason); 1333 fmd_asru_do_repair_containees(alp, farap->fara_reason); 1334 1335 /* 1336 * if called from fmd_adm_*() and we really did clear the bit then 1337 * we need to do a case update to see if the associated case can be 1338 * repaired. No need to do this if called from fmd_case_*() (ie 1339 * when arg is NULL) as the case will be explicitly repaired anyway. 1340 */ 1341 if (farap->fara_rval) { 1342 /* 1343 * *farap->fara_rval defaults to FARA_ERR_RSRCNOTF (not found). 1344 * If we find a valid cache entry which we repair then we 1345 * set it to FARA_OK. However we don't want to do this if 1346 * we have already set it to FARA_ERR_RSRCNOTR (not replaced) 1347 * in a previous iteration (see above). So only set it to 1348 * FARA_OK if the current value is still FARA_ERR_RSRCNOTF. 1349 */ 1350 if (*farap->fara_rval == FARA_ERR_RSRCNOTF) 1351 *farap->fara_rval = FARA_OK; 1352 if (cleared) { 1353 if (alp->al_flags & FMD_ASRU_PROXY) 1354 fmd_case_xprt_updated(alp->al_case); 1355 else 1356 fmd_case_update(alp->al_case); 1357 } 1358 } 1359 } 1360 1361 /* 1362 * Discard the case associated with this alp if it is in resolved state. 1363 * Called on "fmadm flush". 1364 */ 1365 /*ARGSUSED*/ 1366 void 1367 fmd_asru_flush(fmd_asru_link_t *alp, void *arg) 1368 { 1369 int check_if_aged = 0; 1370 int *rval = (int *)arg; 1371 1372 if (alp->al_case) 1373 fmd_case_discard_resolved(alp->al_case, &check_if_aged); 1374 *rval = 0; 1375 } 1376 1377 /* 1378 * This is only called for proxied faults. Set various flags so we can 1379 * find the nature of the transport from the resource cache code. 1380 */ 1381 /*ARGSUSED*/ 1382 void 1383 fmd_asru_set_on_proxy(fmd_asru_link_t *alp, void *arg) 1384 { 1385 fmd_asru_set_on_proxy_t *entryp = (fmd_asru_set_on_proxy_t *)arg; 1386 1387 if (*entryp->fasp_countp >= entryp->fasp_maxcount) 1388 return; 1389 1390 /* 1391 * Note that this is a proxy fault and save whetehr transport is 1392 * RDONLY or EXTERNAL. 1393 */ 1394 alp->al_flags |= FMD_ASRU_PROXY; 1395 alp->al_asru->asru_flags |= FMD_ASRU_PROXY; 1396 1397 if (entryp->fasp_proxy_external) { 1398 alp->al_flags |= FMD_ASRU_PROXY_EXTERNAL; 1399 alp->al_asru->asru_flags |= FMD_ASRU_PROXY_EXTERNAL; 1400 } 1401 1402 if (entryp->fasp_proxy_rdonly) 1403 alp->al_flags |= FMD_ASRU_PROXY_RDONLY; 1404 1405 /* 1406 * Save whether asru is accessible in local domain 1407 */ 1408 if (entryp->fasp_proxy_asru[*entryp->fasp_countp]) { 1409 alp->al_flags |= FMD_ASRU_PROXY_WITH_ASRU; 1410 alp->al_asru->asru_flags |= FMD_ASRU_PROXY_WITH_ASRU; 1411 } 1412 (*entryp->fasp_countp)++; 1413 } 1414 1415 /*ARGSUSED*/ 1416 void 1417 fmd_asru_update_containees(fmd_asru_link_t *alp, void *arg) 1418 { 1419 fmd_asru_do_repair_containees(alp, alp->al_reason); 1420 } 1421 1422 /* 1423 * This function is used for fault proxying. It updates the resource status in 1424 * the resource cache based on information that has come from the other side of 1425 * the transport. This can be called on either the proxy side or the 1426 * diagnosing side. 1427 */ 1428 void 1429 fmd_asru_update_status(fmd_asru_link_t *alp, void *arg) 1430 { 1431 fmd_asru_update_status_t *entryp = (fmd_asru_update_status_t *)arg; 1432 uint8_t status; 1433 1434 if (*entryp->faus_countp >= entryp->faus_maxcount) 1435 return; 1436 1437 status = entryp->faus_ba[*entryp->faus_countp]; 1438 1439 /* 1440 * For proxy, if there is no asru on the proxy side, but there is on 1441 * the diag side, then take the diag side asru status. 1442 * For diag, if there is an asru on the proxy side, then take the proxy 1443 * side asru status. 1444 */ 1445 if (entryp->faus_is_proxy ? 1446 (entryp->faus_diag_asru[*entryp->faus_countp] && 1447 !entryp->faus_proxy_asru[*entryp->faus_countp]) : 1448 entryp->faus_proxy_asru[*entryp->faus_countp]) { 1449 if (status & FM_SUSPECT_DEGRADED) 1450 alp->al_flags |= FMD_ASRU_DEGRADED; 1451 else 1452 alp->al_flags &= ~FMD_ASRU_DEGRADED; 1453 if (status & FM_SUSPECT_UNUSABLE) 1454 (void) fmd_asru_setflags(alp, FMD_ASRU_UNUSABLE); 1455 else 1456 (void) fmd_asru_clrflags(alp, FMD_ASRU_UNUSABLE, 0); 1457 } 1458 1459 /* 1460 * Update the faulty status too. 1461 */ 1462 if (!(status & FM_SUSPECT_FAULTY)) 1463 (void) fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, 1464 (status & FM_SUSPECT_REPAIRED) ? FMD_ASRU_REPAIRED : 1465 (status & FM_SUSPECT_REPLACED) ? FMD_ASRU_REPLACED : 1466 (status & FM_SUSPECT_ACQUITTED) ? FMD_ASRU_ACQUITTED : 1467 FMD_ASRU_REMOVED); 1468 else if (entryp->faus_is_proxy) 1469 (void) fmd_asru_setflags(alp, FMD_ASRU_FAULTY); 1470 1471 /* 1472 * for proxy only, update the present status too. 1473 */ 1474 if (entryp->faus_is_proxy) { 1475 if (!(status & FM_SUSPECT_NOT_PRESENT)) { 1476 alp->al_flags |= FMD_ASRU_PRESENT; 1477 alp->al_asru->asru_flags |= FMD_ASRU_PRESENT; 1478 } else { 1479 alp->al_flags &= ~FMD_ASRU_PRESENT; 1480 alp->al_asru->asru_flags &= ~FMD_ASRU_PRESENT; 1481 } 1482 } 1483 (*entryp->faus_countp)++; 1484 } 1485 1486 /* 1487 * This function is called on the diagnosing side when fault proxying is 1488 * in use and the proxy has sent a uuclose. It updates the status of the 1489 * resource cache entries. 1490 */ 1491 void 1492 fmd_asru_close_status(fmd_asru_link_t *alp, void *arg) 1493 { 1494 fmd_asru_close_status_t *entryp = (fmd_asru_close_status_t *)arg; 1495 1496 if (*entryp->facs_countp >= entryp->facs_maxcount) 1497 return; 1498 alp->al_flags &= ~FMD_ASRU_DEGRADED; 1499 (void) fmd_asru_setflags(alp, FMD_ASRU_UNUSABLE); 1500 (*entryp->facs_countp)++; 1501 } 1502 1503 static void 1504 fmd_asru_logevent(fmd_asru_link_t *alp) 1505 { 1506 fmd_asru_t *ap = alp->al_asru; 1507 boolean_t faulty = (alp->al_flags & FMD_ASRU_FAULTY) != 0; 1508 boolean_t unusable = (alp->al_flags & FMD_ASRU_UNUSABLE) != 0; 1509 boolean_t message = (ap->asru_flags & FMD_ASRU_INVISIBLE) == 0; 1510 boolean_t repaired = (alp->al_reason == FMD_ASRU_REPAIRED); 1511 boolean_t replaced = (alp->al_reason == FMD_ASRU_REPLACED); 1512 boolean_t acquitted = (alp->al_reason == FMD_ASRU_ACQUITTED); 1513 1514 fmd_case_impl_t *cip; 1515 fmd_event_t *e; 1516 fmd_log_t *lp; 1517 nvlist_t *nvl; 1518 char *class; 1519 1520 ASSERT(MUTEX_HELD(&ap->asru_lock)); 1521 cip = (fmd_case_impl_t *)alp->al_case; 1522 ASSERT(cip != NULL); 1523 1524 /* 1525 * Don't log to disk on proxy side 1526 */ 1527 if (cip->ci_xprt != NULL) 1528 return; 1529 1530 if ((lp = alp->al_log) == NULL) 1531 lp = fmd_log_open(ap->asru_root, alp->al_uuid, FMD_LOG_ASRU); 1532 1533 if (lp == NULL) 1534 return; /* can't log events if we can't open the log */ 1535 1536 nvl = fmd_protocol_rsrc_asru(_fmd_asru_events[faulty | (unusable << 1)], 1537 alp->al_asru_fmri, cip->ci_uuid, cip->ci_code, faulty, unusable, 1538 message, alp->al_event, &cip->ci_tv, repaired, replaced, acquitted, 1539 cip->ci_state == FMD_CASE_RESOLVED, cip->ci_diag_de == NULL ? 1540 cip->ci_mod->mod_fmri : cip->ci_diag_de, cip->ci_injected == 1); 1541 1542 (void) nvlist_lookup_string(nvl, FM_CLASS, &class); 1543 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class); 1544 1545 fmd_event_hold(e); 1546 fmd_log_append(lp, e, NULL); 1547 fmd_event_rele(e); 1548 1549 /* 1550 * For now, we close the log file after every update to conserve file 1551 * descriptors and daemon overhead. If this becomes a performance 1552 * issue this code can change to keep a fixed-size LRU cache of logs. 1553 */ 1554 fmd_log_rele(lp); 1555 alp->al_log = NULL; 1556 } 1557 1558 int 1559 fmd_asru_setflags(fmd_asru_link_t *alp, uint_t sflag) 1560 { 1561 fmd_asru_t *ap = alp->al_asru; 1562 uint_t nstate, ostate; 1563 1564 ASSERT(!(sflag & ~FMD_ASRU_STATE)); 1565 ASSERT(sflag != FMD_ASRU_STATE); 1566 1567 (void) pthread_mutex_lock(&ap->asru_lock); 1568 1569 ostate = alp->al_flags & FMD_ASRU_STATE; 1570 alp->al_flags |= sflag; 1571 nstate = alp->al_flags & FMD_ASRU_STATE; 1572 1573 if (nstate == ostate) { 1574 (void) pthread_mutex_unlock(&ap->asru_lock); 1575 return (0); 1576 } 1577 1578 ap->asru_flags |= sflag; 1579 TRACE((FMD_DBG_ASRU, "asru %s %s->%s", alp->al_uuid, 1580 _fmd_asru_snames[ostate], _fmd_asru_snames[nstate])); 1581 1582 fmd_asru_logevent(alp); 1583 1584 (void) pthread_cond_broadcast(&ap->asru_cv); 1585 (void) pthread_mutex_unlock(&ap->asru_lock); 1586 return (1); 1587 } 1588 1589 int 1590 fmd_asru_clrflags(fmd_asru_link_t *alp, uint_t sflag, uint8_t reason) 1591 { 1592 fmd_asru_t *ap = alp->al_asru; 1593 fmd_asru_link_t *nalp; 1594 uint_t nstate, ostate, flags = 0; 1595 1596 ASSERT(!(sflag & ~FMD_ASRU_STATE)); 1597 ASSERT(sflag != FMD_ASRU_STATE); 1598 1599 (void) pthread_mutex_lock(&ap->asru_lock); 1600 1601 ostate = alp->al_flags & FMD_ASRU_STATE; 1602 alp->al_flags &= ~sflag; 1603 nstate = alp->al_flags & FMD_ASRU_STATE; 1604 1605 if (nstate == ostate) { 1606 if (reason > alp->al_reason && 1607 ((fmd_case_impl_t *)alp->al_case)->ci_state < 1608 FMD_CASE_REPAIRED) { 1609 alp->al_reason = reason; 1610 fmd_asru_logevent(alp); 1611 (void) pthread_cond_broadcast(&ap->asru_cv); 1612 } 1613 (void) pthread_mutex_unlock(&ap->asru_lock); 1614 return (0); 1615 } 1616 if (reason > alp->al_reason) 1617 alp->al_reason = reason; 1618 1619 if (sflag == FMD_ASRU_UNUSABLE) 1620 ap->asru_flags &= ~sflag; 1621 else if (sflag == FMD_ASRU_FAULTY) { 1622 /* 1623 * only clear the faulty bit if all links are clear 1624 */ 1625 for (nalp = fmd_list_next(&ap->asru_list); nalp != NULL; 1626 nalp = fmd_list_next(nalp)) 1627 flags |= nalp->al_flags; 1628 if (!(flags & FMD_ASRU_FAULTY)) 1629 ap->asru_flags &= ~sflag; 1630 } 1631 1632 TRACE((FMD_DBG_ASRU, "asru %s %s->%s", alp->al_uuid, 1633 _fmd_asru_snames[ostate], _fmd_asru_snames[nstate])); 1634 1635 fmd_asru_logevent(alp); 1636 1637 (void) pthread_cond_broadcast(&ap->asru_cv); 1638 (void) pthread_mutex_unlock(&ap->asru_lock); 1639 1640 return (1); 1641 } 1642 1643 /*ARGSUSED*/ 1644 void 1645 fmd_asru_log_resolved(fmd_asru_link_t *alp, void *unused) 1646 { 1647 fmd_asru_t *ap = alp->al_asru; 1648 1649 (void) pthread_mutex_lock(&ap->asru_lock); 1650 fmd_asru_logevent(alp); 1651 (void) pthread_cond_broadcast(&ap->asru_cv); 1652 (void) pthread_mutex_unlock(&ap->asru_lock); 1653 } 1654 1655 /* 1656 * Report the current known state of the link entry (ie this particular fault 1657 * affecting this particular ASRU). 1658 */ 1659 int 1660 fmd_asru_al_getstate(fmd_asru_link_t *alp) 1661 { 1662 int us, st = (alp->al_flags & (FMD_ASRU_FAULTY | FMD_ASRU_UNUSABLE)); 1663 nvlist_t *asru; 1664 int ps = FMD_OBJ_STATE_UNKNOWN; 1665 1666 /* 1667 * For fault proxying with an EXTERNAL transport, believe the presence 1668 * state as sent by the diagnosing side. Otherwise find the presence 1669 * state here. Note that if fault proxying with an INTERNAL transport 1670 * we can only trust the presence state where we are using hc-scheme 1671 * fmris which should be consistant across domains in the same system - 1672 * other schemes can refer to different devices in different domains. 1673 */ 1674 if (!(alp->al_flags & FMD_ASRU_PROXY_EXTERNAL)) { 1675 ps = fmd_asru_replacement_state(alp->al_event, (alp->al_flags & 1676 FMD_ASRU_PROXY)? HC_ONLY_TRUE : HC_ONLY_FALSE); 1677 if (ps == FMD_OBJ_STATE_NOT_PRESENT) 1678 return (st | FMD_ASRU_UNUSABLE); 1679 if (ps == FMD_OBJ_STATE_REPLACED) { 1680 if (alp->al_reason < FMD_ASRU_REPLACED) 1681 alp->al_reason = FMD_ASRU_REPLACED; 1682 return (st | FMD_ASRU_UNUSABLE); 1683 } 1684 } 1685 if (ps == FMD_OBJ_STATE_UNKNOWN && (alp->al_flags & FMD_ASRU_PROXY)) 1686 st |= (alp->al_flags & (FMD_ASRU_DEGRADED | FMD_ASRU_PRESENT)); 1687 else 1688 st |= (alp->al_flags & (FMD_ASRU_DEGRADED)) | FMD_ASRU_PRESENT; 1689 1690 /* 1691 * For fault proxying, unless we have a local ASRU, then believe the 1692 * service state sent by the diagnosing side. Otherwise find the service 1693 * state here. Try fmd_fmri_service_state() first, but if that's not 1694 * supported by the scheme then fall back to fmd_fmri_unusable(). 1695 */ 1696 if ((!(alp->al_flags & FMD_ASRU_PROXY) || 1697 (alp->al_flags & FMD_ASRU_PROXY_WITH_ASRU)) && 1698 nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, &asru) == 0) { 1699 us = fmd_fmri_service_state(asru); 1700 if (us == -1 || us == FMD_SERVICE_STATE_UNKNOWN) { 1701 /* not supported by scheme - try fmd_fmri_unusable */ 1702 us = fmd_fmri_unusable(asru); 1703 if (us > 0) 1704 st |= FMD_ASRU_UNUSABLE; 1705 else if (us == 0) 1706 st &= ~FMD_ASRU_UNUSABLE; 1707 } else { 1708 if (us == FMD_SERVICE_STATE_UNUSABLE) { 1709 st &= ~FMD_ASRU_DEGRADED; 1710 st |= FMD_ASRU_UNUSABLE; 1711 } else if (us == FMD_SERVICE_STATE_OK) { 1712 st &= ~(FMD_ASRU_DEGRADED | FMD_ASRU_UNUSABLE); 1713 } else if (us == FMD_SERVICE_STATE_ISOLATE_PENDING) { 1714 st &= ~(FMD_ASRU_DEGRADED | FMD_ASRU_UNUSABLE); 1715 } else if (us == FMD_SERVICE_STATE_DEGRADED) { 1716 st &= ~FMD_ASRU_UNUSABLE; 1717 st |= FMD_ASRU_DEGRADED; 1718 } 1719 } 1720 } 1721 return (st); 1722 } 1723 1724 /* 1725 * Report the current known state of the ASRU by refreshing its unusable status 1726 * based upon the routines provided by the scheme module. If the unusable bit 1727 * is different, we do *not* generate a state change here because that change 1728 * may be unrelated to fmd activities and therefore we have no case or event. 1729 * The absence of the transition is harmless as this function is only provided 1730 * for RPC observability and fmd's clients are only concerned with ASRU_FAULTY. 1731 */ 1732 int 1733 fmd_asru_getstate(fmd_asru_t *ap) 1734 { 1735 int us, st, p = -1; 1736 char *s; 1737 1738 /* do not report non-fmd non-present resources */ 1739 if (!(ap->asru_flags & FMD_ASRU_INTERNAL)) { 1740 /* 1741 * As with fmd_asru_al_getstate(), we can only trust the 1742 * local presence state on a proxy if the transport is 1743 * internal and the scheme is hc. Otherwise we believe the 1744 * state as sent by the diagnosing side. 1745 */ 1746 if (!(ap->asru_flags & FMD_ASRU_PROXY) || 1747 (!(ap->asru_flags & FMD_ASRU_PROXY_EXTERNAL) && 1748 (nvlist_lookup_string(ap->asru_fmri, FM_FMRI_SCHEME, 1749 &s) == 0 && strcmp(s, FM_FMRI_SCHEME_HC) == 0))) { 1750 if (fmd_asru_fake_not_present >= 1751 FMD_OBJ_STATE_REPLACED) 1752 return (0); 1753 p = fmd_fmri_present(ap->asru_fmri); 1754 } 1755 if (p == 0 || (p < 0 && !(ap->asru_flags & FMD_ASRU_PROXY) || 1756 !(ap->asru_flags & FMD_ASRU_PRESENT))) 1757 return (0); 1758 } 1759 1760 /* 1761 * As with fmd_asru_al_getstate(), we can only trust the local unusable 1762 * state on a proxy if there is a local ASRU. 1763 */ 1764 st = ap->asru_flags & (FMD_ASRU_FAULTY | FMD_ASRU_UNUSABLE); 1765 if (!(ap->asru_flags & FMD_ASRU_PROXY) || 1766 (ap->asru_flags & FMD_ASRU_PROXY_WITH_ASRU)) { 1767 us = fmd_fmri_unusable(ap->asru_fmri); 1768 if (us > 0) 1769 st |= FMD_ASRU_UNUSABLE; 1770 else if (us == 0) 1771 st &= ~FMD_ASRU_UNUSABLE; 1772 } 1773 return (st); 1774 } 1775