1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/fm/protocol.h> 28 #include <uuid/uuid.h> 29 30 #include <dirent.h> 31 #include <limits.h> 32 #include <unistd.h> 33 #include <alloca.h> 34 #include <stddef.h> 35 #include <fm/libtopo.h> 36 37 #include <fmd_alloc.h> 38 #include <fmd_string.h> 39 #include <fmd_error.h> 40 #include <fmd_subr.h> 41 #include <fmd_protocol.h> 42 #include <fmd_event.h> 43 #include <fmd_conf.h> 44 #include <fmd_fmri.h> 45 #include <fmd_dispq.h> 46 #include <fmd_case.h> 47 #include <fmd_module.h> 48 #include <fmd_asru.h> 49 50 #include <fmd.h> 51 52 static const char *const _fmd_asru_events[] = { 53 FMD_RSRC_CLASS "asru.ok", /* UNUSABLE=0 FAULTED=0 */ 54 FMD_RSRC_CLASS "asru.degraded", /* UNUSABLE=0 FAULTED=1 */ 55 FMD_RSRC_CLASS "asru.unknown", /* UNUSABLE=1 FAULTED=0 */ 56 FMD_RSRC_CLASS "asru.faulted" /* UNUSABLE=1 FAULTED=1 */ 57 }; 58 59 static const char *const _fmd_asru_snames[] = { 60 "uf", "uF", "Uf", "UF" /* same order as above */ 61 }; 62 63 volatile uint32_t fmd_asru_fake_not_present = 0; 64 65 static uint_t 66 fmd_asru_strhash(fmd_asru_hash_t *ahp, const char *val) 67 { 68 return (topo_fmri_strhash(ahp->ah_topo->ft_hdl, val) % ahp->ah_hashlen); 69 } 70 71 static boolean_t 72 fmd_asru_strcmp(fmd_asru_hash_t *ahp, const char *a, const char *b) 73 { 74 return (topo_fmri_strcmp(ahp->ah_topo->ft_hdl, a, b)); 75 } 76 77 static fmd_asru_t * 78 fmd_asru_create(fmd_asru_hash_t *ahp, const char *uuid, 79 const char *name, nvlist_t *fmri) 80 { 81 fmd_asru_t *ap = fmd_zalloc(sizeof (fmd_asru_t), FMD_SLEEP); 82 char *s; 83 84 (void) pthread_mutex_init(&ap->asru_lock, NULL); 85 (void) pthread_cond_init(&ap->asru_cv, NULL); 86 87 ap->asru_name = fmd_strdup(name, FMD_SLEEP); 88 if (fmri) 89 (void) nvlist_xdup(fmri, &ap->asru_fmri, &fmd.d_nva); 90 ap->asru_root = fmd_strdup(ahp->ah_dirpath, FMD_SLEEP); 91 ap->asru_uuid = fmd_strdup(uuid, FMD_SLEEP); 92 ap->asru_uuidlen = ap->asru_uuid ? strlen(ap->asru_uuid) : 0; 93 ap->asru_refs = 1; 94 95 if (fmri && nvlist_lookup_string(fmri, FM_FMRI_SCHEME, &s) == 0 && 96 strcmp(s, FM_FMRI_SCHEME_FMD) == 0) 97 ap->asru_flags |= FMD_ASRU_INTERNAL; 98 99 return (ap); 100 } 101 102 static void 103 fmd_asru_destroy(fmd_asru_t *ap) 104 { 105 ASSERT(MUTEX_HELD(&ap->asru_lock)); 106 ASSERT(ap->asru_refs == 0); 107 108 nvlist_free(ap->asru_event); 109 fmd_strfree(ap->asru_name); 110 nvlist_free(ap->asru_fmri); 111 fmd_strfree(ap->asru_root); 112 fmd_free(ap->asru_uuid, ap->asru_uuidlen + 1); 113 fmd_free(ap, sizeof (fmd_asru_t)); 114 } 115 116 static void 117 fmd_asru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_t *ap) 118 { 119 uint_t h = fmd_asru_strhash(ahp, ap->asru_name); 120 121 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 122 ap->asru_next = ahp->ah_hash[h]; 123 ahp->ah_hash[h] = ap; 124 ahp->ah_count++; 125 } 126 127 static fmd_asru_t * 128 fmd_asru_hold(fmd_asru_t *ap) 129 { 130 (void) pthread_mutex_lock(&ap->asru_lock); 131 ap->asru_refs++; 132 ASSERT(ap->asru_refs != 0); 133 (void) pthread_mutex_unlock(&ap->asru_lock); 134 return (ap); 135 } 136 137 /* 138 * Lookup an asru in the hash by name and place a hold on it. If the asru is 139 * not found, no entry is created and NULL is returned. This internal function 140 * is for callers who have the ah_lock held and is used by lookup_name below. 141 */ 142 fmd_asru_t * 143 fmd_asru_hash_lookup(fmd_asru_hash_t *ahp, const char *name) 144 { 145 fmd_asru_t *ap; 146 uint_t h; 147 148 ASSERT(RW_LOCK_HELD(&ahp->ah_lock)); 149 h = fmd_asru_strhash(ahp, name); 150 151 for (ap = ahp->ah_hash[h]; ap != NULL; ap = ap->asru_next) { 152 if (fmd_asru_strcmp(ahp, ap->asru_name, name)) 153 break; 154 } 155 156 if (ap != NULL) 157 (void) fmd_asru_hold(ap); 158 else 159 (void) fmd_set_errno(EFMD_ASRU_NOENT); 160 161 return (ap); 162 } 163 164 #define HC_ONLY_FALSE 0 165 #define HC_ONLY_TRUE 1 166 167 static int 168 fmd_asru_replacement_state(nvlist_t *event, int hc_only) 169 { 170 int ps = -1; 171 nvlist_t *asru, *fru, *rsrc; 172 char *s; 173 174 /* 175 * Check if there is evidence that this object is no longer present. 176 * In general fmd_fmri_present() should be supported on resources and/or 177 * frus, as those are the things that are physically present or not 178 * present - an asru can be spread over a number of frus some of which 179 * are present and some not, so fmd_fmri_present() is not generally 180 * meaningful. However retain a check for asru first for compatibility. 181 * If we have checked all three and we still get -1 then nothing knows 182 * whether it's present or not, so err on the safe side and treat it 183 * as still present. 184 * 185 * Note that if hc_only is set, then we only check status using fmris 186 * that are in hc-scheme. 187 */ 188 if (fmd_asru_fake_not_present) 189 return (fmd_asru_fake_not_present); 190 if (nvlist_lookup_nvlist(event, FM_FAULT_ASRU, &asru) == 0 && 191 (hc_only == HC_ONLY_FALSE || (nvlist_lookup_string(asru, 192 FM_FMRI_SCHEME, &s) == 0 && strcmp(s, FM_FMRI_SCHEME_HC) == 0))) 193 ps = fmd_fmri_replaced(asru); 194 if (ps == -1 || ps == FMD_OBJ_STATE_UNKNOWN) { 195 if (nvlist_lookup_nvlist(event, FM_FAULT_RESOURCE, 196 &rsrc) == 0 && (hc_only == HC_ONLY_FALSE || 197 (nvlist_lookup_string(rsrc, FM_FMRI_SCHEME, &s) == 0 && 198 strcmp(s, FM_FMRI_SCHEME_HC) == 0))) { 199 if (ps == -1) { 200 ps = fmd_fmri_replaced(rsrc); 201 } else { 202 /* see if we can improve on UNKNOWN */ 203 int ps2 = fmd_fmri_replaced(rsrc); 204 if (ps2 == FMD_OBJ_STATE_STILL_PRESENT || 205 ps2 == FMD_OBJ_STATE_REPLACED) 206 ps = ps2; 207 } 208 } 209 } 210 if (ps == -1 || ps == FMD_OBJ_STATE_UNKNOWN) { 211 if (nvlist_lookup_nvlist(event, FM_FAULT_FRU, &fru) == 0 && 212 (hc_only == HC_ONLY_FALSE || (nvlist_lookup_string(fru, 213 FM_FMRI_SCHEME, &s) == 0 && 214 strcmp(s, FM_FMRI_SCHEME_HC) == 0))) { 215 if (ps == -1) { 216 ps = fmd_fmri_replaced(fru); 217 } else { 218 /* see if we can improve on UNKNOWN */ 219 int ps2 = fmd_fmri_replaced(fru); 220 if (ps2 == FMD_OBJ_STATE_STILL_PRESENT || 221 ps2 == FMD_OBJ_STATE_REPLACED) 222 ps = ps2; 223 } 224 } 225 } 226 if (ps == -1) 227 ps = FMD_OBJ_STATE_UNKNOWN; 228 return (ps); 229 } 230 231 static void 232 fmd_asru_asru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, 233 char *name) 234 { 235 uint_t h = fmd_asru_strhash(ahp, name); 236 237 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 238 alp->al_asru_next = ahp->ah_asru_hash[h]; 239 ahp->ah_asru_hash[h] = alp; 240 ahp->ah_al_count++; 241 } 242 243 static void 244 fmd_asru_case_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, 245 char *name) 246 { 247 uint_t h = fmd_asru_strhash(ahp, name); 248 249 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 250 alp->al_case_next = ahp->ah_case_hash[h]; 251 ahp->ah_case_hash[h] = alp; 252 } 253 254 static void 255 fmd_asru_fru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, char *name) 256 { 257 uint_t h = fmd_asru_strhash(ahp, name); 258 259 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 260 alp->al_fru_next = ahp->ah_fru_hash[h]; 261 ahp->ah_fru_hash[h] = alp; 262 } 263 264 static void 265 fmd_asru_label_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, 266 char *name) 267 { 268 uint_t h = fmd_asru_strhash(ahp, name); 269 270 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 271 alp->al_label_next = ahp->ah_label_hash[h]; 272 ahp->ah_label_hash[h] = alp; 273 } 274 275 static void 276 fmd_asru_rsrc_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, 277 char *name) 278 { 279 uint_t h = fmd_asru_strhash(ahp, name); 280 281 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 282 alp->al_rsrc_next = ahp->ah_rsrc_hash[h]; 283 ahp->ah_rsrc_hash[h] = alp; 284 } 285 286 static void 287 fmd_asru_al_destroy(fmd_asru_link_t *alp) 288 { 289 ASSERT(alp->al_refs == 0); 290 ASSERT(MUTEX_HELD(&alp->al_asru->asru_lock)); 291 292 if (alp->al_log != NULL) 293 fmd_log_rele(alp->al_log); 294 295 fmd_free(alp->al_uuid, alp->al_uuidlen + 1); 296 nvlist_free(alp->al_event); 297 fmd_strfree(alp->al_rsrc_name); 298 fmd_strfree(alp->al_case_uuid); 299 fmd_strfree(alp->al_fru_name); 300 fmd_strfree(alp->al_asru_name); 301 fmd_strfree(alp->al_label); 302 nvlist_free(alp->al_asru_fmri); 303 fmd_free(alp, sizeof (fmd_asru_link_t)); 304 } 305 306 static fmd_asru_link_t * 307 fmd_asru_al_hold(fmd_asru_link_t *alp) 308 { 309 fmd_asru_t *ap = alp->al_asru; 310 311 (void) pthread_mutex_lock(&ap->asru_lock); 312 ap->asru_refs++; 313 alp->al_refs++; 314 ASSERT(alp->al_refs != 0); 315 (void) pthread_mutex_unlock(&ap->asru_lock); 316 return (alp); 317 } 318 319 static void fmd_asru_destroy(fmd_asru_t *ap); 320 321 /*ARGSUSED*/ 322 static void 323 fmd_asru_al_hash_release(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp) 324 { 325 fmd_asru_t *ap = alp->al_asru; 326 327 (void) pthread_mutex_lock(&ap->asru_lock); 328 ASSERT(alp->al_refs != 0); 329 if (--alp->al_refs == 0) 330 fmd_asru_al_destroy(alp); 331 ASSERT(ap->asru_refs != 0); 332 if (--ap->asru_refs == 0) 333 fmd_asru_destroy(ap); 334 else 335 (void) pthread_mutex_unlock(&ap->asru_lock); 336 } 337 338 static int 339 fmd_asru_get_namestr(nvlist_t *nvl, char **name, ssize_t *namelen) 340 { 341 if ((*namelen = fmd_fmri_nvl2str(nvl, NULL, 0)) == -1) 342 return (EFMD_ASRU_FMRI); 343 *name = fmd_alloc(*namelen + 1, FMD_SLEEP); 344 if (fmd_fmri_nvl2str(nvl, *name, *namelen + 1) == -1) { 345 if (*name != NULL) 346 fmd_free(*name, *namelen + 1); 347 return (EFMD_ASRU_FMRI); 348 } 349 return (0); 350 } 351 352 static fmd_asru_link_t * 353 fmd_asru_al_create(fmd_asru_hash_t *ahp, nvlist_t *nvl, fmd_case_t *cp, 354 const char *al_uuid) 355 { 356 nvlist_t *asru = NULL, *fru, *rsrc; 357 int got_rsrc = 0, got_asru = 0, got_fru = 0; 358 ssize_t fru_namelen, rsrc_namelen, asru_namelen; 359 char *asru_name, *rsrc_name, *fru_name, *name, *label; 360 fmd_asru_link_t *alp; 361 fmd_asru_t *ap; 362 boolean_t msg; 363 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 364 365 if (nvlist_lookup_nvlist(nvl, FM_FAULT_ASRU, &asru) == 0 && 366 fmd_asru_get_namestr(asru, &asru_name, &asru_namelen) == 0) 367 got_asru = 1; 368 if (nvlist_lookup_nvlist(nvl, FM_FAULT_FRU, &fru) == 0 && 369 fmd_asru_get_namestr(fru, &fru_name, &fru_namelen) == 0) 370 got_fru = 1; 371 if (nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &rsrc) == 0 && 372 fmd_asru_get_namestr(rsrc, &rsrc_name, &rsrc_namelen) == 0) 373 got_rsrc = 1; 374 if (nvlist_lookup_string(nvl, FM_FAULT_LOCATION, &label) != 0) 375 label = ""; 376 377 /* 378 * Grab the rwlock as a writer; Then create and insert the asru with 379 * ahp->ah_lock held and hash it in. We'll then drop the rwlock and 380 * proceed to initializing the asru. 381 */ 382 (void) pthread_rwlock_wrlock(&ahp->ah_lock); 383 384 /* 385 * Create and initialise the per-fault "link" structure. 386 */ 387 alp = fmd_zalloc(sizeof (fmd_asru_link_t), FMD_SLEEP); 388 if (got_asru) 389 (void) nvlist_xdup(asru, &alp->al_asru_fmri, &fmd.d_nva); 390 alp->al_uuid = fmd_strdup(al_uuid, FMD_SLEEP); 391 alp->al_uuidlen = strlen(alp->al_uuid); 392 alp->al_refs = 1; 393 394 /* 395 * If this is the first fault for this asru, then create the per-asru 396 * structure and link into the hash. 397 */ 398 name = got_asru ? asru_name : ""; 399 if ((ap = fmd_asru_hash_lookup(ahp, name)) == NULL) { 400 ap = fmd_asru_create(ahp, al_uuid, name, got_asru ? asru : 401 NULL); 402 fmd_asru_hash_insert(ahp, ap); 403 } else 404 nvlist_free(ap->asru_event); 405 (void) nvlist_xdup(nvl, &ap->asru_event, &fmd.d_nva); 406 407 /* 408 * Put the link structure on the list associated with the per-asru 409 * structure. Then put the link structure on the various hashes. 410 */ 411 fmd_list_append(&ap->asru_list, (fmd_list_t *)alp); 412 alp->al_asru = ap; 413 alp->al_asru_name = got_asru ? asru_name : fmd_strdup("", FMD_SLEEP); 414 fmd_asru_asru_hash_insert(ahp, alp, alp->al_asru_name); 415 alp->al_fru_name = got_fru ? fru_name : fmd_strdup("", FMD_SLEEP); 416 fmd_asru_fru_hash_insert(ahp, alp, alp->al_fru_name); 417 alp->al_rsrc_name = got_rsrc ? rsrc_name : fmd_strdup("", FMD_SLEEP); 418 fmd_asru_rsrc_hash_insert(ahp, alp, alp->al_rsrc_name); 419 alp->al_label = fmd_strdup(label, FMD_SLEEP); 420 fmd_asru_label_hash_insert(ahp, alp, label); 421 alp->al_case_uuid = fmd_strdup(cip->ci_uuid, FMD_SLEEP); 422 fmd_asru_case_hash_insert(ahp, alp, cip->ci_uuid); 423 (void) pthread_mutex_lock(&ap->asru_lock); 424 (void) pthread_rwlock_unlock(&ahp->ah_lock); 425 426 ap->asru_case = alp->al_case = cp; 427 if (nvlist_lookup_boolean_value(nvl, FM_SUSPECT_MESSAGE, &msg) == 0 && 428 msg == B_FALSE) 429 ap->asru_flags |= FMD_ASRU_INVISIBLE; 430 (void) nvlist_xdup(nvl, &alp->al_event, &fmd.d_nva); 431 ap->asru_flags |= FMD_ASRU_VALID; 432 (void) pthread_cond_broadcast(&ap->asru_cv); 433 (void) pthread_mutex_unlock(&ap->asru_lock); 434 return (alp); 435 } 436 437 static void 438 fmd_asru_hash_recreate(fmd_log_t *lp, fmd_event_t *ep, fmd_asru_hash_t *ahp) 439 { 440 nvlist_t *nvl = FMD_EVENT_NVL(ep); 441 boolean_t faulty = FMD_B_FALSE, unusable = FMD_B_FALSE; 442 int ps; 443 boolean_t repaired = FMD_B_FALSE, replaced = FMD_B_FALSE; 444 boolean_t acquitted = FMD_B_FALSE, resolved = FMD_B_FALSE; 445 nvlist_t *flt, *flt_copy, *asru; 446 char *case_uuid = NULL, *case_code = NULL; 447 fmd_asru_t *ap; 448 fmd_asru_link_t *alp; 449 fmd_case_t *cp; 450 int64_t *diag_time; 451 nvlist_t *de_fmri, *de_fmri_dup; 452 uint_t nelem; 453 topo_hdl_t *thp; 454 char *class; 455 nvlist_t *rsrc; 456 int err; 457 boolean_t injected; 458 459 /* 460 * Extract the most recent values of 'faulty' from the event log. 461 */ 462 if (nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_FAULTY, 463 &faulty) != 0) { 464 fmd_error(EFMD_ASRU_EVENT, "failed to reload asru %s: " 465 "invalid event log record\n", lp->log_name); 466 ahp->ah_error = EFMD_ASRU_EVENT; 467 return; 468 } 469 if (nvlist_lookup_nvlist(nvl, FM_RSRC_ASRU_EVENT, &flt) != 0) { 470 fmd_error(EFMD_ASRU_EVENT, "failed to reload asru %s: " 471 "invalid event log record\n", lp->log_name); 472 ahp->ah_error = EFMD_ASRU_EVENT; 473 return; 474 } 475 (void) nvlist_lookup_string(nvl, FM_RSRC_ASRU_UUID, &case_uuid); 476 (void) nvlist_lookup_string(nvl, FM_RSRC_ASRU_CODE, &case_code); 477 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_UNUSABLE, 478 &unusable); 479 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_REPAIRED, 480 &repaired); 481 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_REPLACED, 482 &replaced); 483 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_ACQUITTED, 484 &acquitted); 485 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_RESOLVED, 486 &resolved); 487 488 /* 489 * Attempt to recreate the case in CLOSED, REPAIRED or RESOLVED state 490 * (depending on whether the faulty/resolved bits are set). 491 * If the case is already present, fmd_case_recreate() will return it. 492 * If not, we'll create a new orphaned case. Either way, we use the 493 * ASRU event to insert a suspect into the partially-restored case. 494 */ 495 fmd_module_lock(fmd.d_rmod); 496 cp = fmd_case_recreate(fmd.d_rmod, NULL, faulty ? FMD_CASE_CLOSED : 497 resolved ? FMD_CASE_RESOLVED : FMD_CASE_REPAIRED, case_uuid, 498 case_code); 499 fmd_case_hold(cp); 500 fmd_module_unlock(fmd.d_rmod); 501 if (nvlist_lookup_boolean_value(nvl, FM_SUSPECT_INJECTED, 502 &injected) == 0 && injected) 503 fmd_case_set_injected(cp); 504 if (nvlist_lookup_int64_array(nvl, FM_SUSPECT_DIAG_TIME, &diag_time, 505 &nelem) == 0 && nelem >= 2) 506 fmd_case_settime(cp, diag_time[0], diag_time[1]); 507 else 508 fmd_case_settime(cp, lp->log_stat.st_ctime, 0); 509 if (nvlist_lookup_nvlist(nvl, FM_SUSPECT_DE, &de_fmri) == 0) { 510 (void) nvlist_xdup(de_fmri, &de_fmri_dup, &fmd.d_nva); 511 fmd_case_set_de_fmri(cp, de_fmri_dup); 512 } 513 (void) nvlist_xdup(flt, &flt_copy, &fmd.d_nva); 514 515 /* 516 * For faults with a resource, re-evaluate the asru from the resource. 517 */ 518 thp = fmd_fmri_topo_hold(TOPO_VERSION); 519 if (nvlist_lookup_string(flt_copy, FM_CLASS, &class) == 0 && 520 strncmp(class, "fault", 5) == 0 && 521 nvlist_lookup_nvlist(flt_copy, FM_FAULT_RESOURCE, &rsrc) == 0 && 522 rsrc != NULL && topo_fmri_asru(thp, rsrc, &asru, &err) == 0) { 523 (void) nvlist_remove(flt_copy, FM_FAULT_ASRU, DATA_TYPE_NVLIST); 524 (void) nvlist_add_nvlist(flt_copy, FM_FAULT_ASRU, asru); 525 nvlist_free(asru); 526 } 527 fmd_fmri_topo_rele(thp); 528 529 (void) nvlist_xdup(flt_copy, &flt, &fmd.d_nva); 530 531 fmd_case_recreate_suspect(cp, flt_copy); 532 533 /* 534 * Now create the resource cache entries. 535 */ 536 alp = fmd_asru_al_create(ahp, flt, cp, fmd_strbasename(lp->log_name)); 537 ap = alp->al_asru; 538 539 /* 540 * Check to see if the resource is still present in the system. 541 */ 542 ps = fmd_asru_replacement_state(flt, HC_ONLY_FALSE); 543 if (ps == FMD_OBJ_STATE_REPLACED) { 544 replaced = FMD_B_TRUE; 545 } else if (ps == FMD_OBJ_STATE_STILL_PRESENT || 546 ps == FMD_OBJ_STATE_UNKNOWN) { 547 ap->asru_flags |= FMD_ASRU_PRESENT; 548 if (nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, 549 &asru) == 0) { 550 int us; 551 552 switch (fmd_fmri_service_state(asru)) { 553 case FMD_SERVICE_STATE_UNUSABLE: 554 unusable = FMD_B_TRUE; 555 break; 556 case FMD_SERVICE_STATE_OK: 557 case FMD_SERVICE_STATE_ISOLATE_PENDING: 558 case FMD_SERVICE_STATE_DEGRADED: 559 unusable = FMD_B_FALSE; 560 break; 561 case FMD_SERVICE_STATE_UNKNOWN: 562 case -1: 563 /* not supported by scheme */ 564 us = fmd_fmri_unusable(asru); 565 if (us > 0) 566 unusable = FMD_B_TRUE; 567 else if (us == 0) 568 unusable = FMD_B_FALSE; 569 break; 570 } 571 } 572 } 573 574 nvlist_free(flt); 575 576 ap->asru_flags |= FMD_ASRU_RECREATED; 577 if (faulty) { 578 alp->al_flags |= FMD_ASRU_FAULTY; 579 ap->asru_flags |= FMD_ASRU_FAULTY; 580 } 581 if (unusable) { 582 alp->al_flags |= FMD_ASRU_UNUSABLE; 583 ap->asru_flags |= FMD_ASRU_UNUSABLE; 584 } 585 if (replaced) 586 alp->al_reason = FMD_ASRU_REPLACED; 587 else if (repaired) 588 alp->al_reason = FMD_ASRU_REPAIRED; 589 else if (acquitted) 590 alp->al_reason = FMD_ASRU_ACQUITTED; 591 else 592 alp->al_reason = FMD_ASRU_REMOVED; 593 594 TRACE((FMD_DBG_ASRU, "asru %s recreated as %p (%s)", alp->al_uuid, 595 (void *)ap, _fmd_asru_snames[ap->asru_flags & FMD_ASRU_STATE])); 596 } 597 598 static void 599 fmd_asru_hash_discard(fmd_asru_hash_t *ahp, const char *uuid, int err) 600 { 601 char src[PATH_MAX], dst[PATH_MAX]; 602 603 (void) snprintf(src, PATH_MAX, "%s/%s", ahp->ah_dirpath, uuid); 604 (void) snprintf(dst, PATH_MAX, "%s/%s-", ahp->ah_dirpath, uuid); 605 606 if (err != 0) 607 err = rename(src, dst); 608 else 609 err = unlink(src); 610 611 if (err != 0 && errno != ENOENT) 612 fmd_error(EFMD_ASRU_EVENT, "failed to rename log %s", src); 613 } 614 615 /* 616 * Open a saved log file and restore it into the ASRU hash. If we can't even 617 * open the log, rename the log file to <uuid>- to indicate it is corrupt. If 618 * fmd_log_replay() fails, we either delete the file (if it has reached the 619 * upper limit on cache age) or rename it for debugging if it was corrupted. 620 */ 621 static void 622 fmd_asru_hash_logopen(fmd_asru_hash_t *ahp, const char *uuid) 623 { 624 fmd_log_t *lp = fmd_log_tryopen(ahp->ah_dirpath, uuid, FMD_LOG_ASRU); 625 uint_t n; 626 627 if (lp == NULL) { 628 fmd_asru_hash_discard(ahp, uuid, errno); 629 return; 630 } 631 632 ahp->ah_error = 0; 633 n = ahp->ah_al_count; 634 635 fmd_log_replay(lp, (fmd_log_f *)fmd_asru_hash_recreate, ahp); 636 fmd_log_rele(lp); 637 638 if (ahp->ah_al_count == n) 639 fmd_asru_hash_discard(ahp, uuid, ahp->ah_error); 640 } 641 642 void 643 fmd_asru_hash_refresh(fmd_asru_hash_t *ahp) 644 { 645 struct dirent *dp; 646 DIR *dirp; 647 int zero; 648 649 if ((dirp = opendir(ahp->ah_dirpath)) == NULL) { 650 fmd_error(EFMD_ASRU_NODIR, 651 "failed to open asru cache directory %s", ahp->ah_dirpath); 652 return; 653 } 654 655 (void) fmd_conf_getprop(fmd.d_conf, "rsrc.zero", &zero); 656 657 (void) pthread_rwlock_wrlock(&ahp->ah_lock); 658 659 while ((dp = readdir(dirp)) != NULL) { 660 if (dp->d_name[0] == '.') 661 continue; /* skip "." and ".." */ 662 663 if (zero) 664 fmd_asru_hash_discard(ahp, dp->d_name, 0); 665 else if (!fmd_strmatch(dp->d_name, "*-")) 666 fmd_asru_hash_logopen(ahp, dp->d_name); 667 } 668 669 (void) pthread_rwlock_unlock(&ahp->ah_lock); 670 (void) closedir(dirp); 671 } 672 673 /* 674 * If the resource is present and faulty but not unusable, replay the fault 675 * event that caused it be marked faulty. This will cause the agent 676 * subscribing to this fault class to again disable the resource. 677 */ 678 /*ARGSUSED*/ 679 static void 680 fmd_asru_hash_replay_asru(fmd_asru_t *ap, void *data) 681 { 682 fmd_event_t *e; 683 nvlist_t *nvl; 684 char *class; 685 686 if (ap->asru_event != NULL && (ap->asru_flags & (FMD_ASRU_STATE | 687 FMD_ASRU_PRESENT)) == (FMD_ASRU_FAULTY | FMD_ASRU_PRESENT)) { 688 689 fmd_dprintf(FMD_DBG_ASRU, 690 "replaying fault event for %s", ap->asru_name); 691 692 (void) nvlist_xdup(ap->asru_event, &nvl, &fmd.d_nva); 693 (void) nvlist_lookup_string(nvl, FM_CLASS, &class); 694 695 (void) nvlist_add_string(nvl, FMD_EVN_UUID, 696 ((fmd_case_impl_t *)ap->asru_case)->ci_uuid); 697 698 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class); 699 fmd_dispq_dispatch(fmd.d_disp, e, class); 700 } 701 } 702 703 void 704 fmd_asru_hash_replay(fmd_asru_hash_t *ahp) 705 { 706 fmd_asru_hash_apply(ahp, fmd_asru_hash_replay_asru, NULL); 707 } 708 709 /* 710 * Check if the resource is still present. If not, and if the rsrc.age time 711 * has expired, then do an implicit repair on the resource. 712 */ 713 /*ARGSUSED*/ 714 static void 715 fmd_asru_repair_if_aged(fmd_asru_link_t *alp, void *arg) 716 { 717 struct timeval tv; 718 fmd_log_t *lp; 719 hrtime_t hrt; 720 int ps; 721 int err; 722 fmd_asru_rep_arg_t fara; 723 724 if (!(alp->al_flags & FMD_ASRU_FAULTY)) 725 return; 726 727 /* 728 * Checking for aged resources only happens on the diagnosing side 729 * not on a proxy. 730 */ 731 if (alp->al_flags & FMD_ASRU_PROXY) 732 return; 733 734 ps = fmd_asru_replacement_state(alp->al_event, HC_ONLY_FALSE); 735 if (ps == FMD_OBJ_STATE_REPLACED) { 736 fara.fara_reason = FMD_ASRU_REPLACED; 737 fara.fara_bywhat = FARA_ALL; 738 fara.fara_rval = &err; 739 fmd_asru_repaired(alp, &fara); 740 } else if (ps == FMD_OBJ_STATE_NOT_PRESENT) { 741 fmd_time_gettimeofday(&tv); 742 lp = fmd_log_open(alp->al_asru->asru_root, alp->al_uuid, 743 FMD_LOG_ASRU); 744 if (lp == NULL) 745 return; 746 hrt = (hrtime_t)(tv.tv_sec - lp->log_stat.st_mtime); 747 fmd_log_rele(lp); 748 if (hrt * NANOSEC >= fmd.d_asrus->ah_lifetime) { 749 fara.fara_reason = FMD_ASRU_REMOVED; 750 fara.fara_bywhat = FARA_ALL; 751 fara.fara_rval = &err; 752 fmd_asru_repaired(alp, &fara); 753 } 754 } 755 } 756 757 /*ARGSUSED*/ 758 void 759 fmd_asru_check_if_aged(fmd_asru_link_t *alp, void *arg) 760 { 761 struct timeval tv; 762 fmd_log_t *lp; 763 hrtime_t hrt; 764 765 /* 766 * Case must be in resolved state for this to be called. So modified 767 * time on resource cache entry should be the time the resolve occurred. 768 * Return 0 if not yet hit rsrc.aged. 769 */ 770 fmd_time_gettimeofday(&tv); 771 lp = fmd_log_open(alp->al_asru->asru_root, alp->al_uuid, FMD_LOG_ASRU); 772 if (lp == NULL) 773 return; 774 hrt = (hrtime_t)(tv.tv_sec - lp->log_stat.st_mtime); 775 fmd_log_rele(lp); 776 if (hrt * NANOSEC < fmd.d_asrus->ah_lifetime) 777 *(int *)arg = 0; 778 } 779 780 /*ARGSUSED*/ 781 void 782 fmd_asru_most_recent(fmd_asru_link_t *alp, void *arg) 783 { 784 fmd_log_t *lp; 785 uint64_t hrt; 786 787 /* 788 * Find most recent modified time of a set of resource cache entries. 789 */ 790 lp = fmd_log_open(alp->al_asru->asru_root, alp->al_uuid, FMD_LOG_ASRU); 791 if (lp == NULL) 792 return; 793 hrt = lp->log_stat.st_mtime; 794 fmd_log_rele(lp); 795 if (*(uint64_t *)arg < hrt) 796 *(uint64_t *)arg = hrt; 797 } 798 799 void 800 fmd_asru_clear_aged_rsrcs() 801 { 802 int check_if_aged = 1; 803 fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_repair_if_aged, NULL); 804 fmd_case_hash_apply(fmd.d_cases, fmd_case_discard_resolved, 805 &check_if_aged); 806 } 807 808 fmd_asru_hash_t * 809 fmd_asru_hash_create(const char *root, const char *dir) 810 { 811 fmd_asru_hash_t *ahp; 812 char path[PATH_MAX]; 813 814 ahp = fmd_alloc(sizeof (fmd_asru_hash_t), FMD_SLEEP); 815 (void) pthread_rwlock_init(&ahp->ah_lock, NULL); 816 ahp->ah_hashlen = fmd.d_str_buckets; 817 ahp->ah_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, FMD_SLEEP); 818 ahp->ah_asru_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 819 FMD_SLEEP); 820 ahp->ah_case_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 821 FMD_SLEEP); 822 ahp->ah_fru_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 823 FMD_SLEEP); 824 ahp->ah_label_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 825 FMD_SLEEP); 826 ahp->ah_rsrc_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 827 FMD_SLEEP); 828 (void) snprintf(path, sizeof (path), "%s/%s", root, dir); 829 ahp->ah_dirpath = fmd_strdup(path, FMD_SLEEP); 830 (void) fmd_conf_getprop(fmd.d_conf, "rsrc.age", &ahp->ah_lifetime); 831 (void) fmd_conf_getprop(fmd.d_conf, "fakenotpresent", 832 (uint32_t *)&fmd_asru_fake_not_present); 833 ahp->ah_al_count = 0; 834 ahp->ah_count = 0; 835 ahp->ah_error = 0; 836 ahp->ah_topo = fmd_topo_hold(); 837 838 return (ahp); 839 } 840 841 void 842 fmd_asru_hash_destroy(fmd_asru_hash_t *ahp) 843 { 844 fmd_asru_link_t *alp, *np; 845 uint_t i; 846 847 for (i = 0; i < ahp->ah_hashlen; i++) { 848 for (alp = ahp->ah_case_hash[i]; alp != NULL; alp = np) { 849 np = alp->al_case_next; 850 alp->al_case_next = NULL; 851 fmd_case_rele(alp->al_case); 852 alp->al_case = NULL; 853 fmd_asru_al_hash_release(ahp, alp); 854 } 855 } 856 857 fmd_strfree(ahp->ah_dirpath); 858 fmd_free(ahp->ah_hash, sizeof (void *) * ahp->ah_hashlen); 859 fmd_free(ahp->ah_asru_hash, sizeof (void *) * ahp->ah_hashlen); 860 fmd_free(ahp->ah_case_hash, sizeof (void *) * ahp->ah_hashlen); 861 fmd_free(ahp->ah_fru_hash, sizeof (void *) * ahp->ah_hashlen); 862 fmd_free(ahp->ah_label_hash, sizeof (void *) * ahp->ah_hashlen); 863 fmd_free(ahp->ah_rsrc_hash, sizeof (void *) * ahp->ah_hashlen); 864 fmd_topo_rele(ahp->ah_topo); 865 fmd_free(ahp, sizeof (fmd_asru_hash_t)); 866 } 867 868 /* 869 * Take a snapshot of the ASRU database by placing an additional hold on each 870 * member in an auxiliary array, and then call 'func' for each ASRU. 871 */ 872 void 873 fmd_asru_hash_apply(fmd_asru_hash_t *ahp, 874 void (*func)(fmd_asru_t *, void *), void *arg) 875 { 876 fmd_asru_t *ap, **aps, **app; 877 uint_t apc, i; 878 879 (void) pthread_rwlock_rdlock(&ahp->ah_lock); 880 881 aps = app = fmd_alloc(ahp->ah_count * sizeof (fmd_asru_t *), FMD_SLEEP); 882 apc = ahp->ah_count; 883 884 for (i = 0; i < ahp->ah_hashlen; i++) { 885 for (ap = ahp->ah_hash[i]; ap != NULL; ap = ap->asru_next) 886 *app++ = fmd_asru_hold(ap); 887 } 888 889 ASSERT(app == aps + apc); 890 (void) pthread_rwlock_unlock(&ahp->ah_lock); 891 892 for (i = 0; i < apc; i++) { 893 if (aps[i]->asru_fmri != NULL) 894 func(aps[i], arg); 895 fmd_asru_hash_release(ahp, aps[i]); 896 } 897 898 fmd_free(aps, apc * sizeof (fmd_asru_t *)); 899 } 900 901 void 902 fmd_asru_al_hash_apply(fmd_asru_hash_t *ahp, 903 void (*func)(fmd_asru_link_t *, void *), void *arg) 904 { 905 fmd_asru_link_t *alp, **alps, **alpp; 906 uint_t alpc, i; 907 908 (void) pthread_rwlock_rdlock(&ahp->ah_lock); 909 910 alps = alpp = fmd_alloc(ahp->ah_al_count * sizeof (fmd_asru_link_t *), 911 FMD_SLEEP); 912 alpc = ahp->ah_al_count; 913 914 for (i = 0; i < ahp->ah_hashlen; i++) { 915 for (alp = ahp->ah_case_hash[i]; alp != NULL; 916 alp = alp->al_case_next) 917 *alpp++ = fmd_asru_al_hold(alp); 918 } 919 920 ASSERT(alpp == alps + alpc); 921 (void) pthread_rwlock_unlock(&ahp->ah_lock); 922 923 for (i = 0; i < alpc; i++) { 924 func(alps[i], arg); 925 fmd_asru_al_hash_release(ahp, alps[i]); 926 } 927 928 fmd_free(alps, alpc * sizeof (fmd_asru_link_t *)); 929 } 930 931 static void 932 fmd_asru_do_hash_apply(fmd_asru_hash_t *ahp, const char *name, 933 void (*func)(fmd_asru_link_t *, void *), void *arg, 934 fmd_asru_link_t **hash, size_t match_offset, size_t next_offset) 935 { 936 fmd_asru_link_t *alp, **alps, **alpp; 937 uint_t alpc = 0, i; 938 uint_t h; 939 940 (void) pthread_rwlock_rdlock(&ahp->ah_lock); 941 942 h = fmd_asru_strhash(ahp, name); 943 944 for (alp = hash[h]; alp != NULL; alp = 945 /* LINTED pointer alignment */ 946 FMD_ASRU_AL_HASH_NEXT(alp, next_offset)) 947 if (fmd_asru_strcmp(ahp, 948 /* LINTED pointer alignment */ 949 FMD_ASRU_AL_HASH_NAME(alp, match_offset), name)) 950 alpc++; 951 952 alps = alpp = fmd_alloc(alpc * sizeof (fmd_asru_link_t *), FMD_SLEEP); 953 954 for (alp = hash[h]; alp != NULL; alp = 955 /* LINTED pointer alignment */ 956 FMD_ASRU_AL_HASH_NEXT(alp, next_offset)) 957 if (fmd_asru_strcmp(ahp, 958 /* LINTED pointer alignment */ 959 FMD_ASRU_AL_HASH_NAME(alp, match_offset), name)) 960 *alpp++ = fmd_asru_al_hold(alp); 961 962 ASSERT(alpp == alps + alpc); 963 (void) pthread_rwlock_unlock(&ahp->ah_lock); 964 965 for (i = 0; i < alpc; i++) { 966 func(alps[i], arg); 967 fmd_asru_al_hash_release(ahp, alps[i]); 968 } 969 970 fmd_free(alps, alpc * sizeof (fmd_asru_link_t *)); 971 } 972 973 void 974 fmd_asru_hash_apply_by_asru(fmd_asru_hash_t *ahp, const char *name, 975 void (*func)(fmd_asru_link_t *, void *), void *arg) 976 { 977 fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_asru_hash, 978 offsetof(fmd_asru_link_t, al_asru_name), 979 offsetof(fmd_asru_link_t, al_asru_next)); 980 } 981 982 void 983 fmd_asru_hash_apply_by_case(fmd_asru_hash_t *ahp, fmd_case_t *cp, 984 void (*func)(fmd_asru_link_t *, void *), void *arg) 985 { 986 fmd_asru_do_hash_apply(ahp, ((fmd_case_impl_t *)cp)->ci_uuid, func, arg, 987 ahp->ah_case_hash, offsetof(fmd_asru_link_t, al_case_uuid), 988 offsetof(fmd_asru_link_t, al_case_next)); 989 } 990 991 void 992 fmd_asru_hash_apply_by_fru(fmd_asru_hash_t *ahp, const char *name, 993 void (*func)(fmd_asru_link_t *, void *), void *arg) 994 { 995 fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_fru_hash, 996 offsetof(fmd_asru_link_t, al_fru_name), 997 offsetof(fmd_asru_link_t, al_fru_next)); 998 } 999 1000 void 1001 fmd_asru_hash_apply_by_rsrc(fmd_asru_hash_t *ahp, const char *name, 1002 void (*func)(fmd_asru_link_t *, void *), void *arg) 1003 { 1004 fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_rsrc_hash, 1005 offsetof(fmd_asru_link_t, al_rsrc_name), 1006 offsetof(fmd_asru_link_t, al_rsrc_next)); 1007 } 1008 1009 void 1010 fmd_asru_hash_apply_by_label(fmd_asru_hash_t *ahp, const char *name, 1011 void (*func)(fmd_asru_link_t *, void *), void *arg) 1012 { 1013 fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_label_hash, 1014 offsetof(fmd_asru_link_t, al_label), 1015 offsetof(fmd_asru_link_t, al_label_next)); 1016 } 1017 1018 /* 1019 * Lookup an asru in the hash by name and place a hold on it. If the asru is 1020 * not found, no entry is created and NULL is returned. 1021 */ 1022 fmd_asru_t * 1023 fmd_asru_hash_lookup_name(fmd_asru_hash_t *ahp, const char *name) 1024 { 1025 fmd_asru_t *ap; 1026 1027 (void) pthread_rwlock_rdlock(&ahp->ah_lock); 1028 ap = fmd_asru_hash_lookup(ahp, name); 1029 (void) pthread_rwlock_unlock(&ahp->ah_lock); 1030 1031 return (ap); 1032 } 1033 1034 /* 1035 * Create a resource cache entry using the fault event "nvl" for one of the 1036 * suspects from the case "cp". 1037 * 1038 * The fault event can have the following components : FM_FAULT_ASRU, 1039 * FM_FAULT_FRU, FM_FAULT_RESOURCE. These should be set by the Diagnosis Engine 1040 * when calling fmd_nvl_create_fault(). In the general case, these are all 1041 * optional and an entry will always be added into the cache even if one or all 1042 * of these fields is missing. 1043 * 1044 * However, for hardware faults the recommended practice is that the fault 1045 * event should always have the FM_FAULT_RESOURCE field present and that this 1046 * should be represented in hc-scheme. 1047 * 1048 * Currently the DE should also add the FM_FAULT_ASRU and FM_FAULT_FRU fields 1049 * where known, though at some future stage fmd might be able to fill these 1050 * in automatically from the topology. 1051 */ 1052 fmd_asru_link_t * 1053 fmd_asru_hash_create_entry(fmd_asru_hash_t *ahp, fmd_case_t *cp, nvlist_t *nvl) 1054 { 1055 char *parsed_uuid; 1056 uuid_t uuid; 1057 int uuidlen; 1058 fmd_asru_link_t *alp; 1059 1060 /* 1061 * Generate a UUID for the ASRU. libuuid cleverly gives us no 1062 * interface for specifying or learning the buffer size. Sigh. 1063 * The spec says 36 bytes but we use a tunable just to be safe. 1064 */ 1065 (void) fmd_conf_getprop(fmd.d_conf, "uuidlen", &uuidlen); 1066 parsed_uuid = fmd_zalloc(uuidlen + 1, FMD_SLEEP); 1067 uuid_generate(uuid); 1068 uuid_unparse(uuid, parsed_uuid); 1069 1070 /* 1071 * Now create the resource cache entries. 1072 */ 1073 fmd_case_hold_locked(cp); 1074 alp = fmd_asru_al_create(ahp, nvl, cp, parsed_uuid); 1075 TRACE((FMD_DBG_ASRU, "asru %s created as %p", 1076 alp->al_uuid, (void *)alp->al_asru)); 1077 1078 fmd_free(parsed_uuid, uuidlen + 1); 1079 return (alp); 1080 1081 } 1082 1083 /* 1084 * Release the reference count on an asru obtained using fmd_asru_hash_lookup. 1085 * We take 'ahp' for symmetry and in case we need to use it in future work. 1086 */ 1087 /*ARGSUSED*/ 1088 void 1089 fmd_asru_hash_release(fmd_asru_hash_t *ahp, fmd_asru_t *ap) 1090 { 1091 (void) pthread_mutex_lock(&ap->asru_lock); 1092 1093 ASSERT(ap->asru_refs != 0); 1094 if (--ap->asru_refs == 0) 1095 fmd_asru_destroy(ap); 1096 else 1097 (void) pthread_mutex_unlock(&ap->asru_lock); 1098 } 1099 1100 static void 1101 fmd_asru_do_delete_entry(fmd_asru_hash_t *ahp, fmd_case_t *cp, 1102 fmd_asru_link_t **hash, size_t next_offset, char *name) 1103 { 1104 uint_t h; 1105 fmd_asru_link_t *alp, **pp, *alpnext, **alpnextp; 1106 1107 (void) pthread_rwlock_wrlock(&ahp->ah_lock); 1108 h = fmd_asru_strhash(ahp, name); 1109 pp = &hash[h]; 1110 for (alp = *pp; alp != NULL; alp = alpnext) { 1111 /* LINTED pointer alignment */ 1112 alpnextp = FMD_ASRU_AL_HASH_NEXTP(alp, next_offset); 1113 alpnext = *alpnextp; 1114 if (alp->al_case == cp) { 1115 *pp = *alpnextp; 1116 *alpnextp = NULL; 1117 } else 1118 pp = alpnextp; 1119 } 1120 (void) pthread_rwlock_unlock(&ahp->ah_lock); 1121 } 1122 1123 static void 1124 fmd_asru_do_hash_delete(fmd_asru_hash_t *ahp, fmd_case_susp_t *cis, 1125 fmd_case_t *cp, fmd_asru_link_t **hash, size_t next_offset, char *nvname) 1126 { 1127 nvlist_t *nvl; 1128 char *name = NULL; 1129 ssize_t namelen; 1130 1131 if (nvlist_lookup_nvlist(cis->cis_nvl, nvname, &nvl) == 0 && 1132 (namelen = fmd_fmri_nvl2str(nvl, NULL, 0)) != -1 && 1133 (name = fmd_alloc(namelen + 1, FMD_SLEEP)) != NULL) { 1134 if (fmd_fmri_nvl2str(nvl, name, namelen + 1) != -1) 1135 fmd_asru_do_delete_entry(ahp, cp, hash, next_offset, 1136 name); 1137 fmd_free(name, namelen + 1); 1138 } else 1139 fmd_asru_do_delete_entry(ahp, cp, hash, next_offset, ""); 1140 } 1141 1142 void 1143 fmd_asru_hash_delete_case(fmd_asru_hash_t *ahp, fmd_case_t *cp) 1144 { 1145 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 1146 fmd_case_susp_t *cis; 1147 fmd_asru_link_t *alp, **plp, *alpnext; 1148 fmd_asru_t *ap; 1149 char path[PATH_MAX]; 1150 char *label; 1151 uint_t h; 1152 1153 /* 1154 * first delete hash entries for each suspect 1155 */ 1156 for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) { 1157 fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_fru_hash, 1158 offsetof(fmd_asru_link_t, al_fru_next), FM_FAULT_FRU); 1159 fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_rsrc_hash, 1160 offsetof(fmd_asru_link_t, al_rsrc_next), FM_FAULT_RESOURCE); 1161 if (nvlist_lookup_string(cis->cis_nvl, FM_FAULT_LOCATION, 1162 &label) != 0) 1163 label = ""; 1164 fmd_asru_do_delete_entry(ahp, cp, ahp->ah_label_hash, 1165 offsetof(fmd_asru_link_t, al_label_next), label); 1166 fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_asru_hash, 1167 offsetof(fmd_asru_link_t, al_asru_next), FM_FAULT_ASRU); 1168 } 1169 1170 /* 1171 * then delete associated case hash entries 1172 */ 1173 (void) pthread_rwlock_wrlock(&ahp->ah_lock); 1174 h = fmd_asru_strhash(ahp, cip->ci_uuid); 1175 plp = &ahp->ah_case_hash[h]; 1176 for (alp = *plp; alp != NULL; alp = alpnext) { 1177 alpnext = alp->al_case_next; 1178 if (alp->al_case == cp) { 1179 *plp = alp->al_case_next; 1180 alp->al_case_next = NULL; 1181 ASSERT(ahp->ah_al_count != 0); 1182 ahp->ah_al_count--; 1183 1184 /* 1185 * decrement case ref. 1186 */ 1187 fmd_case_rele_locked(cp); 1188 alp->al_case = NULL; 1189 1190 /* 1191 * If we found a matching ASRU, unlink its log file and 1192 * then release the hash entry. Note that it may still 1193 * be referenced if another thread is manipulating it; 1194 * this is ok because once we unlink, the log file will 1195 * not be restored, and the log data will be freed when 1196 * all of the referencing threads release their 1197 * respective references. 1198 */ 1199 (void) snprintf(path, sizeof (path), "%s/%s", 1200 ahp->ah_dirpath, alp->al_uuid); 1201 if (cip->ci_xprt == NULL && unlink(path) != 0) 1202 fmd_error(EFMD_ASRU_UNLINK, 1203 "failed to unlink asru %s", path); 1204 1205 /* 1206 * Now unlink from the global per-resource cache 1207 * and if this is the last link then remove that from 1208 * it's own hash too. 1209 */ 1210 ap = alp->al_asru; 1211 (void) pthread_mutex_lock(&ap->asru_lock); 1212 fmd_list_delete(&ap->asru_list, alp); 1213 if (ap->asru_list.l_next == NULL) { 1214 uint_t h; 1215 fmd_asru_t *ap2, **pp; 1216 fmd_asru_t *apnext, **apnextp; 1217 1218 ASSERT(ahp->ah_count != 0); 1219 ahp->ah_count--; 1220 h = fmd_asru_strhash(ahp, ap->asru_name); 1221 pp = &ahp->ah_hash[h]; 1222 for (ap2 = *pp; ap2 != NULL; ap2 = apnext) { 1223 apnextp = &ap2->asru_next; 1224 apnext = *apnextp; 1225 if (ap2 == ap) { 1226 *pp = *apnextp; 1227 *apnextp = NULL; 1228 } else 1229 pp = apnextp; 1230 } 1231 } 1232 (void) pthread_mutex_unlock(&ap->asru_lock); 1233 fmd_asru_al_hash_release(ahp, alp); 1234 } else 1235 plp = &alp->al_case_next; 1236 } 1237 (void) pthread_rwlock_unlock(&ahp->ah_lock); 1238 } 1239 1240 typedef struct { 1241 nvlist_t *farc_parent_fmri; 1242 uint8_t farc_reason; 1243 } fmd_asru_farc_t; 1244 1245 static void 1246 fmd_asru_repair_containee(fmd_asru_link_t *alp, void *arg) 1247 { 1248 fmd_asru_farc_t *farcp = (fmd_asru_farc_t *)arg; 1249 1250 if ((alp->al_asru->asru_flags & FMD_ASRU_INVISIBLE) && 1251 alp->al_asru_fmri && 1252 fmd_fmri_contains(farcp->farc_parent_fmri, alp->al_asru_fmri) > 0) { 1253 if (fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, 1254 farcp->farc_reason)) { 1255 if (alp->al_flags & FMD_ASRU_PROXY) 1256 fmd_case_xprt_updated(alp->al_case); 1257 else 1258 fmd_case_update(alp->al_case); 1259 } 1260 } 1261 } 1262 1263 static void 1264 fmd_asru_do_repair_containees(fmd_asru_link_t *alp, uint8_t reason) 1265 { 1266 int flags; 1267 1268 /* 1269 * Check if all entries associated with this asru are acquitted and 1270 * if so acquit containees. Don't try to repair containees on proxy 1271 * side unless we have local asru. 1272 */ 1273 if (alp->al_asru_fmri != NULL && (!(alp->al_flags & FMD_ASRU_PROXY) || 1274 (alp->al_flags & FMD_ASRU_PROXY_WITH_ASRU))) { 1275 (void) pthread_mutex_lock(&alp->al_asru->asru_lock); 1276 flags = alp->al_asru->asru_flags; 1277 (void) pthread_mutex_unlock(&alp->al_asru->asru_lock); 1278 if (!(flags & (FMD_ASRU_FAULTY | FMD_ASRU_INVISIBLE))) { 1279 fmd_asru_farc_t farc; 1280 1281 farc.farc_parent_fmri = alp->al_asru_fmri; 1282 farc.farc_reason = reason; 1283 fmd_asru_al_hash_apply(fmd.d_asrus, 1284 fmd_asru_repair_containee, &farc); 1285 } 1286 } 1287 } 1288 1289 void 1290 fmd_asru_repaired(fmd_asru_link_t *alp, void *arg) 1291 { 1292 int cleared; 1293 fmd_asru_rep_arg_t *farap = (fmd_asru_rep_arg_t *)arg; 1294 1295 /* 1296 * don't allow remote repair over readonly transport 1297 */ 1298 if (alp->al_flags & FMD_ASRU_PROXY_RDONLY) 1299 return; 1300 1301 /* 1302 * don't allow repair etc by asru on proxy unless asru is local 1303 */ 1304 if (farap->fara_bywhat == FARA_BY_ASRU && 1305 (alp->al_flags & FMD_ASRU_PROXY) && 1306 !(alp->al_flags & FMD_ASRU_PROXY_WITH_ASRU)) 1307 return; 1308 /* 1309 * For acquit, need to check both name and uuid if specified 1310 */ 1311 if (farap->fara_reason == FMD_ASRU_ACQUITTED && 1312 farap->fara_rval != NULL && strcmp(farap->fara_uuid, "") != 0 && 1313 strcmp(farap->fara_uuid, alp->al_case_uuid) != 0) 1314 return; 1315 1316 /* 1317 * For replaced, verify it has been replaced if we have serial number. 1318 * If not set *farap->fara_rval to FARA_ERR_RSRCNOTR. 1319 */ 1320 if (farap->fara_reason == FMD_ASRU_REPLACED && 1321 !(alp->al_flags & FMD_ASRU_PROXY_EXTERNAL) && 1322 fmd_asru_replacement_state(alp->al_event, 1323 (alp->al_flags & FMD_ASRU_PROXY) ? HC_ONLY_TRUE : HC_ONLY_FALSE) == 1324 FMD_OBJ_STATE_STILL_PRESENT) { 1325 if (farap->fara_rval) 1326 *farap->fara_rval = FARA_ERR_RSRCNOTR; 1327 return; 1328 } 1329 1330 cleared = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, farap->fara_reason); 1331 fmd_asru_do_repair_containees(alp, farap->fara_reason); 1332 1333 /* 1334 * if called from fmd_adm_*() and we really did clear the bit then 1335 * we need to do a case update to see if the associated case can be 1336 * repaired. No need to do this if called from fmd_case_*() (ie 1337 * when arg is NULL) as the case will be explicitly repaired anyway. 1338 */ 1339 if (farap->fara_rval) { 1340 /* 1341 * *farap->fara_rval defaults to FARA_ERR_RSRCNOTF (not found). 1342 * If we find a valid cache entry which we repair then we 1343 * set it to FARA_OK. However we don't want to do this if 1344 * we have already set it to FARA_ERR_RSRCNOTR (not replaced) 1345 * in a previous iteration (see above). So only set it to 1346 * FARA_OK if the current value is still FARA_ERR_RSRCNOTF. 1347 */ 1348 if (*farap->fara_rval == FARA_ERR_RSRCNOTF) 1349 *farap->fara_rval = FARA_OK; 1350 if (cleared) { 1351 if (alp->al_flags & FMD_ASRU_PROXY) 1352 fmd_case_xprt_updated(alp->al_case); 1353 else 1354 fmd_case_update(alp->al_case); 1355 } 1356 } 1357 } 1358 1359 /* 1360 * Discard the case associated with this alp if it is in resolved state. 1361 * Called on "fmadm flush". 1362 */ 1363 /*ARGSUSED*/ 1364 void 1365 fmd_asru_flush(fmd_asru_link_t *alp, void *arg) 1366 { 1367 int check_if_aged = 0; 1368 int *rval = (int *)arg; 1369 1370 if (alp->al_case) 1371 fmd_case_discard_resolved(alp->al_case, &check_if_aged); 1372 *rval = 0; 1373 } 1374 1375 /* 1376 * This is only called for proxied faults. Set various flags so we can 1377 * find the nature of the transport from the resource cache code. 1378 */ 1379 /*ARGSUSED*/ 1380 void 1381 fmd_asru_set_on_proxy(fmd_asru_link_t *alp, void *arg) 1382 { 1383 fmd_asru_set_on_proxy_t *entryp = (fmd_asru_set_on_proxy_t *)arg; 1384 1385 if (*entryp->fasp_countp >= entryp->fasp_maxcount) 1386 return; 1387 1388 /* 1389 * Note that this is a proxy fault and save whetehr transport is 1390 * RDONLY or EXTERNAL. 1391 */ 1392 alp->al_flags |= FMD_ASRU_PROXY; 1393 alp->al_asru->asru_flags |= FMD_ASRU_PROXY; 1394 1395 if (entryp->fasp_proxy_external) { 1396 alp->al_flags |= FMD_ASRU_PROXY_EXTERNAL; 1397 alp->al_asru->asru_flags |= FMD_ASRU_PROXY_EXTERNAL; 1398 } 1399 1400 if (entryp->fasp_proxy_rdonly) 1401 alp->al_flags |= FMD_ASRU_PROXY_RDONLY; 1402 1403 /* 1404 * Save whether asru is accessible in local domain 1405 */ 1406 if (entryp->fasp_proxy_asru[*entryp->fasp_countp]) { 1407 alp->al_flags |= FMD_ASRU_PROXY_WITH_ASRU; 1408 alp->al_asru->asru_flags |= FMD_ASRU_PROXY_WITH_ASRU; 1409 } 1410 (*entryp->fasp_countp)++; 1411 } 1412 1413 /*ARGSUSED*/ 1414 void 1415 fmd_asru_update_containees(fmd_asru_link_t *alp, void *arg) 1416 { 1417 fmd_asru_do_repair_containees(alp, alp->al_reason); 1418 } 1419 1420 /* 1421 * This function is used for fault proxying. It updates the resource status in 1422 * the resource cache based on information that has come from the other side of 1423 * the transport. This can be called on either the proxy side or the 1424 * diagnosing side. 1425 */ 1426 void 1427 fmd_asru_update_status(fmd_asru_link_t *alp, void *arg) 1428 { 1429 fmd_asru_update_status_t *entryp = (fmd_asru_update_status_t *)arg; 1430 uint8_t status; 1431 1432 if (*entryp->faus_countp >= entryp->faus_maxcount) 1433 return; 1434 1435 status = entryp->faus_ba[*entryp->faus_countp]; 1436 1437 /* 1438 * For proxy, if there is no asru on the proxy side, but there is on 1439 * the diag side, then take the diag side asru status. 1440 * For diag, if there is an asru on the proxy side, then take the proxy 1441 * side asru status. 1442 */ 1443 if (entryp->faus_is_proxy ? 1444 (entryp->faus_diag_asru[*entryp->faus_countp] && 1445 !entryp->faus_proxy_asru[*entryp->faus_countp]) : 1446 entryp->faus_proxy_asru[*entryp->faus_countp]) { 1447 if (status & FM_SUSPECT_DEGRADED) 1448 alp->al_flags |= FMD_ASRU_DEGRADED; 1449 else 1450 alp->al_flags &= ~FMD_ASRU_DEGRADED; 1451 if (status & FM_SUSPECT_UNUSABLE) 1452 (void) fmd_asru_setflags(alp, FMD_ASRU_UNUSABLE); 1453 else 1454 (void) fmd_asru_clrflags(alp, FMD_ASRU_UNUSABLE, 0); 1455 } 1456 1457 /* 1458 * Update the faulty status too. 1459 */ 1460 if (!(status & FM_SUSPECT_FAULTY)) 1461 (void) fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, 1462 (status & FM_SUSPECT_REPAIRED) ? FMD_ASRU_REPAIRED : 1463 (status & FM_SUSPECT_REPLACED) ? FMD_ASRU_REPLACED : 1464 (status & FM_SUSPECT_ACQUITTED) ? FMD_ASRU_ACQUITTED : 1465 FMD_ASRU_REMOVED); 1466 else if (entryp->faus_is_proxy) 1467 (void) fmd_asru_setflags(alp, FMD_ASRU_FAULTY); 1468 1469 /* 1470 * for proxy only, update the present status too. 1471 */ 1472 if (entryp->faus_is_proxy) { 1473 if (!(status & FM_SUSPECT_NOT_PRESENT)) { 1474 alp->al_flags |= FMD_ASRU_PRESENT; 1475 alp->al_asru->asru_flags |= FMD_ASRU_PRESENT; 1476 } else { 1477 alp->al_flags &= ~FMD_ASRU_PRESENT; 1478 alp->al_asru->asru_flags &= ~FMD_ASRU_PRESENT; 1479 } 1480 } 1481 (*entryp->faus_countp)++; 1482 } 1483 1484 /* 1485 * This function is called on the diagnosing side when fault proxying is 1486 * in use and the proxy has sent a uuclose. It updates the status of the 1487 * resource cache entries. 1488 */ 1489 void 1490 fmd_asru_close_status(fmd_asru_link_t *alp, void *arg) 1491 { 1492 fmd_asru_close_status_t *entryp = (fmd_asru_close_status_t *)arg; 1493 1494 if (*entryp->facs_countp >= entryp->facs_maxcount) 1495 return; 1496 alp->al_flags &= ~FMD_ASRU_DEGRADED; 1497 (void) fmd_asru_setflags(alp, FMD_ASRU_UNUSABLE); 1498 (*entryp->facs_countp)++; 1499 } 1500 1501 static void 1502 fmd_asru_logevent(fmd_asru_link_t *alp) 1503 { 1504 fmd_asru_t *ap = alp->al_asru; 1505 boolean_t faulty = (alp->al_flags & FMD_ASRU_FAULTY) != 0; 1506 boolean_t unusable = (alp->al_flags & FMD_ASRU_UNUSABLE) != 0; 1507 boolean_t message = (ap->asru_flags & FMD_ASRU_INVISIBLE) == 0; 1508 boolean_t repaired = (alp->al_reason == FMD_ASRU_REPAIRED); 1509 boolean_t replaced = (alp->al_reason == FMD_ASRU_REPLACED); 1510 boolean_t acquitted = (alp->al_reason == FMD_ASRU_ACQUITTED); 1511 1512 fmd_case_impl_t *cip; 1513 fmd_event_t *e; 1514 fmd_log_t *lp; 1515 nvlist_t *nvl; 1516 char *class; 1517 1518 ASSERT(MUTEX_HELD(&ap->asru_lock)); 1519 cip = (fmd_case_impl_t *)alp->al_case; 1520 ASSERT(cip != NULL); 1521 1522 /* 1523 * Don't log to disk on proxy side 1524 */ 1525 if (cip->ci_xprt != NULL) 1526 return; 1527 1528 if ((lp = alp->al_log) == NULL) 1529 lp = fmd_log_open(ap->asru_root, alp->al_uuid, FMD_LOG_ASRU); 1530 1531 if (lp == NULL) 1532 return; /* can't log events if we can't open the log */ 1533 1534 nvl = fmd_protocol_rsrc_asru(_fmd_asru_events[faulty | (unusable << 1)], 1535 alp->al_asru_fmri, cip->ci_uuid, cip->ci_code, faulty, unusable, 1536 message, alp->al_event, &cip->ci_tv, repaired, replaced, acquitted, 1537 cip->ci_state == FMD_CASE_RESOLVED, cip->ci_diag_de == NULL ? 1538 cip->ci_mod->mod_fmri : cip->ci_diag_de, cip->ci_injected == 1); 1539 1540 (void) nvlist_lookup_string(nvl, FM_CLASS, &class); 1541 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class); 1542 1543 fmd_event_hold(e); 1544 fmd_log_append(lp, e, NULL); 1545 fmd_event_rele(e); 1546 1547 /* 1548 * For now, we close the log file after every update to conserve file 1549 * descriptors and daemon overhead. If this becomes a performance 1550 * issue this code can change to keep a fixed-size LRU cache of logs. 1551 */ 1552 fmd_log_rele(lp); 1553 alp->al_log = NULL; 1554 } 1555 1556 int 1557 fmd_asru_setflags(fmd_asru_link_t *alp, uint_t sflag) 1558 { 1559 fmd_asru_t *ap = alp->al_asru; 1560 uint_t nstate, ostate; 1561 1562 ASSERT(!(sflag & ~FMD_ASRU_STATE)); 1563 ASSERT(sflag != FMD_ASRU_STATE); 1564 1565 (void) pthread_mutex_lock(&ap->asru_lock); 1566 1567 ostate = alp->al_flags & FMD_ASRU_STATE; 1568 alp->al_flags |= sflag; 1569 nstate = alp->al_flags & FMD_ASRU_STATE; 1570 1571 if (nstate == ostate) { 1572 (void) pthread_mutex_unlock(&ap->asru_lock); 1573 return (0); 1574 } 1575 1576 ap->asru_flags |= sflag; 1577 TRACE((FMD_DBG_ASRU, "asru %s %s->%s", alp->al_uuid, 1578 _fmd_asru_snames[ostate], _fmd_asru_snames[nstate])); 1579 1580 fmd_asru_logevent(alp); 1581 1582 (void) pthread_cond_broadcast(&ap->asru_cv); 1583 (void) pthread_mutex_unlock(&ap->asru_lock); 1584 return (1); 1585 } 1586 1587 int 1588 fmd_asru_clrflags(fmd_asru_link_t *alp, uint_t sflag, uint8_t reason) 1589 { 1590 fmd_asru_t *ap = alp->al_asru; 1591 fmd_asru_link_t *nalp; 1592 uint_t nstate, ostate, flags = 0; 1593 1594 ASSERT(!(sflag & ~FMD_ASRU_STATE)); 1595 ASSERT(sflag != FMD_ASRU_STATE); 1596 1597 (void) pthread_mutex_lock(&ap->asru_lock); 1598 1599 ostate = alp->al_flags & FMD_ASRU_STATE; 1600 alp->al_flags &= ~sflag; 1601 nstate = alp->al_flags & FMD_ASRU_STATE; 1602 1603 if (nstate == ostate) { 1604 if (reason > alp->al_reason && 1605 ((fmd_case_impl_t *)alp->al_case)->ci_state < 1606 FMD_CASE_REPAIRED) { 1607 alp->al_reason = reason; 1608 fmd_asru_logevent(alp); 1609 (void) pthread_cond_broadcast(&ap->asru_cv); 1610 } 1611 (void) pthread_mutex_unlock(&ap->asru_lock); 1612 return (0); 1613 } 1614 if (reason > alp->al_reason) 1615 alp->al_reason = reason; 1616 1617 if (sflag == FMD_ASRU_UNUSABLE) 1618 ap->asru_flags &= ~sflag; 1619 else if (sflag == FMD_ASRU_FAULTY) { 1620 /* 1621 * only clear the faulty bit if all links are clear 1622 */ 1623 for (nalp = fmd_list_next(&ap->asru_list); nalp != NULL; 1624 nalp = fmd_list_next(nalp)) 1625 flags |= nalp->al_flags; 1626 if (!(flags & FMD_ASRU_FAULTY)) 1627 ap->asru_flags &= ~sflag; 1628 } 1629 1630 TRACE((FMD_DBG_ASRU, "asru %s %s->%s", alp->al_uuid, 1631 _fmd_asru_snames[ostate], _fmd_asru_snames[nstate])); 1632 1633 fmd_asru_logevent(alp); 1634 1635 (void) pthread_cond_broadcast(&ap->asru_cv); 1636 (void) pthread_mutex_unlock(&ap->asru_lock); 1637 1638 return (1); 1639 } 1640 1641 /*ARGSUSED*/ 1642 void 1643 fmd_asru_log_resolved(fmd_asru_link_t *alp, void *unused) 1644 { 1645 fmd_asru_t *ap = alp->al_asru; 1646 1647 (void) pthread_mutex_lock(&ap->asru_lock); 1648 fmd_asru_logevent(alp); 1649 (void) pthread_cond_broadcast(&ap->asru_cv); 1650 (void) pthread_mutex_unlock(&ap->asru_lock); 1651 } 1652 1653 /* 1654 * Report the current known state of the link entry (ie this particular fault 1655 * affecting this particular ASRU). 1656 */ 1657 int 1658 fmd_asru_al_getstate(fmd_asru_link_t *alp) 1659 { 1660 int us, st = (alp->al_flags & (FMD_ASRU_FAULTY | FMD_ASRU_UNUSABLE)); 1661 nvlist_t *asru; 1662 int ps = FMD_OBJ_STATE_UNKNOWN; 1663 1664 /* 1665 * For fault proxying with an EXTERNAL transport, believe the presence 1666 * state as sent by the diagnosing side. Otherwise find the presence 1667 * state here. Note that if fault proxying with an INTERNAL transport 1668 * we can only trust the presence state where we are using hc-scheme 1669 * fmris which should be consistant across domains in the same system - 1670 * other schemes can refer to different devices in different domains. 1671 */ 1672 if (!(alp->al_flags & FMD_ASRU_PROXY_EXTERNAL)) { 1673 ps = fmd_asru_replacement_state(alp->al_event, (alp->al_flags & 1674 FMD_ASRU_PROXY)? HC_ONLY_TRUE : HC_ONLY_FALSE); 1675 if (ps == FMD_OBJ_STATE_NOT_PRESENT) 1676 return (st | FMD_ASRU_UNUSABLE); 1677 if (ps == FMD_OBJ_STATE_REPLACED) { 1678 if (alp->al_reason < FMD_ASRU_REPLACED) 1679 alp->al_reason = FMD_ASRU_REPLACED; 1680 return (st | FMD_ASRU_UNUSABLE); 1681 } 1682 } 1683 if (ps == FMD_OBJ_STATE_UNKNOWN && (alp->al_flags & FMD_ASRU_PROXY)) 1684 st |= (alp->al_flags & (FMD_ASRU_DEGRADED | FMD_ASRU_PRESENT)); 1685 else 1686 st |= (alp->al_flags & (FMD_ASRU_DEGRADED)) | FMD_ASRU_PRESENT; 1687 1688 /* 1689 * For fault proxying, unless we have a local ASRU, then believe the 1690 * service state sent by the diagnosing side. Otherwise find the service 1691 * state here. Try fmd_fmri_service_state() first, but if that's not 1692 * supported by the scheme then fall back to fmd_fmri_unusable(). 1693 */ 1694 if ((!(alp->al_flags & FMD_ASRU_PROXY) || 1695 (alp->al_flags & FMD_ASRU_PROXY_WITH_ASRU)) && 1696 nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, &asru) == 0) { 1697 us = fmd_fmri_service_state(asru); 1698 if (us == -1 || us == FMD_SERVICE_STATE_UNKNOWN) { 1699 /* not supported by scheme - try fmd_fmri_unusable */ 1700 us = fmd_fmri_unusable(asru); 1701 if (us > 0) 1702 st |= FMD_ASRU_UNUSABLE; 1703 else if (us == 0) 1704 st &= ~FMD_ASRU_UNUSABLE; 1705 } else { 1706 if (us == FMD_SERVICE_STATE_UNUSABLE) { 1707 st &= ~FMD_ASRU_DEGRADED; 1708 st |= FMD_ASRU_UNUSABLE; 1709 } else if (us == FMD_SERVICE_STATE_OK) { 1710 st &= ~(FMD_ASRU_DEGRADED | FMD_ASRU_UNUSABLE); 1711 } else if (us == FMD_SERVICE_STATE_ISOLATE_PENDING) { 1712 st &= ~(FMD_ASRU_DEGRADED | FMD_ASRU_UNUSABLE); 1713 } else if (us == FMD_SERVICE_STATE_DEGRADED) { 1714 st &= ~FMD_ASRU_UNUSABLE; 1715 st |= FMD_ASRU_DEGRADED; 1716 } 1717 } 1718 } 1719 return (st); 1720 } 1721 1722 /* 1723 * Report the current known state of the ASRU by refreshing its unusable status 1724 * based upon the routines provided by the scheme module. If the unusable bit 1725 * is different, we do *not* generate a state change here because that change 1726 * may be unrelated to fmd activities and therefore we have no case or event. 1727 * The absence of the transition is harmless as this function is only provided 1728 * for RPC observability and fmd's clients are only concerned with ASRU_FAULTY. 1729 */ 1730 int 1731 fmd_asru_getstate(fmd_asru_t *ap) 1732 { 1733 int us, st, p = -1; 1734 char *s; 1735 1736 /* do not report non-fmd non-present resources */ 1737 if (!(ap->asru_flags & FMD_ASRU_INTERNAL)) { 1738 /* 1739 * As with fmd_asru_al_getstate(), we can only trust the 1740 * local presence state on a proxy if the transport is 1741 * internal and the scheme is hc. Otherwise we believe the 1742 * state as sent by the diagnosing side. 1743 */ 1744 if (!(ap->asru_flags & FMD_ASRU_PROXY) || 1745 (!(ap->asru_flags & FMD_ASRU_PROXY_EXTERNAL) && 1746 (nvlist_lookup_string(ap->asru_fmri, FM_FMRI_SCHEME, 1747 &s) == 0 && strcmp(s, FM_FMRI_SCHEME_HC) == 0))) { 1748 if (fmd_asru_fake_not_present >= 1749 FMD_OBJ_STATE_REPLACED) 1750 return (0); 1751 p = fmd_fmri_present(ap->asru_fmri); 1752 } 1753 if (p == 0 || (p < 0 && !(ap->asru_flags & FMD_ASRU_PROXY) || 1754 !(ap->asru_flags & FMD_ASRU_PRESENT))) 1755 return (0); 1756 } 1757 1758 /* 1759 * As with fmd_asru_al_getstate(), we can only trust the local unusable 1760 * state on a proxy if there is a local ASRU. 1761 */ 1762 st = ap->asru_flags & (FMD_ASRU_FAULTY | FMD_ASRU_UNUSABLE); 1763 if (!(ap->asru_flags & FMD_ASRU_PROXY) || 1764 (ap->asru_flags & FMD_ASRU_PROXY_WITH_ASRU)) { 1765 us = fmd_fmri_unusable(ap->asru_fmri); 1766 if (us > 0) 1767 st |= FMD_ASRU_UNUSABLE; 1768 else if (us == 0) 1769 st &= ~FMD_ASRU_UNUSABLE; 1770 } 1771 return (st); 1772 } 1773