1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/fm/protocol.h> 28 #include <uuid/uuid.h> 29 30 #include <dirent.h> 31 #include <limits.h> 32 #include <unistd.h> 33 #include <alloca.h> 34 #include <stddef.h> 35 #include <fm/libtopo.h> 36 37 #include <fmd_alloc.h> 38 #include <fmd_string.h> 39 #include <fmd_error.h> 40 #include <fmd_subr.h> 41 #include <fmd_protocol.h> 42 #include <fmd_event.h> 43 #include <fmd_conf.h> 44 #include <fmd_fmri.h> 45 #include <fmd_dispq.h> 46 #include <fmd_case.h> 47 #include <fmd_module.h> 48 #include <fmd_asru.h> 49 50 #include <fmd.h> 51 52 static const char *const _fmd_asru_events[] = { 53 FMD_RSRC_CLASS "asru.ok", /* UNUSABLE=0 FAULTED=0 */ 54 FMD_RSRC_CLASS "asru.degraded", /* UNUSABLE=0 FAULTED=1 */ 55 FMD_RSRC_CLASS "asru.unknown", /* UNUSABLE=1 FAULTED=0 */ 56 FMD_RSRC_CLASS "asru.faulted" /* UNUSABLE=1 FAULTED=1 */ 57 }; 58 59 static const char *const _fmd_asru_snames[] = { 60 "uf", "uF", "Uf", "UF" /* same order as above */ 61 }; 62 63 volatile uint32_t fmd_asru_fake_not_present = 0; 64 65 static uint_t 66 fmd_asru_strhash(fmd_asru_hash_t *ahp, const char *val) 67 { 68 return (topo_fmri_strhash(ahp->ah_topo->ft_hdl, val) % ahp->ah_hashlen); 69 } 70 71 static boolean_t 72 fmd_asru_strcmp(fmd_asru_hash_t *ahp, const char *a, const char *b) 73 { 74 return (topo_fmri_strcmp(ahp->ah_topo->ft_hdl, a, b)); 75 } 76 77 static fmd_asru_t * 78 fmd_asru_create(fmd_asru_hash_t *ahp, const char *uuid, 79 const char *name, nvlist_t *fmri) 80 { 81 fmd_asru_t *ap = fmd_zalloc(sizeof (fmd_asru_t), FMD_SLEEP); 82 char *s; 83 84 (void) pthread_mutex_init(&ap->asru_lock, NULL); 85 (void) pthread_cond_init(&ap->asru_cv, NULL); 86 87 ap->asru_name = fmd_strdup(name, FMD_SLEEP); 88 if (fmri) 89 (void) nvlist_xdup(fmri, &ap->asru_fmri, &fmd.d_nva); 90 ap->asru_root = fmd_strdup(ahp->ah_dirpath, FMD_SLEEP); 91 ap->asru_uuid = fmd_strdup(uuid, FMD_SLEEP); 92 ap->asru_uuidlen = ap->asru_uuid ? strlen(ap->asru_uuid) : 0; 93 ap->asru_refs = 1; 94 95 if (fmri && nvlist_lookup_string(fmri, FM_FMRI_SCHEME, &s) == 0 && 96 strcmp(s, FM_FMRI_SCHEME_FMD) == 0) 97 ap->asru_flags |= FMD_ASRU_INTERNAL; 98 99 return (ap); 100 } 101 102 static void 103 fmd_asru_destroy(fmd_asru_t *ap) 104 { 105 ASSERT(MUTEX_HELD(&ap->asru_lock)); 106 ASSERT(ap->asru_refs == 0); 107 108 nvlist_free(ap->asru_event); 109 fmd_strfree(ap->asru_name); 110 nvlist_free(ap->asru_fmri); 111 fmd_strfree(ap->asru_root); 112 fmd_free(ap->asru_uuid, ap->asru_uuidlen + 1); 113 fmd_free(ap, sizeof (fmd_asru_t)); 114 } 115 116 static void 117 fmd_asru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_t *ap) 118 { 119 uint_t h = fmd_asru_strhash(ahp, ap->asru_name); 120 121 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 122 ap->asru_next = ahp->ah_hash[h]; 123 ahp->ah_hash[h] = ap; 124 ahp->ah_count++; 125 } 126 127 static fmd_asru_t * 128 fmd_asru_hold(fmd_asru_t *ap) 129 { 130 (void) pthread_mutex_lock(&ap->asru_lock); 131 ap->asru_refs++; 132 ASSERT(ap->asru_refs != 0); 133 (void) pthread_mutex_unlock(&ap->asru_lock); 134 return (ap); 135 } 136 137 /* 138 * Lookup an asru in the hash by name and place a hold on it. If the asru is 139 * not found, no entry is created and NULL is returned. This internal function 140 * is for callers who have the ah_lock held and is used by lookup_name below. 141 */ 142 fmd_asru_t * 143 fmd_asru_hash_lookup(fmd_asru_hash_t *ahp, const char *name) 144 { 145 fmd_asru_t *ap; 146 uint_t h; 147 148 ASSERT(RW_LOCK_HELD(&ahp->ah_lock)); 149 h = fmd_asru_strhash(ahp, name); 150 151 for (ap = ahp->ah_hash[h]; ap != NULL; ap = ap->asru_next) { 152 if (fmd_asru_strcmp(ahp, ap->asru_name, name)) 153 break; 154 } 155 156 if (ap != NULL) 157 (void) fmd_asru_hold(ap); 158 else 159 (void) fmd_set_errno(EFMD_ASRU_NOENT); 160 161 return (ap); 162 } 163 164 #define HC_ONLY_FALSE 0 165 #define HC_ONLY_TRUE 1 166 167 static int 168 fmd_asru_replacement_state(nvlist_t *event, int hc_only) 169 { 170 int ps = -1; 171 nvlist_t *asru, *fru, *rsrc; 172 char *s; 173 174 /* 175 * Check if there is evidence that this object is no longer present. 176 * In general fmd_fmri_present() should be supported on resources and/or 177 * frus, as those are the things that are physically present or not 178 * present - an asru can be spread over a number of frus some of which 179 * are present and some not, so fmd_fmri_present() is not generally 180 * meaningful. However retain a check for asru first for compatibility. 181 * If we have checked all three and we still get -1 then nothing knows 182 * whether it's present or not, so err on the safe side and treat it 183 * as still present. 184 * 185 * Note that if hc_only is set, then we only check status using fmris 186 * that are in hc-scheme. 187 */ 188 if (fmd_asru_fake_not_present) 189 return (fmd_asru_fake_not_present); 190 if (nvlist_lookup_nvlist(event, FM_FAULT_ASRU, &asru) == 0 && 191 (hc_only == HC_ONLY_FALSE || (nvlist_lookup_string(asru, 192 FM_FMRI_SCHEME, &s) == 0 && strcmp(s, FM_FMRI_SCHEME_HC) == 0))) 193 ps = fmd_fmri_replaced(asru); 194 if (ps == -1 || ps == FMD_OBJ_STATE_UNKNOWN) { 195 if (nvlist_lookup_nvlist(event, FM_FAULT_RESOURCE, 196 &rsrc) == 0 && (hc_only == HC_ONLY_FALSE || 197 (nvlist_lookup_string(rsrc, FM_FMRI_SCHEME, &s) == 0 && 198 strcmp(s, FM_FMRI_SCHEME_HC) == 0))) { 199 if (ps == -1) { 200 ps = fmd_fmri_replaced(rsrc); 201 } else { 202 /* see if we can improve on UNKNOWN */ 203 int ps2 = fmd_fmri_replaced(rsrc); 204 if (ps2 == FMD_OBJ_STATE_STILL_PRESENT || 205 ps2 == FMD_OBJ_STATE_REPLACED) 206 ps = ps2; 207 } 208 } 209 } 210 if (ps == -1 || ps == FMD_OBJ_STATE_UNKNOWN) { 211 if (nvlist_lookup_nvlist(event, FM_FAULT_FRU, &fru) == 0 && 212 (hc_only == HC_ONLY_FALSE || (nvlist_lookup_string(fru, 213 FM_FMRI_SCHEME, &s) == 0 && 214 strcmp(s, FM_FMRI_SCHEME_HC) == 0))) { 215 if (ps == -1) { 216 ps = fmd_fmri_replaced(fru); 217 } else { 218 /* see if we can improve on UNKNOWN */ 219 int ps2 = fmd_fmri_replaced(fru); 220 if (ps2 == FMD_OBJ_STATE_STILL_PRESENT || 221 ps2 == FMD_OBJ_STATE_REPLACED) 222 ps = ps2; 223 } 224 } 225 } 226 if (ps == -1) 227 ps = FMD_OBJ_STATE_UNKNOWN; 228 return (ps); 229 } 230 231 static void 232 fmd_asru_asru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, 233 char *name) 234 { 235 uint_t h = fmd_asru_strhash(ahp, name); 236 237 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 238 alp->al_asru_next = ahp->ah_asru_hash[h]; 239 ahp->ah_asru_hash[h] = alp; 240 ahp->ah_al_count++; 241 } 242 243 static void 244 fmd_asru_case_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, 245 char *name) 246 { 247 uint_t h = fmd_asru_strhash(ahp, name); 248 249 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 250 alp->al_case_next = ahp->ah_case_hash[h]; 251 ahp->ah_case_hash[h] = alp; 252 } 253 254 static void 255 fmd_asru_fru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, char *name) 256 { 257 uint_t h = fmd_asru_strhash(ahp, name); 258 259 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 260 alp->al_fru_next = ahp->ah_fru_hash[h]; 261 ahp->ah_fru_hash[h] = alp; 262 } 263 264 static void 265 fmd_asru_label_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, 266 char *name) 267 { 268 uint_t h = fmd_asru_strhash(ahp, name); 269 270 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 271 alp->al_label_next = ahp->ah_label_hash[h]; 272 ahp->ah_label_hash[h] = alp; 273 } 274 275 static void 276 fmd_asru_rsrc_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, 277 char *name) 278 { 279 uint_t h = fmd_asru_strhash(ahp, name); 280 281 ASSERT(RW_WRITE_HELD(&ahp->ah_lock)); 282 alp->al_rsrc_next = ahp->ah_rsrc_hash[h]; 283 ahp->ah_rsrc_hash[h] = alp; 284 } 285 286 static void 287 fmd_asru_al_destroy(fmd_asru_link_t *alp) 288 { 289 ASSERT(alp->al_refs == 0); 290 ASSERT(MUTEX_HELD(&alp->al_asru->asru_lock)); 291 292 if (alp->al_log != NULL) 293 fmd_log_rele(alp->al_log); 294 295 fmd_free(alp->al_uuid, alp->al_uuidlen + 1); 296 nvlist_free(alp->al_event); 297 fmd_strfree(alp->al_rsrc_name); 298 fmd_strfree(alp->al_case_uuid); 299 fmd_strfree(alp->al_fru_name); 300 fmd_strfree(alp->al_asru_name); 301 fmd_strfree(alp->al_label); 302 nvlist_free(alp->al_asru_fmri); 303 fmd_free(alp, sizeof (fmd_asru_link_t)); 304 } 305 306 static fmd_asru_link_t * 307 fmd_asru_al_hold(fmd_asru_link_t *alp) 308 { 309 fmd_asru_t *ap = alp->al_asru; 310 311 (void) pthread_mutex_lock(&ap->asru_lock); 312 ap->asru_refs++; 313 alp->al_refs++; 314 ASSERT(alp->al_refs != 0); 315 (void) pthread_mutex_unlock(&ap->asru_lock); 316 return (alp); 317 } 318 319 static void fmd_asru_destroy(fmd_asru_t *ap); 320 321 /*ARGSUSED*/ 322 static void 323 fmd_asru_al_hash_release(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp) 324 { 325 fmd_asru_t *ap = alp->al_asru; 326 327 (void) pthread_mutex_lock(&ap->asru_lock); 328 ASSERT(alp->al_refs != 0); 329 if (--alp->al_refs == 0) 330 fmd_asru_al_destroy(alp); 331 ASSERT(ap->asru_refs != 0); 332 if (--ap->asru_refs == 0) 333 fmd_asru_destroy(ap); 334 else 335 (void) pthread_mutex_unlock(&ap->asru_lock); 336 } 337 338 static int 339 fmd_asru_get_namestr(nvlist_t *nvl, char **name, ssize_t *namelen) 340 { 341 if ((*namelen = fmd_fmri_nvl2str(nvl, NULL, 0)) == -1) 342 return (EFMD_ASRU_FMRI); 343 *name = fmd_alloc(*namelen + 1, FMD_SLEEP); 344 if (fmd_fmri_nvl2str(nvl, *name, *namelen + 1) == -1) { 345 if (*name != NULL) 346 fmd_free(*name, *namelen + 1); 347 return (EFMD_ASRU_FMRI); 348 } 349 return (0); 350 } 351 352 static fmd_asru_link_t * 353 fmd_asru_al_create(fmd_asru_hash_t *ahp, nvlist_t *nvl, fmd_case_t *cp, 354 const char *al_uuid) 355 { 356 nvlist_t *asru = NULL, *fru, *rsrc; 357 int got_rsrc = 0, got_asru = 0, got_fru = 0; 358 ssize_t fru_namelen, rsrc_namelen, asru_namelen; 359 char *asru_name, *rsrc_name, *fru_name, *name, *label; 360 fmd_asru_link_t *alp; 361 fmd_asru_t *ap; 362 boolean_t msg; 363 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 364 365 if (nvlist_lookup_nvlist(nvl, FM_FAULT_ASRU, &asru) == 0 && 366 fmd_asru_get_namestr(asru, &asru_name, &asru_namelen) == 0) 367 got_asru = 1; 368 if (nvlist_lookup_nvlist(nvl, FM_FAULT_FRU, &fru) == 0 && 369 fmd_asru_get_namestr(fru, &fru_name, &fru_namelen) == 0) 370 got_fru = 1; 371 if (nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &rsrc) == 0 && 372 fmd_asru_get_namestr(rsrc, &rsrc_name, &rsrc_namelen) == 0) 373 got_rsrc = 1; 374 if (nvlist_lookup_string(nvl, FM_FAULT_LOCATION, &label) != 0) 375 label = ""; 376 377 /* 378 * Grab the rwlock as a writer; Then create and insert the asru with 379 * ahp->ah_lock held and hash it in. We'll then drop the rwlock and 380 * proceed to initializing the asru. 381 */ 382 (void) pthread_rwlock_wrlock(&ahp->ah_lock); 383 384 /* 385 * Create and initialise the per-fault "link" structure. 386 */ 387 alp = fmd_zalloc(sizeof (fmd_asru_link_t), FMD_SLEEP); 388 if (got_asru) 389 (void) nvlist_xdup(asru, &alp->al_asru_fmri, &fmd.d_nva); 390 alp->al_uuid = fmd_strdup(al_uuid, FMD_SLEEP); 391 alp->al_uuidlen = strlen(alp->al_uuid); 392 alp->al_refs = 1; 393 394 /* 395 * If this is the first fault for this asru, then create the per-asru 396 * structure and link into the hash. 397 */ 398 name = got_asru ? asru_name : ""; 399 if ((ap = fmd_asru_hash_lookup(ahp, name)) == NULL) { 400 ap = fmd_asru_create(ahp, al_uuid, name, got_asru ? asru : 401 NULL); 402 fmd_asru_hash_insert(ahp, ap); 403 } else 404 nvlist_free(ap->asru_event); 405 (void) nvlist_xdup(nvl, &ap->asru_event, &fmd.d_nva); 406 407 /* 408 * Put the link structure on the list associated with the per-asru 409 * structure. Then put the link structure on the various hashes. 410 */ 411 fmd_list_append(&ap->asru_list, (fmd_list_t *)alp); 412 alp->al_asru = ap; 413 alp->al_asru_name = got_asru ? asru_name : fmd_strdup("", FMD_SLEEP); 414 fmd_asru_asru_hash_insert(ahp, alp, alp->al_asru_name); 415 alp->al_fru_name = got_fru ? fru_name : fmd_strdup("", FMD_SLEEP); 416 fmd_asru_fru_hash_insert(ahp, alp, alp->al_fru_name); 417 alp->al_rsrc_name = got_rsrc ? rsrc_name : fmd_strdup("", FMD_SLEEP); 418 fmd_asru_rsrc_hash_insert(ahp, alp, alp->al_rsrc_name); 419 alp->al_label = fmd_strdup(label, FMD_SLEEP); 420 fmd_asru_label_hash_insert(ahp, alp, label); 421 alp->al_case_uuid = fmd_strdup(cip->ci_uuid, FMD_SLEEP); 422 fmd_asru_case_hash_insert(ahp, alp, cip->ci_uuid); 423 (void) pthread_mutex_lock(&ap->asru_lock); 424 (void) pthread_rwlock_unlock(&ahp->ah_lock); 425 426 ap->asru_case = alp->al_case = cp; 427 if (nvlist_lookup_boolean_value(nvl, FM_SUSPECT_MESSAGE, &msg) == 0 && 428 msg == B_FALSE) 429 ap->asru_flags |= FMD_ASRU_INVISIBLE; 430 (void) nvlist_xdup(nvl, &alp->al_event, &fmd.d_nva); 431 ap->asru_flags |= FMD_ASRU_VALID; 432 (void) pthread_cond_broadcast(&ap->asru_cv); 433 (void) pthread_mutex_unlock(&ap->asru_lock); 434 return (alp); 435 } 436 437 static void 438 fmd_asru_hash_recreate(fmd_log_t *lp, fmd_event_t *ep, fmd_asru_hash_t *ahp) 439 { 440 nvlist_t *nvl = FMD_EVENT_NVL(ep); 441 boolean_t faulty = FMD_B_FALSE, unusable = FMD_B_FALSE; 442 int ps; 443 boolean_t repaired = FMD_B_FALSE, replaced = FMD_B_FALSE; 444 boolean_t acquitted = FMD_B_FALSE, resolved = FMD_B_FALSE; 445 nvlist_t *flt, *flt_copy, *asru; 446 char *case_uuid = NULL, *case_code = NULL; 447 fmd_asru_t *ap; 448 fmd_asru_link_t *alp; 449 fmd_case_t *cp; 450 int64_t *diag_time; 451 nvlist_t *de_fmri, *de_fmri_dup; 452 uint_t nelem; 453 topo_hdl_t *thp; 454 char *class; 455 nvlist_t *rsrc; 456 int err; 457 boolean_t injected; 458 459 /* 460 * Extract the most recent values of 'faulty' from the event log. 461 */ 462 if (nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_FAULTY, 463 &faulty) != 0) { 464 fmd_error(EFMD_ASRU_EVENT, "failed to reload asru %s: " 465 "invalid event log record\n", lp->log_name); 466 ahp->ah_error = EFMD_ASRU_EVENT; 467 return; 468 } 469 if (nvlist_lookup_nvlist(nvl, FM_RSRC_ASRU_EVENT, &flt) != 0) { 470 fmd_error(EFMD_ASRU_EVENT, "failed to reload asru %s: " 471 "invalid event log record\n", lp->log_name); 472 ahp->ah_error = EFMD_ASRU_EVENT; 473 return; 474 } 475 (void) nvlist_lookup_string(nvl, FM_RSRC_ASRU_UUID, &case_uuid); 476 (void) nvlist_lookup_string(nvl, FM_RSRC_ASRU_CODE, &case_code); 477 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_UNUSABLE, 478 &unusable); 479 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_REPAIRED, 480 &repaired); 481 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_REPLACED, 482 &replaced); 483 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_ACQUITTED, 484 &acquitted); 485 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_RESOLVED, 486 &resolved); 487 488 /* 489 * Attempt to recreate the case in CLOSED, REPAIRED or RESOLVED state 490 * (depending on whether the faulty/resolved bits are set). 491 * If the case is already present, fmd_case_recreate() will return it. 492 * If not, we'll create a new orphaned case. Either way, we use the 493 * ASRU event to insert a suspect into the partially-restored case. 494 */ 495 fmd_module_lock(fmd.d_rmod); 496 cp = fmd_case_recreate(fmd.d_rmod, NULL, faulty ? FMD_CASE_CLOSED : 497 resolved ? FMD_CASE_RESOLVED : FMD_CASE_REPAIRED, case_uuid, 498 case_code); 499 fmd_case_hold(cp); 500 fmd_module_unlock(fmd.d_rmod); 501 if (nvlist_lookup_boolean_value(nvl, FM_SUSPECT_INJECTED, 502 &injected) == 0 && injected) 503 fmd_case_set_injected(cp); 504 if (nvlist_lookup_int64_array(nvl, FM_SUSPECT_DIAG_TIME, &diag_time, 505 &nelem) == 0 && nelem >= 2) 506 fmd_case_settime(cp, diag_time[0], diag_time[1]); 507 else 508 fmd_case_settime(cp, lp->log_stat.st_ctime, 0); 509 if (nvlist_lookup_nvlist(nvl, FM_SUSPECT_DE, &de_fmri) == 0) { 510 (void) nvlist_xdup(de_fmri, &de_fmri_dup, &fmd.d_nva); 511 fmd_case_set_de_fmri(cp, de_fmri_dup); 512 } 513 (void) nvlist_xdup(flt, &flt_copy, &fmd.d_nva); 514 515 /* 516 * For faults with a resource, re-evaluate the asru from the resource. 517 */ 518 thp = fmd_fmri_topo_hold(TOPO_VERSION); 519 if (nvlist_lookup_string(flt_copy, FM_CLASS, &class) == 0 && 520 strncmp(class, "fault", 5) == 0 && 521 nvlist_lookup_nvlist(flt_copy, FM_FAULT_RESOURCE, &rsrc) == 0 && 522 rsrc != NULL && topo_fmri_asru(thp, rsrc, &asru, &err) == 0) { 523 (void) nvlist_remove(flt_copy, FM_FAULT_ASRU, DATA_TYPE_NVLIST); 524 (void) nvlist_add_nvlist(flt_copy, FM_FAULT_ASRU, asru); 525 nvlist_free(asru); 526 } 527 fmd_fmri_topo_rele(thp); 528 529 (void) nvlist_xdup(flt_copy, &flt, &fmd.d_nva); 530 531 fmd_case_recreate_suspect(cp, flt_copy); 532 533 /* 534 * Now create the resource cache entries. 535 */ 536 alp = fmd_asru_al_create(ahp, flt, cp, fmd_strbasename(lp->log_name)); 537 ap = alp->al_asru; 538 539 /* 540 * Check to see if the resource is still present in the system. 541 */ 542 ps = fmd_asru_replacement_state(flt, HC_ONLY_FALSE); 543 if (ps == FMD_OBJ_STATE_REPLACED) { 544 replaced = FMD_B_TRUE; 545 } else if (ps == FMD_OBJ_STATE_STILL_PRESENT || 546 ps == FMD_OBJ_STATE_UNKNOWN) { 547 ap->asru_flags |= FMD_ASRU_PRESENT; 548 if (nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, 549 &asru) == 0) { 550 int us; 551 552 switch (fmd_fmri_service_state(asru)) { 553 case FMD_SERVICE_STATE_UNUSABLE: 554 unusable = FMD_B_TRUE; 555 break; 556 case FMD_SERVICE_STATE_OK: 557 case FMD_SERVICE_STATE_ISOLATE_PENDING: 558 case FMD_SERVICE_STATE_DEGRADED: 559 unusable = FMD_B_FALSE; 560 break; 561 case FMD_SERVICE_STATE_UNKNOWN: 562 case -1: 563 /* not supported by scheme */ 564 us = fmd_fmri_unusable(asru); 565 if (us > 0) 566 unusable = FMD_B_TRUE; 567 else if (us == 0) 568 unusable = FMD_B_FALSE; 569 break; 570 } 571 } 572 } 573 574 nvlist_free(flt); 575 576 ap->asru_flags |= FMD_ASRU_RECREATED; 577 if (faulty) { 578 alp->al_flags |= FMD_ASRU_FAULTY; 579 ap->asru_flags |= FMD_ASRU_FAULTY; 580 } 581 if (unusable) { 582 alp->al_flags |= FMD_ASRU_UNUSABLE; 583 ap->asru_flags |= FMD_ASRU_UNUSABLE; 584 } 585 if (replaced) 586 alp->al_reason = FMD_ASRU_REPLACED; 587 else if (repaired) 588 alp->al_reason = FMD_ASRU_REPAIRED; 589 else if (acquitted) 590 alp->al_reason = FMD_ASRU_ACQUITTED; 591 else 592 alp->al_reason = FMD_ASRU_REMOVED; 593 594 TRACE((FMD_DBG_ASRU, "asru %s recreated as %p (%s)", alp->al_uuid, 595 (void *)ap, _fmd_asru_snames[ap->asru_flags & FMD_ASRU_STATE])); 596 } 597 598 static void 599 fmd_asru_hash_discard(fmd_asru_hash_t *ahp, const char *uuid, int err) 600 { 601 char src[PATH_MAX], dst[PATH_MAX]; 602 603 (void) snprintf(src, PATH_MAX, "%s/%s", ahp->ah_dirpath, uuid); 604 (void) snprintf(dst, PATH_MAX, "%s/%s-", ahp->ah_dirpath, uuid); 605 606 if (err != 0) 607 err = rename(src, dst); 608 else 609 err = unlink(src); 610 611 if (err != 0 && errno != ENOENT) 612 fmd_error(EFMD_ASRU_EVENT, "failed to rename log %s", src); 613 } 614 615 /* 616 * Open a saved log file and restore it into the ASRU hash. If we can't even 617 * open the log, rename the log file to <uuid>- to indicate it is corrupt. If 618 * fmd_log_replay() fails, we either delete the file (if it has reached the 619 * upper limit on cache age) or rename it for debugging if it was corrupted. 620 */ 621 static void 622 fmd_asru_hash_logopen(fmd_asru_hash_t *ahp, const char *uuid) 623 { 624 fmd_log_t *lp = fmd_log_tryopen(ahp->ah_dirpath, uuid, FMD_LOG_ASRU); 625 uint_t n; 626 627 if (lp == NULL) { 628 fmd_asru_hash_discard(ahp, uuid, errno); 629 return; 630 } 631 632 ahp->ah_error = 0; 633 n = ahp->ah_al_count; 634 635 fmd_log_replay(lp, (fmd_log_f *)fmd_asru_hash_recreate, ahp); 636 fmd_log_rele(lp); 637 638 if (ahp->ah_al_count == n) 639 fmd_asru_hash_discard(ahp, uuid, ahp->ah_error); 640 } 641 642 void 643 fmd_asru_hash_refresh(fmd_asru_hash_t *ahp) 644 { 645 struct dirent *dp; 646 DIR *dirp; 647 int zero; 648 649 if ((dirp = opendir(ahp->ah_dirpath)) == NULL) { 650 fmd_error(EFMD_ASRU_NODIR, 651 "failed to open asru cache directory %s", ahp->ah_dirpath); 652 return; 653 } 654 655 (void) fmd_conf_getprop(fmd.d_conf, "rsrc.zero", &zero); 656 657 (void) pthread_rwlock_wrlock(&ahp->ah_lock); 658 659 while ((dp = readdir(dirp)) != NULL) { 660 if (dp->d_name[0] == '.') 661 continue; /* skip "." and ".." */ 662 663 if (zero) 664 fmd_asru_hash_discard(ahp, dp->d_name, 0); 665 else if (!fmd_strmatch(dp->d_name, "*-")) 666 fmd_asru_hash_logopen(ahp, dp->d_name); 667 } 668 669 (void) pthread_rwlock_unlock(&ahp->ah_lock); 670 (void) closedir(dirp); 671 } 672 673 /* 674 * If the resource is present and faulty but not unusable, replay the fault 675 * event that caused it be marked faulty. This will cause the agent 676 * subscribing to this fault class to again disable the resource. 677 */ 678 /*ARGSUSED*/ 679 static void 680 fmd_asru_hash_replay_asru(fmd_asru_t *ap, void *data) 681 { 682 fmd_event_t *e; 683 nvlist_t *nvl; 684 char *class; 685 686 if (ap->asru_event != NULL && (ap->asru_flags & (FMD_ASRU_STATE | 687 FMD_ASRU_PRESENT)) == (FMD_ASRU_FAULTY | FMD_ASRU_PRESENT)) { 688 689 fmd_dprintf(FMD_DBG_ASRU, 690 "replaying fault event for %s", ap->asru_name); 691 692 (void) nvlist_xdup(ap->asru_event, &nvl, &fmd.d_nva); 693 (void) nvlist_lookup_string(nvl, FM_CLASS, &class); 694 695 (void) nvlist_add_string(nvl, FMD_EVN_UUID, 696 ((fmd_case_impl_t *)ap->asru_case)->ci_uuid); 697 698 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class); 699 fmd_dispq_dispatch(fmd.d_disp, e, class); 700 } 701 } 702 703 void 704 fmd_asru_hash_replay(fmd_asru_hash_t *ahp) 705 { 706 fmd_asru_hash_apply(ahp, fmd_asru_hash_replay_asru, NULL); 707 } 708 709 /* 710 * Check if the resource is still present. If not, and if the rsrc.age time 711 * has expired, then do an implicit repair on the resource. 712 */ 713 /*ARGSUSED*/ 714 static void 715 fmd_asru_repair_if_aged(fmd_asru_link_t *alp, void *arg) 716 { 717 struct timeval tv; 718 fmd_log_t *lp; 719 hrtime_t hrt; 720 int ps; 721 int err; 722 fmd_asru_rep_arg_t fara; 723 724 if (!(alp->al_flags & FMD_ASRU_FAULTY)) 725 return; 726 727 /* 728 * Checking for aged resources only happens on the diagnosing side 729 * not on a proxy. 730 */ 731 if (alp->al_flags & FMD_ASRU_PROXY) 732 return; 733 734 ps = fmd_asru_replacement_state(alp->al_event, HC_ONLY_FALSE); 735 if (ps == FMD_OBJ_STATE_REPLACED) { 736 fara.fara_reason = FMD_ASRU_REPLACED; 737 fara.fara_bywhat = FARA_ALL; 738 fara.fara_rval = &err; 739 fmd_asru_repaired(alp, &fara); 740 } else if (ps == FMD_OBJ_STATE_NOT_PRESENT) { 741 fmd_time_gettimeofday(&tv); 742 lp = fmd_log_open(alp->al_asru->asru_root, alp->al_uuid, 743 FMD_LOG_ASRU); 744 hrt = (hrtime_t)(tv.tv_sec - lp->log_stat.st_mtime); 745 fmd_log_rele(lp); 746 if (hrt * NANOSEC >= fmd.d_asrus->ah_lifetime) { 747 fara.fara_reason = FMD_ASRU_REMOVED; 748 fara.fara_bywhat = FARA_ALL; 749 fara.fara_rval = &err; 750 fmd_asru_repaired(alp, &fara); 751 } 752 } 753 } 754 755 /*ARGSUSED*/ 756 void 757 fmd_asru_check_if_aged(fmd_asru_link_t *alp, void *arg) 758 { 759 struct timeval tv; 760 fmd_log_t *lp; 761 hrtime_t hrt; 762 763 /* 764 * Case must be in resolved state for this to be called. So modified 765 * time on resource cache entry should be the time the resolve occurred. 766 * Return 0 if not yet hit rsrc.aged. 767 */ 768 fmd_time_gettimeofday(&tv); 769 lp = fmd_log_open(alp->al_asru->asru_root, alp->al_uuid, FMD_LOG_ASRU); 770 if (lp == NULL) 771 return; 772 hrt = (hrtime_t)(tv.tv_sec - lp->log_stat.st_mtime); 773 fmd_log_rele(lp); 774 if (hrt * NANOSEC < fmd.d_asrus->ah_lifetime) 775 *(int *)arg = 0; 776 } 777 778 /*ARGSUSED*/ 779 void 780 fmd_asru_most_recent(fmd_asru_link_t *alp, void *arg) 781 { 782 fmd_log_t *lp; 783 uint64_t hrt; 784 785 /* 786 * Find most recent modified time of a set of resource cache entries. 787 */ 788 lp = fmd_log_open(alp->al_asru->asru_root, alp->al_uuid, FMD_LOG_ASRU); 789 if (lp == NULL) 790 return; 791 hrt = lp->log_stat.st_mtime; 792 fmd_log_rele(lp); 793 if (*(uint64_t *)arg < hrt) 794 *(uint64_t *)arg = hrt; 795 } 796 797 void 798 fmd_asru_clear_aged_rsrcs() 799 { 800 int check_if_aged = 1; 801 fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_repair_if_aged, NULL); 802 fmd_case_hash_apply(fmd.d_cases, fmd_case_discard_resolved, 803 &check_if_aged); 804 } 805 806 fmd_asru_hash_t * 807 fmd_asru_hash_create(const char *root, const char *dir) 808 { 809 fmd_asru_hash_t *ahp; 810 char path[PATH_MAX]; 811 812 ahp = fmd_alloc(sizeof (fmd_asru_hash_t), FMD_SLEEP); 813 (void) pthread_rwlock_init(&ahp->ah_lock, NULL); 814 ahp->ah_hashlen = fmd.d_str_buckets; 815 ahp->ah_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, FMD_SLEEP); 816 ahp->ah_asru_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 817 FMD_SLEEP); 818 ahp->ah_case_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 819 FMD_SLEEP); 820 ahp->ah_fru_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 821 FMD_SLEEP); 822 ahp->ah_label_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 823 FMD_SLEEP); 824 ahp->ah_rsrc_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, 825 FMD_SLEEP); 826 (void) snprintf(path, sizeof (path), "%s/%s", root, dir); 827 ahp->ah_dirpath = fmd_strdup(path, FMD_SLEEP); 828 (void) fmd_conf_getprop(fmd.d_conf, "rsrc.age", &ahp->ah_lifetime); 829 (void) fmd_conf_getprop(fmd.d_conf, "fakenotpresent", 830 (uint32_t *)&fmd_asru_fake_not_present); 831 ahp->ah_al_count = 0; 832 ahp->ah_count = 0; 833 ahp->ah_error = 0; 834 ahp->ah_topo = fmd_topo_hold(); 835 836 return (ahp); 837 } 838 839 void 840 fmd_asru_hash_destroy(fmd_asru_hash_t *ahp) 841 { 842 fmd_asru_link_t *alp, *np; 843 uint_t i; 844 845 for (i = 0; i < ahp->ah_hashlen; i++) { 846 for (alp = ahp->ah_case_hash[i]; alp != NULL; alp = np) { 847 np = alp->al_case_next; 848 alp->al_case_next = NULL; 849 fmd_case_rele(alp->al_case); 850 alp->al_case = NULL; 851 fmd_asru_al_hash_release(ahp, alp); 852 } 853 } 854 855 fmd_strfree(ahp->ah_dirpath); 856 fmd_free(ahp->ah_hash, sizeof (void *) * ahp->ah_hashlen); 857 fmd_free(ahp->ah_asru_hash, sizeof (void *) * ahp->ah_hashlen); 858 fmd_free(ahp->ah_case_hash, sizeof (void *) * ahp->ah_hashlen); 859 fmd_free(ahp->ah_fru_hash, sizeof (void *) * ahp->ah_hashlen); 860 fmd_free(ahp->ah_label_hash, sizeof (void *) * ahp->ah_hashlen); 861 fmd_free(ahp->ah_rsrc_hash, sizeof (void *) * ahp->ah_hashlen); 862 fmd_topo_rele(ahp->ah_topo); 863 fmd_free(ahp, sizeof (fmd_asru_hash_t)); 864 } 865 866 /* 867 * Take a snapshot of the ASRU database by placing an additional hold on each 868 * member in an auxiliary array, and then call 'func' for each ASRU. 869 */ 870 void 871 fmd_asru_hash_apply(fmd_asru_hash_t *ahp, 872 void (*func)(fmd_asru_t *, void *), void *arg) 873 { 874 fmd_asru_t *ap, **aps, **app; 875 uint_t apc, i; 876 877 (void) pthread_rwlock_rdlock(&ahp->ah_lock); 878 879 aps = app = fmd_alloc(ahp->ah_count * sizeof (fmd_asru_t *), FMD_SLEEP); 880 apc = ahp->ah_count; 881 882 for (i = 0; i < ahp->ah_hashlen; i++) { 883 for (ap = ahp->ah_hash[i]; ap != NULL; ap = ap->asru_next) 884 *app++ = fmd_asru_hold(ap); 885 } 886 887 ASSERT(app == aps + apc); 888 (void) pthread_rwlock_unlock(&ahp->ah_lock); 889 890 for (i = 0; i < apc; i++) { 891 if (aps[i]->asru_fmri != NULL) 892 func(aps[i], arg); 893 fmd_asru_hash_release(ahp, aps[i]); 894 } 895 896 fmd_free(aps, apc * sizeof (fmd_asru_t *)); 897 } 898 899 void 900 fmd_asru_al_hash_apply(fmd_asru_hash_t *ahp, 901 void (*func)(fmd_asru_link_t *, void *), void *arg) 902 { 903 fmd_asru_link_t *alp, **alps, **alpp; 904 uint_t alpc, i; 905 906 (void) pthread_rwlock_rdlock(&ahp->ah_lock); 907 908 alps = alpp = fmd_alloc(ahp->ah_al_count * sizeof (fmd_asru_link_t *), 909 FMD_SLEEP); 910 alpc = ahp->ah_al_count; 911 912 for (i = 0; i < ahp->ah_hashlen; i++) { 913 for (alp = ahp->ah_case_hash[i]; alp != NULL; 914 alp = alp->al_case_next) 915 *alpp++ = fmd_asru_al_hold(alp); 916 } 917 918 ASSERT(alpp == alps + alpc); 919 (void) pthread_rwlock_unlock(&ahp->ah_lock); 920 921 for (i = 0; i < alpc; i++) { 922 func(alps[i], arg); 923 fmd_asru_al_hash_release(ahp, alps[i]); 924 } 925 926 fmd_free(alps, alpc * sizeof (fmd_asru_link_t *)); 927 } 928 929 static void 930 fmd_asru_do_hash_apply(fmd_asru_hash_t *ahp, const char *name, 931 void (*func)(fmd_asru_link_t *, void *), void *arg, 932 fmd_asru_link_t **hash, size_t match_offset, size_t next_offset) 933 { 934 fmd_asru_link_t *alp, **alps, **alpp; 935 uint_t alpc = 0, i; 936 uint_t h; 937 938 (void) pthread_rwlock_rdlock(&ahp->ah_lock); 939 940 h = fmd_asru_strhash(ahp, name); 941 942 for (alp = hash[h]; alp != NULL; alp = 943 /* LINTED pointer alignment */ 944 FMD_ASRU_AL_HASH_NEXT(alp, next_offset)) 945 if (fmd_asru_strcmp(ahp, 946 /* LINTED pointer alignment */ 947 FMD_ASRU_AL_HASH_NAME(alp, match_offset), name)) 948 alpc++; 949 950 alps = alpp = fmd_alloc(alpc * sizeof (fmd_asru_link_t *), FMD_SLEEP); 951 952 for (alp = hash[h]; alp != NULL; alp = 953 /* LINTED pointer alignment */ 954 FMD_ASRU_AL_HASH_NEXT(alp, next_offset)) 955 if (fmd_asru_strcmp(ahp, 956 /* LINTED pointer alignment */ 957 FMD_ASRU_AL_HASH_NAME(alp, match_offset), name)) 958 *alpp++ = fmd_asru_al_hold(alp); 959 960 ASSERT(alpp == alps + alpc); 961 (void) pthread_rwlock_unlock(&ahp->ah_lock); 962 963 for (i = 0; i < alpc; i++) { 964 func(alps[i], arg); 965 fmd_asru_al_hash_release(ahp, alps[i]); 966 } 967 968 fmd_free(alps, alpc * sizeof (fmd_asru_link_t *)); 969 } 970 971 void 972 fmd_asru_hash_apply_by_asru(fmd_asru_hash_t *ahp, const char *name, 973 void (*func)(fmd_asru_link_t *, void *), void *arg) 974 { 975 fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_asru_hash, 976 offsetof(fmd_asru_link_t, al_asru_name), 977 offsetof(fmd_asru_link_t, al_asru_next)); 978 } 979 980 void 981 fmd_asru_hash_apply_by_case(fmd_asru_hash_t *ahp, fmd_case_t *cp, 982 void (*func)(fmd_asru_link_t *, void *), void *arg) 983 { 984 fmd_asru_do_hash_apply(ahp, ((fmd_case_impl_t *)cp)->ci_uuid, func, arg, 985 ahp->ah_case_hash, offsetof(fmd_asru_link_t, al_case_uuid), 986 offsetof(fmd_asru_link_t, al_case_next)); 987 } 988 989 void 990 fmd_asru_hash_apply_by_fru(fmd_asru_hash_t *ahp, const char *name, 991 void (*func)(fmd_asru_link_t *, void *), void *arg) 992 { 993 fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_fru_hash, 994 offsetof(fmd_asru_link_t, al_fru_name), 995 offsetof(fmd_asru_link_t, al_fru_next)); 996 } 997 998 void 999 fmd_asru_hash_apply_by_rsrc(fmd_asru_hash_t *ahp, const char *name, 1000 void (*func)(fmd_asru_link_t *, void *), void *arg) 1001 { 1002 fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_rsrc_hash, 1003 offsetof(fmd_asru_link_t, al_rsrc_name), 1004 offsetof(fmd_asru_link_t, al_rsrc_next)); 1005 } 1006 1007 void 1008 fmd_asru_hash_apply_by_label(fmd_asru_hash_t *ahp, const char *name, 1009 void (*func)(fmd_asru_link_t *, void *), void *arg) 1010 { 1011 fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_label_hash, 1012 offsetof(fmd_asru_link_t, al_label), 1013 offsetof(fmd_asru_link_t, al_label_next)); 1014 } 1015 1016 /* 1017 * Lookup an asru in the hash by name and place a hold on it. If the asru is 1018 * not found, no entry is created and NULL is returned. 1019 */ 1020 fmd_asru_t * 1021 fmd_asru_hash_lookup_name(fmd_asru_hash_t *ahp, const char *name) 1022 { 1023 fmd_asru_t *ap; 1024 1025 (void) pthread_rwlock_rdlock(&ahp->ah_lock); 1026 ap = fmd_asru_hash_lookup(ahp, name); 1027 (void) pthread_rwlock_unlock(&ahp->ah_lock); 1028 1029 return (ap); 1030 } 1031 1032 /* 1033 * Create a resource cache entry using the fault event "nvl" for one of the 1034 * suspects from the case "cp". 1035 * 1036 * The fault event can have the following components : FM_FAULT_ASRU, 1037 * FM_FAULT_FRU, FM_FAULT_RESOURCE. These should be set by the Diagnosis Engine 1038 * when calling fmd_nvl_create_fault(). In the general case, these are all 1039 * optional and an entry will always be added into the cache even if one or all 1040 * of these fields is missing. 1041 * 1042 * However, for hardware faults the recommended practice is that the fault 1043 * event should always have the FM_FAULT_RESOURCE field present and that this 1044 * should be represented in hc-scheme. 1045 * 1046 * Currently the DE should also add the FM_FAULT_ASRU and FM_FAULT_FRU fields 1047 * where known, though at some future stage fmd might be able to fill these 1048 * in automatically from the topology. 1049 */ 1050 fmd_asru_link_t * 1051 fmd_asru_hash_create_entry(fmd_asru_hash_t *ahp, fmd_case_t *cp, nvlist_t *nvl) 1052 { 1053 char *parsed_uuid; 1054 uuid_t uuid; 1055 int uuidlen; 1056 fmd_asru_link_t *alp; 1057 1058 /* 1059 * Generate a UUID for the ASRU. libuuid cleverly gives us no 1060 * interface for specifying or learning the buffer size. Sigh. 1061 * The spec says 36 bytes but we use a tunable just to be safe. 1062 */ 1063 (void) fmd_conf_getprop(fmd.d_conf, "uuidlen", &uuidlen); 1064 parsed_uuid = fmd_zalloc(uuidlen + 1, FMD_SLEEP); 1065 uuid_generate(uuid); 1066 uuid_unparse(uuid, parsed_uuid); 1067 1068 /* 1069 * Now create the resource cache entries. 1070 */ 1071 fmd_case_hold_locked(cp); 1072 alp = fmd_asru_al_create(ahp, nvl, cp, parsed_uuid); 1073 TRACE((FMD_DBG_ASRU, "asru %s created as %p", 1074 alp->al_uuid, (void *)alp->al_asru)); 1075 1076 fmd_free(parsed_uuid, uuidlen + 1); 1077 return (alp); 1078 1079 } 1080 1081 /* 1082 * Release the reference count on an asru obtained using fmd_asru_hash_lookup. 1083 * We take 'ahp' for symmetry and in case we need to use it in future work. 1084 */ 1085 /*ARGSUSED*/ 1086 void 1087 fmd_asru_hash_release(fmd_asru_hash_t *ahp, fmd_asru_t *ap) 1088 { 1089 (void) pthread_mutex_lock(&ap->asru_lock); 1090 1091 ASSERT(ap->asru_refs != 0); 1092 if (--ap->asru_refs == 0) 1093 fmd_asru_destroy(ap); 1094 else 1095 (void) pthread_mutex_unlock(&ap->asru_lock); 1096 } 1097 1098 static void 1099 fmd_asru_do_delete_entry(fmd_asru_hash_t *ahp, fmd_case_t *cp, 1100 fmd_asru_link_t **hash, size_t next_offset, char *name) 1101 { 1102 uint_t h; 1103 fmd_asru_link_t *alp, **pp, *alpnext, **alpnextp; 1104 1105 (void) pthread_rwlock_wrlock(&ahp->ah_lock); 1106 h = fmd_asru_strhash(ahp, name); 1107 pp = &hash[h]; 1108 for (alp = *pp; alp != NULL; alp = alpnext) { 1109 /* LINTED pointer alignment */ 1110 alpnextp = FMD_ASRU_AL_HASH_NEXTP(alp, next_offset); 1111 alpnext = *alpnextp; 1112 if (alp->al_case == cp) { 1113 *pp = *alpnextp; 1114 *alpnextp = NULL; 1115 } else 1116 pp = alpnextp; 1117 } 1118 (void) pthread_rwlock_unlock(&ahp->ah_lock); 1119 } 1120 1121 static void 1122 fmd_asru_do_hash_delete(fmd_asru_hash_t *ahp, fmd_case_susp_t *cis, 1123 fmd_case_t *cp, fmd_asru_link_t **hash, size_t next_offset, char *nvname) 1124 { 1125 nvlist_t *nvl; 1126 char *name = NULL; 1127 ssize_t namelen; 1128 1129 if (nvlist_lookup_nvlist(cis->cis_nvl, nvname, &nvl) == 0 && 1130 (namelen = fmd_fmri_nvl2str(nvl, NULL, 0)) != -1 && 1131 (name = fmd_alloc(namelen + 1, FMD_SLEEP)) != NULL) { 1132 if (fmd_fmri_nvl2str(nvl, name, namelen + 1) != -1) 1133 fmd_asru_do_delete_entry(ahp, cp, hash, next_offset, 1134 name); 1135 fmd_free(name, namelen + 1); 1136 } else 1137 fmd_asru_do_delete_entry(ahp, cp, hash, next_offset, ""); 1138 } 1139 1140 void 1141 fmd_asru_hash_delete_case(fmd_asru_hash_t *ahp, fmd_case_t *cp) 1142 { 1143 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 1144 fmd_case_susp_t *cis; 1145 fmd_asru_link_t *alp, **plp, *alpnext; 1146 fmd_asru_t *ap; 1147 char path[PATH_MAX]; 1148 char *label; 1149 uint_t h; 1150 1151 /* 1152 * first delete hash entries for each suspect 1153 */ 1154 for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) { 1155 fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_fru_hash, 1156 offsetof(fmd_asru_link_t, al_fru_next), FM_FAULT_FRU); 1157 fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_rsrc_hash, 1158 offsetof(fmd_asru_link_t, al_rsrc_next), FM_FAULT_RESOURCE); 1159 if (nvlist_lookup_string(cis->cis_nvl, FM_FAULT_LOCATION, 1160 &label) != 0) 1161 label = ""; 1162 fmd_asru_do_delete_entry(ahp, cp, ahp->ah_label_hash, 1163 offsetof(fmd_asru_link_t, al_label_next), label); 1164 fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_asru_hash, 1165 offsetof(fmd_asru_link_t, al_asru_next), FM_FAULT_ASRU); 1166 } 1167 1168 /* 1169 * then delete associated case hash entries 1170 */ 1171 (void) pthread_rwlock_wrlock(&ahp->ah_lock); 1172 h = fmd_asru_strhash(ahp, cip->ci_uuid); 1173 plp = &ahp->ah_case_hash[h]; 1174 for (alp = *plp; alp != NULL; alp = alpnext) { 1175 alpnext = alp->al_case_next; 1176 if (alp->al_case == cp) { 1177 *plp = alp->al_case_next; 1178 alp->al_case_next = NULL; 1179 ASSERT(ahp->ah_al_count != 0); 1180 ahp->ah_al_count--; 1181 1182 /* 1183 * decrement case ref. 1184 */ 1185 fmd_case_rele_locked(cp); 1186 alp->al_case = NULL; 1187 1188 /* 1189 * If we found a matching ASRU, unlink its log file and 1190 * then release the hash entry. Note that it may still 1191 * be referenced if another thread is manipulating it; 1192 * this is ok because once we unlink, the log file will 1193 * not be restored, and the log data will be freed when 1194 * all of the referencing threads release their 1195 * respective references. 1196 */ 1197 (void) snprintf(path, sizeof (path), "%s/%s", 1198 ahp->ah_dirpath, alp->al_uuid); 1199 if (cip->ci_xprt == NULL && unlink(path) != 0) 1200 fmd_error(EFMD_ASRU_UNLINK, 1201 "failed to unlink asru %s", path); 1202 1203 /* 1204 * Now unlink from the global per-resource cache 1205 * and if this is the last link then remove that from 1206 * it's own hash too. 1207 */ 1208 ap = alp->al_asru; 1209 (void) pthread_mutex_lock(&ap->asru_lock); 1210 fmd_list_delete(&ap->asru_list, alp); 1211 if (ap->asru_list.l_next == NULL) { 1212 uint_t h; 1213 fmd_asru_t *ap2, **pp; 1214 fmd_asru_t *apnext, **apnextp; 1215 1216 ASSERT(ahp->ah_count != 0); 1217 ahp->ah_count--; 1218 h = fmd_asru_strhash(ahp, ap->asru_name); 1219 pp = &ahp->ah_hash[h]; 1220 for (ap2 = *pp; ap2 != NULL; ap2 = apnext) { 1221 apnextp = &ap2->asru_next; 1222 apnext = *apnextp; 1223 if (ap2 == ap) { 1224 *pp = *apnextp; 1225 *apnextp = NULL; 1226 } else 1227 pp = apnextp; 1228 } 1229 } 1230 (void) pthread_mutex_unlock(&ap->asru_lock); 1231 fmd_asru_al_hash_release(ahp, alp); 1232 } else 1233 plp = &alp->al_case_next; 1234 } 1235 (void) pthread_rwlock_unlock(&ahp->ah_lock); 1236 } 1237 1238 typedef struct { 1239 nvlist_t *farc_parent_fmri; 1240 uint8_t farc_reason; 1241 } fmd_asru_farc_t; 1242 1243 static void 1244 fmd_asru_repair_containee(fmd_asru_link_t *alp, void *arg) 1245 { 1246 fmd_asru_farc_t *farcp = (fmd_asru_farc_t *)arg; 1247 1248 if ((alp->al_asru->asru_flags & FMD_ASRU_INVISIBLE) && 1249 alp->al_asru_fmri && 1250 fmd_fmri_contains(farcp->farc_parent_fmri, alp->al_asru_fmri) > 0) { 1251 if (fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, 1252 farcp->farc_reason)) { 1253 if (alp->al_flags & FMD_ASRU_PROXY) 1254 fmd_case_xprt_updated(alp->al_case); 1255 else 1256 fmd_case_update(alp->al_case); 1257 } 1258 } 1259 } 1260 1261 static void 1262 fmd_asru_do_repair_containees(fmd_asru_link_t *alp, uint8_t reason) 1263 { 1264 int flags; 1265 1266 /* 1267 * Check if all entries associated with this asru are acquitted and 1268 * if so acquit containees. Don't try to repair containees on proxy 1269 * side unless we have local asru. 1270 */ 1271 if (alp->al_asru_fmri != NULL && (!(alp->al_flags & FMD_ASRU_PROXY) || 1272 (alp->al_flags & FMD_ASRU_PROXY_WITH_ASRU))) { 1273 (void) pthread_mutex_lock(&alp->al_asru->asru_lock); 1274 flags = alp->al_asru->asru_flags; 1275 (void) pthread_mutex_unlock(&alp->al_asru->asru_lock); 1276 if (!(flags & (FMD_ASRU_FAULTY | FMD_ASRU_INVISIBLE))) { 1277 fmd_asru_farc_t farc; 1278 1279 farc.farc_parent_fmri = alp->al_asru_fmri; 1280 farc.farc_reason = reason; 1281 fmd_asru_al_hash_apply(fmd.d_asrus, 1282 fmd_asru_repair_containee, &farc); 1283 } 1284 } 1285 } 1286 1287 void 1288 fmd_asru_repaired(fmd_asru_link_t *alp, void *arg) 1289 { 1290 int cleared; 1291 fmd_asru_rep_arg_t *farap = (fmd_asru_rep_arg_t *)arg; 1292 1293 /* 1294 * don't allow remote repair over readonly transport 1295 */ 1296 if (alp->al_flags & FMD_ASRU_PROXY_RDONLY) 1297 return; 1298 1299 /* 1300 * don't allow repair etc by asru on proxy unless asru is local 1301 */ 1302 if (farap->fara_bywhat == FARA_BY_ASRU && 1303 (alp->al_flags & FMD_ASRU_PROXY) && 1304 !(alp->al_flags & FMD_ASRU_PROXY_WITH_ASRU)) 1305 return; 1306 /* 1307 * For acquit, need to check both name and uuid if specified 1308 */ 1309 if (farap->fara_reason == FMD_ASRU_ACQUITTED && 1310 farap->fara_rval != NULL && strcmp(farap->fara_uuid, "") != 0 && 1311 strcmp(farap->fara_uuid, alp->al_case_uuid) != 0) 1312 return; 1313 1314 /* 1315 * For replaced, verify it has been replaced if we have serial number. 1316 * If not set *farap->fara_rval to FARA_ERR_RSRCNOTR. 1317 */ 1318 if (farap->fara_reason == FMD_ASRU_REPLACED && 1319 !(alp->al_flags & FMD_ASRU_PROXY_EXTERNAL) && 1320 fmd_asru_replacement_state(alp->al_event, 1321 (alp->al_flags & FMD_ASRU_PROXY) ? HC_ONLY_TRUE : HC_ONLY_FALSE) == 1322 FMD_OBJ_STATE_STILL_PRESENT) { 1323 if (farap->fara_rval) 1324 *farap->fara_rval = FARA_ERR_RSRCNOTR; 1325 return; 1326 } 1327 1328 cleared = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, farap->fara_reason); 1329 fmd_asru_do_repair_containees(alp, farap->fara_reason); 1330 1331 /* 1332 * if called from fmd_adm_*() and we really did clear the bit then 1333 * we need to do a case update to see if the associated case can be 1334 * repaired. No need to do this if called from fmd_case_*() (ie 1335 * when arg is NULL) as the case will be explicitly repaired anyway. 1336 */ 1337 if (farap->fara_rval) { 1338 /* 1339 * *farap->fara_rval defaults to FARA_ERR_RSRCNOTF (not found). 1340 * If we find a valid cache entry which we repair then we 1341 * set it to FARA_OK. However we don't want to do this if 1342 * we have already set it to FARA_ERR_RSRCNOTR (not replaced) 1343 * in a previous iteration (see above). So only set it to 1344 * FARA_OK if the current value is still FARA_ERR_RSRCNOTF. 1345 */ 1346 if (*farap->fara_rval == FARA_ERR_RSRCNOTF) 1347 *farap->fara_rval = FARA_OK; 1348 if (cleared) { 1349 if (alp->al_flags & FMD_ASRU_PROXY) 1350 fmd_case_xprt_updated(alp->al_case); 1351 else 1352 fmd_case_update(alp->al_case); 1353 } 1354 } 1355 } 1356 1357 /* 1358 * Discard the case associated with this alp if it is in resolved state. 1359 * Called on "fmadm flush". 1360 */ 1361 /*ARGSUSED*/ 1362 void 1363 fmd_asru_flush(fmd_asru_link_t *alp, void *arg) 1364 { 1365 int check_if_aged = 0; 1366 int *rval = (int *)arg; 1367 1368 if (alp->al_case) 1369 fmd_case_discard_resolved(alp->al_case, &check_if_aged); 1370 *rval = 0; 1371 } 1372 1373 /* 1374 * This is only called for proxied faults. Set various flags so we can 1375 * find the nature of the transport from the resource cache code. 1376 */ 1377 /*ARGSUSED*/ 1378 void 1379 fmd_asru_set_on_proxy(fmd_asru_link_t *alp, void *arg) 1380 { 1381 fmd_asru_set_on_proxy_t *entryp = (fmd_asru_set_on_proxy_t *)arg; 1382 1383 if (*entryp->fasp_countp >= entryp->fasp_maxcount) 1384 return; 1385 1386 /* 1387 * Note that this is a proxy fault and save whetehr transport is 1388 * RDONLY or EXTERNAL. 1389 */ 1390 alp->al_flags |= FMD_ASRU_PROXY; 1391 alp->al_asru->asru_flags |= FMD_ASRU_PROXY; 1392 1393 if (entryp->fasp_proxy_external) { 1394 alp->al_flags |= FMD_ASRU_PROXY_EXTERNAL; 1395 alp->al_asru->asru_flags |= FMD_ASRU_PROXY_EXTERNAL; 1396 } 1397 1398 if (entryp->fasp_proxy_rdonly) 1399 alp->al_flags |= FMD_ASRU_PROXY_RDONLY; 1400 1401 /* 1402 * Save whether asru is accessible in local domain 1403 */ 1404 if (entryp->fasp_proxy_asru[*entryp->fasp_countp]) { 1405 alp->al_flags |= FMD_ASRU_PROXY_WITH_ASRU; 1406 alp->al_asru->asru_flags |= FMD_ASRU_PROXY_WITH_ASRU; 1407 } 1408 (*entryp->fasp_countp)++; 1409 } 1410 1411 /*ARGSUSED*/ 1412 void 1413 fmd_asru_update_containees(fmd_asru_link_t *alp, void *arg) 1414 { 1415 fmd_asru_do_repair_containees(alp, alp->al_reason); 1416 } 1417 1418 /* 1419 * This function is used for fault proxying. It updates the resource status in 1420 * the resource cache based on information that has come from the other side of 1421 * the transport. This can be called on either the proxy side or the 1422 * diagnosing side. 1423 */ 1424 void 1425 fmd_asru_update_status(fmd_asru_link_t *alp, void *arg) 1426 { 1427 fmd_asru_update_status_t *entryp = (fmd_asru_update_status_t *)arg; 1428 uint8_t status; 1429 1430 if (*entryp->faus_countp >= entryp->faus_maxcount) 1431 return; 1432 1433 status = entryp->faus_ba[*entryp->faus_countp]; 1434 1435 /* 1436 * For proxy, if there is no asru on the proxy side, but there is on 1437 * the diag side, then take the diag side asru status. 1438 * For diag, if there is an asru on the proxy side, then take the proxy 1439 * side asru status. 1440 */ 1441 if (entryp->faus_is_proxy ? 1442 (entryp->faus_diag_asru[*entryp->faus_countp] && 1443 !entryp->faus_proxy_asru[*entryp->faus_countp]) : 1444 entryp->faus_proxy_asru[*entryp->faus_countp]) { 1445 if (status & FM_SUSPECT_DEGRADED) 1446 alp->al_flags |= FMD_ASRU_DEGRADED; 1447 else 1448 alp->al_flags &= ~FMD_ASRU_DEGRADED; 1449 if (status & FM_SUSPECT_UNUSABLE) 1450 (void) fmd_asru_setflags(alp, FMD_ASRU_UNUSABLE); 1451 else 1452 (void) fmd_asru_clrflags(alp, FMD_ASRU_UNUSABLE, 0); 1453 } 1454 1455 /* 1456 * Update the faulty status too. 1457 */ 1458 if (!(status & FM_SUSPECT_FAULTY)) 1459 (void) fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, 1460 (status & FM_SUSPECT_REPAIRED) ? FMD_ASRU_REPAIRED : 1461 (status & FM_SUSPECT_REPLACED) ? FMD_ASRU_REPLACED : 1462 (status & FM_SUSPECT_ACQUITTED) ? FMD_ASRU_ACQUITTED : 1463 FMD_ASRU_REMOVED); 1464 else if (entryp->faus_is_proxy) 1465 (void) fmd_asru_setflags(alp, FMD_ASRU_FAULTY); 1466 1467 /* 1468 * for proxy only, update the present status too. 1469 */ 1470 if (entryp->faus_is_proxy) { 1471 if (!(status & FM_SUSPECT_NOT_PRESENT)) { 1472 alp->al_flags |= FMD_ASRU_PRESENT; 1473 alp->al_asru->asru_flags |= FMD_ASRU_PRESENT; 1474 } else { 1475 alp->al_flags &= ~FMD_ASRU_PRESENT; 1476 alp->al_asru->asru_flags &= ~FMD_ASRU_PRESENT; 1477 } 1478 } 1479 (*entryp->faus_countp)++; 1480 } 1481 1482 /* 1483 * This function is called on the diagnosing side when fault proxying is 1484 * in use and the proxy has sent a uuclose. It updates the status of the 1485 * resource cache entries. 1486 */ 1487 void 1488 fmd_asru_close_status(fmd_asru_link_t *alp, void *arg) 1489 { 1490 fmd_asru_close_status_t *entryp = (fmd_asru_close_status_t *)arg; 1491 1492 if (*entryp->facs_countp >= entryp->facs_maxcount) 1493 return; 1494 alp->al_flags &= ~FMD_ASRU_DEGRADED; 1495 (void) fmd_asru_setflags(alp, FMD_ASRU_UNUSABLE); 1496 (*entryp->facs_countp)++; 1497 } 1498 1499 static void 1500 fmd_asru_logevent(fmd_asru_link_t *alp) 1501 { 1502 fmd_asru_t *ap = alp->al_asru; 1503 boolean_t faulty = (alp->al_flags & FMD_ASRU_FAULTY) != 0; 1504 boolean_t unusable = (alp->al_flags & FMD_ASRU_UNUSABLE) != 0; 1505 boolean_t message = (ap->asru_flags & FMD_ASRU_INVISIBLE) == 0; 1506 boolean_t repaired = (alp->al_reason == FMD_ASRU_REPAIRED); 1507 boolean_t replaced = (alp->al_reason == FMD_ASRU_REPLACED); 1508 boolean_t acquitted = (alp->al_reason == FMD_ASRU_ACQUITTED); 1509 1510 fmd_case_impl_t *cip; 1511 fmd_event_t *e; 1512 fmd_log_t *lp; 1513 nvlist_t *nvl; 1514 char *class; 1515 1516 ASSERT(MUTEX_HELD(&ap->asru_lock)); 1517 cip = (fmd_case_impl_t *)alp->al_case; 1518 ASSERT(cip != NULL); 1519 1520 /* 1521 * Don't log to disk on proxy side 1522 */ 1523 if (cip->ci_xprt != NULL) 1524 return; 1525 1526 if ((lp = alp->al_log) == NULL) 1527 lp = fmd_log_open(ap->asru_root, alp->al_uuid, FMD_LOG_ASRU); 1528 1529 if (lp == NULL) 1530 return; /* can't log events if we can't open the log */ 1531 1532 nvl = fmd_protocol_rsrc_asru(_fmd_asru_events[faulty | (unusable << 1)], 1533 alp->al_asru_fmri, cip->ci_uuid, cip->ci_code, faulty, unusable, 1534 message, alp->al_event, &cip->ci_tv, repaired, replaced, acquitted, 1535 cip->ci_state == FMD_CASE_RESOLVED, cip->ci_diag_de == NULL ? 1536 cip->ci_mod->mod_fmri : cip->ci_diag_de, cip->ci_injected == 1); 1537 1538 (void) nvlist_lookup_string(nvl, FM_CLASS, &class); 1539 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class); 1540 1541 fmd_event_hold(e); 1542 fmd_log_append(lp, e, NULL); 1543 fmd_event_rele(e); 1544 1545 /* 1546 * For now, we close the log file after every update to conserve file 1547 * descriptors and daemon overhead. If this becomes a performance 1548 * issue this code can change to keep a fixed-size LRU cache of logs. 1549 */ 1550 fmd_log_rele(lp); 1551 alp->al_log = NULL; 1552 } 1553 1554 int 1555 fmd_asru_setflags(fmd_asru_link_t *alp, uint_t sflag) 1556 { 1557 fmd_asru_t *ap = alp->al_asru; 1558 uint_t nstate, ostate; 1559 1560 ASSERT(!(sflag & ~FMD_ASRU_STATE)); 1561 ASSERT(sflag != FMD_ASRU_STATE); 1562 1563 (void) pthread_mutex_lock(&ap->asru_lock); 1564 1565 ostate = alp->al_flags & FMD_ASRU_STATE; 1566 alp->al_flags |= sflag; 1567 nstate = alp->al_flags & FMD_ASRU_STATE; 1568 1569 if (nstate == ostate) { 1570 (void) pthread_mutex_unlock(&ap->asru_lock); 1571 return (0); 1572 } 1573 1574 ap->asru_flags |= sflag; 1575 TRACE((FMD_DBG_ASRU, "asru %s %s->%s", alp->al_uuid, 1576 _fmd_asru_snames[ostate], _fmd_asru_snames[nstate])); 1577 1578 fmd_asru_logevent(alp); 1579 1580 (void) pthread_cond_broadcast(&ap->asru_cv); 1581 (void) pthread_mutex_unlock(&ap->asru_lock); 1582 return (1); 1583 } 1584 1585 int 1586 fmd_asru_clrflags(fmd_asru_link_t *alp, uint_t sflag, uint8_t reason) 1587 { 1588 fmd_asru_t *ap = alp->al_asru; 1589 fmd_asru_link_t *nalp; 1590 uint_t nstate, ostate, flags = 0; 1591 1592 ASSERT(!(sflag & ~FMD_ASRU_STATE)); 1593 ASSERT(sflag != FMD_ASRU_STATE); 1594 1595 (void) pthread_mutex_lock(&ap->asru_lock); 1596 1597 ostate = alp->al_flags & FMD_ASRU_STATE; 1598 alp->al_flags &= ~sflag; 1599 nstate = alp->al_flags & FMD_ASRU_STATE; 1600 1601 if (nstate == ostate) { 1602 if (reason > alp->al_reason && 1603 ((fmd_case_impl_t *)alp->al_case)->ci_state < 1604 FMD_CASE_REPAIRED) { 1605 alp->al_reason = reason; 1606 fmd_asru_logevent(alp); 1607 (void) pthread_cond_broadcast(&ap->asru_cv); 1608 } 1609 (void) pthread_mutex_unlock(&ap->asru_lock); 1610 return (0); 1611 } 1612 if (reason > alp->al_reason) 1613 alp->al_reason = reason; 1614 1615 if (sflag == FMD_ASRU_UNUSABLE) 1616 ap->asru_flags &= ~sflag; 1617 else if (sflag == FMD_ASRU_FAULTY) { 1618 /* 1619 * only clear the faulty bit if all links are clear 1620 */ 1621 for (nalp = fmd_list_next(&ap->asru_list); nalp != NULL; 1622 nalp = fmd_list_next(nalp)) 1623 flags |= nalp->al_flags; 1624 if (!(flags & FMD_ASRU_FAULTY)) 1625 ap->asru_flags &= ~sflag; 1626 } 1627 1628 TRACE((FMD_DBG_ASRU, "asru %s %s->%s", alp->al_uuid, 1629 _fmd_asru_snames[ostate], _fmd_asru_snames[nstate])); 1630 1631 fmd_asru_logevent(alp); 1632 1633 (void) pthread_cond_broadcast(&ap->asru_cv); 1634 (void) pthread_mutex_unlock(&ap->asru_lock); 1635 1636 return (1); 1637 } 1638 1639 /*ARGSUSED*/ 1640 void 1641 fmd_asru_log_resolved(fmd_asru_link_t *alp, void *unused) 1642 { 1643 fmd_asru_t *ap = alp->al_asru; 1644 1645 (void) pthread_mutex_lock(&ap->asru_lock); 1646 fmd_asru_logevent(alp); 1647 (void) pthread_cond_broadcast(&ap->asru_cv); 1648 (void) pthread_mutex_unlock(&ap->asru_lock); 1649 } 1650 1651 /* 1652 * Report the current known state of the link entry (ie this particular fault 1653 * affecting this particular ASRU). 1654 */ 1655 int 1656 fmd_asru_al_getstate(fmd_asru_link_t *alp) 1657 { 1658 int us, st = (alp->al_flags & (FMD_ASRU_FAULTY | FMD_ASRU_UNUSABLE)); 1659 nvlist_t *asru; 1660 int ps = FMD_OBJ_STATE_UNKNOWN; 1661 1662 /* 1663 * For fault proxying with an EXTERNAL transport, believe the presence 1664 * state as sent by the diagnosing side. Otherwise find the presence 1665 * state here. Note that if fault proxying with an INTERNAL transport 1666 * we can only trust the presence state where we are using hc-scheme 1667 * fmris which should be consistant across domains in the same system - 1668 * other schemes can refer to different devices in different domains. 1669 */ 1670 if (!(alp->al_flags & FMD_ASRU_PROXY_EXTERNAL)) { 1671 ps = fmd_asru_replacement_state(alp->al_event, (alp->al_flags & 1672 FMD_ASRU_PROXY)? HC_ONLY_TRUE : HC_ONLY_FALSE); 1673 if (ps == FMD_OBJ_STATE_NOT_PRESENT) 1674 return (st | FMD_ASRU_UNUSABLE); 1675 if (ps == FMD_OBJ_STATE_REPLACED) { 1676 if (alp->al_reason < FMD_ASRU_REPLACED) 1677 alp->al_reason = FMD_ASRU_REPLACED; 1678 return (st | FMD_ASRU_UNUSABLE); 1679 } 1680 } 1681 if (ps == FMD_OBJ_STATE_UNKNOWN && (alp->al_flags & FMD_ASRU_PROXY)) 1682 st |= (alp->al_flags & (FMD_ASRU_DEGRADED | FMD_ASRU_PRESENT)); 1683 else 1684 st |= (alp->al_flags & (FMD_ASRU_DEGRADED)) | FMD_ASRU_PRESENT; 1685 1686 /* 1687 * For fault proxying, unless we have a local ASRU, then believe the 1688 * service state sent by the diagnosing side. Otherwise find the service 1689 * state here. Try fmd_fmri_service_state() first, but if that's not 1690 * supported by the scheme then fall back to fmd_fmri_unusable(). 1691 */ 1692 if ((!(alp->al_flags & FMD_ASRU_PROXY) || 1693 (alp->al_flags & FMD_ASRU_PROXY_WITH_ASRU)) && 1694 nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, &asru) == 0) { 1695 us = fmd_fmri_service_state(asru); 1696 if (us == -1 || us == FMD_SERVICE_STATE_UNKNOWN) { 1697 /* not supported by scheme - try fmd_fmri_unusable */ 1698 us = fmd_fmri_unusable(asru); 1699 if (us > 0) 1700 st |= FMD_ASRU_UNUSABLE; 1701 else if (us == 0) 1702 st &= ~FMD_ASRU_UNUSABLE; 1703 } else { 1704 if (us == FMD_SERVICE_STATE_UNUSABLE) { 1705 st &= ~FMD_ASRU_DEGRADED; 1706 st |= FMD_ASRU_UNUSABLE; 1707 } else if (us == FMD_SERVICE_STATE_OK) { 1708 st &= ~(FMD_ASRU_DEGRADED | FMD_ASRU_UNUSABLE); 1709 } else if (us == FMD_SERVICE_STATE_ISOLATE_PENDING) { 1710 st &= ~(FMD_ASRU_DEGRADED | FMD_ASRU_UNUSABLE); 1711 } else if (us == FMD_SERVICE_STATE_DEGRADED) { 1712 st &= ~FMD_ASRU_UNUSABLE; 1713 st |= FMD_ASRU_DEGRADED; 1714 } 1715 } 1716 } 1717 return (st); 1718 } 1719 1720 /* 1721 * Report the current known state of the ASRU by refreshing its unusable status 1722 * based upon the routines provided by the scheme module. If the unusable bit 1723 * is different, we do *not* generate a state change here because that change 1724 * may be unrelated to fmd activities and therefore we have no case or event. 1725 * The absence of the transition is harmless as this function is only provided 1726 * for RPC observability and fmd's clients are only concerned with ASRU_FAULTY. 1727 */ 1728 int 1729 fmd_asru_getstate(fmd_asru_t *ap) 1730 { 1731 int us, st, p = -1; 1732 char *s; 1733 1734 /* do not report non-fmd non-present resources */ 1735 if (!(ap->asru_flags & FMD_ASRU_INTERNAL)) { 1736 /* 1737 * As with fmd_asru_al_getstate(), we can only trust the 1738 * local presence state on a proxy if the transport is 1739 * internal and the scheme is hc. Otherwise we believe the 1740 * state as sent by the diagnosing side. 1741 */ 1742 if (!(ap->asru_flags & FMD_ASRU_PROXY) || 1743 (!(ap->asru_flags & FMD_ASRU_PROXY_EXTERNAL) && 1744 (nvlist_lookup_string(ap->asru_fmri, FM_FMRI_SCHEME, 1745 &s) == 0 && strcmp(s, FM_FMRI_SCHEME_HC) == 0))) { 1746 if (fmd_asru_fake_not_present >= 1747 FMD_OBJ_STATE_REPLACED) 1748 return (0); 1749 p = fmd_fmri_present(ap->asru_fmri); 1750 } 1751 if (p == 0 || (p < 0 && !(ap->asru_flags & FMD_ASRU_PROXY) || 1752 !(ap->asru_flags & FMD_ASRU_PRESENT))) 1753 return (0); 1754 } 1755 1756 /* 1757 * As with fmd_asru_al_getstate(), we can only trust the local unusable 1758 * state on a proxy if there is a local ASRU. 1759 */ 1760 st = ap->asru_flags & (FMD_ASRU_FAULTY | FMD_ASRU_UNUSABLE); 1761 if (!(ap->asru_flags & FMD_ASRU_PROXY) || 1762 (ap->asru_flags & FMD_ASRU_PROXY_WITH_ASRU)) { 1763 us = fmd_fmri_unusable(ap->asru_fmri); 1764 if (us > 0) 1765 st |= FMD_ASRU_UNUSABLE; 1766 else if (us == 0) 1767 st &= ~FMD_ASRU_UNUSABLE; 1768 } 1769 return (st); 1770 } 1771