1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * FMD Case Subsystem 29 * 30 * Diagnosis engines are expected to group telemetry events related to the 31 * diagnosis of a particular problem on the system into a set of cases. The 32 * diagnosis engine may have any number of cases open at a given point in time. 33 * Some cases may eventually be *solved* by associating a suspect list of one 34 * or more problems with the case, at which point fmd publishes a list.suspect 35 * event for the case and it becomes visible to administrators and agents. 36 * 37 * Every case is named using a UUID, and is globally visible in the case hash. 38 * Cases are reference-counted, except for the reference from the case hash 39 * itself. Consumers of case references include modules, which store active 40 * cases on the mod_cases list, ASRUs in the resource cache, and the RPC code. 41 * 42 * Cases obey the following state machine. In states UNSOLVED, SOLVED, and 43 * CLOSE_WAIT, a case's module refers to the owning module (a diagnosis engine 44 * or transport) and the case is referenced by the mod_cases list. Once the 45 * case reaches the CLOSED or REPAIRED states, a case's module changes to refer 46 * to the root module (fmd.d_rmod) and is deleted from the owner's mod_cases. 47 * 48 * +------------+ 49 * +----------| UNSOLVED | 50 * | +------------+ 51 * | 1 | 52 * | | 53 * | +-------v----+ 54 * 2 | | SOLVED | 55 * | +------------+ 56 * | 3 | 5 | 57 * +------------+ | | 58 * | | | 59 * +-v---v----v-+ 60 * | CLOSE_WAIT | 61 * +------------+ 62 * | | | 63 * +-----------+ | +------------+ 64 * | 4 | | 65 * v +-----v------+ | 66 * discard | CLOSED | 6 | 67 * +------------+ | 68 * | | 69 * | +------------+ 70 * 7 | | 71 * +-----v----v-+ 72 * | REPAIRED | 73 * +------------+ 74 * | 75 * 8 | 76 * +-----v------+ 77 * | RESOLVED | 78 * +------------+ 79 * | 80 * v 81 * discard 82 * 83 * The state machine changes are triggered by calls to fmd_case_transition() 84 * from various locations inside of fmd, as described below: 85 * 86 * [1] Called by: fmd_case_solve() 87 * Actions: FMD_CF_SOLVED flag is set in ci_flags 88 * conviction policy is applied to suspect list 89 * suspects convicted are marked faulty (F) in R$ 90 * list.suspect event logged and dispatched 91 * 92 * [2] Called by: fmd_case_close(), fmd_case_uuclose() 93 * Actions: diagnosis engine fmdo_close() entry point scheduled 94 * case discarded upon exit from CLOSE_WAIT 95 * 96 * [3] Called by: fmd_case_close(), fmd_case_uuclose(), fmd_xprt_event_uuclose() 97 * Actions: FMD_CF_ISOLATED flag is set in ci_flags 98 * suspects convicted (F) are marked unusable (U) in R$ 99 * diagnosis engine fmdo_close() entry point scheduled 100 * case transitions to CLOSED [4] upon exit from CLOSE_WAIT 101 * 102 * [4] Called by: fmd_case_delete() (after fmdo_close() entry point returns) 103 * Actions: list.isolated event dispatched 104 * case deleted from module's list of open cases 105 * 106 * [5] Called by: fmd_case_repair(), fmd_case_update() 107 * Actions: FMD_CF_REPAIR flag is set in ci_flags 108 * diagnosis engine fmdo_close() entry point scheduled 109 * case transitions to REPAIRED [6] upon exit from CLOSE_WAIT 110 * 111 * [6] Called by: fmd_case_delete() (after fmdo_close() entry point returns) 112 * Actions: suspects convicted are marked non faulty (!F) in R$ 113 * list.repaired or list.updated event dispatched 114 * 115 * [7] Called by: fmd_case_repair(), fmd_case_update() 116 * Actions: FMD_CF_REPAIR flag is set in ci_flags 117 * suspects convicted are marked non faulty (!F) in R$ 118 * list.repaired or list.updated event dispatched 119 * 120 * [8] Called by: fmd_case_uuresolve() 121 * Actions: list.resolved event dispatched 122 * case is discarded 123 */ 124 125 #include <sys/fm/protocol.h> 126 #include <uuid/uuid.h> 127 #include <alloca.h> 128 129 #include <fmd_alloc.h> 130 #include <fmd_module.h> 131 #include <fmd_error.h> 132 #include <fmd_conf.h> 133 #include <fmd_case.h> 134 #include <fmd_string.h> 135 #include <fmd_subr.h> 136 #include <fmd_protocol.h> 137 #include <fmd_event.h> 138 #include <fmd_eventq.h> 139 #include <fmd_dispq.h> 140 #include <fmd_buf.h> 141 #include <fmd_log.h> 142 #include <fmd_asru.h> 143 #include <fmd_fmri.h> 144 #include <fmd_xprt.h> 145 146 #include <fmd.h> 147 148 static const char *const _fmd_case_snames[] = { 149 "UNSOLVED", /* FMD_CASE_UNSOLVED */ 150 "SOLVED", /* FMD_CASE_SOLVED */ 151 "CLOSE_WAIT", /* FMD_CASE_CLOSE_WAIT */ 152 "CLOSED", /* FMD_CASE_CLOSED */ 153 "REPAIRED", /* FMD_CASE_REPAIRED */ 154 "RESOLVED" /* FMD_CASE_RESOLVED */ 155 }; 156 157 static fmd_case_impl_t *fmd_case_tryhold(fmd_case_impl_t *); 158 159 fmd_case_hash_t * 160 fmd_case_hash_create(void) 161 { 162 fmd_case_hash_t *chp = fmd_alloc(sizeof (fmd_case_hash_t), FMD_SLEEP); 163 164 (void) pthread_rwlock_init(&chp->ch_lock, NULL); 165 chp->ch_hashlen = fmd.d_str_buckets; 166 chp->ch_hash = fmd_zalloc(sizeof (void *) * chp->ch_hashlen, FMD_SLEEP); 167 chp->ch_code_hash = fmd_zalloc(sizeof (void *) * chp->ch_hashlen, 168 FMD_SLEEP); 169 chp->ch_count = 0; 170 171 return (chp); 172 } 173 174 /* 175 * Destroy the case hash. Unlike most of our hash tables, no active references 176 * are kept by the case hash itself; all references come from other subsystems. 177 * The hash must be destroyed after all modules are unloaded; if anything was 178 * present in the hash it would be by definition a reference count leak. 179 */ 180 void 181 fmd_case_hash_destroy(fmd_case_hash_t *chp) 182 { 183 fmd_free(chp->ch_hash, sizeof (void *) * chp->ch_hashlen); 184 fmd_free(chp->ch_code_hash, sizeof (void *) * chp->ch_hashlen); 185 fmd_free(chp, sizeof (fmd_case_hash_t)); 186 } 187 188 /* 189 * Take a snapshot of the case hash by placing an additional hold on each 190 * member in an auxiliary array, and then call 'func' for each case. 191 */ 192 void 193 fmd_case_hash_apply(fmd_case_hash_t *chp, 194 void (*func)(fmd_case_t *, void *), void *arg) 195 { 196 fmd_case_impl_t *cp, **cps, **cpp; 197 uint_t cpc, i; 198 199 (void) pthread_rwlock_rdlock(&chp->ch_lock); 200 201 cps = cpp = fmd_alloc(chp->ch_count * sizeof (fmd_case_t *), FMD_SLEEP); 202 cpc = chp->ch_count; 203 204 for (i = 0; i < chp->ch_hashlen; i++) { 205 for (cp = chp->ch_hash[i]; cp != NULL; cp = cp->ci_next) 206 *cpp++ = fmd_case_tryhold(cp); 207 } 208 209 ASSERT(cpp == cps + cpc); 210 (void) pthread_rwlock_unlock(&chp->ch_lock); 211 212 for (i = 0; i < cpc; i++) { 213 if (cps[i] != NULL) { 214 func((fmd_case_t *)cps[i], arg); 215 fmd_case_rele((fmd_case_t *)cps[i]); 216 } 217 } 218 219 fmd_free(cps, cpc * sizeof (fmd_case_t *)); 220 } 221 222 static void 223 fmd_case_code_hash_insert(fmd_case_hash_t *chp, fmd_case_impl_t *cip) 224 { 225 uint_t h = fmd_strhash(cip->ci_code) % chp->ch_hashlen; 226 227 cip->ci_code_next = chp->ch_code_hash[h]; 228 chp->ch_code_hash[h] = cip; 229 } 230 231 static void 232 fmd_case_code_hash_delete(fmd_case_hash_t *chp, fmd_case_impl_t *cip) 233 { 234 fmd_case_impl_t **pp, *cp; 235 236 if (cip->ci_code) { 237 uint_t h = fmd_strhash(cip->ci_code) % chp->ch_hashlen; 238 239 pp = &chp->ch_code_hash[h]; 240 for (cp = *pp; cp != NULL; cp = cp->ci_code_next) { 241 if (cp != cip) 242 pp = &cp->ci_code_next; 243 else 244 break; 245 } 246 if (cp != NULL) { 247 *pp = cp->ci_code_next; 248 cp->ci_code_next = NULL; 249 } 250 } 251 } 252 253 /* 254 * Look up the diagcode for this case and cache it in ci_code. If no suspects 255 * were defined for this case or if the lookup fails, the event dictionary or 256 * module code is broken, and we set the event code to a precomputed default. 257 */ 258 static const char * 259 fmd_case_mkcode(fmd_case_t *cp) 260 { 261 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 262 fmd_case_susp_t *cis; 263 fmd_case_hash_t *chp = fmd.d_cases; 264 265 char **keys, **keyp; 266 const char *s; 267 268 ASSERT(MUTEX_HELD(&cip->ci_lock)); 269 ASSERT(cip->ci_state >= FMD_CASE_SOLVED); 270 271 /* 272 * delete any existing entry from code hash if it is on it 273 */ 274 fmd_case_code_hash_delete(chp, cip); 275 276 fmd_free(cip->ci_code, cip->ci_codelen); 277 cip->ci_codelen = cip->ci_mod->mod_codelen; 278 cip->ci_code = fmd_zalloc(cip->ci_codelen, FMD_SLEEP); 279 keys = keyp = alloca(sizeof (char *) * (cip->ci_nsuspects + 1)); 280 281 for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) { 282 if (nvlist_lookup_string(cis->cis_nvl, FM_CLASS, keyp) == 0) 283 keyp++; 284 } 285 286 *keyp = NULL; /* mark end of keys[] array for libdiagcode */ 287 288 if (cip->ci_nsuspects == 0 || fmd_module_dc_key2code( 289 cip->ci_mod, keys, cip->ci_code, cip->ci_codelen) != 0) { 290 (void) fmd_conf_getprop(fmd.d_conf, "nodiagcode", &s); 291 fmd_free(cip->ci_code, cip->ci_codelen); 292 cip->ci_codelen = strlen(s) + 1; 293 cip->ci_code = fmd_zalloc(cip->ci_codelen, FMD_SLEEP); 294 (void) strcpy(cip->ci_code, s); 295 } 296 297 /* 298 * add into hash of solved cases 299 */ 300 fmd_case_code_hash_insert(chp, cip); 301 302 return (cip->ci_code); 303 } 304 305 typedef struct { 306 int *fcl_countp; 307 int fcl_maxcount; 308 uint8_t *fcl_ba; 309 nvlist_t **fcl_nva; 310 int *fcl_msgp; 311 } fmd_case_lst_t; 312 313 static void 314 fmd_case_set_lst(fmd_asru_link_t *alp, void *arg) 315 { 316 fmd_case_lst_t *entryp = (fmd_case_lst_t *)arg; 317 boolean_t b; 318 int state; 319 320 if (*entryp->fcl_countp >= entryp->fcl_maxcount) 321 return; 322 if (nvlist_lookup_boolean_value(alp->al_event, FM_SUSPECT_MESSAGE, 323 &b) == 0 && b == B_FALSE) 324 *entryp->fcl_msgp = B_FALSE; 325 entryp->fcl_ba[*entryp->fcl_countp] = 0; 326 state = fmd_asru_al_getstate(alp); 327 if (state & FMD_ASRU_DEGRADED) 328 entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_DEGRADED; 329 if (state & FMD_ASRU_UNUSABLE) 330 entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_UNUSABLE; 331 if (state & FMD_ASRU_FAULTY) 332 entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_FAULTY; 333 if (!(state & FMD_ASRU_PRESENT)) 334 entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_NOT_PRESENT; 335 if (alp->al_reason == FMD_ASRU_REPAIRED) 336 entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_REPAIRED; 337 else if (alp->al_reason == FMD_ASRU_REPLACED) 338 entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_REPLACED; 339 else if (alp->al_reason == FMD_ASRU_ACQUITTED) 340 entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_ACQUITTED; 341 entryp->fcl_nva[*entryp->fcl_countp] = alp->al_event; 342 (*entryp->fcl_countp)++; 343 } 344 345 static void 346 fmd_case_faulty(fmd_asru_link_t *alp, void *arg) 347 { 348 int *faultyp = (int *)arg; 349 350 *faultyp |= (alp->al_flags & FMD_ASRU_FAULTY); 351 } 352 353 static void 354 fmd_case_usable(fmd_asru_link_t *alp, void *arg) 355 { 356 int *usablep = (int *)arg; 357 358 *usablep |= !(fmd_asru_al_getstate(alp) & FMD_ASRU_UNUSABLE); 359 } 360 361 static void 362 fmd_case_not_faulty(fmd_asru_link_t *alp, void *arg) 363 { 364 int *not_faultyp = (int *)arg; 365 366 *not_faultyp |= !(alp->al_flags & FMD_ASRU_FAULTY); 367 } 368 369 /* 370 * Have we got any suspects with an asru that are still unusable and present? 371 */ 372 static void 373 fmd_case_unusable_and_present(fmd_asru_link_t *alp, void *arg) 374 { 375 int *rvalp = (int *)arg; 376 int state = fmd_asru_al_getstate(alp); 377 nvlist_t *asru; 378 379 if (nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, &asru) != 0) 380 return; 381 *rvalp |= ((state & FMD_ASRU_UNUSABLE) && (state & FMD_ASRU_PRESENT)); 382 } 383 384 nvlist_t * 385 fmd_case_mkevent(fmd_case_t *cp, const char *class) 386 { 387 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 388 nvlist_t **nva, *nvl; 389 uint8_t *ba; 390 int msg = B_TRUE; 391 const char *code; 392 fmd_case_lst_t fcl; 393 int count = 0; 394 395 (void) pthread_mutex_lock(&cip->ci_lock); 396 ASSERT(cip->ci_state >= FMD_CASE_SOLVED); 397 398 nva = alloca(sizeof (nvlist_t *) * cip->ci_nsuspects); 399 ba = alloca(sizeof (uint8_t) * cip->ci_nsuspects); 400 401 /* 402 * For each suspect associated with the case, store its fault event 403 * nvlist in 'nva'. We also look to see if any of the suspect faults 404 * have asked not to be messaged. If any of them have made such a 405 * request, propagate that attribute to the composite list.* event. 406 * Finally, store each suspect's faulty status into the bitmap 'ba'. 407 */ 408 fcl.fcl_countp = &count; 409 fcl.fcl_maxcount = cip->ci_nsuspects; 410 fcl.fcl_msgp = &msg; 411 fcl.fcl_ba = ba; 412 fcl.fcl_nva = nva; 413 fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_set_lst, &fcl); 414 415 if (cip->ci_code == NULL) 416 (void) fmd_case_mkcode(cp); 417 /* 418 * For repair and updated event, we lookup diagcode from dict using key 419 * "list.repaired" or "list.updated" or "list.resolved". 420 */ 421 if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0) 422 (void) fmd_conf_getprop(fmd.d_conf, "repaircode", &code); 423 else if (strcmp(class, FM_LIST_RESOLVED_CLASS) == 0) 424 (void) fmd_conf_getprop(fmd.d_conf, "resolvecode", &code); 425 else if (strcmp(class, FM_LIST_UPDATED_CLASS) == 0) 426 (void) fmd_conf_getprop(fmd.d_conf, "updatecode", &code); 427 else 428 code = cip->ci_code; 429 430 if (msg == B_FALSE) 431 cip->ci_flags |= FMD_CF_INVISIBLE; 432 433 nvl = fmd_protocol_list(class, cip->ci_mod->mod_fmri, cip->ci_uuid, 434 code, count, nva, ba, msg, &cip->ci_tv); 435 436 (void) pthread_mutex_unlock(&cip->ci_lock); 437 return (nvl); 438 } 439 440 static boolean_t 441 fmd_case_compare_elem(nvlist_t *nvl, nvlist_t *xnvl, const char *elem) 442 { 443 nvlist_t *new_rsrc; 444 nvlist_t *rsrc; 445 char *new_name = NULL; 446 char *name = NULL; 447 ssize_t new_namelen; 448 ssize_t namelen; 449 int fmri_present = 1; 450 int new_fmri_present = 1; 451 int match = B_FALSE; 452 fmd_topo_t *ftp = fmd_topo_hold(); 453 454 if (nvlist_lookup_nvlist(xnvl, elem, &rsrc) != 0) 455 fmri_present = 0; 456 else { 457 if ((namelen = fmd_fmri_nvl2str(rsrc, NULL, 0)) == -1) 458 goto done; 459 name = fmd_alloc(namelen + 1, FMD_SLEEP); 460 if (fmd_fmri_nvl2str(rsrc, name, namelen + 1) == -1) 461 goto done; 462 } 463 if (nvlist_lookup_nvlist(nvl, elem, &new_rsrc) != 0) 464 new_fmri_present = 0; 465 else { 466 if ((new_namelen = fmd_fmri_nvl2str(new_rsrc, NULL, 0)) == -1) 467 goto done; 468 new_name = fmd_alloc(new_namelen + 1, FMD_SLEEP); 469 if (fmd_fmri_nvl2str(new_rsrc, new_name, new_namelen + 1) == -1) 470 goto done; 471 } 472 match = (fmri_present == new_fmri_present && 473 (fmri_present == 0 || 474 topo_fmri_strcmp(ftp->ft_hdl, name, new_name))); 475 done: 476 if (name != NULL) 477 fmd_free(name, namelen + 1); 478 if (new_name != NULL) 479 fmd_free(new_name, new_namelen + 1); 480 fmd_topo_rele(ftp); 481 return (match); 482 } 483 484 static int 485 fmd_case_match_suspect(fmd_case_susp_t *cis, fmd_case_susp_t *xcis) 486 { 487 char *class, *new_class; 488 489 if (!fmd_case_compare_elem(cis->cis_nvl, xcis->cis_nvl, FM_FAULT_ASRU)) 490 return (0); 491 if (!fmd_case_compare_elem(cis->cis_nvl, xcis->cis_nvl, 492 FM_FAULT_RESOURCE)) 493 return (0); 494 if (!fmd_case_compare_elem(cis->cis_nvl, xcis->cis_nvl, FM_FAULT_FRU)) 495 return (0); 496 (void) nvlist_lookup_string(xcis->cis_nvl, FM_CLASS, &class); 497 (void) nvlist_lookup_string(cis->cis_nvl, FM_CLASS, &new_class); 498 return (strcmp(class, new_class) == 0); 499 } 500 501 /* 502 * see if an identical suspect list already exists in the cache 503 */ 504 static int 505 fmd_case_check_for_dups(fmd_case_t *cp) 506 { 507 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp, *xcip; 508 fmd_case_hash_t *chp = fmd.d_cases; 509 fmd_case_susp_t *xcis, *cis; 510 int match = 0, match_susp; 511 uint_t h; 512 513 (void) pthread_rwlock_rdlock(&chp->ch_lock); 514 515 /* 516 * Find all cases with this code 517 */ 518 h = fmd_strhash(cip->ci_code) % chp->ch_hashlen; 519 for (xcip = chp->ch_code_hash[h]; xcip != NULL; 520 xcip = xcip->ci_code_next) { 521 /* 522 * only look for any cases (apart from this one) 523 * whose code and number of suspects match 524 */ 525 if (xcip == cip || fmd_case_tryhold(xcip) == NULL) 526 continue; 527 if (strcmp(xcip->ci_code, cip->ci_code) != 0 || 528 xcip->ci_nsuspects != cip->ci_nsuspects) { 529 fmd_case_rele((fmd_case_t *)xcip); 530 continue; 531 } 532 533 /* 534 * For each suspect in one list, check if there 535 * is an identical suspect in the other list 536 */ 537 match = 1; 538 for (xcis = xcip->ci_suspects; xcis != NULL; 539 xcis = xcis->cis_next) { 540 match_susp = 0; 541 for (cis = cip->ci_suspects; cis != NULL; 542 cis = cis->cis_next) { 543 if (fmd_case_match_suspect(cis, xcis) == 1) { 544 match_susp = 1; 545 break; 546 } 547 } 548 if (match_susp == 0) { 549 match = 0; 550 break; 551 } 552 } 553 fmd_case_rele((fmd_case_t *)xcip); 554 if (match) { 555 (void) pthread_rwlock_unlock(&chp->ch_lock); 556 return (1); 557 } 558 } 559 (void) pthread_rwlock_unlock(&chp->ch_lock); 560 return (0); 561 } 562 563 /* 564 * Convict suspects in a case by applying a conviction policy and updating the 565 * resource cache prior to emitting the list.suspect event for the given case. 566 * At present, our policy is very simple: convict every suspect in the case. 567 * In the future, this policy can be extended and made configurable to permit: 568 * 569 * - convicting the suspect with the highest FIT rate 570 * - convicting the suspect with the cheapest FRU 571 * - convicting the suspect with the FRU that is in a depot's inventory 572 * - convicting the suspect with the longest lifetime 573 * 574 * and so forth. A word to the wise: this problem is significantly harder that 575 * it seems at first glance. Future work should heed the following advice: 576 * 577 * Hacking the policy into C code here is a very bad idea. The policy needs to 578 * be decided upon very carefully and fundamentally encodes knowledge of what 579 * suspect list combinations can be emitted by what diagnosis engines. As such 580 * fmd's code is the wrong location, because that would require fmd itself to 581 * be updated for every diagnosis engine change, defeating the entire design. 582 * The FMA Event Registry knows the suspect list combinations: policy inputs 583 * can be derived from it and used to produce per-module policy configuration. 584 * 585 * If the policy needs to be dynamic and not statically fixed at either fmd 586 * startup or module load time, any implementation of dynamic policy retrieval 587 * must employ some kind of caching mechanism or be part of a built-in module. 588 * The fmd_case_convict() function is called with locks held inside of fmd and 589 * is not a place where unbounded blocking on some inter-process or inter- 590 * system communication to another service (e.g. another daemon) can occur. 591 */ 592 static int 593 fmd_case_convict(fmd_case_t *cp) 594 { 595 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 596 fmd_asru_hash_t *ahp = fmd.d_asrus; 597 598 fmd_case_susp_t *cis; 599 fmd_asru_link_t *alp; 600 601 (void) pthread_mutex_lock(&cip->ci_lock); 602 (void) fmd_case_mkcode(cp); 603 if (fmd_case_check_for_dups(cp) == 1) { 604 (void) pthread_mutex_unlock(&cip->ci_lock); 605 return (1); 606 } 607 608 /* 609 * no suspect list already exists - allocate new cache entries 610 */ 611 for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) { 612 if ((alp = fmd_asru_hash_create_entry(ahp, 613 cp, cis->cis_nvl)) == NULL) { 614 fmd_error(EFMD_CASE_EVENT, "cannot convict suspect in " 615 "%s: %s\n", cip->ci_uuid, fmd_strerror(errno)); 616 continue; 617 } 618 (void) fmd_asru_clrflags(alp, FMD_ASRU_UNUSABLE, 0); 619 (void) fmd_asru_setflags(alp, FMD_ASRU_FAULTY); 620 } 621 622 (void) pthread_mutex_unlock(&cip->ci_lock); 623 return (0); 624 } 625 626 void 627 fmd_case_publish(fmd_case_t *cp, uint_t state) 628 { 629 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 630 fmd_event_t *e; 631 nvlist_t *nvl; 632 char *class; 633 634 if (state == FMD_CASE_CURRENT) 635 state = cip->ci_state; /* use current state */ 636 637 switch (state) { 638 case FMD_CASE_SOLVED: 639 (void) pthread_mutex_lock(&cip->ci_lock); 640 641 /* 642 * If we already have a code, then case is already solved. 643 */ 644 if (cip->ci_code != NULL) { 645 (void) pthread_mutex_unlock(&cip->ci_lock); 646 break; 647 } 648 649 if (cip->ci_tv_valid == 0) { 650 fmd_time_gettimeofday(&cip->ci_tv); 651 cip->ci_tv_valid = 1; 652 } 653 (void) pthread_mutex_unlock(&cip->ci_lock); 654 655 if (fmd_case_convict(cp) == 1) { /* dupclose */ 656 cip->ci_flags &= ~FMD_CF_SOLVED; 657 fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, 0); 658 break; 659 } 660 nvl = fmd_case_mkevent(cp, FM_LIST_SUSPECT_CLASS); 661 (void) nvlist_lookup_string(nvl, FM_CLASS, &class); 662 663 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class); 664 (void) pthread_rwlock_rdlock(&fmd.d_log_lock); 665 fmd_log_append(fmd.d_fltlog, e, cp); 666 (void) pthread_rwlock_unlock(&fmd.d_log_lock); 667 fmd_dispq_dispatch(fmd.d_disp, e, class); 668 669 (void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock); 670 cip->ci_mod->mod_stats->ms_casesolved.fmds_value.ui64++; 671 (void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock); 672 673 break; 674 675 case FMD_CASE_CLOSE_WAIT: 676 fmd_case_hold(cp); 677 e = fmd_event_create(FMD_EVT_CLOSE, FMD_HRT_NOW, NULL, cp); 678 fmd_eventq_insert_at_head(cip->ci_mod->mod_queue, e); 679 680 (void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock); 681 cip->ci_mod->mod_stats->ms_caseclosed.fmds_value.ui64++; 682 (void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock); 683 684 break; 685 686 case FMD_CASE_CLOSED: 687 nvl = fmd_case_mkevent(cp, FM_LIST_ISOLATED_CLASS); 688 (void) nvlist_lookup_string(nvl, FM_CLASS, &class); 689 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class); 690 fmd_dispq_dispatch(fmd.d_disp, e, class); 691 break; 692 693 case FMD_CASE_REPAIRED: 694 nvl = fmd_case_mkevent(cp, FM_LIST_REPAIRED_CLASS); 695 (void) nvlist_lookup_string(nvl, FM_CLASS, &class); 696 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class); 697 (void) pthread_rwlock_rdlock(&fmd.d_log_lock); 698 fmd_log_append(fmd.d_fltlog, e, cp); 699 (void) pthread_rwlock_unlock(&fmd.d_log_lock); 700 fmd_dispq_dispatch(fmd.d_disp, e, class); 701 break; 702 703 case FMD_CASE_RESOLVED: 704 nvl = fmd_case_mkevent(cp, FM_LIST_RESOLVED_CLASS); 705 (void) nvlist_lookup_string(nvl, FM_CLASS, &class); 706 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class); 707 (void) pthread_rwlock_rdlock(&fmd.d_log_lock); 708 fmd_log_append(fmd.d_fltlog, e, cp); 709 (void) pthread_rwlock_unlock(&fmd.d_log_lock); 710 fmd_dispq_dispatch(fmd.d_disp, e, class); 711 break; 712 } 713 } 714 715 fmd_case_t * 716 fmd_case_hash_lookup(fmd_case_hash_t *chp, const char *uuid) 717 { 718 fmd_case_impl_t *cip; 719 uint_t h; 720 721 (void) pthread_rwlock_rdlock(&chp->ch_lock); 722 h = fmd_strhash(uuid) % chp->ch_hashlen; 723 724 for (cip = chp->ch_hash[h]; cip != NULL; cip = cip->ci_next) { 725 if (strcmp(cip->ci_uuid, uuid) == 0) 726 break; 727 } 728 729 /* 730 * If deleting bit is set, treat the case as if it doesn't exist. 731 */ 732 if (cip != NULL) 733 cip = fmd_case_tryhold(cip); 734 735 if (cip == NULL) 736 (void) fmd_set_errno(EFMD_CASE_INVAL); 737 738 (void) pthread_rwlock_unlock(&chp->ch_lock); 739 return ((fmd_case_t *)cip); 740 } 741 742 static fmd_case_impl_t * 743 fmd_case_hash_insert(fmd_case_hash_t *chp, fmd_case_impl_t *cip) 744 { 745 fmd_case_impl_t *eip; 746 uint_t h; 747 748 (void) pthread_rwlock_wrlock(&chp->ch_lock); 749 h = fmd_strhash(cip->ci_uuid) % chp->ch_hashlen; 750 751 for (eip = chp->ch_hash[h]; eip != NULL; eip = eip->ci_next) { 752 if (strcmp(cip->ci_uuid, eip->ci_uuid) == 0 && 753 fmd_case_tryhold(eip) != NULL) { 754 (void) pthread_rwlock_unlock(&chp->ch_lock); 755 return (eip); /* uuid already present */ 756 } 757 } 758 759 cip->ci_next = chp->ch_hash[h]; 760 chp->ch_hash[h] = cip; 761 762 chp->ch_count++; 763 ASSERT(chp->ch_count != 0); 764 765 (void) pthread_rwlock_unlock(&chp->ch_lock); 766 return (cip); 767 } 768 769 static void 770 fmd_case_hash_delete(fmd_case_hash_t *chp, fmd_case_impl_t *cip) 771 { 772 fmd_case_impl_t *cp, **pp; 773 uint_t h; 774 775 ASSERT(MUTEX_HELD(&cip->ci_lock)); 776 777 cip->ci_flags |= FMD_CF_DELETING; 778 (void) pthread_mutex_unlock(&cip->ci_lock); 779 780 (void) pthread_rwlock_wrlock(&chp->ch_lock); 781 782 h = fmd_strhash(cip->ci_uuid) % chp->ch_hashlen; 783 pp = &chp->ch_hash[h]; 784 785 for (cp = *pp; cp != NULL; cp = cp->ci_next) { 786 if (cp != cip) 787 pp = &cp->ci_next; 788 else 789 break; 790 } 791 792 if (cp == NULL) { 793 fmd_panic("case %p (%s) not found on hash chain %u\n", 794 (void *)cip, cip->ci_uuid, h); 795 } 796 797 *pp = cp->ci_next; 798 cp->ci_next = NULL; 799 800 /* 801 * delete from code hash if it is on it 802 */ 803 fmd_case_code_hash_delete(chp, cip); 804 805 ASSERT(chp->ch_count != 0); 806 chp->ch_count--; 807 808 (void) pthread_rwlock_unlock(&chp->ch_lock); 809 810 (void) pthread_mutex_lock(&cip->ci_lock); 811 ASSERT(cip->ci_flags & FMD_CF_DELETING); 812 } 813 814 fmd_case_t * 815 fmd_case_create(fmd_module_t *mp, void *data) 816 { 817 fmd_case_impl_t *cip = fmd_zalloc(sizeof (fmd_case_impl_t), FMD_SLEEP); 818 fmd_case_impl_t *eip = NULL; 819 uuid_t uuid; 820 821 (void) pthread_mutex_init(&cip->ci_lock, NULL); 822 fmd_buf_hash_create(&cip->ci_bufs); 823 824 fmd_module_hold(mp); 825 cip->ci_mod = mp; 826 cip->ci_refs = 1; 827 cip->ci_state = FMD_CASE_UNSOLVED; 828 cip->ci_flags = FMD_CF_DIRTY; 829 cip->ci_data = data; 830 831 /* 832 * Calling libuuid: get a clue. The library interfaces cleverly do not 833 * define any constant for the length of an unparse string, and do not 834 * permit the caller to specify a buffer length for safety. The spec 835 * says it will be 36 bytes, but we make it tunable just in case. 836 */ 837 (void) fmd_conf_getprop(fmd.d_conf, "uuidlen", &cip->ci_uuidlen); 838 cip->ci_uuid = fmd_zalloc(cip->ci_uuidlen + 1, FMD_SLEEP); 839 840 /* 841 * We expect this loop to execute only once, but code it defensively 842 * against the possibility of libuuid bugs. Keep generating uuids and 843 * attempting to do a hash insert until we get a unique one. 844 */ 845 do { 846 if (eip != NULL) 847 fmd_case_rele((fmd_case_t *)eip); 848 uuid_generate(uuid); 849 uuid_unparse(uuid, cip->ci_uuid); 850 } while ((eip = fmd_case_hash_insert(fmd.d_cases, cip)) != cip); 851 852 ASSERT(fmd_module_locked(mp)); 853 fmd_list_append(&mp->mod_cases, cip); 854 fmd_module_setcdirty(mp); 855 856 (void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock); 857 cip->ci_mod->mod_stats->ms_caseopen.fmds_value.ui64++; 858 (void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock); 859 860 return ((fmd_case_t *)cip); 861 } 862 863 static void 864 fmd_case_destroy_suspects(fmd_case_impl_t *cip) 865 { 866 fmd_case_susp_t *cis, *ncis; 867 868 ASSERT(MUTEX_HELD(&cip->ci_lock)); 869 870 for (cis = cip->ci_suspects; cis != NULL; cis = ncis) { 871 ncis = cis->cis_next; 872 nvlist_free(cis->cis_nvl); 873 fmd_free(cis, sizeof (fmd_case_susp_t)); 874 } 875 876 cip->ci_suspects = NULL; 877 cip->ci_nsuspects = 0; 878 } 879 880 fmd_case_t * 881 fmd_case_recreate(fmd_module_t *mp, fmd_xprt_t *xp, 882 uint_t state, const char *uuid, const char *code) 883 { 884 fmd_case_impl_t *cip = fmd_zalloc(sizeof (fmd_case_impl_t), FMD_SLEEP); 885 fmd_case_impl_t *eip; 886 887 ASSERT(state < FMD_CASE_RESOLVED); 888 889 (void) pthread_mutex_init(&cip->ci_lock, NULL); 890 fmd_buf_hash_create(&cip->ci_bufs); 891 892 fmd_module_hold(mp); 893 cip->ci_mod = mp; 894 cip->ci_xprt = xp; 895 cip->ci_refs = 1; 896 cip->ci_state = state; 897 cip->ci_uuid = fmd_strdup(uuid, FMD_SLEEP); 898 cip->ci_uuidlen = strlen(cip->ci_uuid); 899 cip->ci_code = fmd_strdup(code, FMD_SLEEP); 900 cip->ci_codelen = cip->ci_code ? strlen(cip->ci_code) + 1 : 0; 901 902 if (state > FMD_CASE_CLOSE_WAIT) 903 cip->ci_flags |= FMD_CF_SOLVED; 904 905 /* 906 * Insert the case into the global case hash. If the specified UUID is 907 * already present, check to see if it is an orphan: if so, reclaim it; 908 * otherwise if it is owned by a different module then return NULL. 909 */ 910 if ((eip = fmd_case_hash_insert(fmd.d_cases, cip)) != cip) { 911 (void) pthread_mutex_lock(&cip->ci_lock); 912 cip->ci_refs--; /* decrement to zero */ 913 fmd_case_destroy((fmd_case_t *)cip, B_FALSE); 914 915 cip = eip; /* switch 'cip' to the existing case */ 916 (void) pthread_mutex_lock(&cip->ci_lock); 917 918 /* 919 * If the ASRU cache is trying to recreate an orphan, then just 920 * return the existing case that we found without changing it. 921 */ 922 if (mp == fmd.d_rmod) { 923 /* 924 * When recreating an orphan case, state passed in may 925 * either be CLOSED (faulty) or REPAIRED (!faulty). If 926 * any suspects are still CLOSED (faulty) then the 927 * overall state needs to be CLOSED. 928 */ 929 if (state == FMD_CASE_CLOSED) 930 cip->ci_state = FMD_CASE_CLOSED; 931 (void) pthread_mutex_unlock(&cip->ci_lock); 932 fmd_case_rele((fmd_case_t *)cip); 933 return ((fmd_case_t *)cip); 934 } 935 936 /* 937 * If the existing case isn't an orphan or is being proxied, 938 * then we have a UUID conflict: return failure to the caller. 939 */ 940 if (cip->ci_mod != fmd.d_rmod || xp != NULL) { 941 (void) pthread_mutex_unlock(&cip->ci_lock); 942 fmd_case_rele((fmd_case_t *)cip); 943 return (NULL); 944 } 945 946 /* 947 * If the new module is reclaiming an orphaned case, remove 948 * the case from the root module, switch ci_mod, and then fall 949 * through to adding the case to the new owner module 'mp'. 950 */ 951 fmd_module_lock(cip->ci_mod); 952 fmd_list_delete(&cip->ci_mod->mod_cases, cip); 953 fmd_module_unlock(cip->ci_mod); 954 955 fmd_module_rele(cip->ci_mod); 956 cip->ci_mod = mp; 957 fmd_module_hold(mp); 958 959 /* 960 * It's possible that fmd crashed or was restarted during a 961 * previous solve operation between the asru cache being created 962 * and the ckpt file being updated to SOLVED. Thus when the DE 963 * recreates the case here from the checkpoint file, the state 964 * will be UNSOLVED and yet we are having to reclaim because 965 * the case was in the asru cache. If this happens, revert the 966 * case back to the UNSOLVED state and let the DE solve it again 967 */ 968 if (state == FMD_CASE_UNSOLVED) { 969 fmd_asru_hash_delete_case(fmd.d_asrus, 970 (fmd_case_t *)cip); 971 fmd_case_destroy_suspects(cip); 972 fmd_case_code_hash_delete(fmd.d_cases, cip); 973 fmd_free(cip->ci_code, cip->ci_codelen); 974 cip->ci_code = NULL; 975 cip->ci_codelen = 0; 976 cip->ci_tv_valid = 0; 977 } 978 979 cip->ci_state = state; 980 981 (void) pthread_mutex_unlock(&cip->ci_lock); 982 fmd_case_rele((fmd_case_t *)cip); 983 } else { 984 /* 985 * add into hash of solved cases 986 */ 987 if (cip->ci_code) 988 fmd_case_code_hash_insert(fmd.d_cases, cip); 989 } 990 991 ASSERT(fmd_module_locked(mp)); 992 fmd_list_append(&mp->mod_cases, cip); 993 994 (void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock); 995 cip->ci_mod->mod_stats->ms_caseopen.fmds_value.ui64++; 996 (void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock); 997 998 return ((fmd_case_t *)cip); 999 } 1000 1001 void 1002 fmd_case_destroy(fmd_case_t *cp, int visible) 1003 { 1004 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 1005 fmd_case_item_t *cit, *ncit; 1006 1007 ASSERT(MUTEX_HELD(&cip->ci_lock)); 1008 ASSERT(cip->ci_refs == 0); 1009 1010 if (visible) { 1011 TRACE((FMD_DBG_CASE, "deleting case %s", cip->ci_uuid)); 1012 fmd_case_hash_delete(fmd.d_cases, cip); 1013 } 1014 1015 for (cit = cip->ci_items; cit != NULL; cit = ncit) { 1016 ncit = cit->cit_next; 1017 fmd_event_rele(cit->cit_event); 1018 fmd_free(cit, sizeof (fmd_case_item_t)); 1019 } 1020 1021 fmd_case_destroy_suspects(cip); 1022 1023 if (cip->ci_principal != NULL) 1024 fmd_event_rele(cip->ci_principal); 1025 1026 fmd_free(cip->ci_uuid, cip->ci_uuidlen + 1); 1027 fmd_free(cip->ci_code, cip->ci_codelen); 1028 (void) fmd_buf_hash_destroy(&cip->ci_bufs); 1029 1030 fmd_module_rele(cip->ci_mod); 1031 fmd_free(cip, sizeof (fmd_case_impl_t)); 1032 } 1033 1034 void 1035 fmd_case_hold(fmd_case_t *cp) 1036 { 1037 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 1038 1039 (void) pthread_mutex_lock(&cip->ci_lock); 1040 fmd_case_hold_locked(cp); 1041 (void) pthread_mutex_unlock(&cip->ci_lock); 1042 } 1043 1044 void 1045 fmd_case_hold_locked(fmd_case_t *cp) 1046 { 1047 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 1048 1049 ASSERT(MUTEX_HELD(&cip->ci_lock)); 1050 if (cip->ci_flags & FMD_CF_DELETING) 1051 fmd_panic("attempt to hold a deleting case %p (%s)\n", 1052 (void *)cip, cip->ci_uuid); 1053 cip->ci_refs++; 1054 ASSERT(cip->ci_refs != 0); 1055 } 1056 1057 static fmd_case_impl_t * 1058 fmd_case_tryhold(fmd_case_impl_t *cip) 1059 { 1060 /* 1061 * If the case's "deleting" bit is unset, hold and return case, 1062 * otherwise, return NULL. 1063 */ 1064 (void) pthread_mutex_lock(&cip->ci_lock); 1065 if (cip->ci_flags & FMD_CF_DELETING) { 1066 (void) pthread_mutex_unlock(&cip->ci_lock); 1067 cip = NULL; 1068 } else { 1069 fmd_case_hold_locked((fmd_case_t *)cip); 1070 (void) pthread_mutex_unlock(&cip->ci_lock); 1071 } 1072 return (cip); 1073 } 1074 1075 void 1076 fmd_case_rele(fmd_case_t *cp) 1077 { 1078 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 1079 1080 (void) pthread_mutex_lock(&cip->ci_lock); 1081 ASSERT(cip->ci_refs != 0); 1082 1083 if (--cip->ci_refs == 0) 1084 fmd_case_destroy((fmd_case_t *)cip, B_TRUE); 1085 else 1086 (void) pthread_mutex_unlock(&cip->ci_lock); 1087 } 1088 1089 void 1090 fmd_case_rele_locked(fmd_case_t *cp) 1091 { 1092 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 1093 1094 ASSERT(MUTEX_HELD(&cip->ci_lock)); 1095 --cip->ci_refs; 1096 ASSERT(cip->ci_refs != 0); 1097 } 1098 1099 int 1100 fmd_case_insert_principal(fmd_case_t *cp, fmd_event_t *ep) 1101 { 1102 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 1103 fmd_case_item_t *cit; 1104 fmd_event_t *oep; 1105 uint_t state; 1106 int new; 1107 1108 fmd_event_hold(ep); 1109 (void) pthread_mutex_lock(&cip->ci_lock); 1110 1111 if (cip->ci_flags & FMD_CF_SOLVED) 1112 state = FMD_EVS_DIAGNOSED; 1113 else 1114 state = FMD_EVS_ACCEPTED; 1115 1116 oep = cip->ci_principal; 1117 cip->ci_principal = ep; 1118 1119 for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) { 1120 if (cit->cit_event == ep) 1121 break; 1122 } 1123 1124 cip->ci_flags |= FMD_CF_DIRTY; 1125 new = cit == NULL && ep != oep; 1126 1127 (void) pthread_mutex_unlock(&cip->ci_lock); 1128 1129 fmd_module_setcdirty(cip->ci_mod); 1130 fmd_event_transition(ep, state); 1131 1132 if (oep != NULL) 1133 fmd_event_rele(oep); 1134 1135 return (new); 1136 } 1137 1138 int 1139 fmd_case_insert_event(fmd_case_t *cp, fmd_event_t *ep) 1140 { 1141 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 1142 fmd_case_item_t *cit; 1143 uint_t state; 1144 int new; 1145 1146 (void) pthread_mutex_lock(&cip->ci_lock); 1147 1148 if (cip->ci_flags & FMD_CF_SOLVED) 1149 state = FMD_EVS_DIAGNOSED; 1150 else 1151 state = FMD_EVS_ACCEPTED; 1152 1153 for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) { 1154 if (cit->cit_event == ep) 1155 break; 1156 } 1157 1158 new = cit == NULL && ep != cip->ci_principal; 1159 1160 /* 1161 * If the event is already in the case or the case is already solved, 1162 * there is no reason to save it: just transition it appropriately. 1163 */ 1164 if (cit != NULL || (cip->ci_flags & FMD_CF_SOLVED)) { 1165 (void) pthread_mutex_unlock(&cip->ci_lock); 1166 fmd_event_transition(ep, state); 1167 return (new); 1168 } 1169 1170 cit = fmd_alloc(sizeof (fmd_case_item_t), FMD_SLEEP); 1171 fmd_event_hold(ep); 1172 1173 cit->cit_next = cip->ci_items; 1174 cit->cit_event = ep; 1175 1176 cip->ci_items = cit; 1177 cip->ci_nitems++; 1178 1179 cip->ci_flags |= FMD_CF_DIRTY; 1180 (void) pthread_mutex_unlock(&cip->ci_lock); 1181 1182 fmd_module_setcdirty(cip->ci_mod); 1183 fmd_event_transition(ep, state); 1184 1185 return (new); 1186 } 1187 1188 void 1189 fmd_case_insert_suspect(fmd_case_t *cp, nvlist_t *nvl) 1190 { 1191 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 1192 fmd_case_susp_t *cis = fmd_alloc(sizeof (fmd_case_susp_t), FMD_SLEEP); 1193 1194 (void) pthread_mutex_lock(&cip->ci_lock); 1195 ASSERT(cip->ci_state < FMD_CASE_CLOSE_WAIT); 1196 cip->ci_flags |= FMD_CF_DIRTY; 1197 1198 cis->cis_next = cip->ci_suspects; 1199 cis->cis_nvl = nvl; 1200 1201 cip->ci_suspects = cis; 1202 cip->ci_nsuspects++; 1203 1204 (void) pthread_mutex_unlock(&cip->ci_lock); 1205 fmd_module_setcdirty(cip->ci_mod); 1206 } 1207 1208 void 1209 fmd_case_recreate_suspect(fmd_case_t *cp, nvlist_t *nvl) 1210 { 1211 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 1212 fmd_case_susp_t *cis = fmd_alloc(sizeof (fmd_case_susp_t), FMD_SLEEP); 1213 boolean_t b; 1214 1215 (void) pthread_mutex_lock(&cip->ci_lock); 1216 ASSERT(cip->ci_state == FMD_CASE_CLOSED || 1217 cip->ci_state == FMD_CASE_REPAIRED); 1218 ASSERT(cip->ci_mod == fmd.d_rmod); 1219 1220 cis->cis_next = cip->ci_suspects; 1221 cis->cis_nvl = nvl; 1222 1223 if (nvlist_lookup_boolean_value(nvl, 1224 FM_SUSPECT_MESSAGE, &b) == 0 && b == B_FALSE) 1225 cip->ci_flags |= FMD_CF_INVISIBLE; 1226 1227 cip->ci_suspects = cis; 1228 cip->ci_nsuspects++; 1229 1230 (void) pthread_mutex_unlock(&cip->ci_lock); 1231 } 1232 1233 void 1234 fmd_case_reset_suspects(fmd_case_t *cp) 1235 { 1236 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 1237 1238 (void) pthread_mutex_lock(&cip->ci_lock); 1239 ASSERT(cip->ci_state < FMD_CASE_SOLVED); 1240 1241 fmd_case_destroy_suspects(cip); 1242 cip->ci_flags |= FMD_CF_DIRTY; 1243 1244 (void) pthread_mutex_unlock(&cip->ci_lock); 1245 fmd_module_setcdirty(cip->ci_mod); 1246 } 1247 1248 /*ARGSUSED*/ 1249 static void 1250 fmd_case_unusable(fmd_asru_link_t *alp, void *arg) 1251 { 1252 (void) fmd_asru_setflags(alp, FMD_ASRU_UNUSABLE); 1253 } 1254 1255 /* 1256 * Grab ci_lock and update the case state and set the dirty bit. Then perform 1257 * whatever actions and emit whatever events are appropriate for the state. 1258 * Refer to the topmost block comment explaining the state machine for details. 1259 */ 1260 void 1261 fmd_case_transition(fmd_case_t *cp, uint_t state, uint_t flags) 1262 { 1263 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 1264 fmd_case_item_t *cit; 1265 fmd_event_t *e; 1266 int resolved = 0; 1267 int any_unusable_and_present = 0; 1268 1269 ASSERT(state <= FMD_CASE_RESOLVED); 1270 (void) pthread_mutex_lock(&cip->ci_lock); 1271 1272 if (!(cip->ci_flags & FMD_CF_SOLVED) && !(flags & FMD_CF_SOLVED)) 1273 flags &= ~(FMD_CF_ISOLATED | FMD_CF_REPAIRED); 1274 1275 cip->ci_flags |= flags; 1276 1277 if (cip->ci_state >= state) { 1278 (void) pthread_mutex_unlock(&cip->ci_lock); 1279 return; /* already in specified state */ 1280 } 1281 1282 TRACE((FMD_DBG_CASE, "case %s %s->%s", cip->ci_uuid, 1283 _fmd_case_snames[cip->ci_state], _fmd_case_snames[state])); 1284 1285 cip->ci_state = state; 1286 cip->ci_flags |= FMD_CF_DIRTY; 1287 1288 if (cip->ci_xprt == NULL && cip->ci_mod != fmd.d_rmod) 1289 fmd_module_setcdirty(cip->ci_mod); 1290 1291 switch (state) { 1292 case FMD_CASE_SOLVED: 1293 for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) 1294 fmd_event_transition(cit->cit_event, FMD_EVS_DIAGNOSED); 1295 1296 if (cip->ci_principal != NULL) { 1297 fmd_event_transition(cip->ci_principal, 1298 FMD_EVS_DIAGNOSED); 1299 } 1300 break; 1301 1302 case FMD_CASE_CLOSE_WAIT: 1303 /* 1304 * If the case was never solved, do not change ASRUs. 1305 * If the case was never fmd_case_closed, do not change ASRUs. 1306 * If the case was repaired, do not change ASRUs. 1307 */ 1308 if ((cip->ci_flags & (FMD_CF_SOLVED | FMD_CF_ISOLATED | 1309 FMD_CF_REPAIRED)) == (FMD_CF_SOLVED | FMD_CF_ISOLATED)) 1310 fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, 1311 fmd_case_unusable, NULL); 1312 1313 /* 1314 * If an orphaned case transitions to CLOSE_WAIT, the owning 1315 * module is no longer loaded: continue on to CASE_CLOSED. 1316 */ 1317 if (fmd_case_orphaned(cp)) 1318 state = cip->ci_state = FMD_CASE_CLOSED; 1319 break; 1320 1321 case FMD_CASE_REPAIRED: 1322 ASSERT(fmd_case_orphaned(cp)); 1323 1324 /* 1325 * If all suspects are already either usable or not present then 1326 * transition straight to RESOLVED state, publishing both the 1327 * list.repaired and list.resolved. 1328 */ 1329 fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, 1330 fmd_case_unusable_and_present, &any_unusable_and_present); 1331 if (any_unusable_and_present) 1332 break; 1333 1334 fmd_module_lock(cip->ci_mod); 1335 fmd_list_delete(&cip->ci_mod->mod_cases, cip); 1336 fmd_module_unlock(cip->ci_mod); 1337 cip->ci_state = FMD_CASE_RESOLVED; 1338 (void) pthread_mutex_unlock(&cip->ci_lock); 1339 fmd_case_publish(cp, state); 1340 TRACE((FMD_DBG_CASE, "case %s %s->%s", cip->ci_uuid, 1341 _fmd_case_snames[FMD_CASE_REPAIRED], 1342 _fmd_case_snames[FMD_CASE_RESOLVED])); 1343 state = FMD_CASE_RESOLVED; 1344 resolved = 1; 1345 (void) pthread_mutex_lock(&cip->ci_lock); 1346 break; 1347 1348 case FMD_CASE_RESOLVED: 1349 ASSERT(fmd_case_orphaned(cp)); 1350 1351 /* 1352 * If all suspects are already either usable or not present then 1353 * carry on, publish list.resolved and discard the case. 1354 */ 1355 fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, 1356 fmd_case_unusable_and_present, &any_unusable_and_present); 1357 if (any_unusable_and_present) { 1358 (void) pthread_mutex_unlock(&cip->ci_lock); 1359 return; 1360 } 1361 1362 fmd_module_lock(cip->ci_mod); 1363 fmd_list_delete(&cip->ci_mod->mod_cases, cip); 1364 fmd_module_unlock(cip->ci_mod); 1365 resolved = 1; 1366 break; 1367 } 1368 1369 (void) pthread_mutex_unlock(&cip->ci_lock); 1370 1371 /* 1372 * If the module has initialized, then publish the appropriate event 1373 * for the new case state. If not, we are being called from the 1374 * checkpoint code during module load, in which case the module's 1375 * _fmd_init() routine hasn't finished yet, and our event dictionaries 1376 * may not be open yet, which will prevent us from computing the event 1377 * code. Defer the call to fmd_case_publish() by enqueuing a PUBLISH 1378 * event in our queue: this won't be processed until _fmd_init is done. 1379 */ 1380 if (cip->ci_mod->mod_flags & FMD_MOD_INIT) 1381 fmd_case_publish(cp, state); 1382 else { 1383 fmd_case_hold(cp); 1384 e = fmd_event_create(FMD_EVT_PUBLISH, FMD_HRT_NOW, NULL, cp); 1385 fmd_eventq_insert_at_head(cip->ci_mod->mod_queue, e); 1386 } 1387 1388 if (resolved) { 1389 /* 1390 * If we transitioned to RESOLVED, adjust the reference count to 1391 * reflect our removal from fmd.d_rmod->mod_cases above. If the 1392 * caller has not placed an additional hold on the case, it 1393 * will now be freed. 1394 */ 1395 (void) pthread_mutex_lock(&cip->ci_lock); 1396 fmd_asru_hash_delete_case(fmd.d_asrus, cp); 1397 (void) pthread_mutex_unlock(&cip->ci_lock); 1398 fmd_case_rele(cp); 1399 } 1400 } 1401 1402 /* 1403 * Transition the specified case to *at least* the specified state by first 1404 * re-validating the suspect list using the resource cache. This function is 1405 * employed by the checkpoint code when restoring a saved, solved case to see 1406 * if the state of the case has effectively changed while fmd was not running 1407 * or the module was not loaded. 1408 */ 1409 void 1410 fmd_case_transition_update(fmd_case_t *cp, uint_t state, uint_t flags) 1411 { 1412 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 1413 1414 int usable = 0; /* are any suspects usable? */ 1415 1416 ASSERT(state >= FMD_CASE_SOLVED); 1417 (void) pthread_mutex_lock(&cip->ci_lock); 1418 1419 fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_usable, &usable); 1420 1421 (void) pthread_mutex_unlock(&cip->ci_lock); 1422 1423 if (!usable) { 1424 state = MAX(state, FMD_CASE_CLOSE_WAIT); 1425 flags |= FMD_CF_ISOLATED; 1426 } 1427 1428 fmd_case_transition(cp, state, flags); 1429 } 1430 1431 void 1432 fmd_case_setdirty(fmd_case_t *cp) 1433 { 1434 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 1435 1436 (void) pthread_mutex_lock(&cip->ci_lock); 1437 cip->ci_flags |= FMD_CF_DIRTY; 1438 (void) pthread_mutex_unlock(&cip->ci_lock); 1439 1440 fmd_module_setcdirty(cip->ci_mod); 1441 } 1442 1443 void 1444 fmd_case_clrdirty(fmd_case_t *cp) 1445 { 1446 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 1447 1448 (void) pthread_mutex_lock(&cip->ci_lock); 1449 cip->ci_flags &= ~FMD_CF_DIRTY; 1450 (void) pthread_mutex_unlock(&cip->ci_lock); 1451 } 1452 1453 void 1454 fmd_case_commit(fmd_case_t *cp) 1455 { 1456 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 1457 fmd_case_item_t *cit; 1458 1459 (void) pthread_mutex_lock(&cip->ci_lock); 1460 1461 if (cip->ci_flags & FMD_CF_DIRTY) { 1462 for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) 1463 fmd_event_commit(cit->cit_event); 1464 1465 if (cip->ci_principal != NULL) 1466 fmd_event_commit(cip->ci_principal); 1467 1468 fmd_buf_hash_commit(&cip->ci_bufs); 1469 cip->ci_flags &= ~FMD_CF_DIRTY; 1470 } 1471 1472 (void) pthread_mutex_unlock(&cip->ci_lock); 1473 } 1474 1475 /* 1476 * Indicate that the case may need to change state because one or more of the 1477 * ASRUs named as a suspect has changed state. We examine all the suspects 1478 * and if none are still faulty, we initiate a case close transition. 1479 */ 1480 void 1481 fmd_case_update(fmd_case_t *cp) 1482 { 1483 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 1484 uint_t cstate; 1485 int faulty = 0; 1486 1487 (void) pthread_mutex_lock(&cip->ci_lock); 1488 cstate = cip->ci_state; 1489 1490 if (cip->ci_xprt != NULL || cip->ci_state < FMD_CASE_SOLVED) { 1491 (void) pthread_mutex_unlock(&cip->ci_lock); 1492 return; /* update is not appropriate */ 1493 } 1494 1495 if (cip->ci_flags & FMD_CF_REPAIRED) { 1496 (void) pthread_mutex_unlock(&cip->ci_lock); 1497 return; /* already repaired */ 1498 } 1499 1500 fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_faulty, &faulty); 1501 (void) pthread_mutex_unlock(&cip->ci_lock); 1502 1503 if (faulty) { 1504 nvlist_t *nvl; 1505 fmd_event_t *e; 1506 char *class; 1507 1508 nvl = fmd_case_mkevent(cp, FM_LIST_UPDATED_CLASS); 1509 (void) nvlist_lookup_string(nvl, FM_CLASS, &class); 1510 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class); 1511 (void) pthread_rwlock_rdlock(&fmd.d_log_lock); 1512 fmd_log_append(fmd.d_fltlog, e, cp); 1513 (void) pthread_rwlock_unlock(&fmd.d_log_lock); 1514 fmd_dispq_dispatch(fmd.d_disp, e, class); 1515 return; /* one or more suspects are still marked faulty */ 1516 } 1517 1518 if (cstate == FMD_CASE_CLOSED) 1519 fmd_case_transition(cp, FMD_CASE_REPAIRED, FMD_CF_REPAIRED); 1520 else 1521 fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_REPAIRED); 1522 } 1523 1524 /* 1525 * Delete a closed case from the module's case list once the fmdo_close() entry 1526 * point has run to completion. If the case is owned by a transport module, 1527 * tell the transport to proxy a case close on the other end of the transport. 1528 * If not, transition to the appropriate next state based on ci_flags. This 1529 * function represents the end of CLOSE_WAIT and transitions the case to either 1530 * CLOSED or REPAIRED or discards it entirely because it was never solved; 1531 * refer to the topmost block comment explaining the state machine for details. 1532 */ 1533 void 1534 fmd_case_delete(fmd_case_t *cp) 1535 { 1536 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 1537 fmd_modstat_t *msp; 1538 size_t buftotal; 1539 1540 ASSERT(fmd_module_locked(cip->ci_mod)); 1541 fmd_list_delete(&cip->ci_mod->mod_cases, cip); 1542 buftotal = fmd_buf_hash_destroy(&cip->ci_bufs); 1543 1544 (void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock); 1545 msp = cip->ci_mod->mod_stats; 1546 1547 ASSERT(msp->ms_caseopen.fmds_value.ui64 != 0); 1548 msp->ms_caseopen.fmds_value.ui64--; 1549 1550 ASSERT(msp->ms_buftotal.fmds_value.ui64 >= buftotal); 1551 msp->ms_buftotal.fmds_value.ui64 -= buftotal; 1552 1553 (void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock); 1554 1555 if (cip->ci_xprt == NULL) 1556 fmd_module_setcdirty(cip->ci_mod); 1557 1558 fmd_module_rele(cip->ci_mod); 1559 cip->ci_mod = fmd.d_rmod; 1560 fmd_module_hold(cip->ci_mod); 1561 1562 /* 1563 * If the case is not proxied and it has been solved, then retain it 1564 * on the root module's case list at least until we're transitioned. 1565 * Otherwise free the case with our final fmd_case_rele() below. 1566 */ 1567 if (cip->ci_xprt == NULL && (cip->ci_flags & FMD_CF_SOLVED)) { 1568 fmd_module_lock(cip->ci_mod); 1569 fmd_list_append(&cip->ci_mod->mod_cases, cip); 1570 fmd_module_unlock(cip->ci_mod); 1571 fmd_case_hold(cp); 1572 } 1573 1574 /* 1575 * If a proxied case finishes CLOSE_WAIT, then it can be discarded 1576 * rather than orphaned because by definition it can have no entries 1577 * in the resource cache of the current fault manager. 1578 */ 1579 if (cip->ci_xprt != NULL) 1580 fmd_xprt_uuclose(cip->ci_xprt, cip->ci_uuid); 1581 else if (cip->ci_flags & FMD_CF_REPAIRED) 1582 fmd_case_transition(cp, FMD_CASE_REPAIRED, 0); 1583 else if (cip->ci_flags & FMD_CF_ISOLATED) 1584 fmd_case_transition(cp, FMD_CASE_CLOSED, 0); 1585 1586 fmd_case_rele(cp); 1587 } 1588 1589 void 1590 fmd_case_discard(fmd_case_t *cp) 1591 { 1592 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 1593 1594 (void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock); 1595 cip->ci_mod->mod_stats->ms_caseopen.fmds_value.ui64--; 1596 (void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock); 1597 1598 ASSERT(fmd_module_locked(cip->ci_mod)); 1599 fmd_list_delete(&cip->ci_mod->mod_cases, cip); 1600 fmd_case_rele(cp); 1601 } 1602 1603 /* 1604 * Indicate that the problem corresponding to a case has been repaired by 1605 * clearing the faulty bit on each ASRU named as a suspect. If the case hasn't 1606 * already been closed, this function initiates the transition to CLOSE_WAIT. 1607 * The caller must have the case held from fmd_case_hash_lookup(), so we can 1608 * grab and drop ci_lock without the case being able to be freed in between. 1609 */ 1610 int 1611 fmd_case_repair(fmd_case_t *cp) 1612 { 1613 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 1614 uint_t cstate; 1615 1616 (void) pthread_mutex_lock(&cip->ci_lock); 1617 cstate = cip->ci_state; 1618 1619 if (cip->ci_xprt != NULL) { 1620 (void) pthread_mutex_unlock(&cip->ci_lock); 1621 return (fmd_set_errno(EFMD_CASE_OWNER)); 1622 } 1623 1624 if (cstate < FMD_CASE_SOLVED) { 1625 (void) pthread_mutex_unlock(&cip->ci_lock); 1626 return (fmd_set_errno(EFMD_CASE_STATE)); 1627 } 1628 1629 if (cip->ci_flags & FMD_CF_REPAIRED) { 1630 (void) pthread_mutex_unlock(&cip->ci_lock); 1631 return (0); /* already repaired */ 1632 } 1633 1634 fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_repaired, NULL); 1635 (void) pthread_mutex_unlock(&cip->ci_lock); 1636 1637 if (cstate == FMD_CASE_CLOSED) 1638 fmd_case_transition(cp, FMD_CASE_REPAIRED, FMD_CF_REPAIRED); 1639 else 1640 fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_REPAIRED); 1641 1642 return (0); 1643 } 1644 1645 int 1646 fmd_case_acquit(fmd_case_t *cp) 1647 { 1648 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 1649 uint_t cstate; 1650 1651 (void) pthread_mutex_lock(&cip->ci_lock); 1652 cstate = cip->ci_state; 1653 1654 if (cip->ci_xprt != NULL) { 1655 (void) pthread_mutex_unlock(&cip->ci_lock); 1656 return (fmd_set_errno(EFMD_CASE_OWNER)); 1657 } 1658 1659 if (cstate < FMD_CASE_SOLVED) { 1660 (void) pthread_mutex_unlock(&cip->ci_lock); 1661 return (fmd_set_errno(EFMD_CASE_STATE)); 1662 } 1663 1664 if (cip->ci_flags & FMD_CF_REPAIRED) { 1665 (void) pthread_mutex_unlock(&cip->ci_lock); 1666 return (0); /* already repaired */ 1667 } 1668 1669 fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_acquit, NULL); 1670 (void) pthread_mutex_unlock(&cip->ci_lock); 1671 1672 if (cstate == FMD_CASE_CLOSED) 1673 fmd_case_transition(cp, FMD_CASE_REPAIRED, FMD_CF_REPAIRED); 1674 else 1675 fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_REPAIRED); 1676 1677 return (0); 1678 } 1679 1680 int 1681 fmd_case_contains(fmd_case_t *cp, fmd_event_t *ep) 1682 { 1683 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 1684 fmd_case_item_t *cit; 1685 uint_t state; 1686 int rv = 0; 1687 1688 (void) pthread_mutex_lock(&cip->ci_lock); 1689 1690 if (cip->ci_state >= FMD_CASE_SOLVED) 1691 state = FMD_EVS_DIAGNOSED; 1692 else 1693 state = FMD_EVS_ACCEPTED; 1694 1695 for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) { 1696 if ((rv = fmd_event_equal(ep, cit->cit_event)) != 0) 1697 break; 1698 } 1699 1700 if (rv == 0 && cip->ci_principal != NULL) 1701 rv = fmd_event_equal(ep, cip->ci_principal); 1702 1703 (void) pthread_mutex_unlock(&cip->ci_lock); 1704 1705 if (rv != 0) 1706 fmd_event_transition(ep, state); 1707 1708 return (rv); 1709 } 1710 1711 int 1712 fmd_case_orphaned(fmd_case_t *cp) 1713 { 1714 return (((fmd_case_impl_t *)cp)->ci_mod == fmd.d_rmod); 1715 } 1716 1717 void 1718 fmd_case_settime(fmd_case_t *cp, time_t tv_sec, suseconds_t tv_usec) 1719 { 1720 ((fmd_case_impl_t *)cp)->ci_tv.tv_sec = tv_sec; 1721 ((fmd_case_impl_t *)cp)->ci_tv.tv_usec = tv_usec; 1722 ((fmd_case_impl_t *)cp)->ci_tv_valid = 1; 1723 } 1724 1725 /*ARGSUSED*/ 1726 void 1727 fmd_case_repair_replay_case(fmd_case_t *cp, void *arg) 1728 { 1729 int not_faulty = 0; 1730 int faulty = 0; 1731 nvlist_t *nvl; 1732 fmd_event_t *e; 1733 char *class; 1734 int any_unusable_and_present = 0; 1735 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 1736 1737 if (cip->ci_state < FMD_CASE_SOLVED) 1738 return; 1739 1740 fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_faulty, &faulty); 1741 fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_not_faulty, 1742 ¬_faulty); 1743 1744 if (cip->ci_state >= FMD_CASE_REPAIRED && !faulty) { 1745 /* 1746 * If none of the suspects is faulty, replay the list.repaired. 1747 * If all suspects are already either usable or not present then 1748 * also transition straight to RESOLVED state. 1749 */ 1750 fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, 1751 fmd_case_unusable_and_present, &any_unusable_and_present); 1752 if (!any_unusable_and_present) { 1753 fmd_module_lock(cip->ci_mod); 1754 fmd_list_delete(&cip->ci_mod->mod_cases, cip); 1755 fmd_module_unlock(cip->ci_mod); 1756 cip->ci_state = FMD_CASE_RESOLVED; 1757 1758 nvl = fmd_case_mkevent(cp, FM_LIST_REPAIRED_CLASS); 1759 (void) nvlist_lookup_string(nvl, FM_CLASS, &class); 1760 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, 1761 class); 1762 fmd_dispq_dispatch(fmd.d_disp, e, class); 1763 1764 fmd_case_publish(cp, FMD_CASE_RESOLVED); 1765 (void) pthread_mutex_lock(&cip->ci_lock); 1766 fmd_asru_hash_delete_case(fmd.d_asrus, cp); 1767 (void) pthread_mutex_unlock(&cip->ci_lock); 1768 fmd_case_rele(cp); 1769 } else { 1770 nvl = fmd_case_mkevent(cp, FM_LIST_REPAIRED_CLASS); 1771 (void) nvlist_lookup_string(nvl, FM_CLASS, &class); 1772 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, 1773 class); 1774 fmd_dispq_dispatch(fmd.d_disp, e, class); 1775 } 1776 } else if (faulty && not_faulty) { 1777 /* 1778 * if some but not all of the suspects are not faulty, replay 1779 * the list.updated. 1780 */ 1781 nvl = fmd_case_mkevent(cp, FM_LIST_UPDATED_CLASS); 1782 (void) nvlist_lookup_string(nvl, FM_CLASS, &class); 1783 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class); 1784 fmd_dispq_dispatch(fmd.d_disp, e, class); 1785 } 1786 } 1787 1788 void 1789 fmd_case_repair_replay() 1790 { 1791 fmd_case_hash_apply(fmd.d_cases, fmd_case_repair_replay_case, NULL); 1792 } 1793