17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5749f21d3Swesolows * Common Development and Distribution License (the "License"). 6749f21d3Swesolows * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 21d9638e54Smws 227c478bd9Sstevel@tonic-gate /* 23*f6e214c7SGavin Maltby * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 26d9638e54Smws /* 27d9638e54Smws * FMD Case Subsystem 28d9638e54Smws * 29d9638e54Smws * Diagnosis engines are expected to group telemetry events related to the 30d9638e54Smws * diagnosis of a particular problem on the system into a set of cases. The 31d9638e54Smws * diagnosis engine may have any number of cases open at a given point in time. 32d9638e54Smws * Some cases may eventually be *solved* by associating a suspect list of one 33d9638e54Smws * or more problems with the case, at which point fmd publishes a list.suspect 34d9638e54Smws * event for the case and it becomes visible to administrators and agents. 35d9638e54Smws * 36d9638e54Smws * Every case is named using a UUID, and is globally visible in the case hash. 37d9638e54Smws * Cases are reference-counted, except for the reference from the case hash 38d9638e54Smws * itself. Consumers of case references include modules, which store active 39d9638e54Smws * cases on the mod_cases list, ASRUs in the resource cache, and the RPC code. 40d9638e54Smws * 41d9638e54Smws * Cases obey the following state machine. In states UNSOLVED, SOLVED, and 42d9638e54Smws * CLOSE_WAIT, a case's module refers to the owning module (a diagnosis engine 43d9638e54Smws * or transport) and the case is referenced by the mod_cases list. Once the 44d9638e54Smws * case reaches the CLOSED or REPAIRED states, a case's module changes to refer 45d9638e54Smws * to the root module (fmd.d_rmod) and is deleted from the owner's mod_cases. 46d9638e54Smws * 47d9638e54Smws * +------------+ 48d9638e54Smws * +----------| UNSOLVED | 49d9638e54Smws * | +------------+ 5025c6ff4bSstephh * | 1 | 51d9638e54Smws * | | 5225c6ff4bSstephh * | +-------v----+ 5325c6ff4bSstephh * 2 | | SOLVED | 5425c6ff4bSstephh * | +------------+ 5525c6ff4bSstephh * | 3 | 5 | 5625c6ff4bSstephh * +------------+ | | 5725c6ff4bSstephh * | | | 5825c6ff4bSstephh * +-v---v----v-+ 5925c6ff4bSstephh * | CLOSE_WAIT | 60d9638e54Smws * +------------+ 6125c6ff4bSstephh * | | | 6225c6ff4bSstephh * +-----------+ | +------------+ 6325c6ff4bSstephh * | 4 | | 6425c6ff4bSstephh * v +-----v------+ | 6525c6ff4bSstephh * discard | CLOSED | 6 | 6625c6ff4bSstephh * +------------+ | 6725c6ff4bSstephh * | | 6825c6ff4bSstephh * | +------------+ 6925c6ff4bSstephh * 7 | | 7025c6ff4bSstephh * +-----v----v-+ 7125c6ff4bSstephh * | REPAIRED | 7225c6ff4bSstephh * +------------+ 7325c6ff4bSstephh * | 7425c6ff4bSstephh * 8 | 7525c6ff4bSstephh * +-----v------+ 7625c6ff4bSstephh * | RESOLVED | 7725c6ff4bSstephh * +------------+ 7825c6ff4bSstephh * | 7925c6ff4bSstephh * v 8025c6ff4bSstephh * discard 81d9638e54Smws * 82d9638e54Smws * The state machine changes are triggered by calls to fmd_case_transition() 83d9638e54Smws * from various locations inside of fmd, as described below: 84d9638e54Smws * 85d9638e54Smws * [1] Called by: fmd_case_solve() 86d9638e54Smws * Actions: FMD_CF_SOLVED flag is set in ci_flags 87d9638e54Smws * conviction policy is applied to suspect list 88d9638e54Smws * suspects convicted are marked faulty (F) in R$ 89d9638e54Smws * list.suspect event logged and dispatched 90d9638e54Smws * 9125c6ff4bSstephh * [2] Called by: fmd_case_close(), fmd_case_uuclose() 9225c6ff4bSstephh * Actions: diagnosis engine fmdo_close() entry point scheduled 9325c6ff4bSstephh * case discarded upon exit from CLOSE_WAIT 9425c6ff4bSstephh * 9525c6ff4bSstephh * [3] Called by: fmd_case_close(), fmd_case_uuclose(), fmd_xprt_event_uuclose() 96d9638e54Smws * Actions: FMD_CF_ISOLATED flag is set in ci_flags 97d9638e54Smws * suspects convicted (F) are marked unusable (U) in R$ 98d9638e54Smws * diagnosis engine fmdo_close() entry point scheduled 9925c6ff4bSstephh * case transitions to CLOSED [4] upon exit from CLOSE_WAIT 100d9638e54Smws * 10125c6ff4bSstephh * [4] Called by: fmd_case_delete() (after fmdo_close() entry point returns) 102d9638e54Smws * Actions: list.isolated event dispatched 103d9638e54Smws * case deleted from module's list of open cases 104d9638e54Smws * 105d9638e54Smws * [5] Called by: fmd_case_repair(), fmd_case_update() 106d9638e54Smws * Actions: FMD_CF_REPAIR flag is set in ci_flags 107d9638e54Smws * diagnosis engine fmdo_close() entry point scheduled 108d9638e54Smws * case transitions to REPAIRED [6] upon exit from CLOSE_WAIT 109d9638e54Smws * 11025c6ff4bSstephh * [6] Called by: fmd_case_delete() (after fmdo_close() entry point returns) 11125c6ff4bSstephh * Actions: suspects convicted are marked non faulty (!F) in R$ 11225c6ff4bSstephh * list.repaired or list.updated event dispatched 113d9638e54Smws * 114d9638e54Smws * [7] Called by: fmd_case_repair(), fmd_case_update() 115d9638e54Smws * Actions: FMD_CF_REPAIR flag is set in ci_flags 116d9638e54Smws * suspects convicted are marked non faulty (!F) in R$ 11725c6ff4bSstephh * list.repaired or list.updated event dispatched 11825c6ff4bSstephh * 11925c6ff4bSstephh * [8] Called by: fmd_case_uuresolve() 12025c6ff4bSstephh * Actions: list.resolved event dispatched 12125c6ff4bSstephh * case is discarded 122d9638e54Smws */ 123d9638e54Smws 1247c478bd9Sstevel@tonic-gate #include <sys/fm/protocol.h> 1257c478bd9Sstevel@tonic-gate #include <uuid/uuid.h> 1267c478bd9Sstevel@tonic-gate #include <alloca.h> 1277c478bd9Sstevel@tonic-gate 1287c478bd9Sstevel@tonic-gate #include <fmd_alloc.h> 1297c478bd9Sstevel@tonic-gate #include <fmd_module.h> 1307c478bd9Sstevel@tonic-gate #include <fmd_error.h> 1317c478bd9Sstevel@tonic-gate #include <fmd_conf.h> 1327c478bd9Sstevel@tonic-gate #include <fmd_case.h> 1337c478bd9Sstevel@tonic-gate #include <fmd_string.h> 1347c478bd9Sstevel@tonic-gate #include <fmd_subr.h> 1357c478bd9Sstevel@tonic-gate #include <fmd_protocol.h> 1367c478bd9Sstevel@tonic-gate #include <fmd_event.h> 1377c478bd9Sstevel@tonic-gate #include <fmd_eventq.h> 1387c478bd9Sstevel@tonic-gate #include <fmd_dispq.h> 1397c478bd9Sstevel@tonic-gate #include <fmd_buf.h> 1407c478bd9Sstevel@tonic-gate #include <fmd_log.h> 1417c478bd9Sstevel@tonic-gate #include <fmd_asru.h> 1420b9e3e76Smws #include <fmd_fmri.h> 143d9638e54Smws #include <fmd_xprt.h> 1447c478bd9Sstevel@tonic-gate 1457c478bd9Sstevel@tonic-gate #include <fmd.h> 1467c478bd9Sstevel@tonic-gate 1477c478bd9Sstevel@tonic-gate static const char *const _fmd_case_snames[] = { 1487c478bd9Sstevel@tonic-gate "UNSOLVED", /* FMD_CASE_UNSOLVED */ 1497c478bd9Sstevel@tonic-gate "SOLVED", /* FMD_CASE_SOLVED */ 150d9638e54Smws "CLOSE_WAIT", /* FMD_CASE_CLOSE_WAIT */ 1517c478bd9Sstevel@tonic-gate "CLOSED", /* FMD_CASE_CLOSED */ 15225c6ff4bSstephh "REPAIRED", /* FMD_CASE_REPAIRED */ 15325c6ff4bSstephh "RESOLVED" /* FMD_CASE_RESOLVED */ 1547c478bd9Sstevel@tonic-gate }; 1557c478bd9Sstevel@tonic-gate 15697c04605Scy152378 static fmd_case_impl_t *fmd_case_tryhold(fmd_case_impl_t *); 15797c04605Scy152378 1587c478bd9Sstevel@tonic-gate fmd_case_hash_t * 1597c478bd9Sstevel@tonic-gate fmd_case_hash_create(void) 1607c478bd9Sstevel@tonic-gate { 1617c478bd9Sstevel@tonic-gate fmd_case_hash_t *chp = fmd_alloc(sizeof (fmd_case_hash_t), FMD_SLEEP); 1627c478bd9Sstevel@tonic-gate 1637c478bd9Sstevel@tonic-gate (void) pthread_rwlock_init(&chp->ch_lock, NULL); 1647c478bd9Sstevel@tonic-gate chp->ch_hashlen = fmd.d_str_buckets; 1657c478bd9Sstevel@tonic-gate chp->ch_hash = fmd_zalloc(sizeof (void *) * chp->ch_hashlen, FMD_SLEEP); 166567cc2e6Sstephh chp->ch_code_hash = fmd_zalloc(sizeof (void *) * chp->ch_hashlen, 167567cc2e6Sstephh FMD_SLEEP); 168d9638e54Smws chp->ch_count = 0; 1697c478bd9Sstevel@tonic-gate 1707c478bd9Sstevel@tonic-gate return (chp); 1717c478bd9Sstevel@tonic-gate } 1727c478bd9Sstevel@tonic-gate 1737c478bd9Sstevel@tonic-gate /* 1747c478bd9Sstevel@tonic-gate * Destroy the case hash. Unlike most of our hash tables, no active references 175d9638e54Smws * are kept by the case hash itself; all references come from other subsystems. 1767c478bd9Sstevel@tonic-gate * The hash must be destroyed after all modules are unloaded; if anything was 1777c478bd9Sstevel@tonic-gate * present in the hash it would be by definition a reference count leak. 1787c478bd9Sstevel@tonic-gate */ 1797c478bd9Sstevel@tonic-gate void 1807c478bd9Sstevel@tonic-gate fmd_case_hash_destroy(fmd_case_hash_t *chp) 1817c478bd9Sstevel@tonic-gate { 1827c478bd9Sstevel@tonic-gate fmd_free(chp->ch_hash, sizeof (void *) * chp->ch_hashlen); 183567cc2e6Sstephh fmd_free(chp->ch_code_hash, sizeof (void *) * chp->ch_hashlen); 1847c478bd9Sstevel@tonic-gate fmd_free(chp, sizeof (fmd_case_hash_t)); 1857c478bd9Sstevel@tonic-gate } 1867c478bd9Sstevel@tonic-gate 187d9638e54Smws /* 188d9638e54Smws * Take a snapshot of the case hash by placing an additional hold on each 189d9638e54Smws * member in an auxiliary array, and then call 'func' for each case. 190d9638e54Smws */ 191d9638e54Smws void 192d9638e54Smws fmd_case_hash_apply(fmd_case_hash_t *chp, 193d9638e54Smws void (*func)(fmd_case_t *, void *), void *arg) 194d9638e54Smws { 195d9638e54Smws fmd_case_impl_t *cp, **cps, **cpp; 196d9638e54Smws uint_t cpc, i; 197d9638e54Smws 198d9638e54Smws (void) pthread_rwlock_rdlock(&chp->ch_lock); 199d9638e54Smws 200d9638e54Smws cps = cpp = fmd_alloc(chp->ch_count * sizeof (fmd_case_t *), FMD_SLEEP); 201d9638e54Smws cpc = chp->ch_count; 202d9638e54Smws 203d9638e54Smws for (i = 0; i < chp->ch_hashlen; i++) { 204c297654fSCheng Sean Ye for (cp = chp->ch_hash[i]; cp != NULL; cp = cp->ci_next) 205c297654fSCheng Sean Ye *cpp++ = fmd_case_tryhold(cp); 206d9638e54Smws } 207d9638e54Smws 208d9638e54Smws ASSERT(cpp == cps + cpc); 209d9638e54Smws (void) pthread_rwlock_unlock(&chp->ch_lock); 210d9638e54Smws 211d9638e54Smws for (i = 0; i < cpc; i++) { 212c297654fSCheng Sean Ye if (cps[i] != NULL) { 213d9638e54Smws func((fmd_case_t *)cps[i], arg); 214d9638e54Smws fmd_case_rele((fmd_case_t *)cps[i]); 215d9638e54Smws } 216c297654fSCheng Sean Ye } 217d9638e54Smws 218d9638e54Smws fmd_free(cps, cpc * sizeof (fmd_case_t *)); 219d9638e54Smws } 220d9638e54Smws 221567cc2e6Sstephh static void 222567cc2e6Sstephh fmd_case_code_hash_insert(fmd_case_hash_t *chp, fmd_case_impl_t *cip) 223567cc2e6Sstephh { 224567cc2e6Sstephh uint_t h = fmd_strhash(cip->ci_code) % chp->ch_hashlen; 225567cc2e6Sstephh 226567cc2e6Sstephh cip->ci_code_next = chp->ch_code_hash[h]; 227567cc2e6Sstephh chp->ch_code_hash[h] = cip; 228567cc2e6Sstephh } 229567cc2e6Sstephh 230567cc2e6Sstephh static void 231567cc2e6Sstephh fmd_case_code_hash_delete(fmd_case_hash_t *chp, fmd_case_impl_t *cip) 232567cc2e6Sstephh { 233567cc2e6Sstephh fmd_case_impl_t **pp, *cp; 234567cc2e6Sstephh 235567cc2e6Sstephh if (cip->ci_code) { 236567cc2e6Sstephh uint_t h = fmd_strhash(cip->ci_code) % chp->ch_hashlen; 237567cc2e6Sstephh 238567cc2e6Sstephh pp = &chp->ch_code_hash[h]; 239567cc2e6Sstephh for (cp = *pp; cp != NULL; cp = cp->ci_code_next) { 240567cc2e6Sstephh if (cp != cip) 241567cc2e6Sstephh pp = &cp->ci_code_next; 242567cc2e6Sstephh else 243567cc2e6Sstephh break; 244567cc2e6Sstephh } 245567cc2e6Sstephh if (cp != NULL) { 246567cc2e6Sstephh *pp = cp->ci_code_next; 247567cc2e6Sstephh cp->ci_code_next = NULL; 248567cc2e6Sstephh } 249567cc2e6Sstephh } 250567cc2e6Sstephh } 251567cc2e6Sstephh 252d9638e54Smws /* 253d9638e54Smws * Look up the diagcode for this case and cache it in ci_code. If no suspects 254d9638e54Smws * were defined for this case or if the lookup fails, the event dictionary or 255d9638e54Smws * module code is broken, and we set the event code to a precomputed default. 256d9638e54Smws */ 257d9638e54Smws static const char * 258d9638e54Smws fmd_case_mkcode(fmd_case_t *cp) 2597c478bd9Sstevel@tonic-gate { 2607c478bd9Sstevel@tonic-gate fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 2617c478bd9Sstevel@tonic-gate fmd_case_susp_t *cis; 262567cc2e6Sstephh fmd_case_hash_t *chp = fmd.d_cases; 2637c478bd9Sstevel@tonic-gate 264d9638e54Smws char **keys, **keyp; 2657c478bd9Sstevel@tonic-gate const char *s; 2667c478bd9Sstevel@tonic-gate 2677c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cip->ci_lock)); 2687c478bd9Sstevel@tonic-gate ASSERT(cip->ci_state >= FMD_CASE_SOLVED); 2697c478bd9Sstevel@tonic-gate 270567cc2e6Sstephh /* 271567cc2e6Sstephh * delete any existing entry from code hash if it is on it 272567cc2e6Sstephh */ 273567cc2e6Sstephh fmd_case_code_hash_delete(chp, cip); 274567cc2e6Sstephh 275d9638e54Smws fmd_free(cip->ci_code, cip->ci_codelen); 276d9638e54Smws cip->ci_codelen = cip->ci_mod->mod_codelen; 277d9638e54Smws cip->ci_code = fmd_zalloc(cip->ci_codelen, FMD_SLEEP); 2787c478bd9Sstevel@tonic-gate keys = keyp = alloca(sizeof (char *) * (cip->ci_nsuspects + 1)); 2797c478bd9Sstevel@tonic-gate 2807c478bd9Sstevel@tonic-gate for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) { 2817c478bd9Sstevel@tonic-gate if (nvlist_lookup_string(cis->cis_nvl, FM_CLASS, keyp) == 0) 2827c478bd9Sstevel@tonic-gate keyp++; 2837c478bd9Sstevel@tonic-gate } 2847c478bd9Sstevel@tonic-gate 2857c478bd9Sstevel@tonic-gate *keyp = NULL; /* mark end of keys[] array for libdiagcode */ 2867c478bd9Sstevel@tonic-gate 2877c478bd9Sstevel@tonic-gate if (cip->ci_nsuspects == 0 || fmd_module_dc_key2code( 288d9638e54Smws cip->ci_mod, keys, cip->ci_code, cip->ci_codelen) != 0) { 2897c478bd9Sstevel@tonic-gate (void) fmd_conf_getprop(fmd.d_conf, "nodiagcode", &s); 290d9638e54Smws fmd_free(cip->ci_code, cip->ci_codelen); 291d9638e54Smws cip->ci_codelen = strlen(s) + 1; 292d9638e54Smws cip->ci_code = fmd_zalloc(cip->ci_codelen, FMD_SLEEP); 293d9638e54Smws (void) strcpy(cip->ci_code, s); 2947c478bd9Sstevel@tonic-gate } 2957c478bd9Sstevel@tonic-gate 296567cc2e6Sstephh /* 297567cc2e6Sstephh * add into hash of solved cases 298567cc2e6Sstephh */ 299567cc2e6Sstephh fmd_case_code_hash_insert(chp, cip); 300567cc2e6Sstephh 301d9638e54Smws return (cip->ci_code); 302d9638e54Smws } 303d9638e54Smws 304567cc2e6Sstephh typedef struct { 305567cc2e6Sstephh int *fcl_countp; 306c7d6cfd6SStephen Hanson int fcl_maxcount; 307567cc2e6Sstephh uint8_t *fcl_ba; 308567cc2e6Sstephh nvlist_t **fcl_nva; 309567cc2e6Sstephh int *fcl_msgp; 310567cc2e6Sstephh } fmd_case_lst_t; 311567cc2e6Sstephh 312567cc2e6Sstephh static void 313567cc2e6Sstephh fmd_case_set_lst(fmd_asru_link_t *alp, void *arg) 314567cc2e6Sstephh { 315567cc2e6Sstephh fmd_case_lst_t *entryp = (fmd_case_lst_t *)arg; 316567cc2e6Sstephh boolean_t b; 317567cc2e6Sstephh int state; 318567cc2e6Sstephh 319c7d6cfd6SStephen Hanson if (*entryp->fcl_countp >= entryp->fcl_maxcount) 320c7d6cfd6SStephen Hanson return; 321567cc2e6Sstephh if (nvlist_lookup_boolean_value(alp->al_event, FM_SUSPECT_MESSAGE, 322567cc2e6Sstephh &b) == 0 && b == B_FALSE) 323567cc2e6Sstephh *entryp->fcl_msgp = B_FALSE; 324567cc2e6Sstephh entryp->fcl_ba[*entryp->fcl_countp] = 0; 325567cc2e6Sstephh state = fmd_asru_al_getstate(alp); 32625c6ff4bSstephh if (state & FMD_ASRU_DEGRADED) 32725c6ff4bSstephh entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_DEGRADED; 328567cc2e6Sstephh if (state & FMD_ASRU_UNUSABLE) 329567cc2e6Sstephh entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_UNUSABLE; 330567cc2e6Sstephh if (state & FMD_ASRU_FAULTY) 331567cc2e6Sstephh entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_FAULTY; 332567cc2e6Sstephh if (!(state & FMD_ASRU_PRESENT)) 333567cc2e6Sstephh entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_NOT_PRESENT; 33425c6ff4bSstephh if (alp->al_reason == FMD_ASRU_REPAIRED) 33525c6ff4bSstephh entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_REPAIRED; 33625c6ff4bSstephh else if (alp->al_reason == FMD_ASRU_REPLACED) 33725c6ff4bSstephh entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_REPLACED; 33825c6ff4bSstephh else if (alp->al_reason == FMD_ASRU_ACQUITTED) 33925c6ff4bSstephh entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_ACQUITTED; 340567cc2e6Sstephh entryp->fcl_nva[*entryp->fcl_countp] = alp->al_event; 341567cc2e6Sstephh (*entryp->fcl_countp)++; 342567cc2e6Sstephh } 343567cc2e6Sstephh 344567cc2e6Sstephh static void 345567cc2e6Sstephh fmd_case_faulty(fmd_asru_link_t *alp, void *arg) 346567cc2e6Sstephh { 347567cc2e6Sstephh int *faultyp = (int *)arg; 348567cc2e6Sstephh 349567cc2e6Sstephh *faultyp |= (alp->al_flags & FMD_ASRU_FAULTY); 350567cc2e6Sstephh } 351567cc2e6Sstephh 352567cc2e6Sstephh static void 353567cc2e6Sstephh fmd_case_usable(fmd_asru_link_t *alp, void *arg) 354567cc2e6Sstephh { 355567cc2e6Sstephh int *usablep = (int *)arg; 356567cc2e6Sstephh 357567cc2e6Sstephh *usablep |= !(fmd_asru_al_getstate(alp) & FMD_ASRU_UNUSABLE); 358567cc2e6Sstephh } 359567cc2e6Sstephh 36025c6ff4bSstephh static void 36125c6ff4bSstephh fmd_case_not_faulty(fmd_asru_link_t *alp, void *arg) 36225c6ff4bSstephh { 36325c6ff4bSstephh int *not_faultyp = (int *)arg; 36425c6ff4bSstephh 36525c6ff4bSstephh *not_faultyp |= !(alp->al_flags & FMD_ASRU_FAULTY); 36625c6ff4bSstephh } 36725c6ff4bSstephh 36825c6ff4bSstephh /* 36925c6ff4bSstephh * Have we got any suspects with an asru that are still unusable and present? 37025c6ff4bSstephh */ 37125c6ff4bSstephh static void 37225c6ff4bSstephh fmd_case_unusable_and_present(fmd_asru_link_t *alp, void *arg) 37325c6ff4bSstephh { 37425c6ff4bSstephh int *rvalp = (int *)arg; 375cbf75e67SStephen Hanson int state; 37625c6ff4bSstephh nvlist_t *asru; 37725c6ff4bSstephh 378cbf75e67SStephen Hanson /* 379cbf75e67SStephen Hanson * if this a proxy case and this suspect doesn't have an local asru 380cbf75e67SStephen Hanson * then state is unknown so we must assume it may still be unusable. 381cbf75e67SStephen Hanson */ 382cbf75e67SStephen Hanson if ((alp->al_flags & FMD_ASRU_PROXY) && 383cbf75e67SStephen Hanson !(alp->al_flags & FMD_ASRU_PROXY_WITH_ASRU)) { 384cbf75e67SStephen Hanson *rvalp |= B_TRUE; 385cbf75e67SStephen Hanson return; 386cbf75e67SStephen Hanson } 387cbf75e67SStephen Hanson 388cbf75e67SStephen Hanson state = fmd_asru_al_getstate(alp); 38925c6ff4bSstephh if (nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, &asru) != 0) 39025c6ff4bSstephh return; 39125c6ff4bSstephh *rvalp |= ((state & FMD_ASRU_UNUSABLE) && (state & FMD_ASRU_PRESENT)); 39225c6ff4bSstephh } 39325c6ff4bSstephh 394d9638e54Smws nvlist_t * 395d9638e54Smws fmd_case_mkevent(fmd_case_t *cp, const char *class) 396d9638e54Smws { 397d9638e54Smws fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 398567cc2e6Sstephh nvlist_t **nva, *nvl; 399567cc2e6Sstephh uint8_t *ba; 400d9638e54Smws int msg = B_TRUE; 401627351e3Scy152378 const char *code; 402567cc2e6Sstephh fmd_case_lst_t fcl; 403567cc2e6Sstephh int count = 0; 404d9638e54Smws 405d9638e54Smws (void) pthread_mutex_lock(&cip->ci_lock); 406d9638e54Smws ASSERT(cip->ci_state >= FMD_CASE_SOLVED); 407d9638e54Smws 408567cc2e6Sstephh nva = alloca(sizeof (nvlist_t *) * cip->ci_nsuspects); 409567cc2e6Sstephh ba = alloca(sizeof (uint8_t) * cip->ci_nsuspects); 410d9638e54Smws 411d9638e54Smws /* 412d9638e54Smws * For each suspect associated with the case, store its fault event 413d9638e54Smws * nvlist in 'nva'. We also look to see if any of the suspect faults 414d9638e54Smws * have asked not to be messaged. If any of them have made such a 415d9638e54Smws * request, propagate that attribute to the composite list.* event. 416d9638e54Smws * Finally, store each suspect's faulty status into the bitmap 'ba'. 417d9638e54Smws */ 418567cc2e6Sstephh fcl.fcl_countp = &count; 419c7d6cfd6SStephen Hanson fcl.fcl_maxcount = cip->ci_nsuspects; 420567cc2e6Sstephh fcl.fcl_msgp = &msg; 421567cc2e6Sstephh fcl.fcl_ba = ba; 422567cc2e6Sstephh fcl.fcl_nva = nva; 423567cc2e6Sstephh fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_set_lst, &fcl); 424d9638e54Smws 425d9638e54Smws if (cip->ci_code == NULL) 426d9638e54Smws (void) fmd_case_mkcode(cp); 427627351e3Scy152378 /* 42825c6ff4bSstephh * For repair and updated event, we lookup diagcode from dict using key 42925c6ff4bSstephh * "list.repaired" or "list.updated" or "list.resolved". 430627351e3Scy152378 */ 431627351e3Scy152378 if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0) 432627351e3Scy152378 (void) fmd_conf_getprop(fmd.d_conf, "repaircode", &code); 43325c6ff4bSstephh else if (strcmp(class, FM_LIST_RESOLVED_CLASS) == 0) 43425c6ff4bSstephh (void) fmd_conf_getprop(fmd.d_conf, "resolvecode", &code); 43525c6ff4bSstephh else if (strcmp(class, FM_LIST_UPDATED_CLASS) == 0) 43625c6ff4bSstephh (void) fmd_conf_getprop(fmd.d_conf, "updatecode", &code); 437627351e3Scy152378 else 438627351e3Scy152378 code = cip->ci_code; 439d9638e54Smws 44044743693Sstephh if (msg == B_FALSE) 44144743693Sstephh cip->ci_flags |= FMD_CF_INVISIBLE; 44244743693Sstephh 443cbf75e67SStephen Hanson /* 444cbf75e67SStephen Hanson * Use the ci_diag_de if one has been saved (eg for an injected fault). 445cbf75e67SStephen Hanson * Otherwise use the authority for the current module. 446cbf75e67SStephen Hanson */ 447cbf75e67SStephen Hanson nvl = fmd_protocol_list(class, cip->ci_diag_de == NULL ? 448cbf75e67SStephen Hanson cip->ci_mod->mod_fmri : cip->ci_diag_de, cip->ci_uuid, code, count, 449540db9a9SStephen Hanson nva, ba, msg, &cip->ci_tv, cip->ci_injected); 450d9638e54Smws 451d9638e54Smws (void) pthread_mutex_unlock(&cip->ci_lock); 452d9638e54Smws return (nvl); 4537c478bd9Sstevel@tonic-gate } 4547c478bd9Sstevel@tonic-gate 4555750ef5cSStephen Hanson static int fmd_case_match_on_faulty_overlap = 1; 4565750ef5cSStephen Hanson static int fmd_case_match_on_acquit_overlap = 1; 4575750ef5cSStephen Hanson static int fmd_case_auto_acquit_isolated = 1; 4585750ef5cSStephen Hanson static int fmd_case_auto_acquit_non_acquitted = 1; 4595750ef5cSStephen Hanson static int fmd_case_too_recent = 10; /* time in seconds */ 4605750ef5cSStephen Hanson 461567cc2e6Sstephh static boolean_t 462567cc2e6Sstephh fmd_case_compare_elem(nvlist_t *nvl, nvlist_t *xnvl, const char *elem) 463567cc2e6Sstephh { 464567cc2e6Sstephh nvlist_t *new_rsrc; 465567cc2e6Sstephh nvlist_t *rsrc; 466567cc2e6Sstephh char *new_name = NULL; 467567cc2e6Sstephh char *name = NULL; 468567cc2e6Sstephh ssize_t new_namelen; 469567cc2e6Sstephh ssize_t namelen; 470567cc2e6Sstephh int fmri_present = 1; 471567cc2e6Sstephh int new_fmri_present = 1; 472567cc2e6Sstephh int match = B_FALSE; 473940d71d2Seschrock fmd_topo_t *ftp = fmd_topo_hold(); 474567cc2e6Sstephh 475567cc2e6Sstephh if (nvlist_lookup_nvlist(xnvl, elem, &rsrc) != 0) 476567cc2e6Sstephh fmri_present = 0; 477567cc2e6Sstephh else { 478567cc2e6Sstephh if ((namelen = fmd_fmri_nvl2str(rsrc, NULL, 0)) == -1) 479567cc2e6Sstephh goto done; 480567cc2e6Sstephh name = fmd_alloc(namelen + 1, FMD_SLEEP); 481567cc2e6Sstephh if (fmd_fmri_nvl2str(rsrc, name, namelen + 1) == -1) 482567cc2e6Sstephh goto done; 483567cc2e6Sstephh } 484567cc2e6Sstephh if (nvlist_lookup_nvlist(nvl, elem, &new_rsrc) != 0) 485567cc2e6Sstephh new_fmri_present = 0; 486567cc2e6Sstephh else { 487567cc2e6Sstephh if ((new_namelen = fmd_fmri_nvl2str(new_rsrc, NULL, 0)) == -1) 488567cc2e6Sstephh goto done; 489567cc2e6Sstephh new_name = fmd_alloc(new_namelen + 1, FMD_SLEEP); 490567cc2e6Sstephh if (fmd_fmri_nvl2str(new_rsrc, new_name, new_namelen + 1) == -1) 491567cc2e6Sstephh goto done; 492567cc2e6Sstephh } 493567cc2e6Sstephh match = (fmri_present == new_fmri_present && 494940d71d2Seschrock (fmri_present == 0 || 495940d71d2Seschrock topo_fmri_strcmp(ftp->ft_hdl, name, new_name))); 496567cc2e6Sstephh done: 497567cc2e6Sstephh if (name != NULL) 498567cc2e6Sstephh fmd_free(name, namelen + 1); 499567cc2e6Sstephh if (new_name != NULL) 500567cc2e6Sstephh fmd_free(new_name, new_namelen + 1); 501940d71d2Seschrock fmd_topo_rele(ftp); 502567cc2e6Sstephh return (match); 503567cc2e6Sstephh } 504567cc2e6Sstephh 505567cc2e6Sstephh static int 5065750ef5cSStephen Hanson fmd_case_match_suspect(nvlist_t *nvl1, nvlist_t *nvl2) 507567cc2e6Sstephh { 508567cc2e6Sstephh char *class, *new_class; 509567cc2e6Sstephh 5105750ef5cSStephen Hanson if (!fmd_case_compare_elem(nvl1, nvl2, FM_FAULT_ASRU)) 511567cc2e6Sstephh return (0); 5125750ef5cSStephen Hanson if (!fmd_case_compare_elem(nvl1, nvl2, FM_FAULT_RESOURCE)) 513567cc2e6Sstephh return (0); 5145750ef5cSStephen Hanson if (!fmd_case_compare_elem(nvl1, nvl2, FM_FAULT_FRU)) 515567cc2e6Sstephh return (0); 5165750ef5cSStephen Hanson (void) nvlist_lookup_string(nvl2, FM_CLASS, &class); 5175750ef5cSStephen Hanson (void) nvlist_lookup_string(nvl1, FM_CLASS, &new_class); 518567cc2e6Sstephh return (strcmp(class, new_class) == 0); 519567cc2e6Sstephh } 520567cc2e6Sstephh 5215750ef5cSStephen Hanson typedef struct { 5225750ef5cSStephen Hanson int *fcms_countp; 5235750ef5cSStephen Hanson int fcms_maxcount; 5245750ef5cSStephen Hanson fmd_case_impl_t *fcms_cip; 5255750ef5cSStephen Hanson uint8_t *fcms_new_susp_state; 5265750ef5cSStephen Hanson uint8_t *fcms_old_susp_state; 5275750ef5cSStephen Hanson uint8_t *fcms_old_match_state; 5285750ef5cSStephen Hanson } fcms_t; 5295750ef5cSStephen Hanson #define SUSPECT_STATE_FAULTY 0x1 5305750ef5cSStephen Hanson #define SUSPECT_STATE_ISOLATED 0x2 5315750ef5cSStephen Hanson #define SUSPECT_STATE_REMOVED 0x4 5325750ef5cSStephen Hanson #define SUSPECT_STATE_ACQUITED 0x8 5335750ef5cSStephen Hanson #define SUSPECT_STATE_REPAIRED 0x10 5345750ef5cSStephen Hanson #define SUSPECT_STATE_REPLACED 0x20 5355750ef5cSStephen Hanson #define SUSPECT_STATE_NO_MATCH 0x1 5365750ef5cSStephen Hanson 537567cc2e6Sstephh /* 5385750ef5cSStephen Hanson * This is called for each suspect in the old case. Compare it against each 5395750ef5cSStephen Hanson * suspect in the new case, setting fcms_old_susp_state and fcms_new_susp_state 5405750ef5cSStephen Hanson * as appropriate. fcms_new_susp_state will left as 0 if the suspect is not 5415750ef5cSStephen Hanson * found in the old case. 542567cc2e6Sstephh */ 5435750ef5cSStephen Hanson static void 5445750ef5cSStephen Hanson fmd_case_match_suspects(fmd_asru_link_t *alp, void *arg) 545567cc2e6Sstephh { 5465750ef5cSStephen Hanson fcms_t *fcmsp = (fcms_t *)arg; 5475750ef5cSStephen Hanson fmd_case_impl_t *cip = fcmsp->fcms_cip; 5485750ef5cSStephen Hanson fmd_case_susp_t *cis; 5495750ef5cSStephen Hanson int i = 0; 5505750ef5cSStephen Hanson int state = fmd_asru_al_getstate(alp); 551567cc2e6Sstephh 5525750ef5cSStephen Hanson if (*fcmsp->fcms_countp >= fcmsp->fcms_maxcount) 5535750ef5cSStephen Hanson return; 554567cc2e6Sstephh 5555750ef5cSStephen Hanson if (!(state & FMD_ASRU_PRESENT) || (!(state & FMD_ASRU_FAULTY) && 5565750ef5cSStephen Hanson alp->al_reason == FMD_ASRU_REMOVED)) 5575750ef5cSStephen Hanson fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] = 5585750ef5cSStephen Hanson SUSPECT_STATE_REMOVED; 5595750ef5cSStephen Hanson else if ((state & FMD_ASRU_UNUSABLE) && (state & FMD_ASRU_FAULTY)) 5605750ef5cSStephen Hanson fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] = 5615750ef5cSStephen Hanson SUSPECT_STATE_ISOLATED; 5625750ef5cSStephen Hanson else if (state & FMD_ASRU_FAULTY) 5635750ef5cSStephen Hanson fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] = 5645750ef5cSStephen Hanson SUSPECT_STATE_FAULTY; 5655750ef5cSStephen Hanson else if (alp->al_reason == FMD_ASRU_REPLACED) 5665750ef5cSStephen Hanson fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] = 5675750ef5cSStephen Hanson SUSPECT_STATE_REPLACED; 5685750ef5cSStephen Hanson else if (alp->al_reason == FMD_ASRU_ACQUITTED) 5695750ef5cSStephen Hanson fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] = 5705750ef5cSStephen Hanson SUSPECT_STATE_ACQUITED; 5715750ef5cSStephen Hanson else 5725750ef5cSStephen Hanson fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] = 5735750ef5cSStephen Hanson SUSPECT_STATE_REPAIRED; 574567cc2e6Sstephh 5755750ef5cSStephen Hanson for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next, i++) 5765750ef5cSStephen Hanson if (fmd_case_match_suspect(cis->cis_nvl, alp->al_event) == 1) 577567cc2e6Sstephh break; 5785750ef5cSStephen Hanson if (cis != NULL) 5795750ef5cSStephen Hanson fcmsp->fcms_new_susp_state[i] = 5805750ef5cSStephen Hanson fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp]; 5815750ef5cSStephen Hanson else 5825750ef5cSStephen Hanson fcmsp->fcms_old_match_state[*fcmsp->fcms_countp] |= 5835750ef5cSStephen Hanson SUSPECT_STATE_NO_MATCH; 5845750ef5cSStephen Hanson (*fcmsp->fcms_countp)++; 585567cc2e6Sstephh } 5865750ef5cSStephen Hanson 5875750ef5cSStephen Hanson typedef struct { 5885750ef5cSStephen Hanson int *fca_do_update; 5895750ef5cSStephen Hanson fmd_case_impl_t *fca_cip; 5905750ef5cSStephen Hanson } fca_t; 5915750ef5cSStephen Hanson 5925750ef5cSStephen Hanson /* 5935750ef5cSStephen Hanson * Re-fault all acquitted suspects that are still present in the new list. 5945750ef5cSStephen Hanson */ 5955750ef5cSStephen Hanson static void 5965750ef5cSStephen Hanson fmd_case_fault_acquitted_matching(fmd_asru_link_t *alp, void *arg) 5975750ef5cSStephen Hanson { 5985750ef5cSStephen Hanson fca_t *fcap = (fca_t *)arg; 5995750ef5cSStephen Hanson fmd_case_impl_t *cip = fcap->fca_cip; 6005750ef5cSStephen Hanson fmd_case_susp_t *cis; 6015750ef5cSStephen Hanson int state = fmd_asru_al_getstate(alp); 6025750ef5cSStephen Hanson 6035750ef5cSStephen Hanson if (!(state & FMD_ASRU_FAULTY) && 6045750ef5cSStephen Hanson alp->al_reason == FMD_ASRU_ACQUITTED) { 6055750ef5cSStephen Hanson for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) 6065750ef5cSStephen Hanson if (fmd_case_match_suspect(cis->cis_nvl, 6075750ef5cSStephen Hanson alp->al_event) == 1) 608567cc2e6Sstephh break; 6095750ef5cSStephen Hanson if (cis != NULL) { 6105750ef5cSStephen Hanson (void) fmd_asru_setflags(alp, FMD_ASRU_FAULTY); 6115750ef5cSStephen Hanson *fcap->fca_do_update = 1; 612567cc2e6Sstephh } 613567cc2e6Sstephh } 6145750ef5cSStephen Hanson } 6155750ef5cSStephen Hanson 6165750ef5cSStephen Hanson /* 6175750ef5cSStephen Hanson * Re-fault all suspects that are still present in the new list. 6185750ef5cSStephen Hanson */ 6195750ef5cSStephen Hanson static void 6205750ef5cSStephen Hanson fmd_case_fault_all_matching(fmd_asru_link_t *alp, void *arg) 6215750ef5cSStephen Hanson { 6225750ef5cSStephen Hanson fca_t *fcap = (fca_t *)arg; 6235750ef5cSStephen Hanson fmd_case_impl_t *cip = fcap->fca_cip; 6245750ef5cSStephen Hanson fmd_case_susp_t *cis; 6255750ef5cSStephen Hanson int state = fmd_asru_al_getstate(alp); 6265750ef5cSStephen Hanson 6275750ef5cSStephen Hanson if (!(state & FMD_ASRU_FAULTY)) { 6285750ef5cSStephen Hanson for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) 6295750ef5cSStephen Hanson if (fmd_case_match_suspect(cis->cis_nvl, 6305750ef5cSStephen Hanson alp->al_event) == 1) 6315750ef5cSStephen Hanson break; 6325750ef5cSStephen Hanson if (cis != NULL) { 6335750ef5cSStephen Hanson (void) fmd_asru_setflags(alp, FMD_ASRU_FAULTY); 6345750ef5cSStephen Hanson *fcap->fca_do_update = 1; 635567cc2e6Sstephh } 636567cc2e6Sstephh } 6375750ef5cSStephen Hanson } 6385750ef5cSStephen Hanson 6395750ef5cSStephen Hanson /* 6405750ef5cSStephen Hanson * Acquit all suspects that are no longer present in the new list. 6415750ef5cSStephen Hanson */ 6425750ef5cSStephen Hanson static void 6435750ef5cSStephen Hanson fmd_case_acquit_no_match(fmd_asru_link_t *alp, void *arg) 6445750ef5cSStephen Hanson { 6455750ef5cSStephen Hanson fca_t *fcap = (fca_t *)arg; 6465750ef5cSStephen Hanson fmd_case_impl_t *cip = fcap->fca_cip; 6475750ef5cSStephen Hanson fmd_case_susp_t *cis; 6485750ef5cSStephen Hanson int state = fmd_asru_al_getstate(alp); 6495750ef5cSStephen Hanson 6505750ef5cSStephen Hanson if (state & FMD_ASRU_FAULTY) { 6515750ef5cSStephen Hanson for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) 6525750ef5cSStephen Hanson if (fmd_case_match_suspect(cis->cis_nvl, 6535750ef5cSStephen Hanson alp->al_event) == 1) 6545750ef5cSStephen Hanson break; 6555750ef5cSStephen Hanson if (cis == NULL) { 6565750ef5cSStephen Hanson (void) fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, 6575750ef5cSStephen Hanson FMD_ASRU_ACQUITTED); 6585750ef5cSStephen Hanson *fcap->fca_do_update = 1; 6595750ef5cSStephen Hanson } 6605750ef5cSStephen Hanson } 6615750ef5cSStephen Hanson } 6625750ef5cSStephen Hanson 6635750ef5cSStephen Hanson /* 6645750ef5cSStephen Hanson * Acquit all isolated suspects. 6655750ef5cSStephen Hanson */ 6665750ef5cSStephen Hanson static void 6675750ef5cSStephen Hanson fmd_case_acquit_isolated(fmd_asru_link_t *alp, void *arg) 6685750ef5cSStephen Hanson { 6695750ef5cSStephen Hanson int *do_update = (int *)arg; 6705750ef5cSStephen Hanson int state = fmd_asru_al_getstate(alp); 6715750ef5cSStephen Hanson 6725750ef5cSStephen Hanson if ((state & FMD_ASRU_PRESENT) && (state & FMD_ASRU_UNUSABLE) && 6735750ef5cSStephen Hanson (state & FMD_ASRU_FAULTY)) { 6745750ef5cSStephen Hanson (void) fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, 6755750ef5cSStephen Hanson FMD_ASRU_ACQUITTED); 6765750ef5cSStephen Hanson *do_update = 1; 6775750ef5cSStephen Hanson } 6785750ef5cSStephen Hanson } 6795750ef5cSStephen Hanson 6805750ef5cSStephen Hanson /* 6815750ef5cSStephen Hanson * Acquit suspect which matches specified nvlist 6825750ef5cSStephen Hanson */ 6835750ef5cSStephen Hanson static void 6845750ef5cSStephen Hanson fmd_case_acquit_suspect(fmd_asru_link_t *alp, void *arg) 6855750ef5cSStephen Hanson { 6865750ef5cSStephen Hanson nvlist_t *nvl = (nvlist_t *)arg; 6875750ef5cSStephen Hanson int state = fmd_asru_al_getstate(alp); 6885750ef5cSStephen Hanson 6895750ef5cSStephen Hanson if ((state & FMD_ASRU_FAULTY) && 6905750ef5cSStephen Hanson fmd_case_match_suspect(nvl, alp->al_event) == 1) 6915750ef5cSStephen Hanson (void) fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, 6925750ef5cSStephen Hanson FMD_ASRU_ACQUITTED); 6935750ef5cSStephen Hanson } 6945750ef5cSStephen Hanson 6955750ef5cSStephen Hanson typedef struct { 6965750ef5cSStephen Hanson fmd_case_impl_t *fccd_cip; 6975750ef5cSStephen Hanson uint8_t *fccd_new_susp_state; 6985750ef5cSStephen Hanson uint8_t *fccd_new_match_state; 6995750ef5cSStephen Hanson int *fccd_discard_new; 7005750ef5cSStephen Hanson int *fccd_adjust_new; 7015750ef5cSStephen Hanson } fccd_t; 7025750ef5cSStephen Hanson 7035750ef5cSStephen Hanson /* 7045750ef5cSStephen Hanson * see if a matching suspect list already exists in the cache 7055750ef5cSStephen Hanson */ 7065750ef5cSStephen Hanson static void 7075750ef5cSStephen Hanson fmd_case_check_for_dups(fmd_case_t *old_cp, void *arg) 7085750ef5cSStephen Hanson { 7095750ef5cSStephen Hanson fccd_t *fccdp = (fccd_t *)arg; 7105750ef5cSStephen Hanson fmd_case_impl_t *new_cip = fccdp->fccd_cip; 7115750ef5cSStephen Hanson fmd_case_impl_t *old_cip = (fmd_case_impl_t *)old_cp; 7125750ef5cSStephen Hanson int i, count = 0, do_update = 0, got_isolated_overlap = 0; 7135750ef5cSStephen Hanson int got_faulty_overlap = 0; 7145750ef5cSStephen Hanson int got_acquit_overlap = 0; 7155750ef5cSStephen Hanson boolean_t too_recent; 7165750ef5cSStephen Hanson uint64_t most_recent = 0; 7175750ef5cSStephen Hanson fcms_t fcms; 7185750ef5cSStephen Hanson fca_t fca; 7195750ef5cSStephen Hanson uint8_t *new_susp_state; 7205750ef5cSStephen Hanson uint8_t *old_susp_state; 7215750ef5cSStephen Hanson uint8_t *old_match_state; 7225750ef5cSStephen Hanson 7235750ef5cSStephen Hanson new_susp_state = alloca(new_cip->ci_nsuspects * sizeof (uint8_t)); 7245750ef5cSStephen Hanson for (i = 0; i < new_cip->ci_nsuspects; i++) 7255750ef5cSStephen Hanson new_susp_state[i] = 0; 7265750ef5cSStephen Hanson old_susp_state = alloca(old_cip->ci_nsuspects * sizeof (uint8_t)); 7275750ef5cSStephen Hanson for (i = 0; i < old_cip->ci_nsuspects; i++) 7285750ef5cSStephen Hanson old_susp_state[i] = 0; 7295750ef5cSStephen Hanson old_match_state = alloca(old_cip->ci_nsuspects * sizeof (uint8_t)); 7305750ef5cSStephen Hanson for (i = 0; i < old_cip->ci_nsuspects; i++) 7315750ef5cSStephen Hanson old_match_state[i] = 0; 7325750ef5cSStephen Hanson 7335750ef5cSStephen Hanson /* 7345750ef5cSStephen Hanson * Compare with each suspect in the existing case. 7355750ef5cSStephen Hanson */ 7365750ef5cSStephen Hanson fcms.fcms_countp = &count; 7375750ef5cSStephen Hanson fcms.fcms_maxcount = old_cip->ci_nsuspects; 7385750ef5cSStephen Hanson fcms.fcms_cip = new_cip; 7395750ef5cSStephen Hanson fcms.fcms_new_susp_state = new_susp_state; 7405750ef5cSStephen Hanson fcms.fcms_old_susp_state = old_susp_state; 7415750ef5cSStephen Hanson fcms.fcms_old_match_state = old_match_state; 7425750ef5cSStephen Hanson fmd_asru_hash_apply_by_case(fmd.d_asrus, (fmd_case_t *)old_cip, 7435750ef5cSStephen Hanson fmd_case_match_suspects, &fcms); 7445750ef5cSStephen Hanson 7455750ef5cSStephen Hanson /* 7465750ef5cSStephen Hanson * If we have some faulty, non-isolated suspects that overlap, then most 7475750ef5cSStephen Hanson * likely it is the suspects that overlap in the suspect lists that are 7485750ef5cSStephen Hanson * to blame. So we can consider this to be a match. 7495750ef5cSStephen Hanson */ 7505750ef5cSStephen Hanson for (i = 0; i < new_cip->ci_nsuspects; i++) 7515750ef5cSStephen Hanson if (new_susp_state[i] == SUSPECT_STATE_FAULTY) 7525750ef5cSStephen Hanson got_faulty_overlap = 1; 7535750ef5cSStephen Hanson if (got_faulty_overlap && fmd_case_match_on_faulty_overlap) 7545750ef5cSStephen Hanson goto got_match; 7555750ef5cSStephen Hanson 7565750ef5cSStephen Hanson /* 7575750ef5cSStephen Hanson * If we have no faulty, non-isolated suspects in the old case, but we 7585750ef5cSStephen Hanson * do have some acquitted suspects that overlap, then most likely it is 7595750ef5cSStephen Hanson * the acquitted suspects that overlap in the suspect lists that are 7605750ef5cSStephen Hanson * to blame. So we can consider this to be a match. 7615750ef5cSStephen Hanson */ 7625750ef5cSStephen Hanson for (i = 0; i < new_cip->ci_nsuspects; i++) 7635750ef5cSStephen Hanson if (new_susp_state[i] == SUSPECT_STATE_ACQUITED) 7645750ef5cSStephen Hanson got_acquit_overlap = 1; 7655750ef5cSStephen Hanson for (i = 0; i < old_cip->ci_nsuspects; i++) 7665750ef5cSStephen Hanson if (old_susp_state[i] == SUSPECT_STATE_FAULTY) 7675750ef5cSStephen Hanson got_acquit_overlap = 0; 7685750ef5cSStephen Hanson if (got_acquit_overlap && fmd_case_match_on_acquit_overlap) 7695750ef5cSStephen Hanson goto got_match; 7705750ef5cSStephen Hanson 7715750ef5cSStephen Hanson /* 7725750ef5cSStephen Hanson * Check that all suspects in the new list are present in the old list. 7735750ef5cSStephen Hanson * Return if we find one that isn't. 7745750ef5cSStephen Hanson */ 7755750ef5cSStephen Hanson for (i = 0; i < new_cip->ci_nsuspects; i++) 7765750ef5cSStephen Hanson if (new_susp_state[i] == 0) 7775750ef5cSStephen Hanson return; 7785750ef5cSStephen Hanson 7795750ef5cSStephen Hanson /* 7805750ef5cSStephen Hanson * Check that all suspects in the old list are present in the new list 7815750ef5cSStephen Hanson * *or* they are isolated or removed/replaced (which would explain why 7825750ef5cSStephen Hanson * they are not present in the new list). Return if we find one that is 7835750ef5cSStephen Hanson * faulty and unisolated or repaired or acquitted, and that is not 7845750ef5cSStephen Hanson * present in the new case. 7855750ef5cSStephen Hanson */ 7865750ef5cSStephen Hanson for (i = 0; i < old_cip->ci_nsuspects; i++) 7875750ef5cSStephen Hanson if (old_match_state[i] == SUSPECT_STATE_NO_MATCH && 7885750ef5cSStephen Hanson (old_susp_state[i] == SUSPECT_STATE_FAULTY || 7895750ef5cSStephen Hanson old_susp_state[i] == SUSPECT_STATE_ACQUITED || 7905750ef5cSStephen Hanson old_susp_state[i] == SUSPECT_STATE_REPAIRED)) 7915750ef5cSStephen Hanson return; 7925750ef5cSStephen Hanson 7935750ef5cSStephen Hanson got_match: 7945750ef5cSStephen Hanson /* 7955750ef5cSStephen Hanson * If the old case is already in repaired/resolved state, we can't 7965750ef5cSStephen Hanson * do anything more with it, so keep the new case, but acquit some 7975750ef5cSStephen Hanson * of the suspects if appropriate. 7985750ef5cSStephen Hanson */ 7995750ef5cSStephen Hanson if (old_cip->ci_state >= FMD_CASE_REPAIRED) { 8005750ef5cSStephen Hanson if (fmd_case_auto_acquit_non_acquitted) { 8015750ef5cSStephen Hanson *fccdp->fccd_adjust_new = 1; 8025750ef5cSStephen Hanson for (i = 0; i < new_cip->ci_nsuspects; i++) { 8035750ef5cSStephen Hanson fccdp->fccd_new_susp_state[i] |= 8045750ef5cSStephen Hanson new_susp_state[i]; 8055750ef5cSStephen Hanson if (new_susp_state[i] == 0) 8065750ef5cSStephen Hanson fccdp->fccd_new_susp_state[i] = 8075750ef5cSStephen Hanson SUSPECT_STATE_NO_MATCH; 8085750ef5cSStephen Hanson } 8095750ef5cSStephen Hanson } 8105750ef5cSStephen Hanson return; 8115750ef5cSStephen Hanson } 8125750ef5cSStephen Hanson 8135750ef5cSStephen Hanson /* 8145750ef5cSStephen Hanson * Otherwise discard the new case and keep the old, again updating the 8155750ef5cSStephen Hanson * state of the suspects as appropriate 8165750ef5cSStephen Hanson */ 8175750ef5cSStephen Hanson *fccdp->fccd_discard_new = 1; 8185750ef5cSStephen Hanson fca.fca_cip = new_cip; 8195750ef5cSStephen Hanson fca.fca_do_update = &do_update; 8205750ef5cSStephen Hanson 8215750ef5cSStephen Hanson /* 8225750ef5cSStephen Hanson * See if new case occurred within fmd_case_too_recent seconds of the 8235750ef5cSStephen Hanson * most recent modification to the old case and if so don't do 8245750ef5cSStephen Hanson * auto-acquit. This avoids problems if a flood of ereports come in and 8255750ef5cSStephen Hanson * they don't all get diagnosed before the first case causes some of 8265750ef5cSStephen Hanson * the devices to be isolated making it appear that an isolated device 8275750ef5cSStephen Hanson * was in the suspect list. 8285750ef5cSStephen Hanson */ 8295750ef5cSStephen Hanson fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp, 8305750ef5cSStephen Hanson fmd_asru_most_recent, &most_recent); 8315750ef5cSStephen Hanson too_recent = (new_cip->ci_tv.tv_sec - most_recent < 8325750ef5cSStephen Hanson fmd_case_too_recent); 8335750ef5cSStephen Hanson 8345750ef5cSStephen Hanson if (got_faulty_overlap) { 8355750ef5cSStephen Hanson /* 8365750ef5cSStephen Hanson * Acquit any suspects not present in the new list, plus 8375750ef5cSStephen Hanson * any that are are present but are isolated. 8385750ef5cSStephen Hanson */ 8395750ef5cSStephen Hanson fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp, 8405750ef5cSStephen Hanson fmd_case_acquit_no_match, &fca); 8415750ef5cSStephen Hanson if (fmd_case_auto_acquit_isolated && !too_recent) 8425750ef5cSStephen Hanson fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp, 8435750ef5cSStephen Hanson fmd_case_acquit_isolated, &do_update); 8445750ef5cSStephen Hanson } else if (got_acquit_overlap) { 8455750ef5cSStephen Hanson /* 8465750ef5cSStephen Hanson * Re-fault the acquitted matching suspects and acquit all 8475750ef5cSStephen Hanson * isolated suspects. 8485750ef5cSStephen Hanson */ 8495750ef5cSStephen Hanson if (fmd_case_auto_acquit_isolated && !too_recent) { 8505750ef5cSStephen Hanson fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp, 8515750ef5cSStephen Hanson fmd_case_fault_acquitted_matching, &fca); 8525750ef5cSStephen Hanson fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp, 8535750ef5cSStephen Hanson fmd_case_acquit_isolated, &do_update); 8545750ef5cSStephen Hanson } 8555750ef5cSStephen Hanson } else if (fmd_case_auto_acquit_isolated) { 8565750ef5cSStephen Hanson /* 8575750ef5cSStephen Hanson * To get here, there must be no faulty or acquitted suspects, 8585750ef5cSStephen Hanson * but there must be at least one isolated suspect. Just acquit 8595750ef5cSStephen Hanson * non-matching isolated suspects. If there are no matching 8605750ef5cSStephen Hanson * isolated suspects, then re-fault all matching suspects. 8615750ef5cSStephen Hanson */ 8625750ef5cSStephen Hanson for (i = 0; i < new_cip->ci_nsuspects; i++) 8635750ef5cSStephen Hanson if (new_susp_state[i] == SUSPECT_STATE_ISOLATED) 8645750ef5cSStephen Hanson got_isolated_overlap = 1; 8655750ef5cSStephen Hanson if (!got_isolated_overlap) 8665750ef5cSStephen Hanson fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp, 8675750ef5cSStephen Hanson fmd_case_fault_all_matching, &fca); 8685750ef5cSStephen Hanson fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp, 8695750ef5cSStephen Hanson fmd_case_acquit_no_match, &fca); 8705750ef5cSStephen Hanson } 8715750ef5cSStephen Hanson 8725750ef5cSStephen Hanson /* 8735750ef5cSStephen Hanson * If we've updated anything in the old case, call fmd_case_update() 8745750ef5cSStephen Hanson */ 8755750ef5cSStephen Hanson if (do_update) 8765750ef5cSStephen Hanson fmd_case_update(old_cp); 877567cc2e6Sstephh } 878567cc2e6Sstephh 8797c478bd9Sstevel@tonic-gate /* 880d9638e54Smws * Convict suspects in a case by applying a conviction policy and updating the 881d9638e54Smws * resource cache prior to emitting the list.suspect event for the given case. 882d9638e54Smws * At present, our policy is very simple: convict every suspect in the case. 883d9638e54Smws * In the future, this policy can be extended and made configurable to permit: 884d9638e54Smws * 885d9638e54Smws * - convicting the suspect with the highest FIT rate 886d9638e54Smws * - convicting the suspect with the cheapest FRU 887d9638e54Smws * - convicting the suspect with the FRU that is in a depot's inventory 888d9638e54Smws * - convicting the suspect with the longest lifetime 889d9638e54Smws * 890d9638e54Smws * and so forth. A word to the wise: this problem is significantly harder that 891d9638e54Smws * it seems at first glance. Future work should heed the following advice: 892d9638e54Smws * 893d9638e54Smws * Hacking the policy into C code here is a very bad idea. The policy needs to 894d9638e54Smws * be decided upon very carefully and fundamentally encodes knowledge of what 895d9638e54Smws * suspect list combinations can be emitted by what diagnosis engines. As such 896d9638e54Smws * fmd's code is the wrong location, because that would require fmd itself to 897d9638e54Smws * be updated for every diagnosis engine change, defeating the entire design. 898d9638e54Smws * The FMA Event Registry knows the suspect list combinations: policy inputs 899d9638e54Smws * can be derived from it and used to produce per-module policy configuration. 900d9638e54Smws * 901d9638e54Smws * If the policy needs to be dynamic and not statically fixed at either fmd 902d9638e54Smws * startup or module load time, any implementation of dynamic policy retrieval 903d9638e54Smws * must employ some kind of caching mechanism or be part of a built-in module. 904d9638e54Smws * The fmd_case_convict() function is called with locks held inside of fmd and 905d9638e54Smws * is not a place where unbounded blocking on some inter-process or inter- 906d9638e54Smws * system communication to another service (e.g. another daemon) can occur. 9077c478bd9Sstevel@tonic-gate */ 908567cc2e6Sstephh static int 909d9638e54Smws fmd_case_convict(fmd_case_t *cp) 910d9638e54Smws { 911d9638e54Smws fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 912d9638e54Smws fmd_asru_hash_t *ahp = fmd.d_asrus; 9135750ef5cSStephen Hanson int discard_new = 0, i; 914d9638e54Smws fmd_case_susp_t *cis; 915567cc2e6Sstephh fmd_asru_link_t *alp; 9165750ef5cSStephen Hanson uint8_t *new_susp_state; 9175750ef5cSStephen Hanson uint8_t *new_match_state; 9185750ef5cSStephen Hanson int adjust_new = 0; 9195750ef5cSStephen Hanson fccd_t fccd; 920b0daa853SStephen Hanson fmd_case_impl_t *ncp, **cps, **cpp; 921b0daa853SStephen Hanson uint_t cpc; 922b0daa853SStephen Hanson fmd_case_hash_t *chp; 9235750ef5cSStephen Hanson 9245750ef5cSStephen Hanson /* 9255750ef5cSStephen Hanson * First we must see if any matching cases already exist. 9265750ef5cSStephen Hanson */ 9275750ef5cSStephen Hanson new_susp_state = alloca(cip->ci_nsuspects * sizeof (uint8_t)); 9285750ef5cSStephen Hanson for (i = 0; i < cip->ci_nsuspects; i++) 9295750ef5cSStephen Hanson new_susp_state[i] = 0; 9305750ef5cSStephen Hanson new_match_state = alloca(cip->ci_nsuspects * sizeof (uint8_t)); 9315750ef5cSStephen Hanson for (i = 0; i < cip->ci_nsuspects; i++) 9325750ef5cSStephen Hanson new_match_state[i] = 0; 9335750ef5cSStephen Hanson fccd.fccd_cip = cip; 9345750ef5cSStephen Hanson fccd.fccd_adjust_new = &adjust_new; 9355750ef5cSStephen Hanson fccd.fccd_new_susp_state = new_susp_state; 9365750ef5cSStephen Hanson fccd.fccd_new_match_state = new_match_state; 9375750ef5cSStephen Hanson fccd.fccd_discard_new = &discard_new; 938b0daa853SStephen Hanson 939b0daa853SStephen Hanson /* 940b0daa853SStephen Hanson * Hold all cases 941b0daa853SStephen Hanson */ 942b0daa853SStephen Hanson chp = fmd.d_cases; 943b0daa853SStephen Hanson (void) pthread_rwlock_rdlock(&chp->ch_lock); 944b0daa853SStephen Hanson cps = cpp = fmd_alloc(chp->ch_count * sizeof (fmd_case_t *), FMD_SLEEP); 945b0daa853SStephen Hanson cpc = chp->ch_count; 946b0daa853SStephen Hanson for (i = 0; i < chp->ch_hashlen; i++) 947b0daa853SStephen Hanson for (ncp = chp->ch_hash[i]; ncp != NULL; ncp = ncp->ci_next) 948b0daa853SStephen Hanson *cpp++ = fmd_case_tryhold(ncp); 949b0daa853SStephen Hanson ASSERT(cpp == cps + cpc); 950b0daa853SStephen Hanson (void) pthread_rwlock_unlock(&chp->ch_lock); 951b0daa853SStephen Hanson 952b0daa853SStephen Hanson /* 953b0daa853SStephen Hanson * Run fmd_case_check_for_dups() on all cases except the current one. 954b0daa853SStephen Hanson */ 955b0daa853SStephen Hanson for (i = 0; i < cpc; i++) { 956b0daa853SStephen Hanson if (cps[i] != NULL) { 957b0daa853SStephen Hanson if (cps[i] != (fmd_case_impl_t *)cp) 958b0daa853SStephen Hanson fmd_case_check_for_dups((fmd_case_t *)cps[i], 959b0daa853SStephen Hanson &fccd); 960b0daa853SStephen Hanson fmd_case_rele((fmd_case_t *)cps[i]); 961b0daa853SStephen Hanson } 962b0daa853SStephen Hanson } 963b0daa853SStephen Hanson fmd_free(cps, cpc * sizeof (fmd_case_t *)); 964b0daa853SStephen Hanson 965b0daa853SStephen Hanson (void) pthread_mutex_lock(&cip->ci_lock); 966b0daa853SStephen Hanson if (cip->ci_code == NULL) 967b0daa853SStephen Hanson (void) fmd_case_mkcode(cp); 968b0daa853SStephen Hanson else if (cip->ci_precanned) 969b0daa853SStephen Hanson fmd_case_code_hash_insert(fmd.d_cases, cip); 9705750ef5cSStephen Hanson 9715750ef5cSStephen Hanson if (discard_new) { 9725750ef5cSStephen Hanson /* 9735750ef5cSStephen Hanson * We've found an existing case that is a match and it is not 9745750ef5cSStephen Hanson * already in repaired or resolved state. So we can close this 9755750ef5cSStephen Hanson * one as a duplicate. 9765750ef5cSStephen Hanson */ 977567cc2e6Sstephh (void) pthread_mutex_unlock(&cip->ci_lock); 978567cc2e6Sstephh return (1); 979567cc2e6Sstephh } 980d9638e54Smws 981567cc2e6Sstephh /* 9825750ef5cSStephen Hanson * Allocate new cache entries 983567cc2e6Sstephh */ 984d9638e54Smws for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) { 985567cc2e6Sstephh if ((alp = fmd_asru_hash_create_entry(ahp, 986567cc2e6Sstephh cp, cis->cis_nvl)) == NULL) { 987d9638e54Smws fmd_error(EFMD_CASE_EVENT, "cannot convict suspect in " 988d9638e54Smws "%s: %s\n", cip->ci_uuid, fmd_strerror(errno)); 989d9638e54Smws continue; 990d9638e54Smws } 991cbf75e67SStephen Hanson alp->al_flags |= FMD_ASRU_PRESENT; 992cbf75e67SStephen Hanson alp->al_asru->asru_flags |= FMD_ASRU_PRESENT; 99325c6ff4bSstephh (void) fmd_asru_clrflags(alp, FMD_ASRU_UNUSABLE, 0); 994567cc2e6Sstephh (void) fmd_asru_setflags(alp, FMD_ASRU_FAULTY); 995d9638e54Smws } 996d9638e54Smws 9975750ef5cSStephen Hanson if (adjust_new) { 9985750ef5cSStephen Hanson int some_suspect = 0, some_not_suspect = 0; 9995750ef5cSStephen Hanson 10005750ef5cSStephen Hanson /* 10015750ef5cSStephen Hanson * There is one or more matching case but they are already in 10025750ef5cSStephen Hanson * repaired or resolved state. So we need to keep the new 10035750ef5cSStephen Hanson * case, but we can adjust it. Repaired/removed/replaced 10045750ef5cSStephen Hanson * suspects are unlikely to be to blame (unless there are 10055750ef5cSStephen Hanson * actually two separate faults). So if we have a combination of 10065750ef5cSStephen Hanson * repaired/replaced/removed suspects and acquitted suspects in 10075750ef5cSStephen Hanson * the old lists, then we should acquit in the new list those 10085750ef5cSStephen Hanson * that were repaired/replaced/removed in the old. 10095750ef5cSStephen Hanson */ 10105750ef5cSStephen Hanson for (i = 0; i < cip->ci_nsuspects; i++) { 10115750ef5cSStephen Hanson if ((new_susp_state[i] & SUSPECT_STATE_REPLACED) || 10125750ef5cSStephen Hanson (new_susp_state[i] & SUSPECT_STATE_REPAIRED) || 10135750ef5cSStephen Hanson (new_susp_state[i] & SUSPECT_STATE_REMOVED) || 10145750ef5cSStephen Hanson (new_match_state[i] & SUSPECT_STATE_NO_MATCH)) 10155750ef5cSStephen Hanson some_not_suspect = 1; 10165750ef5cSStephen Hanson else 10175750ef5cSStephen Hanson some_suspect = 1; 10185750ef5cSStephen Hanson } 10195750ef5cSStephen Hanson if (some_suspect && some_not_suspect) { 10205750ef5cSStephen Hanson for (cis = cip->ci_suspects, i = 0; cis != NULL; 10215750ef5cSStephen Hanson cis = cis->cis_next, i++) 10225750ef5cSStephen Hanson if ((new_susp_state[i] & 10235750ef5cSStephen Hanson SUSPECT_STATE_REPLACED) || 10245750ef5cSStephen Hanson (new_susp_state[i] & 10255750ef5cSStephen Hanson SUSPECT_STATE_REPAIRED) || 10265750ef5cSStephen Hanson (new_susp_state[i] & 10275750ef5cSStephen Hanson SUSPECT_STATE_REMOVED) || 10285750ef5cSStephen Hanson (new_match_state[i] & 10295750ef5cSStephen Hanson SUSPECT_STATE_NO_MATCH)) 10305750ef5cSStephen Hanson fmd_asru_hash_apply_by_case(fmd.d_asrus, 10315750ef5cSStephen Hanson cp, fmd_case_acquit_suspect, 10325750ef5cSStephen Hanson cis->cis_nvl); 10335750ef5cSStephen Hanson } 10345750ef5cSStephen Hanson } 10355750ef5cSStephen Hanson 1036d9638e54Smws (void) pthread_mutex_unlock(&cip->ci_lock); 1037567cc2e6Sstephh return (0); 1038d9638e54Smws } 1039d9638e54Smws 1040d9638e54Smws void 10417c478bd9Sstevel@tonic-gate fmd_case_publish(fmd_case_t *cp, uint_t state) 10427c478bd9Sstevel@tonic-gate { 10437c478bd9Sstevel@tonic-gate fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 10447c478bd9Sstevel@tonic-gate fmd_event_t *e; 10457c478bd9Sstevel@tonic-gate nvlist_t *nvl; 10467c478bd9Sstevel@tonic-gate char *class; 10477c478bd9Sstevel@tonic-gate 1048d9638e54Smws if (state == FMD_CASE_CURRENT) 1049d9638e54Smws state = cip->ci_state; /* use current state */ 1050d9638e54Smws 10517c478bd9Sstevel@tonic-gate switch (state) { 10527c478bd9Sstevel@tonic-gate case FMD_CASE_SOLVED: 105344743693Sstephh (void) pthread_mutex_lock(&cip->ci_lock); 1054c7d6cfd6SStephen Hanson 1055c7d6cfd6SStephen Hanson /* 1056c7d6cfd6SStephen Hanson * If we already have a code, then case is already solved. 1057c7d6cfd6SStephen Hanson */ 1058cbf75e67SStephen Hanson if (cip->ci_precanned == 0 && cip->ci_xprt == NULL && 1059cbf75e67SStephen Hanson cip->ci_code != NULL) { 1060c7d6cfd6SStephen Hanson (void) pthread_mutex_unlock(&cip->ci_lock); 1061c7d6cfd6SStephen Hanson break; 1062c7d6cfd6SStephen Hanson } 1063c7d6cfd6SStephen Hanson 106444743693Sstephh if (cip->ci_tv_valid == 0) { 106544743693Sstephh fmd_time_gettimeofday(&cip->ci_tv); 106644743693Sstephh cip->ci_tv_valid = 1; 106744743693Sstephh } 106844743693Sstephh (void) pthread_mutex_unlock(&cip->ci_lock); 1069567cc2e6Sstephh 1070567cc2e6Sstephh if (fmd_case_convict(cp) == 1) { /* dupclose */ 1071567cc2e6Sstephh cip->ci_flags &= ~FMD_CF_SOLVED; 1072567cc2e6Sstephh fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, 0); 1073567cc2e6Sstephh break; 1074567cc2e6Sstephh } 1075cbf75e67SStephen Hanson if (cip->ci_xprt != NULL) { 1076cbf75e67SStephen Hanson /* 1077cbf75e67SStephen Hanson * For proxy, save some information about the transport 1078cbf75e67SStephen Hanson * in the resource cache. 1079cbf75e67SStephen Hanson */ 1080cbf75e67SStephen Hanson int count = 0; 1081cbf75e67SStephen Hanson fmd_asru_set_on_proxy_t fasp; 1082cbf75e67SStephen Hanson fmd_xprt_impl_t *xip = (fmd_xprt_impl_t *)cip->ci_xprt; 1083cbf75e67SStephen Hanson 1084cbf75e67SStephen Hanson fasp.fasp_countp = &count; 1085cbf75e67SStephen Hanson fasp.fasp_maxcount = cip->ci_nsuspects; 1086cbf75e67SStephen Hanson fasp.fasp_proxy_asru = cip->ci_proxy_asru; 1087cbf75e67SStephen Hanson fasp.fasp_proxy_external = xip->xi_flags & 1088cbf75e67SStephen Hanson FMD_XPRT_EXTERNAL; 1089cbf75e67SStephen Hanson fasp.fasp_proxy_rdonly = ((xip->xi_flags & 1090cbf75e67SStephen Hanson FMD_XPRT_RDWR) == FMD_XPRT_RDONLY); 1091cbf75e67SStephen Hanson fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, 1092cbf75e67SStephen Hanson fmd_asru_set_on_proxy, &fasp); 1093cbf75e67SStephen Hanson } 1094d9638e54Smws nvl = fmd_case_mkevent(cp, FM_LIST_SUSPECT_CLASS); 10957c478bd9Sstevel@tonic-gate (void) nvlist_lookup_string(nvl, FM_CLASS, &class); 10967c478bd9Sstevel@tonic-gate 10977c478bd9Sstevel@tonic-gate e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class); 10987c478bd9Sstevel@tonic-gate (void) pthread_rwlock_rdlock(&fmd.d_log_lock); 10997c478bd9Sstevel@tonic-gate fmd_log_append(fmd.d_fltlog, e, cp); 11007c478bd9Sstevel@tonic-gate (void) pthread_rwlock_unlock(&fmd.d_log_lock); 11017c478bd9Sstevel@tonic-gate fmd_dispq_dispatch(fmd.d_disp, e, class); 11027c478bd9Sstevel@tonic-gate 11037c478bd9Sstevel@tonic-gate (void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock); 11047c478bd9Sstevel@tonic-gate cip->ci_mod->mod_stats->ms_casesolved.fmds_value.ui64++; 11057c478bd9Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock); 11067c478bd9Sstevel@tonic-gate 11077c478bd9Sstevel@tonic-gate break; 11087c478bd9Sstevel@tonic-gate 1109d9638e54Smws case FMD_CASE_CLOSE_WAIT: 11107c478bd9Sstevel@tonic-gate fmd_case_hold(cp); 11117c478bd9Sstevel@tonic-gate e = fmd_event_create(FMD_EVT_CLOSE, FMD_HRT_NOW, NULL, cp); 11127c478bd9Sstevel@tonic-gate fmd_eventq_insert_at_head(cip->ci_mod->mod_queue, e); 11137c478bd9Sstevel@tonic-gate 11147c478bd9Sstevel@tonic-gate (void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock); 11157c478bd9Sstevel@tonic-gate cip->ci_mod->mod_stats->ms_caseclosed.fmds_value.ui64++; 11167c478bd9Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock); 11177c478bd9Sstevel@tonic-gate 11187c478bd9Sstevel@tonic-gate break; 1119d9638e54Smws 1120d9638e54Smws case FMD_CASE_CLOSED: 1121d9638e54Smws nvl = fmd_case_mkevent(cp, FM_LIST_ISOLATED_CLASS); 1122d9638e54Smws (void) nvlist_lookup_string(nvl, FM_CLASS, &class); 1123d9638e54Smws e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class); 1124d9638e54Smws fmd_dispq_dispatch(fmd.d_disp, e, class); 1125d9638e54Smws break; 1126d9638e54Smws 1127d9638e54Smws case FMD_CASE_REPAIRED: 1128d9638e54Smws nvl = fmd_case_mkevent(cp, FM_LIST_REPAIRED_CLASS); 1129d9638e54Smws (void) nvlist_lookup_string(nvl, FM_CLASS, &class); 1130d9638e54Smws e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class); 1131627351e3Scy152378 (void) pthread_rwlock_rdlock(&fmd.d_log_lock); 1132627351e3Scy152378 fmd_log_append(fmd.d_fltlog, e, cp); 1133627351e3Scy152378 (void) pthread_rwlock_unlock(&fmd.d_log_lock); 1134d9638e54Smws fmd_dispq_dispatch(fmd.d_disp, e, class); 1135d9638e54Smws break; 113625c6ff4bSstephh 113725c6ff4bSstephh case FMD_CASE_RESOLVED: 113825c6ff4bSstephh nvl = fmd_case_mkevent(cp, FM_LIST_RESOLVED_CLASS); 113925c6ff4bSstephh (void) nvlist_lookup_string(nvl, FM_CLASS, &class); 114025c6ff4bSstephh e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class); 114125c6ff4bSstephh (void) pthread_rwlock_rdlock(&fmd.d_log_lock); 114225c6ff4bSstephh fmd_log_append(fmd.d_fltlog, e, cp); 114325c6ff4bSstephh (void) pthread_rwlock_unlock(&fmd.d_log_lock); 114425c6ff4bSstephh fmd_dispq_dispatch(fmd.d_disp, e, class); 114525c6ff4bSstephh break; 11467c478bd9Sstevel@tonic-gate } 11477c478bd9Sstevel@tonic-gate } 11487c478bd9Sstevel@tonic-gate 11497c478bd9Sstevel@tonic-gate fmd_case_t * 11507c478bd9Sstevel@tonic-gate fmd_case_hash_lookup(fmd_case_hash_t *chp, const char *uuid) 11517c478bd9Sstevel@tonic-gate { 11527c478bd9Sstevel@tonic-gate fmd_case_impl_t *cip; 11537c478bd9Sstevel@tonic-gate uint_t h; 11547c478bd9Sstevel@tonic-gate 11557c478bd9Sstevel@tonic-gate (void) pthread_rwlock_rdlock(&chp->ch_lock); 11567c478bd9Sstevel@tonic-gate h = fmd_strhash(uuid) % chp->ch_hashlen; 11577c478bd9Sstevel@tonic-gate 11587c478bd9Sstevel@tonic-gate for (cip = chp->ch_hash[h]; cip != NULL; cip = cip->ci_next) { 11597c478bd9Sstevel@tonic-gate if (strcmp(cip->ci_uuid, uuid) == 0) 11607c478bd9Sstevel@tonic-gate break; 11617c478bd9Sstevel@tonic-gate } 11627c478bd9Sstevel@tonic-gate 116397c04605Scy152378 /* 116497c04605Scy152378 * If deleting bit is set, treat the case as if it doesn't exist. 116597c04605Scy152378 */ 11667c478bd9Sstevel@tonic-gate if (cip != NULL) 116797c04605Scy152378 cip = fmd_case_tryhold(cip); 116897c04605Scy152378 116997c04605Scy152378 if (cip == NULL) 11707c478bd9Sstevel@tonic-gate (void) fmd_set_errno(EFMD_CASE_INVAL); 11717c478bd9Sstevel@tonic-gate 11727c478bd9Sstevel@tonic-gate (void) pthread_rwlock_unlock(&chp->ch_lock); 11737c478bd9Sstevel@tonic-gate return ((fmd_case_t *)cip); 11747c478bd9Sstevel@tonic-gate } 11757c478bd9Sstevel@tonic-gate 11767c478bd9Sstevel@tonic-gate static fmd_case_impl_t * 11777c478bd9Sstevel@tonic-gate fmd_case_hash_insert(fmd_case_hash_t *chp, fmd_case_impl_t *cip) 11787c478bd9Sstevel@tonic-gate { 11797c478bd9Sstevel@tonic-gate fmd_case_impl_t *eip; 11807c478bd9Sstevel@tonic-gate uint_t h; 11817c478bd9Sstevel@tonic-gate 11827c478bd9Sstevel@tonic-gate (void) pthread_rwlock_wrlock(&chp->ch_lock); 11837c478bd9Sstevel@tonic-gate h = fmd_strhash(cip->ci_uuid) % chp->ch_hashlen; 11847c478bd9Sstevel@tonic-gate 11857c478bd9Sstevel@tonic-gate for (eip = chp->ch_hash[h]; eip != NULL; eip = eip->ci_next) { 118697c04605Scy152378 if (strcmp(cip->ci_uuid, eip->ci_uuid) == 0 && 118797c04605Scy152378 fmd_case_tryhold(eip) != NULL) { 11887c478bd9Sstevel@tonic-gate (void) pthread_rwlock_unlock(&chp->ch_lock); 1189d9638e54Smws return (eip); /* uuid already present */ 11907c478bd9Sstevel@tonic-gate } 11917c478bd9Sstevel@tonic-gate } 11927c478bd9Sstevel@tonic-gate 11937c478bd9Sstevel@tonic-gate cip->ci_next = chp->ch_hash[h]; 11947c478bd9Sstevel@tonic-gate chp->ch_hash[h] = cip; 11957c478bd9Sstevel@tonic-gate 1196d9638e54Smws chp->ch_count++; 1197d9638e54Smws ASSERT(chp->ch_count != 0); 1198d9638e54Smws 11997c478bd9Sstevel@tonic-gate (void) pthread_rwlock_unlock(&chp->ch_lock); 12007c478bd9Sstevel@tonic-gate return (cip); 12017c478bd9Sstevel@tonic-gate } 12027c478bd9Sstevel@tonic-gate 12037c478bd9Sstevel@tonic-gate static void 12047c478bd9Sstevel@tonic-gate fmd_case_hash_delete(fmd_case_hash_t *chp, fmd_case_impl_t *cip) 12057c478bd9Sstevel@tonic-gate { 12067c478bd9Sstevel@tonic-gate fmd_case_impl_t *cp, **pp; 12077c478bd9Sstevel@tonic-gate uint_t h; 12087c478bd9Sstevel@tonic-gate 120997c04605Scy152378 ASSERT(MUTEX_HELD(&cip->ci_lock)); 121097c04605Scy152378 121197c04605Scy152378 cip->ci_flags |= FMD_CF_DELETING; 121297c04605Scy152378 (void) pthread_mutex_unlock(&cip->ci_lock); 121397c04605Scy152378 12147c478bd9Sstevel@tonic-gate (void) pthread_rwlock_wrlock(&chp->ch_lock); 12157c478bd9Sstevel@tonic-gate 12167c478bd9Sstevel@tonic-gate h = fmd_strhash(cip->ci_uuid) % chp->ch_hashlen; 12177c478bd9Sstevel@tonic-gate pp = &chp->ch_hash[h]; 12187c478bd9Sstevel@tonic-gate 12197c478bd9Sstevel@tonic-gate for (cp = *pp; cp != NULL; cp = cp->ci_next) { 12207c478bd9Sstevel@tonic-gate if (cp != cip) 12217c478bd9Sstevel@tonic-gate pp = &cp->ci_next; 12227c478bd9Sstevel@tonic-gate else 12237c478bd9Sstevel@tonic-gate break; 12247c478bd9Sstevel@tonic-gate } 12257c478bd9Sstevel@tonic-gate 12267c478bd9Sstevel@tonic-gate if (cp == NULL) { 12277c478bd9Sstevel@tonic-gate fmd_panic("case %p (%s) not found on hash chain %u\n", 12287c478bd9Sstevel@tonic-gate (void *)cip, cip->ci_uuid, h); 12297c478bd9Sstevel@tonic-gate } 12307c478bd9Sstevel@tonic-gate 12317c478bd9Sstevel@tonic-gate *pp = cp->ci_next; 12327c478bd9Sstevel@tonic-gate cp->ci_next = NULL; 12337c478bd9Sstevel@tonic-gate 1234567cc2e6Sstephh /* 1235567cc2e6Sstephh * delete from code hash if it is on it 1236567cc2e6Sstephh */ 1237567cc2e6Sstephh fmd_case_code_hash_delete(chp, cip); 1238567cc2e6Sstephh 1239d9638e54Smws ASSERT(chp->ch_count != 0); 1240d9638e54Smws chp->ch_count--; 1241d9638e54Smws 12427c478bd9Sstevel@tonic-gate (void) pthread_rwlock_unlock(&chp->ch_lock); 124397c04605Scy152378 124497c04605Scy152378 (void) pthread_mutex_lock(&cip->ci_lock); 124597c04605Scy152378 ASSERT(cip->ci_flags & FMD_CF_DELETING); 12467c478bd9Sstevel@tonic-gate } 12477c478bd9Sstevel@tonic-gate 12487c478bd9Sstevel@tonic-gate fmd_case_t * 1249*f6e214c7SGavin Maltby fmd_case_create(fmd_module_t *mp, const char *uuidstr, void *data) 12507c478bd9Sstevel@tonic-gate { 12517c478bd9Sstevel@tonic-gate fmd_case_impl_t *cip = fmd_zalloc(sizeof (fmd_case_impl_t), FMD_SLEEP); 1252d9638e54Smws fmd_case_impl_t *eip = NULL; 12537c478bd9Sstevel@tonic-gate uuid_t uuid; 12547c478bd9Sstevel@tonic-gate 12557c478bd9Sstevel@tonic-gate (void) pthread_mutex_init(&cip->ci_lock, NULL); 12567c478bd9Sstevel@tonic-gate fmd_buf_hash_create(&cip->ci_bufs); 12577c478bd9Sstevel@tonic-gate 12587c478bd9Sstevel@tonic-gate fmd_module_hold(mp); 12597c478bd9Sstevel@tonic-gate cip->ci_mod = mp; 12607c478bd9Sstevel@tonic-gate cip->ci_refs = 1; 12617c478bd9Sstevel@tonic-gate cip->ci_state = FMD_CASE_UNSOLVED; 12627c478bd9Sstevel@tonic-gate cip->ci_flags = FMD_CF_DIRTY; 12637c478bd9Sstevel@tonic-gate cip->ci_data = data; 12647c478bd9Sstevel@tonic-gate 12657c478bd9Sstevel@tonic-gate /* 12667c478bd9Sstevel@tonic-gate * Calling libuuid: get a clue. The library interfaces cleverly do not 12677c478bd9Sstevel@tonic-gate * define any constant for the length of an unparse string, and do not 12687c478bd9Sstevel@tonic-gate * permit the caller to specify a buffer length for safety. The spec 12697c478bd9Sstevel@tonic-gate * says it will be 36 bytes, but we make it tunable just in case. 12707c478bd9Sstevel@tonic-gate */ 12717c478bd9Sstevel@tonic-gate (void) fmd_conf_getprop(fmd.d_conf, "uuidlen", &cip->ci_uuidlen); 12727c478bd9Sstevel@tonic-gate cip->ci_uuid = fmd_zalloc(cip->ci_uuidlen + 1, FMD_SLEEP); 12737c478bd9Sstevel@tonic-gate 1274*f6e214c7SGavin Maltby if (uuidstr == NULL) { 12757c478bd9Sstevel@tonic-gate /* 1276*f6e214c7SGavin Maltby * We expect this loop to execute only once, but code it 1277*f6e214c7SGavin Maltby * defensively against the possibility of libuuid bugs. 1278*f6e214c7SGavin Maltby * Keep generating uuids and attempting to do a hash insert 1279*f6e214c7SGavin Maltby * until we get a unique one. 12807c478bd9Sstevel@tonic-gate */ 12817c478bd9Sstevel@tonic-gate do { 1282d9638e54Smws if (eip != NULL) 1283d9638e54Smws fmd_case_rele((fmd_case_t *)eip); 12847c478bd9Sstevel@tonic-gate uuid_generate(uuid); 12857c478bd9Sstevel@tonic-gate uuid_unparse(uuid, cip->ci_uuid); 1286d9638e54Smws } while ((eip = fmd_case_hash_insert(fmd.d_cases, cip)) != cip); 1287*f6e214c7SGavin Maltby } else { 1288*f6e214c7SGavin Maltby /* 1289*f6e214c7SGavin Maltby * If a uuid was specified we must succeed with that uuid, 1290*f6e214c7SGavin Maltby * or return NULL indicating a case with that uuid already 1291*f6e214c7SGavin Maltby * exists. 1292*f6e214c7SGavin Maltby */ 1293*f6e214c7SGavin Maltby (void) strncpy(cip->ci_uuid, uuidstr, cip->ci_uuidlen + 1); 1294*f6e214c7SGavin Maltby if (fmd_case_hash_insert(fmd.d_cases, cip) != cip) { 1295*f6e214c7SGavin Maltby fmd_free(cip->ci_uuid, cip->ci_uuidlen + 1); 1296*f6e214c7SGavin Maltby (void) fmd_buf_hash_destroy(&cip->ci_bufs); 1297*f6e214c7SGavin Maltby fmd_module_rele(mp); 1298*f6e214c7SGavin Maltby pthread_mutex_destroy(&cip->ci_lock); 1299*f6e214c7SGavin Maltby fmd_free(cip, sizeof (*cip)); 1300*f6e214c7SGavin Maltby return (NULL); 1301*f6e214c7SGavin Maltby } 1302*f6e214c7SGavin Maltby } 13037c478bd9Sstevel@tonic-gate 13047c478bd9Sstevel@tonic-gate ASSERT(fmd_module_locked(mp)); 13057c478bd9Sstevel@tonic-gate fmd_list_append(&mp->mod_cases, cip); 13067c478bd9Sstevel@tonic-gate fmd_module_setcdirty(mp); 13077c478bd9Sstevel@tonic-gate 13087c478bd9Sstevel@tonic-gate (void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock); 13097c478bd9Sstevel@tonic-gate cip->ci_mod->mod_stats->ms_caseopen.fmds_value.ui64++; 13107c478bd9Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock); 13117c478bd9Sstevel@tonic-gate 13127c478bd9Sstevel@tonic-gate return ((fmd_case_t *)cip); 13137c478bd9Sstevel@tonic-gate } 13147c478bd9Sstevel@tonic-gate 1315162ba6eaSmws static void 1316162ba6eaSmws fmd_case_destroy_suspects(fmd_case_impl_t *cip) 1317162ba6eaSmws { 1318162ba6eaSmws fmd_case_susp_t *cis, *ncis; 1319162ba6eaSmws 1320162ba6eaSmws ASSERT(MUTEX_HELD(&cip->ci_lock)); 1321162ba6eaSmws 1322cbf75e67SStephen Hanson if (cip->ci_proxy_asru) 1323cbf75e67SStephen Hanson fmd_free(cip->ci_proxy_asru, sizeof (uint8_t) * 1324cbf75e67SStephen Hanson cip->ci_nsuspects); 1325cbf75e67SStephen Hanson nvlist_free(cip->ci_diag_de); 1326cbf75e67SStephen Hanson if (cip->ci_diag_asru) 1327cbf75e67SStephen Hanson fmd_free(cip->ci_diag_asru, sizeof (uint8_t) * 1328cbf75e67SStephen Hanson cip->ci_nsuspects); 1329cbf75e67SStephen Hanson 1330162ba6eaSmws for (cis = cip->ci_suspects; cis != NULL; cis = ncis) { 1331162ba6eaSmws ncis = cis->cis_next; 1332162ba6eaSmws nvlist_free(cis->cis_nvl); 1333162ba6eaSmws fmd_free(cis, sizeof (fmd_case_susp_t)); 1334162ba6eaSmws } 1335162ba6eaSmws 1336162ba6eaSmws cip->ci_suspects = NULL; 1337162ba6eaSmws cip->ci_nsuspects = 0; 1338162ba6eaSmws } 1339162ba6eaSmws 13407c478bd9Sstevel@tonic-gate fmd_case_t * 1341d9638e54Smws fmd_case_recreate(fmd_module_t *mp, fmd_xprt_t *xp, 1342d9638e54Smws uint_t state, const char *uuid, const char *code) 13437c478bd9Sstevel@tonic-gate { 13447c478bd9Sstevel@tonic-gate fmd_case_impl_t *cip = fmd_zalloc(sizeof (fmd_case_impl_t), FMD_SLEEP); 1345d9638e54Smws fmd_case_impl_t *eip; 1346d9638e54Smws 13477c478bd9Sstevel@tonic-gate (void) pthread_mutex_init(&cip->ci_lock, NULL); 13487c478bd9Sstevel@tonic-gate fmd_buf_hash_create(&cip->ci_bufs); 13497c478bd9Sstevel@tonic-gate 13507c478bd9Sstevel@tonic-gate fmd_module_hold(mp); 13517c478bd9Sstevel@tonic-gate cip->ci_mod = mp; 1352d9638e54Smws cip->ci_xprt = xp; 13537c478bd9Sstevel@tonic-gate cip->ci_refs = 1; 1354d9638e54Smws cip->ci_state = state; 13557c478bd9Sstevel@tonic-gate cip->ci_uuid = fmd_strdup(uuid, FMD_SLEEP); 13567c478bd9Sstevel@tonic-gate cip->ci_uuidlen = strlen(cip->ci_uuid); 1357d9638e54Smws cip->ci_code = fmd_strdup(code, FMD_SLEEP); 1358d9638e54Smws cip->ci_codelen = cip->ci_code ? strlen(cip->ci_code) + 1 : 0; 1359d9638e54Smws 1360d9638e54Smws if (state > FMD_CASE_CLOSE_WAIT) 1361d9638e54Smws cip->ci_flags |= FMD_CF_SOLVED; 1362d9638e54Smws 1363d9638e54Smws /* 1364d9638e54Smws * Insert the case into the global case hash. If the specified UUID is 1365d9638e54Smws * already present, check to see if it is an orphan: if so, reclaim it; 1366d9638e54Smws * otherwise if it is owned by a different module then return NULL. 1367d9638e54Smws */ 1368d9638e54Smws if ((eip = fmd_case_hash_insert(fmd.d_cases, cip)) != cip) { 1369d9638e54Smws (void) pthread_mutex_lock(&cip->ci_lock); 1370d9638e54Smws cip->ci_refs--; /* decrement to zero */ 1371d9638e54Smws fmd_case_destroy((fmd_case_t *)cip, B_FALSE); 1372d9638e54Smws 1373d9638e54Smws cip = eip; /* switch 'cip' to the existing case */ 1374d9638e54Smws (void) pthread_mutex_lock(&cip->ci_lock); 1375d9638e54Smws 1376d9638e54Smws /* 1377d9638e54Smws * If the ASRU cache is trying to recreate an orphan, then just 1378d9638e54Smws * return the existing case that we found without changing it. 1379d9638e54Smws */ 1380d9638e54Smws if (mp == fmd.d_rmod) { 138125c6ff4bSstephh /* 1382cbf75e67SStephen Hanson * In case the case has already been created from 1383cbf75e67SStephen Hanson * a checkpoint file we need to set up code now. 1384cbf75e67SStephen Hanson */ 1385cbf75e67SStephen Hanson if (cip->ci_state < FMD_CASE_CLOSED) { 1386cbf75e67SStephen Hanson if (code != NULL && cip->ci_code == NULL) { 1387cbf75e67SStephen Hanson cip->ci_code = fmd_strdup(code, 1388cbf75e67SStephen Hanson FMD_SLEEP); 1389cbf75e67SStephen Hanson cip->ci_codelen = cip->ci_code ? 1390cbf75e67SStephen Hanson strlen(cip->ci_code) + 1 : 0; 1391cbf75e67SStephen Hanson fmd_case_code_hash_insert(fmd.d_cases, 1392cbf75e67SStephen Hanson cip); 1393cbf75e67SStephen Hanson } 1394cbf75e67SStephen Hanson } 1395cbf75e67SStephen Hanson 1396cbf75e67SStephen Hanson /* 139725c6ff4bSstephh * When recreating an orphan case, state passed in may 13985750ef5cSStephen Hanson * be CLOSED (faulty) or REPAIRED/RESOLVED (!faulty). If 139925c6ff4bSstephh * any suspects are still CLOSED (faulty) then the 140025c6ff4bSstephh * overall state needs to be CLOSED. 140125c6ff4bSstephh */ 14025750ef5cSStephen Hanson if ((cip->ci_state == FMD_CASE_REPAIRED || 14035750ef5cSStephen Hanson cip->ci_state == FMD_CASE_RESOLVED) && 1404cbf75e67SStephen Hanson state == FMD_CASE_CLOSED) 140525c6ff4bSstephh cip->ci_state = FMD_CASE_CLOSED; 1406d9638e54Smws (void) pthread_mutex_unlock(&cip->ci_lock); 1407d9638e54Smws fmd_case_rele((fmd_case_t *)cip); 1408d9638e54Smws return ((fmd_case_t *)cip); 1409d9638e54Smws } 1410d9638e54Smws 1411d9638e54Smws /* 1412d9638e54Smws * If the existing case isn't an orphan or is being proxied, 1413d9638e54Smws * then we have a UUID conflict: return failure to the caller. 1414d9638e54Smws */ 1415d9638e54Smws if (cip->ci_mod != fmd.d_rmod || xp != NULL) { 1416d9638e54Smws (void) pthread_mutex_unlock(&cip->ci_lock); 1417d9638e54Smws fmd_case_rele((fmd_case_t *)cip); 1418d9638e54Smws return (NULL); 1419d9638e54Smws } 1420d9638e54Smws 1421d9638e54Smws /* 1422d9638e54Smws * If the new module is reclaiming an orphaned case, remove 1423d9638e54Smws * the case from the root module, switch ci_mod, and then fall 1424d9638e54Smws * through to adding the case to the new owner module 'mp'. 1425d9638e54Smws */ 1426d9638e54Smws fmd_module_lock(cip->ci_mod); 1427d9638e54Smws fmd_list_delete(&cip->ci_mod->mod_cases, cip); 1428d9638e54Smws fmd_module_unlock(cip->ci_mod); 1429d9638e54Smws 1430d9638e54Smws fmd_module_rele(cip->ci_mod); 1431d9638e54Smws cip->ci_mod = mp; 1432d9638e54Smws fmd_module_hold(mp); 1433d9638e54Smws 1434c7d6cfd6SStephen Hanson /* 1435c7d6cfd6SStephen Hanson * It's possible that fmd crashed or was restarted during a 1436c7d6cfd6SStephen Hanson * previous solve operation between the asru cache being created 1437c7d6cfd6SStephen Hanson * and the ckpt file being updated to SOLVED. Thus when the DE 1438c7d6cfd6SStephen Hanson * recreates the case here from the checkpoint file, the state 1439c7d6cfd6SStephen Hanson * will be UNSOLVED and yet we are having to reclaim because 1440c7d6cfd6SStephen Hanson * the case was in the asru cache. If this happens, revert the 1441c7d6cfd6SStephen Hanson * case back to the UNSOLVED state and let the DE solve it again 1442c7d6cfd6SStephen Hanson */ 1443c7d6cfd6SStephen Hanson if (state == FMD_CASE_UNSOLVED) { 1444c7d6cfd6SStephen Hanson fmd_asru_hash_delete_case(fmd.d_asrus, 1445c7d6cfd6SStephen Hanson (fmd_case_t *)cip); 1446162ba6eaSmws fmd_case_destroy_suspects(cip); 1447c7d6cfd6SStephen Hanson fmd_case_code_hash_delete(fmd.d_cases, cip); 1448c7d6cfd6SStephen Hanson fmd_free(cip->ci_code, cip->ci_codelen); 1449c7d6cfd6SStephen Hanson cip->ci_code = NULL; 1450c7d6cfd6SStephen Hanson cip->ci_codelen = 0; 1451c7d6cfd6SStephen Hanson cip->ci_tv_valid = 0; 1452c7d6cfd6SStephen Hanson } 1453c7d6cfd6SStephen Hanson 1454162ba6eaSmws cip->ci_state = state; 1455162ba6eaSmws 1456d9638e54Smws (void) pthread_mutex_unlock(&cip->ci_lock); 1457d9638e54Smws fmd_case_rele((fmd_case_t *)cip); 1458567cc2e6Sstephh } else { 1459567cc2e6Sstephh /* 1460567cc2e6Sstephh * add into hash of solved cases 1461567cc2e6Sstephh */ 1462567cc2e6Sstephh if (cip->ci_code) 1463567cc2e6Sstephh fmd_case_code_hash_insert(fmd.d_cases, cip); 1464d9638e54Smws } 14657c478bd9Sstevel@tonic-gate 14667c478bd9Sstevel@tonic-gate ASSERT(fmd_module_locked(mp)); 14677c478bd9Sstevel@tonic-gate fmd_list_append(&mp->mod_cases, cip); 14687c478bd9Sstevel@tonic-gate 14697c478bd9Sstevel@tonic-gate (void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock); 14707c478bd9Sstevel@tonic-gate cip->ci_mod->mod_stats->ms_caseopen.fmds_value.ui64++; 14717c478bd9Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock); 14727c478bd9Sstevel@tonic-gate 14737c478bd9Sstevel@tonic-gate return ((fmd_case_t *)cip); 14747c478bd9Sstevel@tonic-gate } 14757c478bd9Sstevel@tonic-gate 14767c478bd9Sstevel@tonic-gate void 1477d9638e54Smws fmd_case_destroy(fmd_case_t *cp, int visible) 14787c478bd9Sstevel@tonic-gate { 14797c478bd9Sstevel@tonic-gate fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 14807c478bd9Sstevel@tonic-gate fmd_case_item_t *cit, *ncit; 14817c478bd9Sstevel@tonic-gate 14827c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cip->ci_lock)); 14837c478bd9Sstevel@tonic-gate ASSERT(cip->ci_refs == 0); 14847c478bd9Sstevel@tonic-gate 1485d9638e54Smws if (visible) { 1486d9638e54Smws TRACE((FMD_DBG_CASE, "deleting case %s", cip->ci_uuid)); 14877c478bd9Sstevel@tonic-gate fmd_case_hash_delete(fmd.d_cases, cip); 1488d9638e54Smws } 14897c478bd9Sstevel@tonic-gate 14907c478bd9Sstevel@tonic-gate for (cit = cip->ci_items; cit != NULL; cit = ncit) { 14917c478bd9Sstevel@tonic-gate ncit = cit->cit_next; 14927c478bd9Sstevel@tonic-gate fmd_event_rele(cit->cit_event); 14937c478bd9Sstevel@tonic-gate fmd_free(cit, sizeof (fmd_case_item_t)); 14947c478bd9Sstevel@tonic-gate } 14957c478bd9Sstevel@tonic-gate 1496162ba6eaSmws fmd_case_destroy_suspects(cip); 14977c478bd9Sstevel@tonic-gate 14987c478bd9Sstevel@tonic-gate if (cip->ci_principal != NULL) 14997c478bd9Sstevel@tonic-gate fmd_event_rele(cip->ci_principal); 15007c478bd9Sstevel@tonic-gate 15017c478bd9Sstevel@tonic-gate fmd_free(cip->ci_uuid, cip->ci_uuidlen + 1); 1502d9638e54Smws fmd_free(cip->ci_code, cip->ci_codelen); 15030b9e3e76Smws (void) fmd_buf_hash_destroy(&cip->ci_bufs); 15047c478bd9Sstevel@tonic-gate 15057c478bd9Sstevel@tonic-gate fmd_module_rele(cip->ci_mod); 15067c478bd9Sstevel@tonic-gate fmd_free(cip, sizeof (fmd_case_impl_t)); 15077c478bd9Sstevel@tonic-gate } 15087c478bd9Sstevel@tonic-gate 15097c478bd9Sstevel@tonic-gate void 15107c478bd9Sstevel@tonic-gate fmd_case_hold(fmd_case_t *cp) 15117c478bd9Sstevel@tonic-gate { 15127c478bd9Sstevel@tonic-gate fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 15137c478bd9Sstevel@tonic-gate 15147c478bd9Sstevel@tonic-gate (void) pthread_mutex_lock(&cip->ci_lock); 151597c04605Scy152378 fmd_case_hold_locked(cp); 15167c478bd9Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_lock); 15177c478bd9Sstevel@tonic-gate } 15187c478bd9Sstevel@tonic-gate 15197c478bd9Sstevel@tonic-gate void 1520d9638e54Smws fmd_case_hold_locked(fmd_case_t *cp) 1521d9638e54Smws { 1522d9638e54Smws fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 1523d9638e54Smws 1524d9638e54Smws ASSERT(MUTEX_HELD(&cip->ci_lock)); 152597c04605Scy152378 if (cip->ci_flags & FMD_CF_DELETING) 152697c04605Scy152378 fmd_panic("attempt to hold a deleting case %p (%s)\n", 152797c04605Scy152378 (void *)cip, cip->ci_uuid); 1528d9638e54Smws cip->ci_refs++; 1529d9638e54Smws ASSERT(cip->ci_refs != 0); 1530d9638e54Smws } 1531d9638e54Smws 153297c04605Scy152378 static fmd_case_impl_t * 153397c04605Scy152378 fmd_case_tryhold(fmd_case_impl_t *cip) 153497c04605Scy152378 { 153597c04605Scy152378 /* 153697c04605Scy152378 * If the case's "deleting" bit is unset, hold and return case, 153797c04605Scy152378 * otherwise, return NULL. 153897c04605Scy152378 */ 153997c04605Scy152378 (void) pthread_mutex_lock(&cip->ci_lock); 154097c04605Scy152378 if (cip->ci_flags & FMD_CF_DELETING) { 154197c04605Scy152378 (void) pthread_mutex_unlock(&cip->ci_lock); 154297c04605Scy152378 cip = NULL; 154397c04605Scy152378 } else { 154497c04605Scy152378 fmd_case_hold_locked((fmd_case_t *)cip); 154597c04605Scy152378 (void) pthread_mutex_unlock(&cip->ci_lock); 154697c04605Scy152378 } 154797c04605Scy152378 return (cip); 154897c04605Scy152378 } 154997c04605Scy152378 1550d9638e54Smws void 15517c478bd9Sstevel@tonic-gate fmd_case_rele(fmd_case_t *cp) 15527c478bd9Sstevel@tonic-gate { 15537c478bd9Sstevel@tonic-gate fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 15547c478bd9Sstevel@tonic-gate 15557c478bd9Sstevel@tonic-gate (void) pthread_mutex_lock(&cip->ci_lock); 15567c478bd9Sstevel@tonic-gate ASSERT(cip->ci_refs != 0); 15577c478bd9Sstevel@tonic-gate 15587c478bd9Sstevel@tonic-gate if (--cip->ci_refs == 0) 1559d9638e54Smws fmd_case_destroy((fmd_case_t *)cip, B_TRUE); 15607c478bd9Sstevel@tonic-gate else 15617c478bd9Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_lock); 15627c478bd9Sstevel@tonic-gate } 15637c478bd9Sstevel@tonic-gate 1564567cc2e6Sstephh void 1565567cc2e6Sstephh fmd_case_rele_locked(fmd_case_t *cp) 1566567cc2e6Sstephh { 1567567cc2e6Sstephh fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 1568567cc2e6Sstephh 1569567cc2e6Sstephh ASSERT(MUTEX_HELD(&cip->ci_lock)); 1570567cc2e6Sstephh --cip->ci_refs; 1571567cc2e6Sstephh ASSERT(cip->ci_refs != 0); 1572567cc2e6Sstephh } 1573567cc2e6Sstephh 15747aec1d6eScindi int 15757c478bd9Sstevel@tonic-gate fmd_case_insert_principal(fmd_case_t *cp, fmd_event_t *ep) 15767c478bd9Sstevel@tonic-gate { 15777c478bd9Sstevel@tonic-gate fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 15787aec1d6eScindi fmd_case_item_t *cit; 15797c478bd9Sstevel@tonic-gate fmd_event_t *oep; 15807c478bd9Sstevel@tonic-gate uint_t state; 15817aec1d6eScindi int new; 15827c478bd9Sstevel@tonic-gate 15837c478bd9Sstevel@tonic-gate fmd_event_hold(ep); 15847c478bd9Sstevel@tonic-gate (void) pthread_mutex_lock(&cip->ci_lock); 15857c478bd9Sstevel@tonic-gate 1586d9638e54Smws if (cip->ci_flags & FMD_CF_SOLVED) 15877c478bd9Sstevel@tonic-gate state = FMD_EVS_DIAGNOSED; 15887c478bd9Sstevel@tonic-gate else 15897c478bd9Sstevel@tonic-gate state = FMD_EVS_ACCEPTED; 15907c478bd9Sstevel@tonic-gate 15917c478bd9Sstevel@tonic-gate oep = cip->ci_principal; 15927c478bd9Sstevel@tonic-gate cip->ci_principal = ep; 15937c478bd9Sstevel@tonic-gate 15947aec1d6eScindi for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) { 15957aec1d6eScindi if (cit->cit_event == ep) 15967aec1d6eScindi break; 15977aec1d6eScindi } 15987aec1d6eScindi 15997c478bd9Sstevel@tonic-gate cip->ci_flags |= FMD_CF_DIRTY; 16007aec1d6eScindi new = cit == NULL && ep != oep; 16017aec1d6eScindi 16027c478bd9Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_lock); 16037c478bd9Sstevel@tonic-gate 16047c478bd9Sstevel@tonic-gate fmd_module_setcdirty(cip->ci_mod); 16057c478bd9Sstevel@tonic-gate fmd_event_transition(ep, state); 16067c478bd9Sstevel@tonic-gate 16077c478bd9Sstevel@tonic-gate if (oep != NULL) 16087c478bd9Sstevel@tonic-gate fmd_event_rele(oep); 16097aec1d6eScindi 16107aec1d6eScindi return (new); 16117c478bd9Sstevel@tonic-gate } 16127c478bd9Sstevel@tonic-gate 16137aec1d6eScindi int 16147c478bd9Sstevel@tonic-gate fmd_case_insert_event(fmd_case_t *cp, fmd_event_t *ep) 16157c478bd9Sstevel@tonic-gate { 16167c478bd9Sstevel@tonic-gate fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 16177aec1d6eScindi fmd_case_item_t *cit; 16187c478bd9Sstevel@tonic-gate uint_t state; 16197aec1d6eScindi int new; 1620540db9a9SStephen Hanson boolean_t injected; 16217c478bd9Sstevel@tonic-gate 16227c478bd9Sstevel@tonic-gate (void) pthread_mutex_lock(&cip->ci_lock); 16237c478bd9Sstevel@tonic-gate 16247aec1d6eScindi if (cip->ci_flags & FMD_CF_SOLVED) 16257aec1d6eScindi state = FMD_EVS_DIAGNOSED; 16267aec1d6eScindi else 16277aec1d6eScindi state = FMD_EVS_ACCEPTED; 16287aec1d6eScindi 16297aec1d6eScindi for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) { 16307aec1d6eScindi if (cit->cit_event == ep) 16317aec1d6eScindi break; 16327aec1d6eScindi } 16337aec1d6eScindi 16347aec1d6eScindi new = cit == NULL && ep != cip->ci_principal; 16357aec1d6eScindi 16367aec1d6eScindi /* 16377aec1d6eScindi * If the event is already in the case or the case is already solved, 16387aec1d6eScindi * there is no reason to save it: just transition it appropriately. 16397aec1d6eScindi */ 16407aec1d6eScindi if (cit != NULL || (cip->ci_flags & FMD_CF_SOLVED)) { 16417aec1d6eScindi (void) pthread_mutex_unlock(&cip->ci_lock); 16427aec1d6eScindi fmd_event_transition(ep, state); 16437aec1d6eScindi return (new); 16447aec1d6eScindi } 16457aec1d6eScindi 16467aec1d6eScindi cit = fmd_alloc(sizeof (fmd_case_item_t), FMD_SLEEP); 16477aec1d6eScindi fmd_event_hold(ep); 16487aec1d6eScindi 1649540db9a9SStephen Hanson if (nvlist_lookup_boolean_value(((fmd_event_impl_t *)ep)->ev_nvl, 1650540db9a9SStephen Hanson "__injected", &injected) == 0 && injected) 1651540db9a9SStephen Hanson fmd_case_set_injected(cp); 1652540db9a9SStephen Hanson 16537c478bd9Sstevel@tonic-gate cit->cit_next = cip->ci_items; 16547c478bd9Sstevel@tonic-gate cit->cit_event = ep; 16557c478bd9Sstevel@tonic-gate 16567c478bd9Sstevel@tonic-gate cip->ci_items = cit; 16577c478bd9Sstevel@tonic-gate cip->ci_nitems++; 16587c478bd9Sstevel@tonic-gate 16597c478bd9Sstevel@tonic-gate cip->ci_flags |= FMD_CF_DIRTY; 16607c478bd9Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_lock); 16617c478bd9Sstevel@tonic-gate 16627c478bd9Sstevel@tonic-gate fmd_module_setcdirty(cip->ci_mod); 16637c478bd9Sstevel@tonic-gate fmd_event_transition(ep, state); 16647aec1d6eScindi 16657aec1d6eScindi return (new); 16667c478bd9Sstevel@tonic-gate } 16677c478bd9Sstevel@tonic-gate 16687c478bd9Sstevel@tonic-gate void 16697c478bd9Sstevel@tonic-gate fmd_case_insert_suspect(fmd_case_t *cp, nvlist_t *nvl) 16707c478bd9Sstevel@tonic-gate { 16717c478bd9Sstevel@tonic-gate fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 16727c478bd9Sstevel@tonic-gate fmd_case_susp_t *cis = fmd_alloc(sizeof (fmd_case_susp_t), FMD_SLEEP); 16737c478bd9Sstevel@tonic-gate 16747c478bd9Sstevel@tonic-gate (void) pthread_mutex_lock(&cip->ci_lock); 1675567cc2e6Sstephh ASSERT(cip->ci_state < FMD_CASE_CLOSE_WAIT); 16767c478bd9Sstevel@tonic-gate cip->ci_flags |= FMD_CF_DIRTY; 16777c478bd9Sstevel@tonic-gate 16787c478bd9Sstevel@tonic-gate cis->cis_next = cip->ci_suspects; 16797c478bd9Sstevel@tonic-gate cis->cis_nvl = nvl; 16807c478bd9Sstevel@tonic-gate 16817c478bd9Sstevel@tonic-gate cip->ci_suspects = cis; 16827c478bd9Sstevel@tonic-gate cip->ci_nsuspects++; 16837c478bd9Sstevel@tonic-gate 16847c478bd9Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_lock); 1685cbf75e67SStephen Hanson if (cip->ci_xprt == NULL) 16867c478bd9Sstevel@tonic-gate fmd_module_setcdirty(cip->ci_mod); 16877c478bd9Sstevel@tonic-gate } 16887c478bd9Sstevel@tonic-gate 16897c478bd9Sstevel@tonic-gate void 1690d9638e54Smws fmd_case_recreate_suspect(fmd_case_t *cp, nvlist_t *nvl) 1691d9638e54Smws { 1692d9638e54Smws fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 1693d9638e54Smws fmd_case_susp_t *cis = fmd_alloc(sizeof (fmd_case_susp_t), FMD_SLEEP); 169444743693Sstephh boolean_t b; 1695d9638e54Smws 1696d9638e54Smws (void) pthread_mutex_lock(&cip->ci_lock); 1697d9638e54Smws 1698d9638e54Smws cis->cis_next = cip->ci_suspects; 1699d9638e54Smws cis->cis_nvl = nvl; 1700d9638e54Smws 170144743693Sstephh if (nvlist_lookup_boolean_value(nvl, 170244743693Sstephh FM_SUSPECT_MESSAGE, &b) == 0 && b == B_FALSE) 170344743693Sstephh cip->ci_flags |= FMD_CF_INVISIBLE; 170444743693Sstephh 1705d9638e54Smws cip->ci_suspects = cis; 1706d9638e54Smws cip->ci_nsuspects++; 1707d9638e54Smws 1708d9638e54Smws (void) pthread_mutex_unlock(&cip->ci_lock); 1709d9638e54Smws } 1710d9638e54Smws 1711d9638e54Smws void 17127c478bd9Sstevel@tonic-gate fmd_case_reset_suspects(fmd_case_t *cp) 17137c478bd9Sstevel@tonic-gate { 17147c478bd9Sstevel@tonic-gate fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 17157c478bd9Sstevel@tonic-gate 17167c478bd9Sstevel@tonic-gate (void) pthread_mutex_lock(&cip->ci_lock); 17177c478bd9Sstevel@tonic-gate ASSERT(cip->ci_state < FMD_CASE_SOLVED); 17187c478bd9Sstevel@tonic-gate 1719162ba6eaSmws fmd_case_destroy_suspects(cip); 17207c478bd9Sstevel@tonic-gate cip->ci_flags |= FMD_CF_DIRTY; 17217c478bd9Sstevel@tonic-gate 17227c478bd9Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_lock); 17237c478bd9Sstevel@tonic-gate fmd_module_setcdirty(cip->ci_mod); 17247c478bd9Sstevel@tonic-gate } 17257c478bd9Sstevel@tonic-gate 1726567cc2e6Sstephh /*ARGSUSED*/ 1727567cc2e6Sstephh static void 1728567cc2e6Sstephh fmd_case_unusable(fmd_asru_link_t *alp, void *arg) 1729567cc2e6Sstephh { 1730567cc2e6Sstephh (void) fmd_asru_setflags(alp, FMD_ASRU_UNUSABLE); 1731567cc2e6Sstephh } 1732567cc2e6Sstephh 1733d9638e54Smws /* 1734d9638e54Smws * Grab ci_lock and update the case state and set the dirty bit. Then perform 1735d9638e54Smws * whatever actions and emit whatever events are appropriate for the state. 1736d9638e54Smws * Refer to the topmost block comment explaining the state machine for details. 1737d9638e54Smws */ 17387c478bd9Sstevel@tonic-gate void 1739d9638e54Smws fmd_case_transition(fmd_case_t *cp, uint_t state, uint_t flags) 17407c478bd9Sstevel@tonic-gate { 17417c478bd9Sstevel@tonic-gate fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 1742d9638e54Smws fmd_case_item_t *cit; 1743d9638e54Smws fmd_event_t *e; 174425c6ff4bSstephh int resolved = 0; 174525c6ff4bSstephh int any_unusable_and_present = 0; 17467c478bd9Sstevel@tonic-gate 174725c6ff4bSstephh ASSERT(state <= FMD_CASE_RESOLVED); 17487c478bd9Sstevel@tonic-gate (void) pthread_mutex_lock(&cip->ci_lock); 1749162ba6eaSmws 175044743693Sstephh if (!(cip->ci_flags & FMD_CF_SOLVED) && !(flags & FMD_CF_SOLVED)) 1751cbf75e67SStephen Hanson flags &= ~(FMD_CF_ISOLATED | FMD_CF_REPAIRED | FMD_CF_RESOLVED); 1752162ba6eaSmws 1753d9638e54Smws cip->ci_flags |= flags; 17547c478bd9Sstevel@tonic-gate 17557c478bd9Sstevel@tonic-gate if (cip->ci_state >= state) { 17567c478bd9Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_lock); 17577c478bd9Sstevel@tonic-gate return; /* already in specified state */ 17587c478bd9Sstevel@tonic-gate } 17597c478bd9Sstevel@tonic-gate 17607c478bd9Sstevel@tonic-gate TRACE((FMD_DBG_CASE, "case %s %s->%s", cip->ci_uuid, 17617c478bd9Sstevel@tonic-gate _fmd_case_snames[cip->ci_state], _fmd_case_snames[state])); 17627c478bd9Sstevel@tonic-gate 17637c478bd9Sstevel@tonic-gate cip->ci_state = state; 17647c478bd9Sstevel@tonic-gate cip->ci_flags |= FMD_CF_DIRTY; 17657c478bd9Sstevel@tonic-gate 1766d9638e54Smws if (cip->ci_xprt == NULL && cip->ci_mod != fmd.d_rmod) 1767d9638e54Smws fmd_module_setcdirty(cip->ci_mod); 1768d9638e54Smws 17697c478bd9Sstevel@tonic-gate switch (state) { 1770d9638e54Smws case FMD_CASE_SOLVED: 17717c478bd9Sstevel@tonic-gate for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) 17727c478bd9Sstevel@tonic-gate fmd_event_transition(cit->cit_event, FMD_EVS_DIAGNOSED); 17737c478bd9Sstevel@tonic-gate 17747c478bd9Sstevel@tonic-gate if (cip->ci_principal != NULL) { 17757c478bd9Sstevel@tonic-gate fmd_event_transition(cip->ci_principal, 17767c478bd9Sstevel@tonic-gate FMD_EVS_DIAGNOSED); 17777c478bd9Sstevel@tonic-gate } 17787c478bd9Sstevel@tonic-gate break; 17797c478bd9Sstevel@tonic-gate 1780d9638e54Smws case FMD_CASE_CLOSE_WAIT: 1781d9638e54Smws /* 1782d9638e54Smws * If the case was never solved, do not change ASRUs. 1783d9638e54Smws * If the case was never fmd_case_closed, do not change ASRUs. 1784d9638e54Smws * If the case was repaired, do not change ASRUs. 1785d9638e54Smws */ 1786d9638e54Smws if ((cip->ci_flags & (FMD_CF_SOLVED | FMD_CF_ISOLATED | 1787567cc2e6Sstephh FMD_CF_REPAIRED)) == (FMD_CF_SOLVED | FMD_CF_ISOLATED)) 1788567cc2e6Sstephh fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, 1789567cc2e6Sstephh fmd_case_unusable, NULL); 17907c478bd9Sstevel@tonic-gate 17917c478bd9Sstevel@tonic-gate /* 1792d9638e54Smws * If an orphaned case transitions to CLOSE_WAIT, the owning 1793705e9f42SStephen Hanson * module is no longer loaded: continue on to CASE_CLOSED or 1794705e9f42SStephen Hanson * CASE_REPAIRED as appropriate. 1795d9638e54Smws */ 1796705e9f42SStephen Hanson if (fmd_case_orphaned(cp)) { 1797705e9f42SStephen Hanson if (cip->ci_flags & FMD_CF_REPAIRED) { 1798705e9f42SStephen Hanson state = cip->ci_state = FMD_CASE_REPAIRED; 1799705e9f42SStephen Hanson TRACE((FMD_DBG_CASE, "case %s %s->%s", 1800705e9f42SStephen Hanson cip->ci_uuid, 1801705e9f42SStephen Hanson _fmd_case_snames[FMD_CASE_CLOSE_WAIT], 1802705e9f42SStephen Hanson _fmd_case_snames[FMD_CASE_REPAIRED])); 1803705e9f42SStephen Hanson goto do_repair; 1804705e9f42SStephen Hanson } else { 1805d9638e54Smws state = cip->ci_state = FMD_CASE_CLOSED; 1806705e9f42SStephen Hanson TRACE((FMD_DBG_CASE, "case %s %s->%s", 1807705e9f42SStephen Hanson cip->ci_uuid, 1808705e9f42SStephen Hanson _fmd_case_snames[FMD_CASE_CLOSE_WAIT], 1809705e9f42SStephen Hanson _fmd_case_snames[FMD_CASE_CLOSED])); 1810705e9f42SStephen Hanson } 1811705e9f42SStephen Hanson } 18127c478bd9Sstevel@tonic-gate break; 1813d9638e54Smws 1814d9638e54Smws case FMD_CASE_REPAIRED: 1815705e9f42SStephen Hanson do_repair: 1816cbf75e67SStephen Hanson ASSERT(cip->ci_xprt != NULL || fmd_case_orphaned(cp)); 181725c6ff4bSstephh 181825c6ff4bSstephh /* 1819cbf75e67SStephen Hanson * If we've been requested to transition straight on to the 1820cbf75e67SStephen Hanson * RESOLVED state (which can happen with fault proxying where a 1821cbf75e67SStephen Hanson * list.resolved or a uuresolved is received from the other 1822cbf75e67SStephen Hanson * side), or if all suspects are already either usable or not 1823cbf75e67SStephen Hanson * present then transition straight to RESOLVED state, 1824cbf75e67SStephen Hanson * publishing both the list.repaired and list.resolved. For a 1825cbf75e67SStephen Hanson * proxy, if we discover here that all suspects are already 1826cbf75e67SStephen Hanson * either usable or not present, notify the diag side instead 1827cbf75e67SStephen Hanson * using fmd_xprt_uuresolved(). 182825c6ff4bSstephh */ 1829cbf75e67SStephen Hanson if (flags & FMD_CF_RESOLVED) { 18305750ef5cSStephen Hanson if (cip->ci_xprt != NULL) 1831cbf75e67SStephen Hanson fmd_list_delete(&cip->ci_mod->mod_cases, cip); 1832cbf75e67SStephen Hanson } else { 1833cbf75e67SStephen Hanson fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, 1834cbf75e67SStephen Hanson fmd_case_unusable_and_present, 1835cbf75e67SStephen Hanson &any_unusable_and_present); 1836cbf75e67SStephen Hanson if (any_unusable_and_present) 1837cbf75e67SStephen Hanson break; 1838cbf75e67SStephen Hanson if (cip->ci_xprt != NULL) { 1839cbf75e67SStephen Hanson fmd_xprt_uuresolved(cip->ci_xprt, cip->ci_uuid); 1840cbf75e67SStephen Hanson break; 1841cbf75e67SStephen Hanson } 1842cbf75e67SStephen Hanson } 1843cbf75e67SStephen Hanson 184425c6ff4bSstephh cip->ci_state = FMD_CASE_RESOLVED; 184525c6ff4bSstephh (void) pthread_mutex_unlock(&cip->ci_lock); 184625c6ff4bSstephh fmd_case_publish(cp, state); 184725c6ff4bSstephh TRACE((FMD_DBG_CASE, "case %s %s->%s", cip->ci_uuid, 184825c6ff4bSstephh _fmd_case_snames[FMD_CASE_REPAIRED], 184925c6ff4bSstephh _fmd_case_snames[FMD_CASE_RESOLVED])); 185025c6ff4bSstephh state = FMD_CASE_RESOLVED; 185125c6ff4bSstephh resolved = 1; 185225c6ff4bSstephh (void) pthread_mutex_lock(&cip->ci_lock); 185325c6ff4bSstephh break; 185425c6ff4bSstephh 185525c6ff4bSstephh case FMD_CASE_RESOLVED: 1856cbf75e67SStephen Hanson /* 1857cbf75e67SStephen Hanson * For a proxy, no need to check that all suspects are already 1858cbf75e67SStephen Hanson * either usable or not present - this request has come from 1859cbf75e67SStephen Hanson * the diagnosing side which makes the final decision on this. 1860cbf75e67SStephen Hanson */ 1861cbf75e67SStephen Hanson if (cip->ci_xprt != NULL) { 1862cbf75e67SStephen Hanson fmd_list_delete(&cip->ci_mod->mod_cases, cip); 1863cbf75e67SStephen Hanson resolved = 1; 1864cbf75e67SStephen Hanson break; 1865cbf75e67SStephen Hanson } 1866cbf75e67SStephen Hanson 186725c6ff4bSstephh ASSERT(fmd_case_orphaned(cp)); 186825c6ff4bSstephh 186925c6ff4bSstephh /* 187025c6ff4bSstephh * If all suspects are already either usable or not present then 187125c6ff4bSstephh * carry on, publish list.resolved and discard the case. 187225c6ff4bSstephh */ 187325c6ff4bSstephh fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, 187425c6ff4bSstephh fmd_case_unusable_and_present, &any_unusable_and_present); 187525c6ff4bSstephh if (any_unusable_and_present) { 187625c6ff4bSstephh (void) pthread_mutex_unlock(&cip->ci_lock); 187725c6ff4bSstephh return; 187825c6ff4bSstephh } 187925c6ff4bSstephh 188025c6ff4bSstephh resolved = 1; 1881d9638e54Smws break; 18827c478bd9Sstevel@tonic-gate } 18837c478bd9Sstevel@tonic-gate 18847c478bd9Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_lock); 18857c478bd9Sstevel@tonic-gate 18867c478bd9Sstevel@tonic-gate /* 1887d9638e54Smws * If the module has initialized, then publish the appropriate event 1888d9638e54Smws * for the new case state. If not, we are being called from the 1889d9638e54Smws * checkpoint code during module load, in which case the module's 1890d9638e54Smws * _fmd_init() routine hasn't finished yet, and our event dictionaries 1891d9638e54Smws * may not be open yet, which will prevent us from computing the event 1892d9638e54Smws * code. Defer the call to fmd_case_publish() by enqueuing a PUBLISH 1893d9638e54Smws * event in our queue: this won't be processed until _fmd_init is done. 18947c478bd9Sstevel@tonic-gate */ 18957c478bd9Sstevel@tonic-gate if (cip->ci_mod->mod_flags & FMD_MOD_INIT) 18967c478bd9Sstevel@tonic-gate fmd_case_publish(cp, state); 1897d9638e54Smws else { 1898d9638e54Smws fmd_case_hold(cp); 1899d9638e54Smws e = fmd_event_create(FMD_EVT_PUBLISH, FMD_HRT_NOW, NULL, cp); 1900d9638e54Smws fmd_eventq_insert_at_head(cip->ci_mod->mod_queue, e); 1901d9638e54Smws } 1902d9638e54Smws 190325c6ff4bSstephh if (resolved) { 19045750ef5cSStephen Hanson if (cip->ci_xprt != NULL) { 1905d9638e54Smws /* 19065750ef5cSStephen Hanson * If we transitioned to RESOLVED, adjust the reference 19075750ef5cSStephen Hanson * count to reflect our removal from 19085750ef5cSStephen Hanson * fmd.d_rmod->mod_cases above. If the caller has not 19095750ef5cSStephen Hanson * placed an additional hold on the case, it will now 19105750ef5cSStephen Hanson * be freed. 1911d9638e54Smws */ 1912567cc2e6Sstephh (void) pthread_mutex_lock(&cip->ci_lock); 1913567cc2e6Sstephh fmd_asru_hash_delete_case(fmd.d_asrus, cp); 1914567cc2e6Sstephh (void) pthread_mutex_unlock(&cip->ci_lock); 1915d9638e54Smws fmd_case_rele(cp); 19165750ef5cSStephen Hanson } else { 19175750ef5cSStephen Hanson fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, 19185750ef5cSStephen Hanson fmd_asru_log_resolved, NULL); 19195750ef5cSStephen Hanson (void) pthread_mutex_lock(&cip->ci_lock); 19205750ef5cSStephen Hanson /* mark as "ready to be discarded */ 19215750ef5cSStephen Hanson cip->ci_flags |= FMD_CF_RES_CMPL; 19225750ef5cSStephen Hanson (void) pthread_mutex_unlock(&cip->ci_lock); 19237c478bd9Sstevel@tonic-gate } 1924567cc2e6Sstephh } 19255750ef5cSStephen Hanson } 19265750ef5cSStephen Hanson 19275750ef5cSStephen Hanson /* 19285750ef5cSStephen Hanson * Discard any case if it is in RESOLVED state (and if check_if_aged argument 19295750ef5cSStephen Hanson * is set if all suspects have passed the rsrc.aged time). 19305750ef5cSStephen Hanson */ 19315750ef5cSStephen Hanson void 19325750ef5cSStephen Hanson fmd_case_discard_resolved(fmd_case_t *cp, void *arg) 19335750ef5cSStephen Hanson { 19345750ef5cSStephen Hanson int check_if_aged = *(int *)arg; 19355750ef5cSStephen Hanson fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 19365750ef5cSStephen Hanson 19375750ef5cSStephen Hanson /* 19385750ef5cSStephen Hanson * First check if case has completed transition to resolved. 19395750ef5cSStephen Hanson */ 19405750ef5cSStephen Hanson (void) pthread_mutex_lock(&cip->ci_lock); 19415750ef5cSStephen Hanson if (!(cip->ci_flags & FMD_CF_RES_CMPL)) { 19425750ef5cSStephen Hanson (void) pthread_mutex_unlock(&cip->ci_lock); 19435750ef5cSStephen Hanson return; 19445750ef5cSStephen Hanson } 19455750ef5cSStephen Hanson 19465750ef5cSStephen Hanson /* 19475750ef5cSStephen Hanson * Now if check_is_aged is set, see if all suspects have aged. 19485750ef5cSStephen Hanson */ 19495750ef5cSStephen Hanson if (check_if_aged) { 19505750ef5cSStephen Hanson int aged = 1; 19515750ef5cSStephen Hanson 19525750ef5cSStephen Hanson fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, 19535750ef5cSStephen Hanson fmd_asru_check_if_aged, &aged); 19545750ef5cSStephen Hanson if (!aged) { 19555750ef5cSStephen Hanson (void) pthread_mutex_unlock(&cip->ci_lock); 19565750ef5cSStephen Hanson return; 19575750ef5cSStephen Hanson } 19585750ef5cSStephen Hanson } 19595750ef5cSStephen Hanson 19605750ef5cSStephen Hanson /* 19615750ef5cSStephen Hanson * Finally discard the case, clearing FMD_CF_RES_CMPL so we don't 19625750ef5cSStephen Hanson * do it twice. 19635750ef5cSStephen Hanson */ 19645750ef5cSStephen Hanson fmd_module_lock(cip->ci_mod); 19655750ef5cSStephen Hanson fmd_list_delete(&cip->ci_mod->mod_cases, cip); 19665750ef5cSStephen Hanson fmd_module_unlock(cip->ci_mod); 19675750ef5cSStephen Hanson fmd_asru_hash_delete_case(fmd.d_asrus, cp); 19685750ef5cSStephen Hanson cip->ci_flags &= ~FMD_CF_RES_CMPL; 19695750ef5cSStephen Hanson (void) pthread_mutex_unlock(&cip->ci_lock); 19705750ef5cSStephen Hanson fmd_case_rele(cp); 19715750ef5cSStephen Hanson } 19727c478bd9Sstevel@tonic-gate 19730b9e3e76Smws /* 19740b9e3e76Smws * Transition the specified case to *at least* the specified state by first 19750b9e3e76Smws * re-validating the suspect list using the resource cache. This function is 19760b9e3e76Smws * employed by the checkpoint code when restoring a saved, solved case to see 19770b9e3e76Smws * if the state of the case has effectively changed while fmd was not running 197825c6ff4bSstephh * or the module was not loaded. 19790b9e3e76Smws */ 19800b9e3e76Smws void 19810b9e3e76Smws fmd_case_transition_update(fmd_case_t *cp, uint_t state, uint_t flags) 19820b9e3e76Smws { 19830b9e3e76Smws fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 19840b9e3e76Smws 19850b9e3e76Smws int usable = 0; /* are any suspects usable? */ 19860b9e3e76Smws 19870b9e3e76Smws ASSERT(state >= FMD_CASE_SOLVED); 19880b9e3e76Smws (void) pthread_mutex_lock(&cip->ci_lock); 19890b9e3e76Smws 1990567cc2e6Sstephh fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_usable, &usable); 19910b9e3e76Smws 19920b9e3e76Smws (void) pthread_mutex_unlock(&cip->ci_lock); 19930b9e3e76Smws 199425c6ff4bSstephh if (!usable) { 19950b9e3e76Smws state = MAX(state, FMD_CASE_CLOSE_WAIT); 19960b9e3e76Smws flags |= FMD_CF_ISOLATED; 19970b9e3e76Smws } 19980b9e3e76Smws 19990b9e3e76Smws fmd_case_transition(cp, state, flags); 20000b9e3e76Smws } 20010b9e3e76Smws 20027c478bd9Sstevel@tonic-gate void 20037c478bd9Sstevel@tonic-gate fmd_case_setdirty(fmd_case_t *cp) 20047c478bd9Sstevel@tonic-gate { 20057c478bd9Sstevel@tonic-gate fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 20067c478bd9Sstevel@tonic-gate 20077c478bd9Sstevel@tonic-gate (void) pthread_mutex_lock(&cip->ci_lock); 20087c478bd9Sstevel@tonic-gate cip->ci_flags |= FMD_CF_DIRTY; 20097c478bd9Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_lock); 20107c478bd9Sstevel@tonic-gate 20117c478bd9Sstevel@tonic-gate fmd_module_setcdirty(cip->ci_mod); 20127c478bd9Sstevel@tonic-gate } 20137c478bd9Sstevel@tonic-gate 20147c478bd9Sstevel@tonic-gate void 20157c478bd9Sstevel@tonic-gate fmd_case_clrdirty(fmd_case_t *cp) 20167c478bd9Sstevel@tonic-gate { 20177c478bd9Sstevel@tonic-gate fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 20187c478bd9Sstevel@tonic-gate 20197c478bd9Sstevel@tonic-gate (void) pthread_mutex_lock(&cip->ci_lock); 20207c478bd9Sstevel@tonic-gate cip->ci_flags &= ~FMD_CF_DIRTY; 20217c478bd9Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_lock); 20227c478bd9Sstevel@tonic-gate } 20237c478bd9Sstevel@tonic-gate 20247c478bd9Sstevel@tonic-gate void 20257c478bd9Sstevel@tonic-gate fmd_case_commit(fmd_case_t *cp) 20267c478bd9Sstevel@tonic-gate { 20277c478bd9Sstevel@tonic-gate fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 20287c478bd9Sstevel@tonic-gate fmd_case_item_t *cit; 20297c478bd9Sstevel@tonic-gate 20307c478bd9Sstevel@tonic-gate (void) pthread_mutex_lock(&cip->ci_lock); 20317c478bd9Sstevel@tonic-gate 20327c478bd9Sstevel@tonic-gate if (cip->ci_flags & FMD_CF_DIRTY) { 20337c478bd9Sstevel@tonic-gate for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) 20347c478bd9Sstevel@tonic-gate fmd_event_commit(cit->cit_event); 20357c478bd9Sstevel@tonic-gate 20367c478bd9Sstevel@tonic-gate if (cip->ci_principal != NULL) 20377c478bd9Sstevel@tonic-gate fmd_event_commit(cip->ci_principal); 20387c478bd9Sstevel@tonic-gate 20397c478bd9Sstevel@tonic-gate fmd_buf_hash_commit(&cip->ci_bufs); 20407c478bd9Sstevel@tonic-gate cip->ci_flags &= ~FMD_CF_DIRTY; 20417c478bd9Sstevel@tonic-gate } 20427c478bd9Sstevel@tonic-gate 20437c478bd9Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_lock); 20447c478bd9Sstevel@tonic-gate } 20457c478bd9Sstevel@tonic-gate 20467c478bd9Sstevel@tonic-gate /* 2047cbf75e67SStephen Hanson * On proxy side, send back repair/acquit/etc request to diagnosing side 2048cbf75e67SStephen Hanson */ 2049cbf75e67SStephen Hanson void 2050cbf75e67SStephen Hanson fmd_case_xprt_updated(fmd_case_t *cp) 2051cbf75e67SStephen Hanson { 2052cbf75e67SStephen Hanson fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 2053cbf75e67SStephen Hanson nvlist_t **nva; 2054cbf75e67SStephen Hanson uint8_t *ba; 2055cbf75e67SStephen Hanson int msg = B_TRUE; 2056cbf75e67SStephen Hanson int count = 0; 2057cbf75e67SStephen Hanson fmd_case_lst_t fcl; 2058cbf75e67SStephen Hanson 2059cbf75e67SStephen Hanson ASSERT(cip->ci_xprt != NULL); 2060cbf75e67SStephen Hanson (void) pthread_mutex_lock(&cip->ci_lock); 2061cbf75e67SStephen Hanson ba = alloca(sizeof (uint8_t) * cip->ci_nsuspects); 2062cbf75e67SStephen Hanson nva = alloca(sizeof (nvlist_t *) * cip->ci_nsuspects); 2063cbf75e67SStephen Hanson fcl.fcl_countp = &count; 2064cbf75e67SStephen Hanson fcl.fcl_maxcount = cip->ci_nsuspects; 2065cbf75e67SStephen Hanson fcl.fcl_msgp = &msg; 2066cbf75e67SStephen Hanson fcl.fcl_ba = ba; 2067cbf75e67SStephen Hanson fcl.fcl_nva = nva; 2068cbf75e67SStephen Hanson fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_set_lst, &fcl); 2069cbf75e67SStephen Hanson (void) pthread_mutex_unlock(&cip->ci_lock); 2070cbf75e67SStephen Hanson fmd_xprt_updated(cip->ci_xprt, cip->ci_uuid, ba, cip->ci_proxy_asru, 2071cbf75e67SStephen Hanson count); 2072cbf75e67SStephen Hanson } 2073cbf75e67SStephen Hanson 2074cbf75e67SStephen Hanson /* 2075cbf75e67SStephen Hanson * fmd_case_update_status() can be called on either the proxy side when a 2076cbf75e67SStephen Hanson * list.suspect is received, or on the diagnosing side when an update request 2077cbf75e67SStephen Hanson * is received from the proxy. It updates the status in the resource cache. 2078cbf75e67SStephen Hanson */ 2079cbf75e67SStephen Hanson void 2080cbf75e67SStephen Hanson fmd_case_update_status(fmd_case_t *cp, uint8_t *statusp, uint8_t *proxy_asrup, 2081cbf75e67SStephen Hanson uint8_t *diag_asrup) 2082cbf75e67SStephen Hanson { 2083cbf75e67SStephen Hanson fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 2084cbf75e67SStephen Hanson int count = 0; 2085cbf75e67SStephen Hanson fmd_asru_update_status_t faus; 2086cbf75e67SStephen Hanson 2087cbf75e67SStephen Hanson /* 2088cbf75e67SStephen Hanson * update status of resource cache entries 2089cbf75e67SStephen Hanson */ 2090cbf75e67SStephen Hanson faus.faus_countp = &count; 2091cbf75e67SStephen Hanson faus.faus_maxcount = cip->ci_nsuspects; 2092cbf75e67SStephen Hanson faus.faus_ba = statusp; 2093cbf75e67SStephen Hanson faus.faus_proxy_asru = proxy_asrup; 2094cbf75e67SStephen Hanson faus.faus_diag_asru = diag_asrup; 2095cbf75e67SStephen Hanson faus.faus_is_proxy = (cip->ci_xprt != NULL); 2096cbf75e67SStephen Hanson (void) pthread_mutex_lock(&cip->ci_lock); 2097cbf75e67SStephen Hanson fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_update_status, 2098cbf75e67SStephen Hanson &faus); 2099cbf75e67SStephen Hanson (void) pthread_mutex_unlock(&cip->ci_lock); 2100cbf75e67SStephen Hanson } 2101cbf75e67SStephen Hanson 2102cbf75e67SStephen Hanson /* 2103cbf75e67SStephen Hanson * Called on either the proxy side or the diag side when a repair has taken 2104cbf75e67SStephen Hanson * place on the other side but this side may know the asru "contains" 2105cbf75e67SStephen Hanson * relationships. 2106cbf75e67SStephen Hanson */ 2107cbf75e67SStephen Hanson void 2108cbf75e67SStephen Hanson fmd_case_update_containees(fmd_case_t *cp) 2109cbf75e67SStephen Hanson { 2110cbf75e67SStephen Hanson fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 2111cbf75e67SStephen Hanson 2112cbf75e67SStephen Hanson (void) pthread_mutex_lock(&cip->ci_lock); 2113cbf75e67SStephen Hanson fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, 2114cbf75e67SStephen Hanson fmd_asru_update_containees, NULL); 2115cbf75e67SStephen Hanson (void) pthread_mutex_unlock(&cip->ci_lock); 2116cbf75e67SStephen Hanson } 2117cbf75e67SStephen Hanson 2118cbf75e67SStephen Hanson /* 2119cbf75e67SStephen Hanson * fmd_case_close_status() is called on diagnosing side when proxy side 2120cbf75e67SStephen Hanson * has had a uuclose. It updates the status in the resource cache. 2121cbf75e67SStephen Hanson */ 2122cbf75e67SStephen Hanson void 2123cbf75e67SStephen Hanson fmd_case_close_status(fmd_case_t *cp) 2124cbf75e67SStephen Hanson { 2125cbf75e67SStephen Hanson fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 2126cbf75e67SStephen Hanson int count = 0; 2127cbf75e67SStephen Hanson fmd_asru_close_status_t facs; 2128cbf75e67SStephen Hanson 2129cbf75e67SStephen Hanson /* 2130cbf75e67SStephen Hanson * update status of resource cache entries 2131cbf75e67SStephen Hanson */ 2132cbf75e67SStephen Hanson facs.facs_countp = &count; 2133cbf75e67SStephen Hanson facs.facs_maxcount = cip->ci_nsuspects; 2134cbf75e67SStephen Hanson (void) pthread_mutex_lock(&cip->ci_lock); 2135cbf75e67SStephen Hanson fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_close_status, 2136cbf75e67SStephen Hanson &facs); 2137cbf75e67SStephen Hanson (void) pthread_mutex_unlock(&cip->ci_lock); 2138cbf75e67SStephen Hanson } 2139cbf75e67SStephen Hanson 2140cbf75e67SStephen Hanson /* 21417c478bd9Sstevel@tonic-gate * Indicate that the case may need to change state because one or more of the 21427c478bd9Sstevel@tonic-gate * ASRUs named as a suspect has changed state. We examine all the suspects 21437c478bd9Sstevel@tonic-gate * and if none are still faulty, we initiate a case close transition. 21447c478bd9Sstevel@tonic-gate */ 21457c478bd9Sstevel@tonic-gate void 21467c478bd9Sstevel@tonic-gate fmd_case_update(fmd_case_t *cp) 21477c478bd9Sstevel@tonic-gate { 21487c478bd9Sstevel@tonic-gate fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 2149d9638e54Smws uint_t cstate; 2150567cc2e6Sstephh int faulty = 0; 21517c478bd9Sstevel@tonic-gate 21527c478bd9Sstevel@tonic-gate (void) pthread_mutex_lock(&cip->ci_lock); 2153d9638e54Smws cstate = cip->ci_state; 21547c478bd9Sstevel@tonic-gate 2155cbf75e67SStephen Hanson if (cip->ci_state < FMD_CASE_SOLVED) { 21567c478bd9Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_lock); 2157d9638e54Smws return; /* update is not appropriate */ 21587c478bd9Sstevel@tonic-gate } 21597c478bd9Sstevel@tonic-gate 2160567cc2e6Sstephh if (cip->ci_flags & FMD_CF_REPAIRED) { 2161567cc2e6Sstephh (void) pthread_mutex_unlock(&cip->ci_lock); 2162567cc2e6Sstephh return; /* already repaired */ 21637c478bd9Sstevel@tonic-gate } 21647c478bd9Sstevel@tonic-gate 2165cbf75e67SStephen Hanson TRACE((FMD_DBG_CASE, "case update %s", cip->ci_uuid)); 2166567cc2e6Sstephh fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_faulty, &faulty); 21677c478bd9Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_lock); 21687c478bd9Sstevel@tonic-gate 216925c6ff4bSstephh if (faulty) { 217025c6ff4bSstephh nvlist_t *nvl; 217125c6ff4bSstephh fmd_event_t *e; 217225c6ff4bSstephh char *class; 217325c6ff4bSstephh 2174cbf75e67SStephen Hanson TRACE((FMD_DBG_CASE, "sending list.updated %s", cip->ci_uuid)); 217525c6ff4bSstephh nvl = fmd_case_mkevent(cp, FM_LIST_UPDATED_CLASS); 217625c6ff4bSstephh (void) nvlist_lookup_string(nvl, FM_CLASS, &class); 217725c6ff4bSstephh e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class); 217825c6ff4bSstephh (void) pthread_rwlock_rdlock(&fmd.d_log_lock); 217925c6ff4bSstephh fmd_log_append(fmd.d_fltlog, e, cp); 218025c6ff4bSstephh (void) pthread_rwlock_unlock(&fmd.d_log_lock); 218125c6ff4bSstephh fmd_dispq_dispatch(fmd.d_disp, e, class); 2182d9638e54Smws return; /* one or more suspects are still marked faulty */ 218325c6ff4bSstephh } 2184d9638e54Smws 2185d9638e54Smws if (cstate == FMD_CASE_CLOSED) 2186d9638e54Smws fmd_case_transition(cp, FMD_CASE_REPAIRED, FMD_CF_REPAIRED); 2187d9638e54Smws else 2188d9638e54Smws fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_REPAIRED); 2189d9638e54Smws } 2190d9638e54Smws 2191d9638e54Smws /* 2192d9638e54Smws * Delete a closed case from the module's case list once the fmdo_close() entry 2193d9638e54Smws * point has run to completion. If the case is owned by a transport module, 2194d9638e54Smws * tell the transport to proxy a case close on the other end of the transport. 2195cbf75e67SStephen Hanson * Transition to the appropriate next state based on ci_flags. This 2196d9638e54Smws * function represents the end of CLOSE_WAIT and transitions the case to either 2197d9638e54Smws * CLOSED or REPAIRED or discards it entirely because it was never solved; 2198d9638e54Smws * refer to the topmost block comment explaining the state machine for details. 2199d9638e54Smws */ 2200d9638e54Smws void 2201d9638e54Smws fmd_case_delete(fmd_case_t *cp) 2202d9638e54Smws { 2203d9638e54Smws fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 22040b9e3e76Smws fmd_modstat_t *msp; 22050b9e3e76Smws size_t buftotal; 2206d9638e54Smws 2207cbf75e67SStephen Hanson TRACE((FMD_DBG_CASE, "case delete %s", cip->ci_uuid)); 2208d9638e54Smws ASSERT(fmd_module_locked(cip->ci_mod)); 2209d9638e54Smws fmd_list_delete(&cip->ci_mod->mod_cases, cip); 22100b9e3e76Smws buftotal = fmd_buf_hash_destroy(&cip->ci_bufs); 22110b9e3e76Smws 22120b9e3e76Smws (void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock); 22130b9e3e76Smws msp = cip->ci_mod->mod_stats; 22140b9e3e76Smws 22150b9e3e76Smws ASSERT(msp->ms_caseopen.fmds_value.ui64 != 0); 22160b9e3e76Smws msp->ms_caseopen.fmds_value.ui64--; 22170b9e3e76Smws 22180b9e3e76Smws ASSERT(msp->ms_buftotal.fmds_value.ui64 >= buftotal); 22190b9e3e76Smws msp->ms_buftotal.fmds_value.ui64 -= buftotal; 22200b9e3e76Smws 22210b9e3e76Smws (void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock); 2222d9638e54Smws 2223d9638e54Smws if (cip->ci_xprt == NULL) 2224d9638e54Smws fmd_module_setcdirty(cip->ci_mod); 2225d9638e54Smws 2226d9638e54Smws fmd_module_rele(cip->ci_mod); 2227d9638e54Smws cip->ci_mod = fmd.d_rmod; 2228d9638e54Smws fmd_module_hold(cip->ci_mod); 2229d9638e54Smws 2230d9638e54Smws /* 2231cbf75e67SStephen Hanson * If the case has been solved, then retain it 2232162ba6eaSmws * on the root module's case list at least until we're transitioned. 2233162ba6eaSmws * Otherwise free the case with our final fmd_case_rele() below. 2234162ba6eaSmws */ 2235cbf75e67SStephen Hanson if (cip->ci_flags & FMD_CF_SOLVED) { 2236162ba6eaSmws fmd_module_lock(cip->ci_mod); 2237162ba6eaSmws fmd_list_append(&cip->ci_mod->mod_cases, cip); 2238162ba6eaSmws fmd_module_unlock(cip->ci_mod); 2239162ba6eaSmws fmd_case_hold(cp); 2240162ba6eaSmws } 2241162ba6eaSmws 2242162ba6eaSmws /* 2243cbf75e67SStephen Hanson * Transition onwards to REPAIRED or CLOSED as originally requested. 2244cbf75e67SStephen Hanson * Note that for proxy case if we're transitioning to CLOSED it means 2245cbf75e67SStephen Hanson * the case was isolated locally, so call fmd_xprt_uuclose() to notify 2246cbf75e67SStephen Hanson * the diagnosing side. No need to notify the diagnosing side if we are 2247cbf75e67SStephen Hanson * transitioning to REPAIRED as we only do this when requested to do 2248cbf75e67SStephen Hanson * so by the diagnosing side anyway. 2249d9638e54Smws */ 2250cbf75e67SStephen Hanson if (cip->ci_flags & FMD_CF_REPAIRED) 2251cbf75e67SStephen Hanson fmd_case_transition(cp, FMD_CASE_REPAIRED, 0); 2252cbf75e67SStephen Hanson else if (cip->ci_flags & FMD_CF_ISOLATED) { 2253cbf75e67SStephen Hanson fmd_case_transition(cp, FMD_CASE_CLOSED, 0); 2254d9638e54Smws if (cip->ci_xprt != NULL) 2255d9638e54Smws fmd_xprt_uuclose(cip->ci_xprt, cip->ci_uuid); 2256cbf75e67SStephen Hanson } 2257d9638e54Smws 2258d9638e54Smws fmd_case_rele(cp); 2259d9638e54Smws } 2260d9638e54Smws 2261d9638e54Smws void 2262cbf75e67SStephen Hanson fmd_case_discard(fmd_case_t *cp, boolean_t delete_from_asru_cache) 2263d9638e54Smws { 2264d9638e54Smws fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 2265d9638e54Smws 2266d9638e54Smws (void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock); 2267d9638e54Smws cip->ci_mod->mod_stats->ms_caseopen.fmds_value.ui64--; 2268d9638e54Smws (void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock); 2269d9638e54Smws 2270d9638e54Smws ASSERT(fmd_module_locked(cip->ci_mod)); 2271d9638e54Smws fmd_list_delete(&cip->ci_mod->mod_cases, cip); 2272cbf75e67SStephen Hanson if (delete_from_asru_cache) { 2273cbf75e67SStephen Hanson (void) pthread_mutex_lock(&cip->ci_lock); 2274cbf75e67SStephen Hanson fmd_asru_hash_delete_case(fmd.d_asrus, cp); 2275cbf75e67SStephen Hanson (void) pthread_mutex_unlock(&cip->ci_lock); 2276cbf75e67SStephen Hanson } 2277d9638e54Smws fmd_case_rele(cp); 22787c478bd9Sstevel@tonic-gate } 22797c478bd9Sstevel@tonic-gate 22807c478bd9Sstevel@tonic-gate /* 22817c478bd9Sstevel@tonic-gate * Indicate that the problem corresponding to a case has been repaired by 2282d9638e54Smws * clearing the faulty bit on each ASRU named as a suspect. If the case hasn't 2283d9638e54Smws * already been closed, this function initiates the transition to CLOSE_WAIT. 2284d9638e54Smws * The caller must have the case held from fmd_case_hash_lookup(), so we can 2285d9638e54Smws * grab and drop ci_lock without the case being able to be freed in between. 22867c478bd9Sstevel@tonic-gate */ 22877c478bd9Sstevel@tonic-gate int 22887c478bd9Sstevel@tonic-gate fmd_case_repair(fmd_case_t *cp) 22897c478bd9Sstevel@tonic-gate { 22907c478bd9Sstevel@tonic-gate fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 2291d9638e54Smws uint_t cstate; 2292cbf75e67SStephen Hanson fmd_asru_rep_arg_t fara; 2293d9638e54Smws 22947c478bd9Sstevel@tonic-gate (void) pthread_mutex_lock(&cip->ci_lock); 2295d9638e54Smws cstate = cip->ci_state; 22967c478bd9Sstevel@tonic-gate 2297567cc2e6Sstephh if (cstate < FMD_CASE_SOLVED) { 22987c478bd9Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_lock); 22997c478bd9Sstevel@tonic-gate return (fmd_set_errno(EFMD_CASE_STATE)); 23007c478bd9Sstevel@tonic-gate } 23017c478bd9Sstevel@tonic-gate 2302567cc2e6Sstephh if (cip->ci_flags & FMD_CF_REPAIRED) { 23037c478bd9Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_lock); 2304567cc2e6Sstephh return (0); /* already repaired */ 2305162ba6eaSmws } 2306162ba6eaSmws 2307cbf75e67SStephen Hanson TRACE((FMD_DBG_CASE, "case repair %s", cip->ci_uuid)); 2308cbf75e67SStephen Hanson fara.fara_reason = FMD_ASRU_REPAIRED; 2309cbf75e67SStephen Hanson fara.fara_bywhat = FARA_BY_CASE; 2310cbf75e67SStephen Hanson fara.fara_rval = NULL; 2311cbf75e67SStephen Hanson fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_repaired, &fara); 231225c6ff4bSstephh (void) pthread_mutex_unlock(&cip->ci_lock); 231325c6ff4bSstephh 2314cbf75e67SStephen Hanson /* 2315cbf75e67SStephen Hanson * if this is a proxied case, send the repair across the transport. 2316cbf75e67SStephen Hanson * The remote side will then do the repair and send a list.repaired back 2317cbf75e67SStephen Hanson * again such that we can finally repair the case on this side. 2318cbf75e67SStephen Hanson */ 2319cbf75e67SStephen Hanson if (cip->ci_xprt != NULL) { 2320cbf75e67SStephen Hanson fmd_case_xprt_updated(cp); 2321cbf75e67SStephen Hanson return (0); 2322cbf75e67SStephen Hanson } 2323cbf75e67SStephen Hanson 232425c6ff4bSstephh if (cstate == FMD_CASE_CLOSED) 232525c6ff4bSstephh fmd_case_transition(cp, FMD_CASE_REPAIRED, FMD_CF_REPAIRED); 232625c6ff4bSstephh else 232725c6ff4bSstephh fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_REPAIRED); 232825c6ff4bSstephh 232925c6ff4bSstephh return (0); 233025c6ff4bSstephh } 233125c6ff4bSstephh 233225c6ff4bSstephh int 233325c6ff4bSstephh fmd_case_acquit(fmd_case_t *cp) 233425c6ff4bSstephh { 233525c6ff4bSstephh fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 233625c6ff4bSstephh uint_t cstate; 2337cbf75e67SStephen Hanson fmd_asru_rep_arg_t fara; 233825c6ff4bSstephh 233925c6ff4bSstephh (void) pthread_mutex_lock(&cip->ci_lock); 234025c6ff4bSstephh cstate = cip->ci_state; 234125c6ff4bSstephh 234225c6ff4bSstephh if (cstate < FMD_CASE_SOLVED) { 234325c6ff4bSstephh (void) pthread_mutex_unlock(&cip->ci_lock); 234425c6ff4bSstephh return (fmd_set_errno(EFMD_CASE_STATE)); 234525c6ff4bSstephh } 234625c6ff4bSstephh 234725c6ff4bSstephh if (cip->ci_flags & FMD_CF_REPAIRED) { 234825c6ff4bSstephh (void) pthread_mutex_unlock(&cip->ci_lock); 234925c6ff4bSstephh return (0); /* already repaired */ 235025c6ff4bSstephh } 235125c6ff4bSstephh 2352cbf75e67SStephen Hanson TRACE((FMD_DBG_CASE, "case acquit %s", cip->ci_uuid)); 2353cbf75e67SStephen Hanson fara.fara_reason = FMD_ASRU_ACQUITTED; 2354cbf75e67SStephen Hanson fara.fara_bywhat = FARA_BY_CASE; 2355cbf75e67SStephen Hanson fara.fara_rval = NULL; 2356cbf75e67SStephen Hanson fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_repaired, &fara); 2357162ba6eaSmws (void) pthread_mutex_unlock(&cip->ci_lock); 2358162ba6eaSmws 2359cbf75e67SStephen Hanson /* 2360cbf75e67SStephen Hanson * if this is a proxied case, send the repair across the transport. 2361cbf75e67SStephen Hanson * The remote side will then do the repair and send a list.repaired back 2362cbf75e67SStephen Hanson * again such that we can finally repair the case on this side. 2363cbf75e67SStephen Hanson */ 2364cbf75e67SStephen Hanson if (cip->ci_xprt != NULL) { 2365cbf75e67SStephen Hanson fmd_case_xprt_updated(cp); 2366cbf75e67SStephen Hanson return (0); 2367cbf75e67SStephen Hanson } 2368cbf75e67SStephen Hanson 2369d9638e54Smws if (cstate == FMD_CASE_CLOSED) 2370d9638e54Smws fmd_case_transition(cp, FMD_CASE_REPAIRED, FMD_CF_REPAIRED); 2371d9638e54Smws else 2372d9638e54Smws fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_REPAIRED); 2373d9638e54Smws 23747c478bd9Sstevel@tonic-gate return (0); 23757c478bd9Sstevel@tonic-gate } 23767c478bd9Sstevel@tonic-gate 23777c478bd9Sstevel@tonic-gate int 23787c478bd9Sstevel@tonic-gate fmd_case_contains(fmd_case_t *cp, fmd_event_t *ep) 23797c478bd9Sstevel@tonic-gate { 23807c478bd9Sstevel@tonic-gate fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 23817c478bd9Sstevel@tonic-gate fmd_case_item_t *cit; 23827c478bd9Sstevel@tonic-gate uint_t state; 23837c478bd9Sstevel@tonic-gate int rv = 0; 23847c478bd9Sstevel@tonic-gate 23857c478bd9Sstevel@tonic-gate (void) pthread_mutex_lock(&cip->ci_lock); 23867c478bd9Sstevel@tonic-gate 23877c478bd9Sstevel@tonic-gate if (cip->ci_state >= FMD_CASE_SOLVED) 23887c478bd9Sstevel@tonic-gate state = FMD_EVS_DIAGNOSED; 23897c478bd9Sstevel@tonic-gate else 23907c478bd9Sstevel@tonic-gate state = FMD_EVS_ACCEPTED; 23917c478bd9Sstevel@tonic-gate 23927c478bd9Sstevel@tonic-gate for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) { 23937c478bd9Sstevel@tonic-gate if ((rv = fmd_event_equal(ep, cit->cit_event)) != 0) 23947c478bd9Sstevel@tonic-gate break; 23957c478bd9Sstevel@tonic-gate } 23967c478bd9Sstevel@tonic-gate 23977c478bd9Sstevel@tonic-gate if (rv == 0 && cip->ci_principal != NULL) 23987c478bd9Sstevel@tonic-gate rv = fmd_event_equal(ep, cip->ci_principal); 23997c478bd9Sstevel@tonic-gate 24007c478bd9Sstevel@tonic-gate (void) pthread_mutex_unlock(&cip->ci_lock); 24017c478bd9Sstevel@tonic-gate 24027c478bd9Sstevel@tonic-gate if (rv != 0) 24037c478bd9Sstevel@tonic-gate fmd_event_transition(ep, state); 24047c478bd9Sstevel@tonic-gate 24057c478bd9Sstevel@tonic-gate return (rv); 24067c478bd9Sstevel@tonic-gate } 2407d9638e54Smws 2408d9638e54Smws int 2409d9638e54Smws fmd_case_orphaned(fmd_case_t *cp) 2410d9638e54Smws { 2411d9638e54Smws return (((fmd_case_impl_t *)cp)->ci_mod == fmd.d_rmod); 2412d9638e54Smws } 241344743693Sstephh 241444743693Sstephh void 241544743693Sstephh fmd_case_settime(fmd_case_t *cp, time_t tv_sec, suseconds_t tv_usec) 241644743693Sstephh { 241744743693Sstephh ((fmd_case_impl_t *)cp)->ci_tv.tv_sec = tv_sec; 241844743693Sstephh ((fmd_case_impl_t *)cp)->ci_tv.tv_usec = tv_usec; 241944743693Sstephh ((fmd_case_impl_t *)cp)->ci_tv_valid = 1; 242044743693Sstephh } 242125c6ff4bSstephh 2422cbf75e67SStephen Hanson void 2423540db9a9SStephen Hanson fmd_case_set_injected(fmd_case_t *cp) 2424540db9a9SStephen Hanson { 2425540db9a9SStephen Hanson ((fmd_case_impl_t *)cp)->ci_injected = 1; 2426540db9a9SStephen Hanson } 2427540db9a9SStephen Hanson 2428540db9a9SStephen Hanson void 2429cbf75e67SStephen Hanson fmd_case_set_de_fmri(fmd_case_t *cp, nvlist_t *nvl) 2430cbf75e67SStephen Hanson { 2431cbf75e67SStephen Hanson fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 2432cbf75e67SStephen Hanson 2433cbf75e67SStephen Hanson nvlist_free(cip->ci_diag_de); 2434cbf75e67SStephen Hanson cip->ci_diag_de = nvl; 2435cbf75e67SStephen Hanson } 2436cbf75e67SStephen Hanson 2437cbf75e67SStephen Hanson void 2438cbf75e67SStephen Hanson fmd_case_setcode(fmd_case_t *cp, char *code) 2439cbf75e67SStephen Hanson { 2440cbf75e67SStephen Hanson fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 2441cbf75e67SStephen Hanson 2442cbf75e67SStephen Hanson cip->ci_code = fmd_strdup(code, FMD_SLEEP); 2443cbf75e67SStephen Hanson cip->ci_codelen = cip->ci_code ? strlen(cip->ci_code) + 1 : 0; 2444cbf75e67SStephen Hanson } 2445cbf75e67SStephen Hanson 244625c6ff4bSstephh /*ARGSUSED*/ 24475750ef5cSStephen Hanson static void 244825c6ff4bSstephh fmd_case_repair_replay_case(fmd_case_t *cp, void *arg) 244925c6ff4bSstephh { 245025c6ff4bSstephh int not_faulty = 0; 245125c6ff4bSstephh int faulty = 0; 245225c6ff4bSstephh nvlist_t *nvl; 245325c6ff4bSstephh fmd_event_t *e; 245425c6ff4bSstephh char *class; 245525c6ff4bSstephh int any_unusable_and_present = 0; 245625c6ff4bSstephh fmd_case_impl_t *cip = (fmd_case_impl_t *)cp; 245725c6ff4bSstephh 2458cbf75e67SStephen Hanson if (cip->ci_state < FMD_CASE_SOLVED || cip->ci_xprt != NULL) 245925c6ff4bSstephh return; 246025c6ff4bSstephh 24615750ef5cSStephen Hanson if (cip->ci_state == FMD_CASE_RESOLVED) { 24625750ef5cSStephen Hanson cip->ci_flags |= FMD_CF_RES_CMPL; 24635750ef5cSStephen Hanson return; 24645750ef5cSStephen Hanson } 24655750ef5cSStephen Hanson 246625c6ff4bSstephh fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_faulty, &faulty); 246725c6ff4bSstephh fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_not_faulty, 246825c6ff4bSstephh ¬_faulty); 246925c6ff4bSstephh 2470c7d6cfd6SStephen Hanson if (cip->ci_state >= FMD_CASE_REPAIRED && !faulty) { 247125c6ff4bSstephh /* 247225c6ff4bSstephh * If none of the suspects is faulty, replay the list.repaired. 247325c6ff4bSstephh * If all suspects are already either usable or not present then 247425c6ff4bSstephh * also transition straight to RESOLVED state. 247525c6ff4bSstephh */ 247625c6ff4bSstephh fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, 247725c6ff4bSstephh fmd_case_unusable_and_present, &any_unusable_and_present); 247825c6ff4bSstephh if (!any_unusable_and_present) { 247925c6ff4bSstephh cip->ci_state = FMD_CASE_RESOLVED; 248025c6ff4bSstephh 2481cbf75e67SStephen Hanson TRACE((FMD_DBG_CASE, "replay sending list.repaired %s", 2482cbf75e67SStephen Hanson cip->ci_uuid)); 248325c6ff4bSstephh nvl = fmd_case_mkevent(cp, FM_LIST_REPAIRED_CLASS); 248425c6ff4bSstephh (void) nvlist_lookup_string(nvl, FM_CLASS, &class); 248525c6ff4bSstephh e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, 248625c6ff4bSstephh class); 248725c6ff4bSstephh fmd_dispq_dispatch(fmd.d_disp, e, class); 248825c6ff4bSstephh 2489cbf75e67SStephen Hanson TRACE((FMD_DBG_CASE, "replay sending list.resolved %s", 2490cbf75e67SStephen Hanson cip->ci_uuid)); 249125c6ff4bSstephh fmd_case_publish(cp, FMD_CASE_RESOLVED); 2492540db9a9SStephen Hanson fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, 2493540db9a9SStephen Hanson fmd_asru_log_resolved, NULL); 24945750ef5cSStephen Hanson cip->ci_flags |= FMD_CF_RES_CMPL; 249525c6ff4bSstephh } else { 2496cbf75e67SStephen Hanson TRACE((FMD_DBG_CASE, "replay sending list.repaired %s", 2497cbf75e67SStephen Hanson cip->ci_uuid)); 249825c6ff4bSstephh nvl = fmd_case_mkevent(cp, FM_LIST_REPAIRED_CLASS); 249925c6ff4bSstephh (void) nvlist_lookup_string(nvl, FM_CLASS, &class); 250025c6ff4bSstephh e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, 250125c6ff4bSstephh class); 250225c6ff4bSstephh fmd_dispq_dispatch(fmd.d_disp, e, class); 250325c6ff4bSstephh } 2504c7d6cfd6SStephen Hanson } else if (faulty && not_faulty) { 250525c6ff4bSstephh /* 250625c6ff4bSstephh * if some but not all of the suspects are not faulty, replay 250725c6ff4bSstephh * the list.updated. 250825c6ff4bSstephh */ 2509cbf75e67SStephen Hanson TRACE((FMD_DBG_CASE, "replay sending list.updated %s", 2510cbf75e67SStephen Hanson cip->ci_uuid)); 251125c6ff4bSstephh nvl = fmd_case_mkevent(cp, FM_LIST_UPDATED_CLASS); 251225c6ff4bSstephh (void) nvlist_lookup_string(nvl, FM_CLASS, &class); 251325c6ff4bSstephh e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class); 251425c6ff4bSstephh fmd_dispq_dispatch(fmd.d_disp, e, class); 251525c6ff4bSstephh } 251625c6ff4bSstephh } 251725c6ff4bSstephh 251825c6ff4bSstephh void 251925c6ff4bSstephh fmd_case_repair_replay() 252025c6ff4bSstephh { 252125c6ff4bSstephh fmd_case_hash_apply(fmd.d_cases, fmd_case_repair_replay_case, NULL); 252225c6ff4bSstephh } 2523