xref: /illumos-gate/usr/src/cmd/fm/fmd/common/fmd_case.c (revision fe3e2633be44d2f5361a7bba26abeb80fcc04fbc)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * FMD Case Subsystem
31  *
32  * Diagnosis engines are expected to group telemetry events related to the
33  * diagnosis of a particular problem on the system into a set of cases.  The
34  * diagnosis engine may have any number of cases open at a given point in time.
35  * Some cases may eventually be *solved* by associating a suspect list of one
36  * or more problems with the case, at which point fmd publishes a list.suspect
37  * event for the case and it becomes visible to administrators and agents.
38  *
39  * Every case is named using a UUID, and is globally visible in the case hash.
40  * Cases are reference-counted, except for the reference from the case hash
41  * itself.  Consumers of case references include modules, which store active
42  * cases on the mod_cases list, ASRUs in the resource cache, and the RPC code.
43  *
44  * Cases obey the following state machine.  In states UNSOLVED, SOLVED, and
45  * CLOSE_WAIT, a case's module refers to the owning module (a diagnosis engine
46  * or transport) and the case is referenced by the mod_cases list.  Once the
47  * case reaches the CLOSED or REPAIRED states, a case's module changes to refer
48  * to the root module (fmd.d_rmod) and is deleted from the owner's mod_cases.
49  *
50  *			+------------+
51  *	     +----------|  UNSOLVED  |
52  *	     |		+------------+
53  *	     |		      1 |
54  *	     |			|
55  *	     |		+-------v----+
56  *	   2 |		|    SOLVED  |
57  *	     |		+------------+
58  *	     |		    3 |  5 |
59  *	     +------------+   |    |
60  *			  |   |    |
61  *			+-v---v----v-+
62  *			| CLOSE_WAIT |
63  *			+------------+
64  *			  |   |    |
65  *	      +-----------+   |    +------------+
66  *	      |		    4 |			|
67  *	      v		+-----v------+		|
68  *	   discard      |   CLOSED   |	      6	|
69  *			+------------+		|
70  *			      |			|
71  *			      |	   +------------+
72  *			    7 |	   |
73  *			+-----v----v-+
74  *			|  REPAIRED  |
75  *			+------------+
76  *			      |
77  *			    8 |
78  *			+-----v------+
79  *			|  RESOLVED  |
80  *			+------------+
81  *			      |
82  *			      v
83  *			   discard
84  *
85  * The state machine changes are triggered by calls to fmd_case_transition()
86  * from various locations inside of fmd, as described below:
87  *
88  * [1] Called by: fmd_case_solve()
89  *       Actions: FMD_CF_SOLVED flag is set in ci_flags
90  *                conviction policy is applied to suspect list
91  *                suspects convicted are marked faulty (F) in R$
92  *                list.suspect event logged and dispatched
93  *
94  * [2] Called by: fmd_case_close(), fmd_case_uuclose()
95  *       Actions: diagnosis engine fmdo_close() entry point scheduled
96  *                case discarded upon exit from CLOSE_WAIT
97  *
98  * [3] Called by: fmd_case_close(), fmd_case_uuclose(), fmd_xprt_event_uuclose()
99  *       Actions: FMD_CF_ISOLATED flag is set in ci_flags
100  *                suspects convicted (F) are marked unusable (U) in R$
101  *                diagnosis engine fmdo_close() entry point scheduled
102  *                case transitions to CLOSED [4] upon exit from CLOSE_WAIT
103  *
104  * [4] Called by: fmd_case_delete() (after fmdo_close() entry point returns)
105  *       Actions: list.isolated event dispatched
106  *                case deleted from module's list of open cases
107  *
108  * [5] Called by: fmd_case_repair(), fmd_case_update()
109  *       Actions: FMD_CF_REPAIR flag is set in ci_flags
110  *                diagnosis engine fmdo_close() entry point scheduled
111  *                case transitions to REPAIRED [6] upon exit from CLOSE_WAIT
112  *
113  * [6] Called by: fmd_case_delete() (after fmdo_close() entry point returns)
114  *       Actions: suspects convicted are marked non faulty (!F) in R$
115  *                list.repaired or list.updated event dispatched
116  *
117  * [7] Called by: fmd_case_repair(), fmd_case_update()
118  *       Actions: FMD_CF_REPAIR flag is set in ci_flags
119  *                suspects convicted are marked non faulty (!F) in R$
120  *                list.repaired or list.updated event dispatched
121  *
122  * [8] Called by: fmd_case_uuresolve()
123  *       Actions: list.resolved event dispatched
124  *		  case is discarded
125  */
126 
127 #include <sys/fm/protocol.h>
128 #include <uuid/uuid.h>
129 #include <alloca.h>
130 
131 #include <fmd_alloc.h>
132 #include <fmd_module.h>
133 #include <fmd_error.h>
134 #include <fmd_conf.h>
135 #include <fmd_case.h>
136 #include <fmd_string.h>
137 #include <fmd_subr.h>
138 #include <fmd_protocol.h>
139 #include <fmd_event.h>
140 #include <fmd_eventq.h>
141 #include <fmd_dispq.h>
142 #include <fmd_buf.h>
143 #include <fmd_log.h>
144 #include <fmd_asru.h>
145 #include <fmd_fmri.h>
146 #include <fmd_xprt.h>
147 
148 #include <fmd.h>
149 
150 static const char *const _fmd_case_snames[] = {
151 	"UNSOLVED",	/* FMD_CASE_UNSOLVED */
152 	"SOLVED",	/* FMD_CASE_SOLVED */
153 	"CLOSE_WAIT",	/* FMD_CASE_CLOSE_WAIT */
154 	"CLOSED",	/* FMD_CASE_CLOSED */
155 	"REPAIRED",	/* FMD_CASE_REPAIRED */
156 	"RESOLVED"	/* FMD_CASE_RESOLVED */
157 };
158 
159 static fmd_case_impl_t *fmd_case_tryhold(fmd_case_impl_t *);
160 
161 fmd_case_hash_t *
162 fmd_case_hash_create(void)
163 {
164 	fmd_case_hash_t *chp = fmd_alloc(sizeof (fmd_case_hash_t), FMD_SLEEP);
165 
166 	(void) pthread_rwlock_init(&chp->ch_lock, NULL);
167 	chp->ch_hashlen = fmd.d_str_buckets;
168 	chp->ch_hash = fmd_zalloc(sizeof (void *) * chp->ch_hashlen, FMD_SLEEP);
169 	chp->ch_code_hash = fmd_zalloc(sizeof (void *) * chp->ch_hashlen,
170 	    FMD_SLEEP);
171 	chp->ch_count = 0;
172 
173 	return (chp);
174 }
175 
176 /*
177  * Destroy the case hash.  Unlike most of our hash tables, no active references
178  * are kept by the case hash itself; all references come from other subsystems.
179  * The hash must be destroyed after all modules are unloaded; if anything was
180  * present in the hash it would be by definition a reference count leak.
181  */
182 void
183 fmd_case_hash_destroy(fmd_case_hash_t *chp)
184 {
185 	fmd_free(chp->ch_hash, sizeof (void *) * chp->ch_hashlen);
186 	fmd_free(chp->ch_code_hash, sizeof (void *) * chp->ch_hashlen);
187 	fmd_free(chp, sizeof (fmd_case_hash_t));
188 }
189 
190 /*
191  * Take a snapshot of the case hash by placing an additional hold on each
192  * member in an auxiliary array, and then call 'func' for each case.
193  */
194 void
195 fmd_case_hash_apply(fmd_case_hash_t *chp,
196     void (*func)(fmd_case_t *, void *), void *arg)
197 {
198 	fmd_case_impl_t *cp, **cps, **cpp;
199 	uint_t cpc, i;
200 
201 	(void) pthread_rwlock_rdlock(&chp->ch_lock);
202 
203 	cps = cpp = fmd_alloc(chp->ch_count * sizeof (fmd_case_t *), FMD_SLEEP);
204 	cpc = chp->ch_count;
205 
206 	for (i = 0; i < chp->ch_hashlen; i++) {
207 		for (cp = chp->ch_hash[i]; cp != NULL; cp = cp->ci_next) {
208 			if (fmd_case_tryhold(cp) != NULL)
209 				*cpp++ = cp;
210 		}
211 	}
212 
213 	ASSERT(cpp == cps + cpc);
214 	(void) pthread_rwlock_unlock(&chp->ch_lock);
215 
216 	for (i = 0; i < cpc; i++) {
217 		func((fmd_case_t *)cps[i], arg);
218 		fmd_case_rele((fmd_case_t *)cps[i]);
219 	}
220 
221 	fmd_free(cps, cpc * sizeof (fmd_case_t *));
222 }
223 
224 static void
225 fmd_case_code_hash_insert(fmd_case_hash_t *chp, fmd_case_impl_t *cip)
226 {
227 	uint_t h = fmd_strhash(cip->ci_code) % chp->ch_hashlen;
228 
229 	cip->ci_code_next = chp->ch_code_hash[h];
230 	chp->ch_code_hash[h] = cip;
231 }
232 
233 static void
234 fmd_case_code_hash_delete(fmd_case_hash_t *chp, fmd_case_impl_t *cip)
235 {
236 	fmd_case_impl_t **pp, *cp;
237 
238 	if (cip->ci_code) {
239 		uint_t h = fmd_strhash(cip->ci_code) % chp->ch_hashlen;
240 
241 		pp = &chp->ch_code_hash[h];
242 		for (cp = *pp; cp != NULL; cp = cp->ci_code_next) {
243 			if (cp != cip)
244 				pp = &cp->ci_code_next;
245 			else
246 				break;
247 		}
248 		if (cp != NULL) {
249 			*pp = cp->ci_code_next;
250 			cp->ci_code_next = NULL;
251 		}
252 	}
253 }
254 
255 /*
256  * Look up the diagcode for this case and cache it in ci_code.  If no suspects
257  * were defined for this case or if the lookup fails, the event dictionary or
258  * module code is broken, and we set the event code to a precomputed default.
259  */
260 static const char *
261 fmd_case_mkcode(fmd_case_t *cp)
262 {
263 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
264 	fmd_case_susp_t *cis;
265 	fmd_case_hash_t *chp = fmd.d_cases;
266 
267 	char **keys, **keyp;
268 	const char *s;
269 
270 	ASSERT(MUTEX_HELD(&cip->ci_lock));
271 	ASSERT(cip->ci_state >= FMD_CASE_SOLVED);
272 
273 	/*
274 	 * delete any existing entry from code hash if it is on it
275 	 */
276 	fmd_case_code_hash_delete(chp, cip);
277 
278 	fmd_free(cip->ci_code, cip->ci_codelen);
279 	cip->ci_codelen = cip->ci_mod->mod_codelen;
280 	cip->ci_code = fmd_zalloc(cip->ci_codelen, FMD_SLEEP);
281 	keys = keyp = alloca(sizeof (char *) * (cip->ci_nsuspects + 1));
282 
283 	for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
284 		if (nvlist_lookup_string(cis->cis_nvl, FM_CLASS, keyp) == 0)
285 			keyp++;
286 	}
287 
288 	*keyp = NULL; /* mark end of keys[] array for libdiagcode */
289 
290 	if (cip->ci_nsuspects == 0 || fmd_module_dc_key2code(
291 	    cip->ci_mod, keys, cip->ci_code, cip->ci_codelen) != 0) {
292 		(void) fmd_conf_getprop(fmd.d_conf, "nodiagcode", &s);
293 		fmd_free(cip->ci_code, cip->ci_codelen);
294 		cip->ci_codelen = strlen(s) + 1;
295 		cip->ci_code = fmd_zalloc(cip->ci_codelen, FMD_SLEEP);
296 		(void) strcpy(cip->ci_code, s);
297 	}
298 
299 	/*
300 	 * add into hash of solved cases
301 	 */
302 	fmd_case_code_hash_insert(chp, cip);
303 
304 	return (cip->ci_code);
305 }
306 
307 typedef struct {
308 	int	*fcl_countp;
309 	uint8_t *fcl_ba;
310 	nvlist_t **fcl_nva;
311 	int	*fcl_msgp;
312 } fmd_case_lst_t;
313 
314 static void
315 fmd_case_set_lst(fmd_asru_link_t *alp, void *arg)
316 {
317 	fmd_case_lst_t *entryp = (fmd_case_lst_t *)arg;
318 	boolean_t b;
319 	int state;
320 
321 	if (nvlist_lookup_boolean_value(alp->al_event, FM_SUSPECT_MESSAGE,
322 	    &b) == 0 && b == B_FALSE)
323 		*entryp->fcl_msgp = B_FALSE;
324 	entryp->fcl_ba[*entryp->fcl_countp] = 0;
325 	state = fmd_asru_al_getstate(alp);
326 	if (state & FMD_ASRU_DEGRADED)
327 		entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_DEGRADED;
328 	if (state & FMD_ASRU_UNUSABLE)
329 		entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_UNUSABLE;
330 	if (state & FMD_ASRU_FAULTY)
331 		entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_FAULTY;
332 	if (!(state & FMD_ASRU_PRESENT))
333 		entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_NOT_PRESENT;
334 	if (alp->al_reason == FMD_ASRU_REPAIRED)
335 		entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_REPAIRED;
336 	else if (alp->al_reason == FMD_ASRU_REPLACED)
337 		entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_REPLACED;
338 	else if (alp->al_reason == FMD_ASRU_ACQUITTED)
339 		entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_ACQUITTED;
340 	entryp->fcl_nva[*entryp->fcl_countp] = alp->al_event;
341 	(*entryp->fcl_countp)++;
342 }
343 
344 static void
345 fmd_case_faulty(fmd_asru_link_t *alp, void *arg)
346 {
347 	int *faultyp = (int *)arg;
348 
349 	*faultyp |= (alp->al_flags & FMD_ASRU_FAULTY);
350 }
351 
352 static void
353 fmd_case_usable(fmd_asru_link_t *alp, void *arg)
354 {
355 	int *usablep = (int *)arg;
356 
357 	*usablep |= !(fmd_asru_al_getstate(alp) & FMD_ASRU_UNUSABLE);
358 }
359 
360 static void
361 fmd_case_not_faulty(fmd_asru_link_t *alp, void *arg)
362 {
363 	int *not_faultyp = (int *)arg;
364 
365 	*not_faultyp |= !(alp->al_flags & FMD_ASRU_FAULTY);
366 }
367 
368 /*
369  * Have we got any suspects with an asru that are still unusable and present?
370  */
371 static void
372 fmd_case_unusable_and_present(fmd_asru_link_t *alp, void *arg)
373 {
374 	int *rvalp = (int *)arg;
375 	int state = fmd_asru_al_getstate(alp);
376 	nvlist_t *asru;
377 
378 	if (nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, &asru) != 0)
379 		return;
380 	*rvalp |= ((state & FMD_ASRU_UNUSABLE) && (state & FMD_ASRU_PRESENT));
381 }
382 
383 nvlist_t *
384 fmd_case_mkevent(fmd_case_t *cp, const char *class)
385 {
386 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
387 	nvlist_t **nva, *nvl;
388 	uint8_t *ba;
389 	int msg = B_TRUE;
390 	const char *code;
391 	fmd_case_lst_t fcl;
392 	int count = 0;
393 
394 	(void) pthread_mutex_lock(&cip->ci_lock);
395 	ASSERT(cip->ci_state >= FMD_CASE_SOLVED);
396 
397 	nva = alloca(sizeof (nvlist_t *) * cip->ci_nsuspects);
398 	ba = alloca(sizeof (uint8_t) * cip->ci_nsuspects);
399 
400 	/*
401 	 * For each suspect associated with the case, store its fault event
402 	 * nvlist in 'nva'.  We also look to see if any of the suspect faults
403 	 * have asked not to be messaged.  If any of them have made such a
404 	 * request, propagate that attribute to the composite list.* event.
405 	 * Finally, store each suspect's faulty status into the bitmap 'ba'.
406 	 */
407 	fcl.fcl_countp = &count;
408 	fcl.fcl_msgp = &msg;
409 	fcl.fcl_ba = ba;
410 	fcl.fcl_nva = nva;
411 	fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_set_lst, &fcl);
412 
413 	if (cip->ci_code == NULL)
414 		(void) fmd_case_mkcode(cp);
415 	/*
416 	 * For repair and updated event, we lookup diagcode from dict using key
417 	 * "list.repaired" or "list.updated" or "list.resolved".
418 	 */
419 	if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0)
420 		(void) fmd_conf_getprop(fmd.d_conf, "repaircode", &code);
421 	else if (strcmp(class, FM_LIST_RESOLVED_CLASS) == 0)
422 		(void) fmd_conf_getprop(fmd.d_conf, "resolvecode", &code);
423 	else if (strcmp(class, FM_LIST_UPDATED_CLASS) == 0)
424 		(void) fmd_conf_getprop(fmd.d_conf, "updatecode", &code);
425 	else
426 		code = cip->ci_code;
427 
428 	if (msg == B_FALSE)
429 		cip->ci_flags |= FMD_CF_INVISIBLE;
430 
431 	nvl = fmd_protocol_list(class, cip->ci_mod->mod_fmri, cip->ci_uuid,
432 	    code, count, nva, ba, msg, &cip->ci_tv);
433 
434 	(void) pthread_mutex_unlock(&cip->ci_lock);
435 	return (nvl);
436 }
437 
438 static boolean_t
439 fmd_case_compare_elem(nvlist_t *nvl, nvlist_t *xnvl, const char *elem)
440 {
441 	nvlist_t *new_rsrc;
442 	nvlist_t *rsrc;
443 	char *new_name = NULL;
444 	char *name = NULL;
445 	ssize_t new_namelen;
446 	ssize_t namelen;
447 	int fmri_present = 1;
448 	int new_fmri_present = 1;
449 	int match = B_FALSE;
450 	fmd_topo_t *ftp = fmd_topo_hold();
451 
452 	if (nvlist_lookup_nvlist(xnvl, elem, &rsrc) != 0)
453 		fmri_present = 0;
454 	else {
455 		if ((namelen = fmd_fmri_nvl2str(rsrc, NULL, 0)) == -1)
456 			goto done;
457 		name = fmd_alloc(namelen + 1, FMD_SLEEP);
458 		if (fmd_fmri_nvl2str(rsrc, name, namelen + 1) == -1)
459 			goto done;
460 	}
461 	if (nvlist_lookup_nvlist(nvl, elem, &new_rsrc) != 0)
462 		new_fmri_present = 0;
463 	else {
464 		if ((new_namelen = fmd_fmri_nvl2str(new_rsrc, NULL, 0)) == -1)
465 			goto done;
466 		new_name = fmd_alloc(new_namelen + 1, FMD_SLEEP);
467 		if (fmd_fmri_nvl2str(new_rsrc, new_name, new_namelen + 1) == -1)
468 			goto done;
469 	}
470 	match = (fmri_present == new_fmri_present &&
471 	    (fmri_present == 0 ||
472 	    topo_fmri_strcmp(ftp->ft_hdl, name, new_name)));
473 done:
474 	if (name != NULL)
475 		fmd_free(name, namelen + 1);
476 	if (new_name != NULL)
477 		fmd_free(new_name, new_namelen + 1);
478 	fmd_topo_rele(ftp);
479 	return (match);
480 }
481 
482 static int
483 fmd_case_match_suspect(fmd_case_susp_t *cis, fmd_case_susp_t *xcis)
484 {
485 	char *class, *new_class;
486 
487 	if (!fmd_case_compare_elem(cis->cis_nvl, xcis->cis_nvl, FM_FAULT_ASRU))
488 		return (0);
489 	if (!fmd_case_compare_elem(cis->cis_nvl, xcis->cis_nvl,
490 	    FM_FAULT_RESOURCE))
491 		return (0);
492 	if (!fmd_case_compare_elem(cis->cis_nvl, xcis->cis_nvl, FM_FAULT_FRU))
493 		return (0);
494 	(void) nvlist_lookup_string(xcis->cis_nvl, FM_CLASS, &class);
495 	(void) nvlist_lookup_string(cis->cis_nvl, FM_CLASS, &new_class);
496 	return (strcmp(class, new_class) == 0);
497 }
498 
499 /*
500  * see if an identical suspect list already exists in the cache
501  */
502 static int
503 fmd_case_check_for_dups(fmd_case_t *cp)
504 {
505 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp, *xcip;
506 	fmd_case_hash_t *chp = fmd.d_cases;
507 	fmd_case_susp_t *xcis, *cis;
508 	int match = 0, match_susp;
509 	uint_t h;
510 
511 	(void) pthread_rwlock_rdlock(&chp->ch_lock);
512 
513 	/*
514 	 * Find all cases with this code
515 	 */
516 	h = fmd_strhash(cip->ci_code) % chp->ch_hashlen;
517 	for (xcip = chp->ch_code_hash[h]; xcip != NULL;
518 	    xcip = xcip->ci_code_next) {
519 		/*
520 		 * only look for any cases (apart from this one)
521 		 * whose code and number of suspects match
522 		 */
523 		if (xcip == cip || fmd_case_tryhold(xcip) == NULL)
524 			continue;
525 		if (strcmp(xcip->ci_code, cip->ci_code) != 0 ||
526 		    xcip->ci_nsuspects != cip->ci_nsuspects) {
527 			fmd_case_rele((fmd_case_t *)xcip);
528 			continue;
529 		}
530 
531 		/*
532 		 * For each suspect in one list, check if there
533 		 * is an identical suspect in the other list
534 		 */
535 		match = 1;
536 		for (xcis = xcip->ci_suspects; xcis != NULL;
537 		    xcis = xcis->cis_next) {
538 			match_susp = 0;
539 			for (cis = cip->ci_suspects; cis != NULL;
540 			    cis = cis->cis_next) {
541 				if (fmd_case_match_suspect(cis, xcis) == 1) {
542 					match_susp = 1;
543 					break;
544 				}
545 			}
546 			if (match_susp == 0) {
547 				match = 0;
548 				break;
549 			}
550 		}
551 		fmd_case_rele((fmd_case_t *)xcip);
552 		if (match) {
553 			(void) pthread_rwlock_unlock(&chp->ch_lock);
554 			return (1);
555 		}
556 	}
557 	(void) pthread_rwlock_unlock(&chp->ch_lock);
558 	return (0);
559 }
560 
561 /*
562  * Convict suspects in a case by applying a conviction policy and updating the
563  * resource cache prior to emitting the list.suspect event for the given case.
564  * At present, our policy is very simple: convict every suspect in the case.
565  * In the future, this policy can be extended and made configurable to permit:
566  *
567  * - convicting the suspect with the highest FIT rate
568  * - convicting the suspect with the cheapest FRU
569  * - convicting the suspect with the FRU that is in a depot's inventory
570  * - convicting the suspect with the longest lifetime
571  *
572  * and so forth.  A word to the wise: this problem is significantly harder that
573  * it seems at first glance.  Future work should heed the following advice:
574  *
575  * Hacking the policy into C code here is a very bad idea.  The policy needs to
576  * be decided upon very carefully and fundamentally encodes knowledge of what
577  * suspect list combinations can be emitted by what diagnosis engines.  As such
578  * fmd's code is the wrong location, because that would require fmd itself to
579  * be updated for every diagnosis engine change, defeating the entire design.
580  * The FMA Event Registry knows the suspect list combinations: policy inputs
581  * can be derived from it and used to produce per-module policy configuration.
582  *
583  * If the policy needs to be dynamic and not statically fixed at either fmd
584  * startup or module load time, any implementation of dynamic policy retrieval
585  * must employ some kind of caching mechanism or be part of a built-in module.
586  * The fmd_case_convict() function is called with locks held inside of fmd and
587  * is not a place where unbounded blocking on some inter-process or inter-
588  * system communication to another service (e.g. another daemon) can occur.
589  */
590 static int
591 fmd_case_convict(fmd_case_t *cp)
592 {
593 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
594 	fmd_asru_hash_t *ahp = fmd.d_asrus;
595 
596 	fmd_case_susp_t *cis;
597 	fmd_asru_link_t *alp;
598 
599 	(void) pthread_mutex_lock(&cip->ci_lock);
600 	(void) fmd_case_mkcode(cp);
601 	if (fmd_case_check_for_dups(cp) == 1) {
602 		(void) pthread_mutex_unlock(&cip->ci_lock);
603 		return (1);
604 	}
605 
606 	/*
607 	 * no suspect list already exists  - allocate new cache entries
608 	 */
609 	for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
610 		if ((alp = fmd_asru_hash_create_entry(ahp,
611 		    cp, cis->cis_nvl)) == NULL) {
612 			fmd_error(EFMD_CASE_EVENT, "cannot convict suspect in "
613 			    "%s: %s\n", cip->ci_uuid, fmd_strerror(errno));
614 			continue;
615 		}
616 		(void) fmd_asru_clrflags(alp, FMD_ASRU_UNUSABLE, 0);
617 		(void) fmd_asru_setflags(alp, FMD_ASRU_FAULTY);
618 	}
619 
620 	(void) pthread_mutex_unlock(&cip->ci_lock);
621 	return (0);
622 }
623 
624 void
625 fmd_case_publish(fmd_case_t *cp, uint_t state)
626 {
627 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
628 	fmd_event_t *e;
629 	nvlist_t *nvl;
630 	char *class;
631 
632 	if (state == FMD_CASE_CURRENT)
633 		state = cip->ci_state; /* use current state */
634 
635 	switch (state) {
636 	case FMD_CASE_SOLVED:
637 		(void) pthread_mutex_lock(&cip->ci_lock);
638 		if (cip->ci_tv_valid == 0) {
639 			fmd_time_gettimeofday(&cip->ci_tv);
640 			cip->ci_tv_valid = 1;
641 		}
642 		(void) pthread_mutex_unlock(&cip->ci_lock);
643 
644 		if (fmd_case_convict(cp) == 1) { /* dupclose */
645 			cip->ci_flags &= ~FMD_CF_SOLVED;
646 			fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, 0);
647 			break;
648 		}
649 		nvl = fmd_case_mkevent(cp, FM_LIST_SUSPECT_CLASS);
650 		(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
651 
652 		e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
653 		(void) pthread_rwlock_rdlock(&fmd.d_log_lock);
654 		fmd_log_append(fmd.d_fltlog, e, cp);
655 		(void) pthread_rwlock_unlock(&fmd.d_log_lock);
656 		fmd_dispq_dispatch(fmd.d_disp, e, class);
657 
658 		(void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
659 		cip->ci_mod->mod_stats->ms_casesolved.fmds_value.ui64++;
660 		(void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
661 
662 		break;
663 
664 	case FMD_CASE_CLOSE_WAIT:
665 		fmd_case_hold(cp);
666 		e = fmd_event_create(FMD_EVT_CLOSE, FMD_HRT_NOW, NULL, cp);
667 		fmd_eventq_insert_at_head(cip->ci_mod->mod_queue, e);
668 
669 		(void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
670 		cip->ci_mod->mod_stats->ms_caseclosed.fmds_value.ui64++;
671 		(void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
672 
673 		break;
674 
675 	case FMD_CASE_CLOSED:
676 		nvl = fmd_case_mkevent(cp, FM_LIST_ISOLATED_CLASS);
677 		(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
678 		e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
679 		fmd_dispq_dispatch(fmd.d_disp, e, class);
680 		break;
681 
682 	case FMD_CASE_REPAIRED:
683 		nvl = fmd_case_mkevent(cp, FM_LIST_REPAIRED_CLASS);
684 		(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
685 		e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
686 		(void) pthread_rwlock_rdlock(&fmd.d_log_lock);
687 		fmd_log_append(fmd.d_fltlog, e, cp);
688 		(void) pthread_rwlock_unlock(&fmd.d_log_lock);
689 		fmd_dispq_dispatch(fmd.d_disp, e, class);
690 		break;
691 
692 	case FMD_CASE_RESOLVED:
693 		nvl = fmd_case_mkevent(cp, FM_LIST_RESOLVED_CLASS);
694 		(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
695 		e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
696 		(void) pthread_rwlock_rdlock(&fmd.d_log_lock);
697 		fmd_log_append(fmd.d_fltlog, e, cp);
698 		(void) pthread_rwlock_unlock(&fmd.d_log_lock);
699 		fmd_dispq_dispatch(fmd.d_disp, e, class);
700 		break;
701 	}
702 }
703 
704 fmd_case_t *
705 fmd_case_hash_lookup(fmd_case_hash_t *chp, const char *uuid)
706 {
707 	fmd_case_impl_t *cip;
708 	uint_t h;
709 
710 	(void) pthread_rwlock_rdlock(&chp->ch_lock);
711 	h = fmd_strhash(uuid) % chp->ch_hashlen;
712 
713 	for (cip = chp->ch_hash[h]; cip != NULL; cip = cip->ci_next) {
714 		if (strcmp(cip->ci_uuid, uuid) == 0)
715 			break;
716 	}
717 
718 	/*
719 	 * If deleting bit is set, treat the case as if it doesn't exist.
720 	 */
721 	if (cip != NULL)
722 		cip = fmd_case_tryhold(cip);
723 
724 	if (cip == NULL)
725 		(void) fmd_set_errno(EFMD_CASE_INVAL);
726 
727 	(void) pthread_rwlock_unlock(&chp->ch_lock);
728 	return ((fmd_case_t *)cip);
729 }
730 
731 static fmd_case_impl_t *
732 fmd_case_hash_insert(fmd_case_hash_t *chp, fmd_case_impl_t *cip)
733 {
734 	fmd_case_impl_t *eip;
735 	uint_t h;
736 
737 	(void) pthread_rwlock_wrlock(&chp->ch_lock);
738 	h = fmd_strhash(cip->ci_uuid) % chp->ch_hashlen;
739 
740 	for (eip = chp->ch_hash[h]; eip != NULL; eip = eip->ci_next) {
741 		if (strcmp(cip->ci_uuid, eip->ci_uuid) == 0 &&
742 		    fmd_case_tryhold(eip) != NULL) {
743 			(void) pthread_rwlock_unlock(&chp->ch_lock);
744 			return (eip); /* uuid already present */
745 		}
746 	}
747 
748 	cip->ci_next = chp->ch_hash[h];
749 	chp->ch_hash[h] = cip;
750 
751 	chp->ch_count++;
752 	ASSERT(chp->ch_count != 0);
753 
754 	(void) pthread_rwlock_unlock(&chp->ch_lock);
755 	return (cip);
756 }
757 
758 static void
759 fmd_case_hash_delete(fmd_case_hash_t *chp, fmd_case_impl_t *cip)
760 {
761 	fmd_case_impl_t *cp, **pp;
762 	uint_t h;
763 
764 	ASSERT(MUTEX_HELD(&cip->ci_lock));
765 
766 	cip->ci_flags |= FMD_CF_DELETING;
767 	(void) pthread_mutex_unlock(&cip->ci_lock);
768 
769 	(void) pthread_rwlock_wrlock(&chp->ch_lock);
770 
771 	h = fmd_strhash(cip->ci_uuid) % chp->ch_hashlen;
772 	pp = &chp->ch_hash[h];
773 
774 	for (cp = *pp; cp != NULL; cp = cp->ci_next) {
775 		if (cp != cip)
776 			pp = &cp->ci_next;
777 		else
778 			break;
779 	}
780 
781 	if (cp == NULL) {
782 		fmd_panic("case %p (%s) not found on hash chain %u\n",
783 		    (void *)cip, cip->ci_uuid, h);
784 	}
785 
786 	*pp = cp->ci_next;
787 	cp->ci_next = NULL;
788 
789 	/*
790 	 * delete from code hash if it is on it
791 	 */
792 	fmd_case_code_hash_delete(chp, cip);
793 
794 	ASSERT(chp->ch_count != 0);
795 	chp->ch_count--;
796 
797 	(void) pthread_rwlock_unlock(&chp->ch_lock);
798 
799 	(void) pthread_mutex_lock(&cip->ci_lock);
800 	ASSERT(cip->ci_flags & FMD_CF_DELETING);
801 }
802 
803 fmd_case_t *
804 fmd_case_create(fmd_module_t *mp, void *data)
805 {
806 	fmd_case_impl_t *cip = fmd_zalloc(sizeof (fmd_case_impl_t), FMD_SLEEP);
807 	fmd_case_impl_t *eip = NULL;
808 	uuid_t uuid;
809 
810 	(void) pthread_mutex_init(&cip->ci_lock, NULL);
811 	fmd_buf_hash_create(&cip->ci_bufs);
812 
813 	fmd_module_hold(mp);
814 	cip->ci_mod = mp;
815 	cip->ci_refs = 1;
816 	cip->ci_state = FMD_CASE_UNSOLVED;
817 	cip->ci_flags = FMD_CF_DIRTY;
818 	cip->ci_data = data;
819 
820 	/*
821 	 * Calling libuuid: get a clue.  The library interfaces cleverly do not
822 	 * define any constant for the length of an unparse string, and do not
823 	 * permit the caller to specify a buffer length for safety.  The spec
824 	 * says it will be 36 bytes, but we make it tunable just in case.
825 	 */
826 	(void) fmd_conf_getprop(fmd.d_conf, "uuidlen", &cip->ci_uuidlen);
827 	cip->ci_uuid = fmd_zalloc(cip->ci_uuidlen + 1, FMD_SLEEP);
828 
829 	/*
830 	 * We expect this loop to execute only once, but code it defensively
831 	 * against the possibility of libuuid bugs.  Keep generating uuids and
832 	 * attempting to do a hash insert until we get a unique one.
833 	 */
834 	do {
835 		if (eip != NULL)
836 			fmd_case_rele((fmd_case_t *)eip);
837 		uuid_generate(uuid);
838 		uuid_unparse(uuid, cip->ci_uuid);
839 	} while ((eip = fmd_case_hash_insert(fmd.d_cases, cip)) != cip);
840 
841 	ASSERT(fmd_module_locked(mp));
842 	fmd_list_append(&mp->mod_cases, cip);
843 	fmd_module_setcdirty(mp);
844 
845 	(void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
846 	cip->ci_mod->mod_stats->ms_caseopen.fmds_value.ui64++;
847 	(void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
848 
849 	return ((fmd_case_t *)cip);
850 }
851 
852 static void
853 fmd_case_destroy_suspects(fmd_case_impl_t *cip)
854 {
855 	fmd_case_susp_t *cis, *ncis;
856 
857 	ASSERT(MUTEX_HELD(&cip->ci_lock));
858 
859 	for (cis = cip->ci_suspects; cis != NULL; cis = ncis) {
860 		ncis = cis->cis_next;
861 		nvlist_free(cis->cis_nvl);
862 		fmd_free(cis, sizeof (fmd_case_susp_t));
863 	}
864 
865 	cip->ci_suspects = NULL;
866 	cip->ci_nsuspects = 0;
867 }
868 
869 fmd_case_t *
870 fmd_case_recreate(fmd_module_t *mp, fmd_xprt_t *xp,
871     uint_t state, const char *uuid, const char *code)
872 {
873 	fmd_case_impl_t *cip = fmd_zalloc(sizeof (fmd_case_impl_t), FMD_SLEEP);
874 	fmd_case_impl_t *eip;
875 
876 	ASSERT(state < FMD_CASE_RESOLVED);
877 
878 	(void) pthread_mutex_init(&cip->ci_lock, NULL);
879 	fmd_buf_hash_create(&cip->ci_bufs);
880 
881 	fmd_module_hold(mp);
882 	cip->ci_mod = mp;
883 	cip->ci_xprt = xp;
884 	cip->ci_refs = 1;
885 	cip->ci_state = state;
886 	cip->ci_uuid = fmd_strdup(uuid, FMD_SLEEP);
887 	cip->ci_uuidlen = strlen(cip->ci_uuid);
888 	cip->ci_code = fmd_strdup(code, FMD_SLEEP);
889 	cip->ci_codelen = cip->ci_code ? strlen(cip->ci_code) + 1 : 0;
890 
891 	if (state > FMD_CASE_CLOSE_WAIT)
892 		cip->ci_flags |= FMD_CF_SOLVED;
893 
894 	/*
895 	 * Insert the case into the global case hash.  If the specified UUID is
896 	 * already present, check to see if it is an orphan: if so, reclaim it;
897 	 * otherwise if it is owned by a different module then return NULL.
898 	 */
899 	if ((eip = fmd_case_hash_insert(fmd.d_cases, cip)) != cip) {
900 		(void) pthread_mutex_lock(&cip->ci_lock);
901 		cip->ci_refs--; /* decrement to zero */
902 		fmd_case_destroy((fmd_case_t *)cip, B_FALSE);
903 
904 		cip = eip; /* switch 'cip' to the existing case */
905 		(void) pthread_mutex_lock(&cip->ci_lock);
906 
907 		/*
908 		 * If the ASRU cache is trying to recreate an orphan, then just
909 		 * return the existing case that we found without changing it.
910 		 */
911 		if (mp == fmd.d_rmod) {
912 			/*
913 			 * When recreating an orphan case, state passed in may
914 			 * either be CLOSED (faulty) or REPAIRED (!faulty). If
915 			 * any suspects are still CLOSED (faulty) then the
916 			 * overall state needs to be CLOSED.
917 			 */
918 			if (state == FMD_CASE_CLOSED)
919 				cip->ci_state = FMD_CASE_CLOSED;
920 			(void) pthread_mutex_unlock(&cip->ci_lock);
921 			fmd_case_rele((fmd_case_t *)cip);
922 			return ((fmd_case_t *)cip);
923 		}
924 
925 		/*
926 		 * If the existing case isn't an orphan or is being proxied,
927 		 * then we have a UUID conflict: return failure to the caller.
928 		 */
929 		if (cip->ci_mod != fmd.d_rmod || xp != NULL) {
930 			(void) pthread_mutex_unlock(&cip->ci_lock);
931 			fmd_case_rele((fmd_case_t *)cip);
932 			return (NULL);
933 		}
934 
935 		/*
936 		 * If the new module is reclaiming an orphaned case, remove
937 		 * the case from the root module, switch ci_mod, and then fall
938 		 * through to adding the case to the new owner module 'mp'.
939 		 */
940 		fmd_module_lock(cip->ci_mod);
941 		fmd_list_delete(&cip->ci_mod->mod_cases, cip);
942 		fmd_module_unlock(cip->ci_mod);
943 
944 		fmd_module_rele(cip->ci_mod);
945 		cip->ci_mod = mp;
946 		fmd_module_hold(mp);
947 
948 		fmd_case_destroy_suspects(cip);
949 		cip->ci_state = state;
950 
951 		(void) pthread_mutex_unlock(&cip->ci_lock);
952 		fmd_case_rele((fmd_case_t *)cip);
953 	} else {
954 		/*
955 		 * add into hash of solved cases
956 		 */
957 		if (cip->ci_code)
958 			fmd_case_code_hash_insert(fmd.d_cases, cip);
959 	}
960 
961 	ASSERT(fmd_module_locked(mp));
962 	fmd_list_append(&mp->mod_cases, cip);
963 
964 	(void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
965 	cip->ci_mod->mod_stats->ms_caseopen.fmds_value.ui64++;
966 	(void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
967 
968 	return ((fmd_case_t *)cip);
969 }
970 
971 void
972 fmd_case_destroy(fmd_case_t *cp, int visible)
973 {
974 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
975 	fmd_case_item_t *cit, *ncit;
976 
977 	ASSERT(MUTEX_HELD(&cip->ci_lock));
978 	ASSERT(cip->ci_refs == 0);
979 
980 	if (visible) {
981 		TRACE((FMD_DBG_CASE, "deleting case %s", cip->ci_uuid));
982 		fmd_case_hash_delete(fmd.d_cases, cip);
983 	}
984 
985 	for (cit = cip->ci_items; cit != NULL; cit = ncit) {
986 		ncit = cit->cit_next;
987 		fmd_event_rele(cit->cit_event);
988 		fmd_free(cit, sizeof (fmd_case_item_t));
989 	}
990 
991 	fmd_case_destroy_suspects(cip);
992 
993 	if (cip->ci_principal != NULL)
994 		fmd_event_rele(cip->ci_principal);
995 
996 	fmd_free(cip->ci_uuid, cip->ci_uuidlen + 1);
997 	fmd_free(cip->ci_code, cip->ci_codelen);
998 	(void) fmd_buf_hash_destroy(&cip->ci_bufs);
999 
1000 	fmd_module_rele(cip->ci_mod);
1001 	fmd_free(cip, sizeof (fmd_case_impl_t));
1002 }
1003 
1004 void
1005 fmd_case_hold(fmd_case_t *cp)
1006 {
1007 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1008 
1009 	(void) pthread_mutex_lock(&cip->ci_lock);
1010 	fmd_case_hold_locked(cp);
1011 	(void) pthread_mutex_unlock(&cip->ci_lock);
1012 }
1013 
1014 void
1015 fmd_case_hold_locked(fmd_case_t *cp)
1016 {
1017 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1018 
1019 	ASSERT(MUTEX_HELD(&cip->ci_lock));
1020 	if (cip->ci_flags & FMD_CF_DELETING)
1021 		fmd_panic("attempt to hold a deleting case %p (%s)\n",
1022 		    (void *)cip, cip->ci_uuid);
1023 	cip->ci_refs++;
1024 	ASSERT(cip->ci_refs != 0);
1025 }
1026 
1027 static fmd_case_impl_t *
1028 fmd_case_tryhold(fmd_case_impl_t *cip)
1029 {
1030 	/*
1031 	 * If the case's "deleting" bit is unset, hold and return case,
1032 	 * otherwise, return NULL.
1033 	 */
1034 	(void) pthread_mutex_lock(&cip->ci_lock);
1035 	if (cip->ci_flags & FMD_CF_DELETING) {
1036 		(void) pthread_mutex_unlock(&cip->ci_lock);
1037 		cip = NULL;
1038 	} else {
1039 		fmd_case_hold_locked((fmd_case_t *)cip);
1040 		(void) pthread_mutex_unlock(&cip->ci_lock);
1041 	}
1042 	return (cip);
1043 }
1044 
1045 void
1046 fmd_case_rele(fmd_case_t *cp)
1047 {
1048 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1049 
1050 	(void) pthread_mutex_lock(&cip->ci_lock);
1051 	ASSERT(cip->ci_refs != 0);
1052 
1053 	if (--cip->ci_refs == 0)
1054 		fmd_case_destroy((fmd_case_t *)cip, B_TRUE);
1055 	else
1056 		(void) pthread_mutex_unlock(&cip->ci_lock);
1057 }
1058 
1059 void
1060 fmd_case_rele_locked(fmd_case_t *cp)
1061 {
1062 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1063 
1064 	ASSERT(MUTEX_HELD(&cip->ci_lock));
1065 	--cip->ci_refs;
1066 	ASSERT(cip->ci_refs != 0);
1067 }
1068 
1069 int
1070 fmd_case_insert_principal(fmd_case_t *cp, fmd_event_t *ep)
1071 {
1072 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1073 	fmd_case_item_t *cit;
1074 	fmd_event_t *oep;
1075 	uint_t state;
1076 	int new;
1077 
1078 	fmd_event_hold(ep);
1079 	(void) pthread_mutex_lock(&cip->ci_lock);
1080 
1081 	if (cip->ci_flags & FMD_CF_SOLVED)
1082 		state = FMD_EVS_DIAGNOSED;
1083 	else
1084 		state = FMD_EVS_ACCEPTED;
1085 
1086 	oep = cip->ci_principal;
1087 	cip->ci_principal = ep;
1088 
1089 	for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) {
1090 		if (cit->cit_event == ep)
1091 			break;
1092 	}
1093 
1094 	cip->ci_flags |= FMD_CF_DIRTY;
1095 	new = cit == NULL && ep != oep;
1096 
1097 	(void) pthread_mutex_unlock(&cip->ci_lock);
1098 
1099 	fmd_module_setcdirty(cip->ci_mod);
1100 	fmd_event_transition(ep, state);
1101 
1102 	if (oep != NULL)
1103 		fmd_event_rele(oep);
1104 
1105 	return (new);
1106 }
1107 
1108 int
1109 fmd_case_insert_event(fmd_case_t *cp, fmd_event_t *ep)
1110 {
1111 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1112 	fmd_case_item_t *cit;
1113 	uint_t state;
1114 	int new;
1115 
1116 	(void) pthread_mutex_lock(&cip->ci_lock);
1117 
1118 	if (cip->ci_flags & FMD_CF_SOLVED)
1119 		state = FMD_EVS_DIAGNOSED;
1120 	else
1121 		state = FMD_EVS_ACCEPTED;
1122 
1123 	for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) {
1124 		if (cit->cit_event == ep)
1125 			break;
1126 	}
1127 
1128 	new = cit == NULL && ep != cip->ci_principal;
1129 
1130 	/*
1131 	 * If the event is already in the case or the case is already solved,
1132 	 * there is no reason to save it: just transition it appropriately.
1133 	 */
1134 	if (cit != NULL || (cip->ci_flags & FMD_CF_SOLVED)) {
1135 		(void) pthread_mutex_unlock(&cip->ci_lock);
1136 		fmd_event_transition(ep, state);
1137 		return (new);
1138 	}
1139 
1140 	cit = fmd_alloc(sizeof (fmd_case_item_t), FMD_SLEEP);
1141 	fmd_event_hold(ep);
1142 
1143 	cit->cit_next = cip->ci_items;
1144 	cit->cit_event = ep;
1145 
1146 	cip->ci_items = cit;
1147 	cip->ci_nitems++;
1148 
1149 	cip->ci_flags |= FMD_CF_DIRTY;
1150 	(void) pthread_mutex_unlock(&cip->ci_lock);
1151 
1152 	fmd_module_setcdirty(cip->ci_mod);
1153 	fmd_event_transition(ep, state);
1154 
1155 	return (new);
1156 }
1157 
1158 void
1159 fmd_case_insert_suspect(fmd_case_t *cp, nvlist_t *nvl)
1160 {
1161 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1162 	fmd_case_susp_t *cis = fmd_alloc(sizeof (fmd_case_susp_t), FMD_SLEEP);
1163 
1164 	(void) pthread_mutex_lock(&cip->ci_lock);
1165 	ASSERT(cip->ci_state < FMD_CASE_CLOSE_WAIT);
1166 	cip->ci_flags |= FMD_CF_DIRTY;
1167 
1168 	cis->cis_next = cip->ci_suspects;
1169 	cis->cis_nvl = nvl;
1170 
1171 	cip->ci_suspects = cis;
1172 	cip->ci_nsuspects++;
1173 
1174 	(void) pthread_mutex_unlock(&cip->ci_lock);
1175 	fmd_module_setcdirty(cip->ci_mod);
1176 }
1177 
1178 void
1179 fmd_case_recreate_suspect(fmd_case_t *cp, nvlist_t *nvl)
1180 {
1181 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1182 	fmd_case_susp_t *cis = fmd_alloc(sizeof (fmd_case_susp_t), FMD_SLEEP);
1183 	boolean_t b;
1184 
1185 	(void) pthread_mutex_lock(&cip->ci_lock);
1186 	ASSERT(cip->ci_state == FMD_CASE_CLOSED ||
1187 	    cip->ci_state == FMD_CASE_REPAIRED);
1188 	ASSERT(cip->ci_mod == fmd.d_rmod);
1189 
1190 	cis->cis_next = cip->ci_suspects;
1191 	cis->cis_nvl = nvl;
1192 
1193 	if (nvlist_lookup_boolean_value(nvl,
1194 	    FM_SUSPECT_MESSAGE, &b) == 0 && b == B_FALSE)
1195 		cip->ci_flags |= FMD_CF_INVISIBLE;
1196 
1197 	cip->ci_suspects = cis;
1198 	cip->ci_nsuspects++;
1199 
1200 	(void) pthread_mutex_unlock(&cip->ci_lock);
1201 }
1202 
1203 void
1204 fmd_case_reset_suspects(fmd_case_t *cp)
1205 {
1206 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1207 
1208 	(void) pthread_mutex_lock(&cip->ci_lock);
1209 	ASSERT(cip->ci_state < FMD_CASE_SOLVED);
1210 
1211 	fmd_case_destroy_suspects(cip);
1212 	cip->ci_flags |= FMD_CF_DIRTY;
1213 
1214 	(void) pthread_mutex_unlock(&cip->ci_lock);
1215 	fmd_module_setcdirty(cip->ci_mod);
1216 }
1217 
1218 /*ARGSUSED*/
1219 static void
1220 fmd_case_unusable(fmd_asru_link_t *alp, void *arg)
1221 {
1222 	(void) fmd_asru_setflags(alp, FMD_ASRU_UNUSABLE);
1223 }
1224 
1225 /*
1226  * Grab ci_lock and update the case state and set the dirty bit.  Then perform
1227  * whatever actions and emit whatever events are appropriate for the state.
1228  * Refer to the topmost block comment explaining the state machine for details.
1229  */
1230 void
1231 fmd_case_transition(fmd_case_t *cp, uint_t state, uint_t flags)
1232 {
1233 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1234 	fmd_case_item_t *cit;
1235 	fmd_event_t *e;
1236 	int resolved = 0;
1237 	int any_unusable_and_present = 0;
1238 
1239 	ASSERT(state <= FMD_CASE_RESOLVED);
1240 	(void) pthread_mutex_lock(&cip->ci_lock);
1241 
1242 	if (!(cip->ci_flags & FMD_CF_SOLVED) && !(flags & FMD_CF_SOLVED))
1243 		flags &= ~(FMD_CF_ISOLATED | FMD_CF_REPAIRED);
1244 
1245 	cip->ci_flags |= flags;
1246 
1247 	if (cip->ci_state >= state) {
1248 		(void) pthread_mutex_unlock(&cip->ci_lock);
1249 		return; /* already in specified state */
1250 	}
1251 
1252 	TRACE((FMD_DBG_CASE, "case %s %s->%s", cip->ci_uuid,
1253 	    _fmd_case_snames[cip->ci_state], _fmd_case_snames[state]));
1254 
1255 	cip->ci_state = state;
1256 	cip->ci_flags |= FMD_CF_DIRTY;
1257 
1258 	if (cip->ci_xprt == NULL && cip->ci_mod != fmd.d_rmod)
1259 		fmd_module_setcdirty(cip->ci_mod);
1260 
1261 	switch (state) {
1262 	case FMD_CASE_SOLVED:
1263 		for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next)
1264 			fmd_event_transition(cit->cit_event, FMD_EVS_DIAGNOSED);
1265 
1266 		if (cip->ci_principal != NULL) {
1267 			fmd_event_transition(cip->ci_principal,
1268 			    FMD_EVS_DIAGNOSED);
1269 		}
1270 		break;
1271 
1272 	case FMD_CASE_CLOSE_WAIT:
1273 		/*
1274 		 * If the case was never solved, do not change ASRUs.
1275 		 * If the case was never fmd_case_closed, do not change ASRUs.
1276 		 * If the case was repaired, do not change ASRUs.
1277 		 */
1278 		if ((cip->ci_flags & (FMD_CF_SOLVED | FMD_CF_ISOLATED |
1279 		    FMD_CF_REPAIRED)) == (FMD_CF_SOLVED | FMD_CF_ISOLATED))
1280 			fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
1281 			    fmd_case_unusable, NULL);
1282 
1283 		/*
1284 		 * If an orphaned case transitions to CLOSE_WAIT, the owning
1285 		 * module is no longer loaded: continue on to CASE_CLOSED.
1286 		 */
1287 		if (fmd_case_orphaned(cp))
1288 			state = cip->ci_state = FMD_CASE_CLOSED;
1289 		break;
1290 
1291 	case FMD_CASE_REPAIRED:
1292 		ASSERT(fmd_case_orphaned(cp));
1293 
1294 		/*
1295 		 * If all suspects are already either usable or not present then
1296 		 * transition straight to RESOLVED state, publishing both the
1297 		 * list.repaired and list.resolved.
1298 		 */
1299 		fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
1300 		    fmd_case_unusable_and_present, &any_unusable_and_present);
1301 		if (any_unusable_and_present)
1302 			break;
1303 
1304 		fmd_module_lock(cip->ci_mod);
1305 		fmd_list_delete(&cip->ci_mod->mod_cases, cip);
1306 		fmd_module_unlock(cip->ci_mod);
1307 		cip->ci_state = FMD_CASE_RESOLVED;
1308 		(void) pthread_mutex_unlock(&cip->ci_lock);
1309 		fmd_case_publish(cp, state);
1310 		TRACE((FMD_DBG_CASE, "case %s %s->%s", cip->ci_uuid,
1311 		    _fmd_case_snames[FMD_CASE_REPAIRED],
1312 		    _fmd_case_snames[FMD_CASE_RESOLVED]));
1313 		state = FMD_CASE_RESOLVED;
1314 		resolved = 1;
1315 		(void) pthread_mutex_lock(&cip->ci_lock);
1316 		break;
1317 
1318 	case FMD_CASE_RESOLVED:
1319 		ASSERT(fmd_case_orphaned(cp));
1320 
1321 		/*
1322 		 * If all suspects are already either usable or not present then
1323 		 * carry on, publish list.resolved and discard the case.
1324 		 */
1325 		fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
1326 		    fmd_case_unusable_and_present, &any_unusable_and_present);
1327 		if (any_unusable_and_present) {
1328 			(void) pthread_mutex_unlock(&cip->ci_lock);
1329 			return;
1330 		}
1331 
1332 		fmd_module_lock(cip->ci_mod);
1333 		fmd_list_delete(&cip->ci_mod->mod_cases, cip);
1334 		fmd_module_unlock(cip->ci_mod);
1335 		resolved = 1;
1336 		break;
1337 	}
1338 
1339 	(void) pthread_mutex_unlock(&cip->ci_lock);
1340 
1341 	/*
1342 	 * If the module has initialized, then publish the appropriate event
1343 	 * for the new case state.  If not, we are being called from the
1344 	 * checkpoint code during module load, in which case the module's
1345 	 * _fmd_init() routine hasn't finished yet, and our event dictionaries
1346 	 * may not be open yet, which will prevent us from computing the event
1347 	 * code.  Defer the call to fmd_case_publish() by enqueuing a PUBLISH
1348 	 * event in our queue: this won't be processed until _fmd_init is done.
1349 	 */
1350 	if (cip->ci_mod->mod_flags & FMD_MOD_INIT)
1351 		fmd_case_publish(cp, state);
1352 	else {
1353 		fmd_case_hold(cp);
1354 		e = fmd_event_create(FMD_EVT_PUBLISH, FMD_HRT_NOW, NULL, cp);
1355 		fmd_eventq_insert_at_head(cip->ci_mod->mod_queue, e);
1356 	}
1357 
1358 	if (resolved) {
1359 		/*
1360 		 * If we transitioned to RESOLVED, adjust the reference count to
1361 		 * reflect our removal from fmd.d_rmod->mod_cases above.  If the
1362 		 * caller has not placed an additional hold on the case, it
1363 		 * will now be freed.
1364 		 */
1365 		(void) pthread_mutex_lock(&cip->ci_lock);
1366 		fmd_asru_hash_delete_case(fmd.d_asrus, cp);
1367 		(void) pthread_mutex_unlock(&cip->ci_lock);
1368 		fmd_case_rele(cp);
1369 	}
1370 }
1371 
1372 /*
1373  * Transition the specified case to *at least* the specified state by first
1374  * re-validating the suspect list using the resource cache.  This function is
1375  * employed by the checkpoint code when restoring a saved, solved case to see
1376  * if the state of the case has effectively changed while fmd was not running
1377  * or the module was not loaded.
1378  */
1379 void
1380 fmd_case_transition_update(fmd_case_t *cp, uint_t state, uint_t flags)
1381 {
1382 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1383 
1384 	int usable = 0;		/* are any suspects usable? */
1385 
1386 	ASSERT(state >= FMD_CASE_SOLVED);
1387 	(void) pthread_mutex_lock(&cip->ci_lock);
1388 
1389 	fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_usable, &usable);
1390 
1391 	(void) pthread_mutex_unlock(&cip->ci_lock);
1392 
1393 	if (!usable) {
1394 		state = MAX(state, FMD_CASE_CLOSE_WAIT);
1395 		flags |= FMD_CF_ISOLATED;
1396 	}
1397 
1398 	fmd_case_transition(cp, state, flags);
1399 }
1400 
1401 void
1402 fmd_case_setdirty(fmd_case_t *cp)
1403 {
1404 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1405 
1406 	(void) pthread_mutex_lock(&cip->ci_lock);
1407 	cip->ci_flags |= FMD_CF_DIRTY;
1408 	(void) pthread_mutex_unlock(&cip->ci_lock);
1409 
1410 	fmd_module_setcdirty(cip->ci_mod);
1411 }
1412 
1413 void
1414 fmd_case_clrdirty(fmd_case_t *cp)
1415 {
1416 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1417 
1418 	(void) pthread_mutex_lock(&cip->ci_lock);
1419 	cip->ci_flags &= ~FMD_CF_DIRTY;
1420 	(void) pthread_mutex_unlock(&cip->ci_lock);
1421 }
1422 
1423 void
1424 fmd_case_commit(fmd_case_t *cp)
1425 {
1426 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1427 	fmd_case_item_t *cit;
1428 
1429 	(void) pthread_mutex_lock(&cip->ci_lock);
1430 
1431 	if (cip->ci_flags & FMD_CF_DIRTY) {
1432 		for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next)
1433 			fmd_event_commit(cit->cit_event);
1434 
1435 		if (cip->ci_principal != NULL)
1436 			fmd_event_commit(cip->ci_principal);
1437 
1438 		fmd_buf_hash_commit(&cip->ci_bufs);
1439 		cip->ci_flags &= ~FMD_CF_DIRTY;
1440 	}
1441 
1442 	(void) pthread_mutex_unlock(&cip->ci_lock);
1443 }
1444 
1445 /*
1446  * Indicate that the case may need to change state because one or more of the
1447  * ASRUs named as a suspect has changed state.  We examine all the suspects
1448  * and if none are still faulty, we initiate a case close transition.
1449  */
1450 void
1451 fmd_case_update(fmd_case_t *cp)
1452 {
1453 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1454 	uint_t cstate;
1455 	int faulty = 0;
1456 
1457 	(void) pthread_mutex_lock(&cip->ci_lock);
1458 	cstate = cip->ci_state;
1459 
1460 	if (cip->ci_xprt != NULL || cip->ci_state < FMD_CASE_SOLVED) {
1461 		(void) pthread_mutex_unlock(&cip->ci_lock);
1462 		return; /* update is not appropriate */
1463 	}
1464 
1465 	if (cip->ci_flags & FMD_CF_REPAIRED) {
1466 		(void) pthread_mutex_unlock(&cip->ci_lock);
1467 		return; /* already repaired */
1468 	}
1469 
1470 	fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_faulty, &faulty);
1471 	(void) pthread_mutex_unlock(&cip->ci_lock);
1472 
1473 	if (faulty) {
1474 		nvlist_t *nvl;
1475 		fmd_event_t *e;
1476 		char *class;
1477 
1478 		nvl = fmd_case_mkevent(cp, FM_LIST_UPDATED_CLASS);
1479 		(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
1480 		e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
1481 		(void) pthread_rwlock_rdlock(&fmd.d_log_lock);
1482 		fmd_log_append(fmd.d_fltlog, e, cp);
1483 		(void) pthread_rwlock_unlock(&fmd.d_log_lock);
1484 		fmd_dispq_dispatch(fmd.d_disp, e, class);
1485 		return; /* one or more suspects are still marked faulty */
1486 	}
1487 
1488 	if (cstate == FMD_CASE_CLOSED)
1489 		fmd_case_transition(cp, FMD_CASE_REPAIRED, FMD_CF_REPAIRED);
1490 	else
1491 		fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_REPAIRED);
1492 }
1493 
1494 /*
1495  * Delete a closed case from the module's case list once the fmdo_close() entry
1496  * point has run to completion.  If the case is owned by a transport module,
1497  * tell the transport to proxy a case close on the other end of the transport.
1498  * If not, transition to the appropriate next state based on ci_flags.  This
1499  * function represents the end of CLOSE_WAIT and transitions the case to either
1500  * CLOSED or REPAIRED or discards it entirely because it was never solved;
1501  * refer to the topmost block comment explaining the state machine for details.
1502  */
1503 void
1504 fmd_case_delete(fmd_case_t *cp)
1505 {
1506 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1507 	fmd_modstat_t *msp;
1508 	size_t buftotal;
1509 
1510 	ASSERT(fmd_module_locked(cip->ci_mod));
1511 	fmd_list_delete(&cip->ci_mod->mod_cases, cip);
1512 	buftotal = fmd_buf_hash_destroy(&cip->ci_bufs);
1513 
1514 	(void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
1515 	msp = cip->ci_mod->mod_stats;
1516 
1517 	ASSERT(msp->ms_caseopen.fmds_value.ui64 != 0);
1518 	msp->ms_caseopen.fmds_value.ui64--;
1519 
1520 	ASSERT(msp->ms_buftotal.fmds_value.ui64 >= buftotal);
1521 	msp->ms_buftotal.fmds_value.ui64 -= buftotal;
1522 
1523 	(void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
1524 
1525 	if (cip->ci_xprt == NULL)
1526 		fmd_module_setcdirty(cip->ci_mod);
1527 
1528 	fmd_module_rele(cip->ci_mod);
1529 	cip->ci_mod = fmd.d_rmod;
1530 	fmd_module_hold(cip->ci_mod);
1531 
1532 	/*
1533 	 * If the case is not proxied and it has been solved, then retain it
1534 	 * on the root module's case list at least until we're transitioned.
1535 	 * Otherwise free the case with our final fmd_case_rele() below.
1536 	 */
1537 	if (cip->ci_xprt == NULL && (cip->ci_flags & FMD_CF_SOLVED)) {
1538 		fmd_module_lock(cip->ci_mod);
1539 		fmd_list_append(&cip->ci_mod->mod_cases, cip);
1540 		fmd_module_unlock(cip->ci_mod);
1541 		fmd_case_hold(cp);
1542 	}
1543 
1544 	/*
1545 	 * If a proxied case finishes CLOSE_WAIT, then it can be discarded
1546 	 * rather than orphaned because by definition it can have no entries
1547 	 * in the resource cache of the current fault manager.
1548 	 */
1549 	if (cip->ci_xprt != NULL)
1550 		fmd_xprt_uuclose(cip->ci_xprt, cip->ci_uuid);
1551 	else if (cip->ci_flags & FMD_CF_REPAIRED)
1552 		fmd_case_transition(cp, FMD_CASE_REPAIRED, 0);
1553 	else if (cip->ci_flags & FMD_CF_ISOLATED)
1554 		fmd_case_transition(cp, FMD_CASE_CLOSED, 0);
1555 
1556 	fmd_case_rele(cp);
1557 }
1558 
1559 void
1560 fmd_case_discard(fmd_case_t *cp)
1561 {
1562 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1563 
1564 	(void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
1565 	cip->ci_mod->mod_stats->ms_caseopen.fmds_value.ui64--;
1566 	(void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
1567 
1568 	ASSERT(fmd_module_locked(cip->ci_mod));
1569 	fmd_list_delete(&cip->ci_mod->mod_cases, cip);
1570 	fmd_case_rele(cp);
1571 }
1572 
1573 /*
1574  * Indicate that the problem corresponding to a case has been repaired by
1575  * clearing the faulty bit on each ASRU named as a suspect.  If the case hasn't
1576  * already been closed, this function initiates the transition to CLOSE_WAIT.
1577  * The caller must have the case held from fmd_case_hash_lookup(), so we can
1578  * grab and drop ci_lock without the case being able to be freed in between.
1579  */
1580 int
1581 fmd_case_repair(fmd_case_t *cp)
1582 {
1583 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1584 	uint_t cstate;
1585 
1586 	(void) pthread_mutex_lock(&cip->ci_lock);
1587 	cstate = cip->ci_state;
1588 
1589 	if (cip->ci_xprt != NULL) {
1590 		(void) pthread_mutex_unlock(&cip->ci_lock);
1591 		return (fmd_set_errno(EFMD_CASE_OWNER));
1592 	}
1593 
1594 	if (cstate < FMD_CASE_SOLVED) {
1595 		(void) pthread_mutex_unlock(&cip->ci_lock);
1596 		return (fmd_set_errno(EFMD_CASE_STATE));
1597 	}
1598 
1599 	if (cip->ci_flags & FMD_CF_REPAIRED) {
1600 		(void) pthread_mutex_unlock(&cip->ci_lock);
1601 		return (0); /* already repaired */
1602 	}
1603 
1604 	fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_repaired, NULL);
1605 	(void) pthread_mutex_unlock(&cip->ci_lock);
1606 
1607 	if (cstate == FMD_CASE_CLOSED)
1608 		fmd_case_transition(cp, FMD_CASE_REPAIRED, FMD_CF_REPAIRED);
1609 	else
1610 		fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_REPAIRED);
1611 
1612 	return (0);
1613 }
1614 
1615 int
1616 fmd_case_acquit(fmd_case_t *cp)
1617 {
1618 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1619 	uint_t cstate;
1620 
1621 	(void) pthread_mutex_lock(&cip->ci_lock);
1622 	cstate = cip->ci_state;
1623 
1624 	if (cip->ci_xprt != NULL) {
1625 		(void) pthread_mutex_unlock(&cip->ci_lock);
1626 		return (fmd_set_errno(EFMD_CASE_OWNER));
1627 	}
1628 
1629 	if (cstate < FMD_CASE_SOLVED) {
1630 		(void) pthread_mutex_unlock(&cip->ci_lock);
1631 		return (fmd_set_errno(EFMD_CASE_STATE));
1632 	}
1633 
1634 	if (cip->ci_flags & FMD_CF_REPAIRED) {
1635 		(void) pthread_mutex_unlock(&cip->ci_lock);
1636 		return (0); /* already repaired */
1637 	}
1638 
1639 	fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_acquit, NULL);
1640 	(void) pthread_mutex_unlock(&cip->ci_lock);
1641 
1642 	if (cstate == FMD_CASE_CLOSED)
1643 		fmd_case_transition(cp, FMD_CASE_REPAIRED, FMD_CF_REPAIRED);
1644 	else
1645 		fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_REPAIRED);
1646 
1647 	return (0);
1648 }
1649 
1650 int
1651 fmd_case_contains(fmd_case_t *cp, fmd_event_t *ep)
1652 {
1653 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1654 	fmd_case_item_t *cit;
1655 	uint_t state;
1656 	int rv = 0;
1657 
1658 	(void) pthread_mutex_lock(&cip->ci_lock);
1659 
1660 	if (cip->ci_state >= FMD_CASE_SOLVED)
1661 		state = FMD_EVS_DIAGNOSED;
1662 	else
1663 		state = FMD_EVS_ACCEPTED;
1664 
1665 	for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) {
1666 		if ((rv = fmd_event_equal(ep, cit->cit_event)) != 0)
1667 			break;
1668 	}
1669 
1670 	if (rv == 0 && cip->ci_principal != NULL)
1671 		rv = fmd_event_equal(ep, cip->ci_principal);
1672 
1673 	(void) pthread_mutex_unlock(&cip->ci_lock);
1674 
1675 	if (rv != 0)
1676 		fmd_event_transition(ep, state);
1677 
1678 	return (rv);
1679 }
1680 
1681 int
1682 fmd_case_orphaned(fmd_case_t *cp)
1683 {
1684 	return (((fmd_case_impl_t *)cp)->ci_mod == fmd.d_rmod);
1685 }
1686 
1687 void
1688 fmd_case_settime(fmd_case_t *cp, time_t tv_sec, suseconds_t tv_usec)
1689 {
1690 	((fmd_case_impl_t *)cp)->ci_tv.tv_sec = tv_sec;
1691 	((fmd_case_impl_t *)cp)->ci_tv.tv_usec = tv_usec;
1692 	((fmd_case_impl_t *)cp)->ci_tv_valid = 1;
1693 }
1694 
1695 /*ARGSUSED*/
1696 void
1697 fmd_case_repair_replay_case(fmd_case_t *cp, void *arg)
1698 {
1699 	int not_faulty = 0;
1700 	int faulty = 0;
1701 	nvlist_t *nvl;
1702 	fmd_event_t *e;
1703 	char *class;
1704 	int any_unusable_and_present = 0;
1705 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1706 
1707 	if (cip->ci_state < FMD_CASE_SOLVED)
1708 		return;
1709 
1710 	fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_faulty, &faulty);
1711 	fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_not_faulty,
1712 	    &not_faulty);
1713 
1714 	if (!faulty) {
1715 		/*
1716 		 * If none of the suspects is faulty, replay the list.repaired.
1717 		 * If all suspects are already either usable or not present then
1718 		 * also transition straight to RESOLVED state.
1719 		 */
1720 		fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
1721 		    fmd_case_unusable_and_present, &any_unusable_and_present);
1722 		if (!any_unusable_and_present) {
1723 			fmd_module_lock(cip->ci_mod);
1724 			fmd_list_delete(&cip->ci_mod->mod_cases, cip);
1725 			fmd_module_unlock(cip->ci_mod);
1726 			cip->ci_state = FMD_CASE_RESOLVED;
1727 
1728 			nvl = fmd_case_mkevent(cp, FM_LIST_REPAIRED_CLASS);
1729 			(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
1730 			e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl,
1731 			    class);
1732 			fmd_dispq_dispatch(fmd.d_disp, e, class);
1733 
1734 			fmd_case_publish(cp, FMD_CASE_RESOLVED);
1735 			(void) pthread_mutex_lock(&cip->ci_lock);
1736 			fmd_asru_hash_delete_case(fmd.d_asrus, cp);
1737 			(void) pthread_mutex_unlock(&cip->ci_lock);
1738 			fmd_case_rele(cp);
1739 		} else {
1740 			nvl = fmd_case_mkevent(cp, FM_LIST_REPAIRED_CLASS);
1741 			(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
1742 			e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl,
1743 			    class);
1744 			fmd_dispq_dispatch(fmd.d_disp, e, class);
1745 		}
1746 	} else if (not_faulty) {
1747 		/*
1748 		 * if some but not all of the suspects are not faulty, replay
1749 		 * the list.updated.
1750 		 */
1751 		nvl = fmd_case_mkevent(cp, FM_LIST_UPDATED_CLASS);
1752 		(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
1753 		e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
1754 		fmd_dispq_dispatch(fmd.d_disp, e, class);
1755 	}
1756 }
1757 
1758 void
1759 fmd_case_repair_replay()
1760 {
1761 	fmd_case_hash_apply(fmd.d_cases, fmd_case_repair_replay_case, NULL);
1762 }
1763