xref: /titanic_50/usr/src/cmd/fm/fmd/common/fmd_case.c (revision 47f258d370fe585ec2f4768e76dcfc71031dcbab)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 
23 /*
24  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
25  * Use is subject to license terms.
26  */
27 
28 #pragma ident	"%Z%%M%	%I%	%E% SMI"
29 
30 /*
31  * FMD Case Subsystem
32  *
33  * Diagnosis engines are expected to group telemetry events related to the
34  * diagnosis of a particular problem on the system into a set of cases.  The
35  * diagnosis engine may have any number of cases open at a given point in time.
36  * Some cases may eventually be *solved* by associating a suspect list of one
37  * or more problems with the case, at which point fmd publishes a list.suspect
38  * event for the case and it becomes visible to administrators and agents.
39  *
40  * Every case is named using a UUID, and is globally visible in the case hash.
41  * Cases are reference-counted, except for the reference from the case hash
42  * itself.  Consumers of case references include modules, which store active
43  * cases on the mod_cases list, ASRUs in the resource cache, and the RPC code.
44  *
45  * Cases obey the following state machine.  In states UNSOLVED, SOLVED, and
46  * CLOSE_WAIT, a case's module refers to the owning module (a diagnosis engine
47  * or transport) and the case is referenced by the mod_cases list.  Once the
48  * case reaches the CLOSED or REPAIRED states, a case's module changes to refer
49  * to the root module (fmd.d_rmod) and is deleted from the owner's mod_cases.
50  *
51  *			+------------+
52  *	     +----------|  UNSOLVED  |
53  *	     |		+------------+
54  *	   1 |	             4 |
55  *           |                 |
56  *	+----v---+ /-2->+------v-----+	  3	+--------+
57  *      | SOLVED |<     | CLOSE_WAIT |--------->| CLOSED |
58  *	+--------+ \-5->+------------+		+--------+
59  *	                       |                    |
60  *                           6 |                    | 7
61  *      		+------v-----+              |
62  *	                |  REPAIRED  |<-------------+
63  *			+------------+
64  *
65  * The state machine changes are triggered by calls to fmd_case_transition()
66  * from various locations inside of fmd, as described below:
67  *
68  * [1] Called by: fmd_case_solve()
69  *       Actions: FMD_CF_SOLVED flag is set in ci_flags
70  *                conviction policy is applied to suspect list
71  *                suspects convicted are marked faulty (F) in R$
72  *                list.suspect event logged and dispatched
73  *
74  * [2] Called by: fmd_case_close(), fmd_case_uuclose(), fmd_xprt_event_uuclose()
75  *       Actions: FMD_CF_ISOLATED flag is set in ci_flags
76  *                suspects convicted (F) are marked unusable (U) in R$
77  *                diagnosis engine fmdo_close() entry point scheduled
78  *                case transitions to CLOSED [3] upon exit from CLOSE_WAIT
79  *
80  * [3] Called by: fmd_case_delete() (after fmdo_close() entry point returns)
81  *       Actions: list.isolated event dispatched
82  *                case deleted from module's list of open cases
83  *
84  * [4] Called by: fmd_case_close(), fmd_case_uuclose()
85  *       Actions: diagnosis engine fmdo_close() entry point scheduled
86  *                case is subsequently discarded by fmd_case_delete()
87  *
88  * [5] Called by: fmd_case_repair(), fmd_case_update()
89  *       Actions: FMD_CF_REPAIR flag is set in ci_flags
90  *                diagnosis engine fmdo_close() entry point scheduled
91  *                case transitions to REPAIRED [6] upon exit from CLOSE_WAIT
92  *
93  * [6] Called by: fmd_case_repair(), fmd_case_update()
94  *       Actions: FMD_CF_REPAIR flag is set in ci_flags
95  *                suspects convicted are marked non faulty (!F) in R$
96  *                list.repaired event dispatched
97  *
98  * [7] Called by: fmd_case_repair(), fmd_case_update()
99  *       Actions: FMD_CF_REPAIR flag is set in ci_flags
100  *                suspects convicted are marked non faulty (!F) in R$
101  *                list.repaired event dispatched
102  */
103 
104 #include <sys/fm/protocol.h>
105 #include <uuid/uuid.h>
106 #include <alloca.h>
107 
108 #include <fmd_alloc.h>
109 #include <fmd_module.h>
110 #include <fmd_error.h>
111 #include <fmd_conf.h>
112 #include <fmd_case.h>
113 #include <fmd_string.h>
114 #include <fmd_subr.h>
115 #include <fmd_protocol.h>
116 #include <fmd_event.h>
117 #include <fmd_eventq.h>
118 #include <fmd_dispq.h>
119 #include <fmd_buf.h>
120 #include <fmd_log.h>
121 #include <fmd_asru.h>
122 #include <fmd_xprt.h>
123 
124 #include <fmd.h>
125 
126 static const char *const _fmd_case_snames[] = {
127 	"UNSOLVED",	/* FMD_CASE_UNSOLVED */
128 	"SOLVED",	/* FMD_CASE_SOLVED */
129 	"CLOSE_WAIT",	/* FMD_CASE_CLOSE_WAIT */
130 	"CLOSED",	/* FMD_CASE_CLOSED */
131 	"REPAIRED"	/* FMD_CASE_REPAIRED */
132 };
133 
134 fmd_case_hash_t *
135 fmd_case_hash_create(void)
136 {
137 	fmd_case_hash_t *chp = fmd_alloc(sizeof (fmd_case_hash_t), FMD_SLEEP);
138 
139 	(void) pthread_rwlock_init(&chp->ch_lock, NULL);
140 	chp->ch_hashlen = fmd.d_str_buckets;
141 	chp->ch_hash = fmd_zalloc(sizeof (void *) * chp->ch_hashlen, FMD_SLEEP);
142 	chp->ch_count = 0;
143 
144 	return (chp);
145 }
146 
147 /*
148  * Destroy the case hash.  Unlike most of our hash tables, no active references
149  * are kept by the case hash itself; all references come from other subsystems.
150  * The hash must be destroyed after all modules are unloaded; if anything was
151  * present in the hash it would be by definition a reference count leak.
152  */
153 void
154 fmd_case_hash_destroy(fmd_case_hash_t *chp)
155 {
156 	fmd_free(chp->ch_hash, sizeof (void *) * chp->ch_hashlen);
157 	fmd_free(chp, sizeof (fmd_case_hash_t));
158 }
159 
160 /*
161  * Take a snapshot of the case hash by placing an additional hold on each
162  * member in an auxiliary array, and then call 'func' for each case.
163  */
164 void
165 fmd_case_hash_apply(fmd_case_hash_t *chp,
166     void (*func)(fmd_case_t *, void *), void *arg)
167 {
168 	fmd_case_impl_t *cp, **cps, **cpp;
169 	uint_t cpc, i;
170 
171 	(void) pthread_rwlock_rdlock(&chp->ch_lock);
172 
173 	cps = cpp = fmd_alloc(chp->ch_count * sizeof (fmd_case_t *), FMD_SLEEP);
174 	cpc = chp->ch_count;
175 
176 	for (i = 0; i < chp->ch_hashlen; i++) {
177 		for (cp = chp->ch_hash[i]; cp != NULL; cp = cp->ci_next) {
178 			fmd_case_hold((fmd_case_t *)cp);
179 			*cpp++ = cp;
180 		}
181 	}
182 
183 	ASSERT(cpp == cps + cpc);
184 	(void) pthread_rwlock_unlock(&chp->ch_lock);
185 
186 	for (i = 0; i < cpc; i++) {
187 		func((fmd_case_t *)cps[i], arg);
188 		fmd_case_rele((fmd_case_t *)cps[i]);
189 	}
190 
191 	fmd_free(cps, cpc * sizeof (fmd_case_t *));
192 }
193 
194 /*
195  * Look up the diagcode for this case and cache it in ci_code.  If no suspects
196  * were defined for this case or if the lookup fails, the event dictionary or
197  * module code is broken, and we set the event code to a precomputed default.
198  */
199 static const char *
200 fmd_case_mkcode(fmd_case_t *cp)
201 {
202 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
203 	fmd_case_susp_t *cis;
204 
205 	char **keys, **keyp;
206 	const char *s;
207 
208 	ASSERT(MUTEX_HELD(&cip->ci_lock));
209 	ASSERT(cip->ci_state >= FMD_CASE_SOLVED);
210 
211 	fmd_free(cip->ci_code, cip->ci_codelen);
212 	cip->ci_codelen = cip->ci_mod->mod_codelen;
213 	cip->ci_code = fmd_zalloc(cip->ci_codelen, FMD_SLEEP);
214 	keys = keyp = alloca(sizeof (char *) * (cip->ci_nsuspects + 1));
215 
216 	for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
217 		if (nvlist_lookup_string(cis->cis_nvl, FM_CLASS, keyp) == 0)
218 			keyp++;
219 	}
220 
221 	*keyp = NULL; /* mark end of keys[] array for libdiagcode */
222 
223 	if (cip->ci_nsuspects == 0 || fmd_module_dc_key2code(
224 	    cip->ci_mod, keys, cip->ci_code, cip->ci_codelen) != 0) {
225 		(void) fmd_conf_getprop(fmd.d_conf, "nodiagcode", &s);
226 		fmd_free(cip->ci_code, cip->ci_codelen);
227 		cip->ci_codelen = strlen(s) + 1;
228 		cip->ci_code = fmd_zalloc(cip->ci_codelen, FMD_SLEEP);
229 		(void) strcpy(cip->ci_code, s);
230 	}
231 
232 	return (cip->ci_code);
233 }
234 
235 nvlist_t *
236 fmd_case_mkevent(fmd_case_t *cp, const char *class)
237 {
238 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
239 	fmd_case_susp_t *cis;
240 
241 	fmd_asru_hash_t *ahp = fmd.d_asrus;
242 	fmd_asru_t *asru;
243 
244 	nvlist_t **nva, **nvp, *nvl, *fmri;
245 	uint8_t *ba, *bp;
246 
247 	int msg = B_TRUE;
248 	boolean_t b;
249 
250 	(void) pthread_mutex_lock(&cip->ci_lock);
251 	ASSERT(cip->ci_state >= FMD_CASE_SOLVED);
252 
253 	nva = nvp = alloca(sizeof (nvlist_t *) * cip->ci_nsuspects);
254 	ba = bp = alloca(sizeof (uint8_t) * cip->ci_nsuspects);
255 
256 	/*
257 	 * For each suspect associated with the case, store its fault event
258 	 * nvlist in 'nva'.  We also look to see if any of the suspect faults
259 	 * have asked not to be messaged.  If any of them have made such a
260 	 * request, propagate that attribute to the composite list.* event.
261 	 * Finally, store each suspect's faulty status into the bitmap 'ba'.
262 	 */
263 	for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
264 		if (nvlist_lookup_boolean_value(cis->cis_nvl,
265 		    FM_SUSPECT_MESSAGE, &b) == 0 && b == B_FALSE)
266 			msg = B_FALSE;
267 
268 		if (nvlist_lookup_nvlist(cis->cis_nvl,
269 		    FM_FAULT_ASRU, &fmri) == 0 && (asru =
270 		    fmd_asru_hash_lookup_nvl(ahp, fmri, FMD_B_FALSE)) != NULL) {
271 			*bp++ = (asru->asru_flags & FMD_ASRU_FAULTY) != 0;
272 			fmd_asru_hash_release(ahp, asru);
273 		} else
274 			*bp++ = 0;
275 
276 		*nvp++ = cis->cis_nvl;
277 	}
278 
279 	if (cip->ci_code == NULL)
280 		(void) fmd_case_mkcode(cp);
281 
282 	nvl = fmd_protocol_list(class, cip->ci_mod->mod_fmri,
283 	    cip->ci_uuid, cip->ci_code, cip->ci_nsuspects, nva, ba, msg);
284 
285 	(void) pthread_mutex_unlock(&cip->ci_lock);
286 	return (nvl);
287 }
288 
289 /*
290  * Convict suspects in a case by applying a conviction policy and updating the
291  * resource cache prior to emitting the list.suspect event for the given case.
292  * At present, our policy is very simple: convict every suspect in the case.
293  * In the future, this policy can be extended and made configurable to permit:
294  *
295  * - convicting the suspect with the highest FIT rate
296  * - convicting the suspect with the cheapest FRU
297  * - convicting the suspect with the FRU that is in a depot's inventory
298  * - convicting the suspect with the longest lifetime
299  *
300  * and so forth.  A word to the wise: this problem is significantly harder that
301  * it seems at first glance.  Future work should heed the following advice:
302  *
303  * Hacking the policy into C code here is a very bad idea.  The policy needs to
304  * be decided upon very carefully and fundamentally encodes knowledge of what
305  * suspect list combinations can be emitted by what diagnosis engines.  As such
306  * fmd's code is the wrong location, because that would require fmd itself to
307  * be updated for every diagnosis engine change, defeating the entire design.
308  * The FMA Event Registry knows the suspect list combinations: policy inputs
309  * can be derived from it and used to produce per-module policy configuration.
310  *
311  * If the policy needs to be dynamic and not statically fixed at either fmd
312  * startup or module load time, any implementation of dynamic policy retrieval
313  * must employ some kind of caching mechanism or be part of a built-in module.
314  * The fmd_case_convict() function is called with locks held inside of fmd and
315  * is not a place where unbounded blocking on some inter-process or inter-
316  * system communication to another service (e.g. another daemon) can occur.
317  */
318 static void
319 fmd_case_convict(fmd_case_t *cp)
320 {
321 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
322 	fmd_asru_hash_t *ahp = fmd.d_asrus;
323 
324 	fmd_case_susp_t *cis;
325 	fmd_asru_t *asru;
326 	nvlist_t *fmri;
327 
328 	(void) pthread_mutex_lock(&cip->ci_lock);
329 	(void) fmd_case_mkcode(cp);
330 
331 	for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
332 		if (nvlist_lookup_nvlist(cis->cis_nvl, FM_FAULT_ASRU, &fmri))
333 			continue; /* no ASRU provided by diagnosis engine */
334 
335 		if ((asru = fmd_asru_hash_lookup_nvl(ahp,
336 		    fmri, FMD_B_TRUE)) == NULL) {
337 			fmd_error(EFMD_CASE_EVENT, "cannot convict suspect in "
338 			    "%s: %s\n", cip->ci_uuid, fmd_strerror(errno));
339 			continue;
340 		}
341 
342 		(void) fmd_asru_clrflags(asru,
343 		    FMD_ASRU_UNUSABLE, cp, cis->cis_nvl);
344 		(void) fmd_asru_setflags(asru,
345 		    FMD_ASRU_FAULTY, cp, cis->cis_nvl);
346 
347 		fmd_asru_hash_release(ahp, asru);
348 	}
349 
350 	(void) pthread_mutex_unlock(&cip->ci_lock);
351 }
352 
353 void
354 fmd_case_publish(fmd_case_t *cp, uint_t state)
355 {
356 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
357 	fmd_event_t *e;
358 	nvlist_t *nvl;
359 	char *class;
360 
361 	if (state == FMD_CASE_CURRENT)
362 		state = cip->ci_state; /* use current state */
363 
364 	switch (state) {
365 	case FMD_CASE_SOLVED:
366 		fmd_case_convict(cp);
367 		nvl = fmd_case_mkevent(cp, FM_LIST_SUSPECT_CLASS);
368 		(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
369 
370 		e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
371 		(void) pthread_rwlock_rdlock(&fmd.d_log_lock);
372 		fmd_log_append(fmd.d_fltlog, e, cp);
373 		(void) pthread_rwlock_unlock(&fmd.d_log_lock);
374 		fmd_dispq_dispatch(fmd.d_disp, e, class);
375 
376 		(void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
377 		cip->ci_mod->mod_stats->ms_casesolved.fmds_value.ui64++;
378 		(void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
379 
380 		break;
381 
382 	case FMD_CASE_CLOSE_WAIT:
383 		fmd_case_hold(cp);
384 		e = fmd_event_create(FMD_EVT_CLOSE, FMD_HRT_NOW, NULL, cp);
385 		fmd_eventq_insert_at_head(cip->ci_mod->mod_queue, e);
386 
387 		(void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
388 		cip->ci_mod->mod_stats->ms_caseclosed.fmds_value.ui64++;
389 		(void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
390 
391 		break;
392 
393 	case FMD_CASE_CLOSED:
394 		nvl = fmd_case_mkevent(cp, FM_LIST_ISOLATED_CLASS);
395 		(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
396 		e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
397 		fmd_dispq_dispatch(fmd.d_disp, e, class);
398 		break;
399 
400 	case FMD_CASE_REPAIRED:
401 		nvl = fmd_case_mkevent(cp, FM_LIST_REPAIRED_CLASS);
402 		(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
403 		e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
404 		fmd_dispq_dispatch(fmd.d_disp, e, class);
405 		break;
406 	}
407 }
408 
409 fmd_case_t *
410 fmd_case_hash_lookup(fmd_case_hash_t *chp, const char *uuid)
411 {
412 	fmd_case_impl_t *cip;
413 	uint_t h;
414 
415 	(void) pthread_rwlock_rdlock(&chp->ch_lock);
416 	h = fmd_strhash(uuid) % chp->ch_hashlen;
417 
418 	for (cip = chp->ch_hash[h]; cip != NULL; cip = cip->ci_next) {
419 		if (strcmp(cip->ci_uuid, uuid) == 0)
420 			break;
421 	}
422 
423 	if (cip != NULL)
424 		fmd_case_hold((fmd_case_t *)cip);
425 	else
426 		(void) fmd_set_errno(EFMD_CASE_INVAL);
427 
428 	(void) pthread_rwlock_unlock(&chp->ch_lock);
429 	return ((fmd_case_t *)cip);
430 }
431 
432 static fmd_case_impl_t *
433 fmd_case_hash_insert(fmd_case_hash_t *chp, fmd_case_impl_t *cip)
434 {
435 	fmd_case_impl_t *eip;
436 	uint_t h;
437 
438 	(void) pthread_rwlock_wrlock(&chp->ch_lock);
439 	h = fmd_strhash(cip->ci_uuid) % chp->ch_hashlen;
440 
441 	for (eip = chp->ch_hash[h]; eip != NULL; eip = eip->ci_next) {
442 		if (strcmp(cip->ci_uuid, eip->ci_uuid) == 0) {
443 			fmd_case_hold((fmd_case_t *)eip);
444 			(void) pthread_rwlock_unlock(&chp->ch_lock);
445 			return (eip); /* uuid already present */
446 		}
447 	}
448 
449 	cip->ci_next = chp->ch_hash[h];
450 	chp->ch_hash[h] = cip;
451 
452 	chp->ch_count++;
453 	ASSERT(chp->ch_count != 0);
454 
455 	(void) pthread_rwlock_unlock(&chp->ch_lock);
456 	return (cip);
457 }
458 
459 static void
460 fmd_case_hash_delete(fmd_case_hash_t *chp, fmd_case_impl_t *cip)
461 {
462 	fmd_case_impl_t *cp, **pp;
463 	uint_t h;
464 
465 	(void) pthread_rwlock_wrlock(&chp->ch_lock);
466 
467 	h = fmd_strhash(cip->ci_uuid) % chp->ch_hashlen;
468 	pp = &chp->ch_hash[h];
469 
470 	for (cp = *pp; cp != NULL; cp = cp->ci_next) {
471 		if (cp != cip)
472 			pp = &cp->ci_next;
473 		else
474 			break;
475 	}
476 
477 	if (cp == NULL) {
478 		fmd_panic("case %p (%s) not found on hash chain %u\n",
479 		    (void *)cip, cip->ci_uuid, h);
480 	}
481 
482 	*pp = cp->ci_next;
483 	cp->ci_next = NULL;
484 
485 	ASSERT(chp->ch_count != 0);
486 	chp->ch_count--;
487 
488 	(void) pthread_rwlock_unlock(&chp->ch_lock);
489 }
490 
491 fmd_case_t *
492 fmd_case_create(fmd_module_t *mp, void *data)
493 {
494 	fmd_case_impl_t *cip = fmd_zalloc(sizeof (fmd_case_impl_t), FMD_SLEEP);
495 	fmd_case_impl_t *eip = NULL;
496 	uuid_t uuid;
497 
498 	(void) pthread_mutex_init(&cip->ci_lock, NULL);
499 	fmd_buf_hash_create(&cip->ci_bufs);
500 
501 	fmd_module_hold(mp);
502 	cip->ci_mod = mp;
503 	cip->ci_refs = 1;
504 	cip->ci_state = FMD_CASE_UNSOLVED;
505 	cip->ci_flags = FMD_CF_DIRTY;
506 	cip->ci_data = data;
507 
508 	/*
509 	 * Calling libuuid: get a clue.  The library interfaces cleverly do not
510 	 * define any constant for the length of an unparse string, and do not
511 	 * permit the caller to specify a buffer length for safety.  The spec
512 	 * says it will be 36 bytes, but we make it tunable just in case.
513 	 */
514 	(void) fmd_conf_getprop(fmd.d_conf, "uuidlen", &cip->ci_uuidlen);
515 	cip->ci_uuid = fmd_zalloc(cip->ci_uuidlen + 1, FMD_SLEEP);
516 
517 	/*
518 	 * We expect this loop to execute only once, but code it defensively
519 	 * against the possibility of libuuid bugs.  Keep generating uuids and
520 	 * attempting to do a hash insert until we get a unique one.
521 	 */
522 	do {
523 		if (eip != NULL)
524 			fmd_case_rele((fmd_case_t *)eip);
525 		uuid_generate(uuid);
526 		uuid_unparse(uuid, cip->ci_uuid);
527 	} while ((eip = fmd_case_hash_insert(fmd.d_cases, cip)) != cip);
528 
529 	ASSERT(fmd_module_locked(mp));
530 	fmd_list_append(&mp->mod_cases, cip);
531 	fmd_module_setcdirty(mp);
532 
533 	(void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
534 	cip->ci_mod->mod_stats->ms_caseopen.fmds_value.ui64++;
535 	(void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
536 
537 	return ((fmd_case_t *)cip);
538 }
539 
540 fmd_case_t *
541 fmd_case_recreate(fmd_module_t *mp, fmd_xprt_t *xp,
542     uint_t state, const char *uuid, const char *code)
543 {
544 	fmd_case_impl_t *cip = fmd_zalloc(sizeof (fmd_case_impl_t), FMD_SLEEP);
545 	fmd_case_impl_t *eip;
546 
547 	ASSERT(state < FMD_CASE_REPAIRED);
548 
549 	(void) pthread_mutex_init(&cip->ci_lock, NULL);
550 	fmd_buf_hash_create(&cip->ci_bufs);
551 
552 	fmd_module_hold(mp);
553 	cip->ci_mod = mp;
554 	cip->ci_xprt = xp;
555 	cip->ci_refs = 1;
556 	cip->ci_state = state;
557 	cip->ci_uuid = fmd_strdup(uuid, FMD_SLEEP);
558 	cip->ci_uuidlen = strlen(cip->ci_uuid);
559 	cip->ci_code = fmd_strdup(code, FMD_SLEEP);
560 	cip->ci_codelen = cip->ci_code ? strlen(cip->ci_code) + 1 : 0;
561 
562 	if (state > FMD_CASE_CLOSE_WAIT)
563 		cip->ci_flags |= FMD_CF_SOLVED;
564 
565 	/*
566 	 * Insert the case into the global case hash.  If the specified UUID is
567 	 * already present, check to see if it is an orphan: if so, reclaim it;
568 	 * otherwise if it is owned by a different module then return NULL.
569 	 */
570 	if ((eip = fmd_case_hash_insert(fmd.d_cases, cip)) != cip) {
571 		(void) pthread_mutex_lock(&cip->ci_lock);
572 		cip->ci_refs--; /* decrement to zero */
573 		fmd_case_destroy((fmd_case_t *)cip, B_FALSE);
574 
575 		cip = eip; /* switch 'cip' to the existing case */
576 		(void) pthread_mutex_lock(&cip->ci_lock);
577 
578 		/*
579 		 * If the ASRU cache is trying to recreate an orphan, then just
580 		 * return the existing case that we found without changing it.
581 		 */
582 		if (mp == fmd.d_rmod) {
583 			(void) pthread_mutex_unlock(&cip->ci_lock);
584 			fmd_case_rele((fmd_case_t *)cip);
585 			return ((fmd_case_t *)cip);
586 		}
587 
588 		/*
589 		 * If the existing case isn't an orphan or is being proxied,
590 		 * then we have a UUID conflict: return failure to the caller.
591 		 */
592 		if (cip->ci_mod != fmd.d_rmod || xp != NULL) {
593 			(void) pthread_mutex_unlock(&cip->ci_lock);
594 			fmd_case_rele((fmd_case_t *)cip);
595 			return (NULL);
596 		}
597 
598 		/*
599 		 * If the new module is reclaiming an orphaned case, remove
600 		 * the case from the root module, switch ci_mod, and then fall
601 		 * through to adding the case to the new owner module 'mp'.
602 		 */
603 		fmd_module_lock(cip->ci_mod);
604 		fmd_list_delete(&cip->ci_mod->mod_cases, cip);
605 		fmd_module_unlock(cip->ci_mod);
606 
607 		fmd_module_rele(cip->ci_mod);
608 		cip->ci_mod = mp;
609 		fmd_module_hold(mp);
610 
611 		(void) pthread_mutex_unlock(&cip->ci_lock);
612 		fmd_case_rele((fmd_case_t *)cip);
613 	}
614 
615 	ASSERT(fmd_module_locked(mp));
616 	fmd_list_append(&mp->mod_cases, cip);
617 
618 	(void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
619 	cip->ci_mod->mod_stats->ms_caseopen.fmds_value.ui64++;
620 	(void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
621 
622 	return ((fmd_case_t *)cip);
623 }
624 
625 void
626 fmd_case_destroy(fmd_case_t *cp, int visible)
627 {
628 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
629 	fmd_case_item_t *cit, *ncit;
630 	fmd_case_susp_t *cis, *ncis;
631 
632 	ASSERT(MUTEX_HELD(&cip->ci_lock));
633 	ASSERT(cip->ci_refs == 0);
634 
635 	if (visible) {
636 		TRACE((FMD_DBG_CASE, "deleting case %s", cip->ci_uuid));
637 		fmd_case_hash_delete(fmd.d_cases, cip);
638 	}
639 
640 	for (cit = cip->ci_items; cit != NULL; cit = ncit) {
641 		ncit = cit->cit_next;
642 		fmd_event_rele(cit->cit_event);
643 		fmd_free(cit, sizeof (fmd_case_item_t));
644 	}
645 
646 	for (cis = cip->ci_suspects; cis != NULL; cis = ncis) {
647 		ncis = cis->cis_next;
648 		nvlist_free(cis->cis_nvl);
649 		fmd_free(cis, sizeof (fmd_case_susp_t));
650 	}
651 
652 	if (cip->ci_principal != NULL)
653 		fmd_event_rele(cip->ci_principal);
654 
655 	fmd_free(cip->ci_uuid, cip->ci_uuidlen + 1);
656 	fmd_free(cip->ci_code, cip->ci_codelen);
657 	fmd_buf_hash_destroy(&cip->ci_bufs);
658 
659 	fmd_module_rele(cip->ci_mod);
660 	fmd_free(cip, sizeof (fmd_case_impl_t));
661 }
662 
663 void
664 fmd_case_hold(fmd_case_t *cp)
665 {
666 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
667 
668 	(void) pthread_mutex_lock(&cip->ci_lock);
669 	cip->ci_refs++;
670 	ASSERT(cip->ci_refs != 0);
671 	(void) pthread_mutex_unlock(&cip->ci_lock);
672 }
673 
674 void
675 fmd_case_hold_locked(fmd_case_t *cp)
676 {
677 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
678 
679 	ASSERT(MUTEX_HELD(&cip->ci_lock));
680 	cip->ci_refs++;
681 	ASSERT(cip->ci_refs != 0);
682 }
683 
684 void
685 fmd_case_rele(fmd_case_t *cp)
686 {
687 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
688 
689 	(void) pthread_mutex_lock(&cip->ci_lock);
690 	ASSERT(cip->ci_refs != 0);
691 
692 	if (--cip->ci_refs == 0)
693 		fmd_case_destroy((fmd_case_t *)cip, B_TRUE);
694 	else
695 		(void) pthread_mutex_unlock(&cip->ci_lock);
696 }
697 
698 void
699 fmd_case_insert_principal(fmd_case_t *cp, fmd_event_t *ep)
700 {
701 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
702 	fmd_event_t *oep;
703 	uint_t state;
704 
705 	fmd_event_hold(ep);
706 	(void) pthread_mutex_lock(&cip->ci_lock);
707 
708 	if (cip->ci_flags & FMD_CF_SOLVED)
709 		state = FMD_EVS_DIAGNOSED;
710 	else
711 		state = FMD_EVS_ACCEPTED;
712 
713 	oep = cip->ci_principal;
714 	cip->ci_principal = ep;
715 
716 	cip->ci_flags |= FMD_CF_DIRTY;
717 	(void) pthread_mutex_unlock(&cip->ci_lock);
718 
719 	fmd_module_setcdirty(cip->ci_mod);
720 	fmd_event_transition(ep, state);
721 
722 	if (oep != NULL)
723 		fmd_event_rele(oep);
724 }
725 
726 void
727 fmd_case_insert_event(fmd_case_t *cp, fmd_event_t *ep)
728 {
729 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
730 	fmd_case_item_t *cit = fmd_alloc(sizeof (fmd_case_item_t), FMD_SLEEP);
731 	uint_t state;
732 
733 	fmd_event_hold(ep);
734 	(void) pthread_mutex_lock(&cip->ci_lock);
735 
736 	cit->cit_next = cip->ci_items;
737 	cit->cit_event = ep;
738 
739 	cip->ci_items = cit;
740 	cip->ci_nitems++;
741 
742 	if (cip->ci_flags & FMD_CF_SOLVED)
743 		state = FMD_EVS_DIAGNOSED;
744 	else
745 		state = FMD_EVS_ACCEPTED;
746 
747 	cip->ci_flags |= FMD_CF_DIRTY;
748 	(void) pthread_mutex_unlock(&cip->ci_lock);
749 
750 	fmd_module_setcdirty(cip->ci_mod);
751 	fmd_event_transition(ep, state);
752 }
753 
754 void
755 fmd_case_insert_suspect(fmd_case_t *cp, nvlist_t *nvl)
756 {
757 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
758 	fmd_case_susp_t *cis = fmd_alloc(sizeof (fmd_case_susp_t), FMD_SLEEP);
759 
760 	(void) pthread_mutex_lock(&cip->ci_lock);
761 	ASSERT(cip->ci_state < FMD_CASE_SOLVED);
762 	cip->ci_flags |= FMD_CF_DIRTY;
763 
764 	cis->cis_next = cip->ci_suspects;
765 	cis->cis_nvl = nvl;
766 
767 	cip->ci_suspects = cis;
768 	cip->ci_nsuspects++;
769 
770 	(void) pthread_mutex_unlock(&cip->ci_lock);
771 	fmd_module_setcdirty(cip->ci_mod);
772 }
773 
774 void
775 fmd_case_recreate_suspect(fmd_case_t *cp, nvlist_t *nvl)
776 {
777 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
778 	fmd_case_susp_t *cis = fmd_alloc(sizeof (fmd_case_susp_t), FMD_SLEEP);
779 
780 	(void) pthread_mutex_lock(&cip->ci_lock);
781 	ASSERT(cip->ci_state == FMD_CASE_CLOSED);
782 	ASSERT(cip->ci_mod == fmd.d_rmod);
783 
784 	cis->cis_next = cip->ci_suspects;
785 	cis->cis_nvl = nvl;
786 
787 	cip->ci_suspects = cis;
788 	cip->ci_nsuspects++;
789 
790 	(void) pthread_mutex_unlock(&cip->ci_lock);
791 }
792 
793 void
794 fmd_case_reset_suspects(fmd_case_t *cp)
795 {
796 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
797 	fmd_case_susp_t *cis, *ncis;
798 
799 	(void) pthread_mutex_lock(&cip->ci_lock);
800 	ASSERT(cip->ci_state < FMD_CASE_SOLVED);
801 
802 	for (cis = cip->ci_suspects; cis != NULL; cis = ncis) {
803 		ncis = cis->cis_next;
804 		nvlist_free(cis->cis_nvl);
805 		fmd_free(cis, sizeof (fmd_case_susp_t));
806 	}
807 
808 	cip->ci_flags |= FMD_CF_DIRTY;
809 	cip->ci_suspects = NULL;
810 	cip->ci_nsuspects = 0;
811 
812 	(void) pthread_mutex_unlock(&cip->ci_lock);
813 	fmd_module_setcdirty(cip->ci_mod);
814 }
815 
816 /*
817  * Grab ci_lock and update the case state and set the dirty bit.  Then perform
818  * whatever actions and emit whatever events are appropriate for the state.
819  * Refer to the topmost block comment explaining the state machine for details.
820  */
821 void
822 fmd_case_transition(fmd_case_t *cp, uint_t state, uint_t flags)
823 {
824 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
825 
826 	uint_t old_state;
827 	fmd_case_susp_t *cis;
828 	fmd_case_item_t *cit;
829 	fmd_asru_t *asru;
830 	fmd_event_t *e;
831 	nvlist_t *nvl;
832 
833 	ASSERT(state <= FMD_CASE_REPAIRED);
834 	(void) pthread_mutex_lock(&cip->ci_lock);
835 	cip->ci_flags |= flags;
836 
837 	if (cip->ci_state >= state) {
838 		(void) pthread_mutex_unlock(&cip->ci_lock);
839 		return; /* already in specified state */
840 	}
841 
842 	TRACE((FMD_DBG_CASE, "case %s %s->%s", cip->ci_uuid,
843 	    _fmd_case_snames[cip->ci_state], _fmd_case_snames[state]));
844 
845 	old_state = cip->ci_state;
846 	cip->ci_state = state;
847 	cip->ci_flags |= FMD_CF_DIRTY;
848 
849 	if (cip->ci_xprt == NULL && cip->ci_mod != fmd.d_rmod)
850 		fmd_module_setcdirty(cip->ci_mod);
851 
852 	switch (state) {
853 	case FMD_CASE_SOLVED:
854 		for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next)
855 			fmd_event_transition(cit->cit_event, FMD_EVS_DIAGNOSED);
856 
857 		if (cip->ci_principal != NULL) {
858 			fmd_event_transition(cip->ci_principal,
859 			    FMD_EVS_DIAGNOSED);
860 		}
861 		break;
862 
863 	case FMD_CASE_CLOSE_WAIT:
864 		/*
865 		 * If the case was never solved, do not change ASRUs.
866 		 * If the case was never fmd_case_closed, do not change ASRUs.
867 		 * If the case was repaired, do not change ASRUs.
868 		 */
869 		if ((cip->ci_flags & (FMD_CF_SOLVED | FMD_CF_ISOLATED |
870 		    FMD_CF_REPAIRED)) != (FMD_CF_SOLVED | FMD_CF_ISOLATED))
871 			goto close_wait_finish;
872 
873 		/*
874 		 * For each fault event in the suspect list, attempt to look up
875 		 * the corresponding ASRU in the ASRU dictionary.  If the ASRU
876 		 * is found there and is marked faulty, we now mark it unusable
877 		 * and record the case meta-data and fault event with the ASRU.
878 		 */
879 		for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
880 			if (nvlist_lookup_nvlist(cis->cis_nvl, FM_FAULT_ASRU,
881 			    &nvl) == 0 && (asru = fmd_asru_hash_lookup_nvl(
882 			    fmd.d_asrus, nvl, FMD_B_FALSE)) != NULL) {
883 				(void) fmd_asru_setflags(asru,
884 				    FMD_ASRU_UNUSABLE, cp, cis->cis_nvl);
885 				fmd_asru_hash_release(fmd.d_asrus, asru);
886 			}
887 		}
888 
889 	close_wait_finish:
890 		if (!fmd_case_orphaned(cp))
891 			break; /* state transition complete */
892 
893 		/*
894 		 * If an orphaned case transitions to CLOSE_WAIT, the owning
895 		 * module is no longer loaded: continue on to CASE_CLOSED.
896 		 */
897 		state = cip->ci_state = FMD_CASE_CLOSED;
898 		/*FALLTHRU*/
899 
900 	case FMD_CASE_CLOSED:
901 		ASSERT(fmd_case_orphaned(cp));
902 		fmd_module_lock(cip->ci_mod);
903 		fmd_list_append(&cip->ci_mod->mod_cases, cip);
904 		fmd_module_unlock(cip->ci_mod);
905 		break;
906 
907 	case FMD_CASE_REPAIRED:
908 		ASSERT(fmd_case_orphaned(cp));
909 
910 		if (old_state == FMD_CASE_CLOSE_WAIT)
911 			break; /* case was never closed (transition 6 above) */
912 
913 		fmd_module_lock(cip->ci_mod);
914 		fmd_list_delete(&cip->ci_mod->mod_cases, cip);
915 		fmd_module_unlock(cip->ci_mod);
916 		break;
917 	}
918 
919 	(void) pthread_mutex_unlock(&cip->ci_lock);
920 
921 	/*
922 	 * If the module has initialized, then publish the appropriate event
923 	 * for the new case state.  If not, we are being called from the
924 	 * checkpoint code during module load, in which case the module's
925 	 * _fmd_init() routine hasn't finished yet, and our event dictionaries
926 	 * may not be open yet, which will prevent us from computing the event
927 	 * code.  Defer the call to fmd_case_publish() by enqueuing a PUBLISH
928 	 * event in our queue: this won't be processed until _fmd_init is done.
929 	 */
930 	if (cip->ci_mod->mod_flags & FMD_MOD_INIT)
931 		fmd_case_publish(cp, state);
932 	else {
933 		fmd_case_hold(cp);
934 		e = fmd_event_create(FMD_EVT_PUBLISH, FMD_HRT_NOW, NULL, cp);
935 		fmd_eventq_insert_at_head(cip->ci_mod->mod_queue, e);
936 	}
937 
938 	/*
939 	 * If we transitioned to CLOSED or REPAIRED, adjust the reference count
940 	 * to reflect our addition to or removal from fmd.d_rmod->mod_cases.
941 	 */
942 	if (state == FMD_CASE_CLOSED)
943 		fmd_case_hold(cp);
944 	else if (state == FMD_CASE_REPAIRED && old_state != FMD_CASE_CLOSE_WAIT)
945 		fmd_case_rele(cp);
946 }
947 
948 void
949 fmd_case_setdirty(fmd_case_t *cp)
950 {
951 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
952 
953 	(void) pthread_mutex_lock(&cip->ci_lock);
954 	cip->ci_flags |= FMD_CF_DIRTY;
955 	(void) pthread_mutex_unlock(&cip->ci_lock);
956 
957 	fmd_module_setcdirty(cip->ci_mod);
958 }
959 
960 void
961 fmd_case_clrdirty(fmd_case_t *cp)
962 {
963 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
964 
965 	(void) pthread_mutex_lock(&cip->ci_lock);
966 	cip->ci_flags &= ~FMD_CF_DIRTY;
967 	(void) pthread_mutex_unlock(&cip->ci_lock);
968 }
969 
970 void
971 fmd_case_commit(fmd_case_t *cp)
972 {
973 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
974 	fmd_case_item_t *cit;
975 
976 	(void) pthread_mutex_lock(&cip->ci_lock);
977 
978 	if (cip->ci_flags & FMD_CF_DIRTY) {
979 		for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next)
980 			fmd_event_commit(cit->cit_event);
981 
982 		if (cip->ci_principal != NULL)
983 			fmd_event_commit(cip->ci_principal);
984 
985 		fmd_buf_hash_commit(&cip->ci_bufs);
986 		cip->ci_flags &= ~FMD_CF_DIRTY;
987 	}
988 
989 	(void) pthread_mutex_unlock(&cip->ci_lock);
990 }
991 
992 /*
993  * Indicate that the case may need to change state because one or more of the
994  * ASRUs named as a suspect has changed state.  We examine all the suspects
995  * and if none are still faulty, we initiate a case close transition.
996  */
997 void
998 fmd_case_update(fmd_case_t *cp)
999 {
1000 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1001 	fmd_case_susp_t *cis;
1002 	fmd_asru_t *asru;
1003 	nvlist_t *nvl;
1004 
1005 	int astate = 0;
1006 	uint_t cstate;
1007 
1008 	(void) pthread_mutex_lock(&cip->ci_lock);
1009 	cstate = cip->ci_state;
1010 
1011 	if (cip->ci_xprt != NULL || cip->ci_state < FMD_CASE_SOLVED) {
1012 		(void) pthread_mutex_unlock(&cip->ci_lock);
1013 		return; /* update is not appropriate */
1014 	}
1015 
1016 	for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
1017 		if (nvlist_lookup_nvlist(cis->cis_nvl, FM_FAULT_ASRU,
1018 		    &nvl) == 0 && (asru = fmd_asru_hash_lookup_nvl(
1019 		    fmd.d_asrus, nvl, FMD_B_FALSE)) != NULL) {
1020 			astate |= fmd_asru_getstate(asru);
1021 			fmd_asru_hash_release(fmd.d_asrus, asru);
1022 		}
1023 	}
1024 
1025 	(void) pthread_mutex_unlock(&cip->ci_lock);
1026 
1027 	if (astate & FMD_ASRU_FAULTY)
1028 		return; /* one or more suspects are still marked faulty */
1029 
1030 	if (cstate == FMD_CASE_CLOSED)
1031 		fmd_case_transition(cp, FMD_CASE_REPAIRED, FMD_CF_REPAIRED);
1032 	else
1033 		fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_REPAIRED);
1034 }
1035 
1036 /*
1037  * Delete a closed case from the module's case list once the fmdo_close() entry
1038  * point has run to completion.  If the case is owned by a transport module,
1039  * tell the transport to proxy a case close on the other end of the transport.
1040  * If not, transition to the appropriate next state based on ci_flags.  This
1041  * function represents the end of CLOSE_WAIT and transitions the case to either
1042  * CLOSED or REPAIRED or discards it entirely because it was never solved;
1043  * refer to the topmost block comment explaining the state machine for details.
1044  */
1045 void
1046 fmd_case_delete(fmd_case_t *cp)
1047 {
1048 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1049 
1050 	(void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
1051 	cip->ci_mod->mod_stats->ms_caseopen.fmds_value.ui64--;
1052 	(void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
1053 
1054 	ASSERT(fmd_module_locked(cip->ci_mod));
1055 	fmd_list_delete(&cip->ci_mod->mod_cases, cip);
1056 
1057 	if (cip->ci_xprt == NULL)
1058 		fmd_module_setcdirty(cip->ci_mod);
1059 
1060 	fmd_module_rele(cip->ci_mod);
1061 	cip->ci_mod = fmd.d_rmod;
1062 	fmd_module_hold(cip->ci_mod);
1063 
1064 	/*
1065 	 * If a proxied case finishes CLOSE_WAIT, then it can be discarded
1066 	 * rather than orphaned because by definition it can have no entries
1067 	 * in the resource cache of the current fault manager.
1068 	 */
1069 	if (cip->ci_xprt != NULL)
1070 		fmd_xprt_uuclose(cip->ci_xprt, cip->ci_uuid);
1071 	else if (cip->ci_flags & FMD_CF_REPAIRED)
1072 		fmd_case_transition(cp, FMD_CASE_REPAIRED, 0);
1073 	else if (cip->ci_flags & FMD_CF_ISOLATED)
1074 		fmd_case_transition(cp, FMD_CASE_CLOSED, 0);
1075 
1076 	fmd_case_rele(cp);
1077 }
1078 
1079 void
1080 fmd_case_discard(fmd_case_t *cp)
1081 {
1082 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1083 
1084 	(void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
1085 	cip->ci_mod->mod_stats->ms_caseopen.fmds_value.ui64--;
1086 	(void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
1087 
1088 	ASSERT(fmd_module_locked(cip->ci_mod));
1089 	fmd_list_delete(&cip->ci_mod->mod_cases, cip);
1090 	fmd_case_rele(cp);
1091 }
1092 
1093 /*
1094  * Indicate that the problem corresponding to a case has been repaired by
1095  * clearing the faulty bit on each ASRU named as a suspect.  If the case hasn't
1096  * already been closed, this function initiates the transition to CLOSE_WAIT.
1097  * The caller must have the case held from fmd_case_hash_lookup(), so we can
1098  * grab and drop ci_lock without the case being able to be freed in between.
1099  */
1100 int
1101 fmd_case_repair(fmd_case_t *cp)
1102 {
1103 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1104 	fmd_case_susp_t *cis;
1105 	nvlist_t *nvl;
1106 	uint_t cstate;
1107 
1108 	fmd_asru_hash_t *ahp = fmd.d_asrus;
1109 	fmd_asru_t **aa;
1110 	uint_t i, an;
1111 
1112 	(void) pthread_mutex_lock(&cip->ci_lock);
1113 	cstate = cip->ci_state;
1114 
1115 	if (cip->ci_xprt != NULL) {
1116 		(void) pthread_mutex_unlock(&cip->ci_lock);
1117 		return (fmd_set_errno(EFMD_CASE_OWNER));
1118 	}
1119 
1120 	if (cstate < FMD_CASE_SOLVED) {
1121 		(void) pthread_mutex_unlock(&cip->ci_lock);
1122 		return (fmd_set_errno(EFMD_CASE_STATE));
1123 	}
1124 
1125 	/*
1126 	 * Take a snapshot of any ASRUs referenced by the case that are present
1127 	 * in the resource cache.  Then drop ci_lock and clear the faulty bit
1128 	 * on each ASRU (we can't call fmd_asru_clrflags() with ci_lock held).
1129 	 */
1130 	an = cip->ci_nsuspects;
1131 	aa = alloca(sizeof (fmd_asru_t *) * an);
1132 	bzero(aa, sizeof (fmd_asru_t *) * an);
1133 
1134 	for (i = 0, cis = cip->ci_suspects;
1135 	    cis != NULL; cis = cis->cis_next, i++) {
1136 		if (nvlist_lookup_nvlist(cis->cis_nvl,
1137 		    FM_FAULT_ASRU, &nvl) == 0)
1138 			aa[i] = fmd_asru_hash_lookup_nvl(ahp, nvl, FMD_B_FALSE);
1139 	}
1140 
1141 	(void) pthread_mutex_unlock(&cip->ci_lock);
1142 
1143 	for (i = 0; i < an; i++) {
1144 		if (aa[i] == NULL)
1145 			continue; /* no asru was found */
1146 		(void) fmd_asru_clrflags(aa[i], FMD_ASRU_FAULTY, NULL, NULL);
1147 		fmd_asru_hash_release(ahp, aa[i]);
1148 	}
1149 
1150 	if (cstate == FMD_CASE_CLOSED)
1151 		fmd_case_transition(cp, FMD_CASE_REPAIRED, FMD_CF_REPAIRED);
1152 	else
1153 		fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_REPAIRED);
1154 
1155 	return (0);
1156 }
1157 
1158 int
1159 fmd_case_contains(fmd_case_t *cp, fmd_event_t *ep)
1160 {
1161 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1162 	fmd_case_item_t *cit;
1163 	uint_t state;
1164 	int rv = 0;
1165 
1166 	(void) pthread_mutex_lock(&cip->ci_lock);
1167 
1168 	if (cip->ci_state >= FMD_CASE_SOLVED)
1169 		state = FMD_EVS_DIAGNOSED;
1170 	else
1171 		state = FMD_EVS_ACCEPTED;
1172 
1173 	for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) {
1174 		if ((rv = fmd_event_equal(ep, cit->cit_event)) != 0)
1175 			break;
1176 	}
1177 
1178 	if (rv == 0 && cip->ci_principal != NULL)
1179 		rv = fmd_event_equal(ep, cip->ci_principal);
1180 
1181 	(void) pthread_mutex_unlock(&cip->ci_lock);
1182 
1183 	if (rv != 0)
1184 		fmd_event_transition(ep, state);
1185 
1186 	return (rv);
1187 }
1188 
1189 int
1190 fmd_case_orphaned(fmd_case_t *cp)
1191 {
1192 	return (((fmd_case_impl_t *)cp)->ci_mod == fmd.d_rmod);
1193 }
1194