xref: /titanic_52/usr/src/cmd/fm/fmd/common/fmd_asru.c (revision 33f2fefd46350ca5992567761c46a5b70f864340)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/fm/protocol.h>
28 #include <uuid/uuid.h>
29 
30 #include <dirent.h>
31 #include <limits.h>
32 #include <unistd.h>
33 #include <alloca.h>
34 #include <stddef.h>
35 #include <fm/libtopo.h>
36 
37 #include <fmd_alloc.h>
38 #include <fmd_string.h>
39 #include <fmd_error.h>
40 #include <fmd_subr.h>
41 #include <fmd_protocol.h>
42 #include <fmd_event.h>
43 #include <fmd_conf.h>
44 #include <fmd_fmri.h>
45 #include <fmd_dispq.h>
46 #include <fmd_case.h>
47 #include <fmd_module.h>
48 #include <fmd_asru.h>
49 
50 #include <fmd.h>
51 
52 static const char *const _fmd_asru_events[] = {
53 	FMD_RSRC_CLASS "asru.ok",		/* UNUSABLE=0 FAULTED=0 */
54 	FMD_RSRC_CLASS "asru.degraded",		/* UNUSABLE=0 FAULTED=1 */
55 	FMD_RSRC_CLASS "asru.unknown",		/* UNUSABLE=1 FAULTED=0 */
56 	FMD_RSRC_CLASS "asru.faulted"		/* UNUSABLE=1 FAULTED=1 */
57 };
58 
59 static const char *const _fmd_asru_snames[] = {
60 	"uf", "uF", "Uf", "UF"			/* same order as above */
61 };
62 
63 volatile uint32_t fmd_asru_fake_not_present = 0;
64 
65 static uint_t
66 fmd_asru_strhash(fmd_asru_hash_t *ahp, const char *val)
67 {
68 	return (topo_fmri_strhash(ahp->ah_topo->ft_hdl, val) % ahp->ah_hashlen);
69 }
70 
71 static boolean_t
72 fmd_asru_strcmp(fmd_asru_hash_t *ahp, const char *a, const char *b)
73 {
74 	return (topo_fmri_strcmp(ahp->ah_topo->ft_hdl, a, b));
75 }
76 
77 static fmd_asru_t *
78 fmd_asru_create(fmd_asru_hash_t *ahp, const char *uuid,
79     const char *name, nvlist_t *fmri)
80 {
81 	fmd_asru_t *ap = fmd_zalloc(sizeof (fmd_asru_t), FMD_SLEEP);
82 	char *s;
83 
84 	(void) pthread_mutex_init(&ap->asru_lock, NULL);
85 	(void) pthread_cond_init(&ap->asru_cv, NULL);
86 
87 	ap->asru_name = fmd_strdup(name, FMD_SLEEP);
88 	if (fmri)
89 		(void) nvlist_xdup(fmri, &ap->asru_fmri, &fmd.d_nva);
90 	ap->asru_root = fmd_strdup(ahp->ah_dirpath, FMD_SLEEP);
91 	ap->asru_uuid = fmd_strdup(uuid, FMD_SLEEP);
92 	ap->asru_uuidlen = ap->asru_uuid ? strlen(ap->asru_uuid) : 0;
93 	ap->asru_refs = 1;
94 
95 	if (fmri && nvlist_lookup_string(fmri, FM_FMRI_SCHEME, &s) == 0 &&
96 	    strcmp(s, FM_FMRI_SCHEME_FMD) == 0)
97 		ap->asru_flags |= FMD_ASRU_INTERNAL;
98 
99 	return (ap);
100 }
101 
102 static void
103 fmd_asru_destroy(fmd_asru_t *ap)
104 {
105 	ASSERT(MUTEX_HELD(&ap->asru_lock));
106 	ASSERT(ap->asru_refs == 0);
107 
108 	nvlist_free(ap->asru_event);
109 	fmd_strfree(ap->asru_name);
110 	nvlist_free(ap->asru_fmri);
111 	fmd_strfree(ap->asru_root);
112 	fmd_free(ap->asru_uuid, ap->asru_uuidlen + 1);
113 	fmd_free(ap, sizeof (fmd_asru_t));
114 }
115 
116 static void
117 fmd_asru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_t *ap)
118 {
119 	uint_t h = fmd_asru_strhash(ahp, ap->asru_name);
120 
121 	ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
122 	ap->asru_next = ahp->ah_hash[h];
123 	ahp->ah_hash[h] = ap;
124 	ahp->ah_count++;
125 }
126 
127 static fmd_asru_t *
128 fmd_asru_hold(fmd_asru_t *ap)
129 {
130 	(void) pthread_mutex_lock(&ap->asru_lock);
131 	ap->asru_refs++;
132 	ASSERT(ap->asru_refs != 0);
133 	(void) pthread_mutex_unlock(&ap->asru_lock);
134 	return (ap);
135 }
136 
137 /*
138  * Lookup an asru in the hash by name and place a hold on it.  If the asru is
139  * not found, no entry is created and NULL is returned.  This internal function
140  * is for callers who have the ah_lock held and is used by lookup_name below.
141  */
142 fmd_asru_t *
143 fmd_asru_hash_lookup(fmd_asru_hash_t *ahp, const char *name)
144 {
145 	fmd_asru_t *ap;
146 	uint_t h;
147 
148 	ASSERT(RW_LOCK_HELD(&ahp->ah_lock));
149 	h = fmd_asru_strhash(ahp, name);
150 
151 	for (ap = ahp->ah_hash[h]; ap != NULL; ap = ap->asru_next) {
152 		if (fmd_asru_strcmp(ahp, ap->asru_name, name))
153 			break;
154 	}
155 
156 	if (ap != NULL)
157 		(void) fmd_asru_hold(ap);
158 	else
159 		(void) fmd_set_errno(EFMD_ASRU_NOENT);
160 
161 	return (ap);
162 }
163 
164 static int
165 fmd_asru_replacement_state(nvlist_t *event)
166 {
167 	int ps = -1;
168 	nvlist_t *asru, *fru, *rsrc;
169 
170 	/*
171 	 * Check if there is evidence that this object is no longer present.
172 	 * In general fmd_fmri_present() should be supported on resources and/or
173 	 * frus, as those are the things that are physically present or not
174 	 * present - an asru can be spread over a number of frus some of which
175 	 * are present and some not, so fmd_fmri_present() is not generally
176 	 * meaningful. However retain a check for asru first for compatibility.
177 	 * If we have checked all three and we still get -1 then nothing knows
178 	 * whether it's present or not, so err on the safe side and treat it
179 	 * as still present.
180 	 */
181 	if (fmd_asru_fake_not_present)
182 		return (fmd_asru_fake_not_present);
183 	if (nvlist_lookup_nvlist(event, FM_FAULT_ASRU, &asru) == 0)
184 		ps = fmd_fmri_replaced(asru);
185 	if (ps == -1) {
186 		if (nvlist_lookup_nvlist(event, FM_FAULT_RESOURCE, &rsrc) == 0)
187 			ps = fmd_fmri_replaced(rsrc);
188 	} else if (ps == FMD_OBJ_STATE_UNKNOWN) {
189 		/* see if we can improve on UNKNOWN */
190 		if (nvlist_lookup_nvlist(event, FM_FAULT_RESOURCE,
191 		    &rsrc) == 0) {
192 			int ps2 = fmd_fmri_replaced(rsrc);
193 			if (ps2 == FMD_OBJ_STATE_STILL_PRESENT ||
194 			    ps2 == FMD_OBJ_STATE_REPLACED)
195 				ps = ps2;
196 		}
197 	}
198 	if (ps == -1) {
199 		if (nvlist_lookup_nvlist(event, FM_FAULT_FRU, &fru) == 0)
200 			ps = fmd_fmri_replaced(fru);
201 	} else if (ps == FMD_OBJ_STATE_UNKNOWN) {
202 		/* see if we can improve on UNKNOWN */
203 		if (nvlist_lookup_nvlist(event, FM_FAULT_FRU, &fru) == 0) {
204 			int ps2 = fmd_fmri_replaced(fru);
205 			if (ps2 == FMD_OBJ_STATE_STILL_PRESENT ||
206 			    ps2 == FMD_OBJ_STATE_REPLACED)
207 				ps = ps2;
208 		}
209 	}
210 	if (ps == -1)
211 		ps = FMD_OBJ_STATE_UNKNOWN;
212 	return (ps);
213 }
214 
215 static void
216 fmd_asru_asru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp,
217     char *name)
218 {
219 	uint_t h = fmd_asru_strhash(ahp, name);
220 
221 	ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
222 	alp->al_asru_next = ahp->ah_asru_hash[h];
223 	ahp->ah_asru_hash[h] = alp;
224 	ahp->ah_al_count++;
225 }
226 
227 static void
228 fmd_asru_case_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp,
229     char *name)
230 {
231 	uint_t h = fmd_asru_strhash(ahp, name);
232 
233 	ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
234 	alp->al_case_next = ahp->ah_case_hash[h];
235 	ahp->ah_case_hash[h] = alp;
236 }
237 
238 static void
239 fmd_asru_fru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, char *name)
240 {
241 	uint_t h = fmd_asru_strhash(ahp, name);
242 
243 	ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
244 	alp->al_fru_next = ahp->ah_fru_hash[h];
245 	ahp->ah_fru_hash[h] = alp;
246 }
247 
248 static void
249 fmd_asru_label_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp,
250     char *name)
251 {
252 	uint_t h = fmd_asru_strhash(ahp, name);
253 
254 	ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
255 	alp->al_label_next = ahp->ah_label_hash[h];
256 	ahp->ah_label_hash[h] = alp;
257 }
258 
259 static void
260 fmd_asru_rsrc_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp,
261     char *name)
262 {
263 	uint_t h = fmd_asru_strhash(ahp, name);
264 
265 	ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
266 	alp->al_rsrc_next = ahp->ah_rsrc_hash[h];
267 	ahp->ah_rsrc_hash[h] = alp;
268 }
269 
270 static void
271 fmd_asru_al_destroy(fmd_asru_link_t *alp)
272 {
273 	ASSERT(alp->al_refs == 0);
274 	ASSERT(MUTEX_HELD(&alp->al_asru->asru_lock));
275 
276 	if (alp->al_log != NULL)
277 		fmd_log_rele(alp->al_log);
278 
279 	fmd_free(alp->al_uuid, alp->al_uuidlen + 1);
280 	nvlist_free(alp->al_event);
281 	fmd_strfree(alp->al_rsrc_name);
282 	fmd_strfree(alp->al_case_uuid);
283 	fmd_strfree(alp->al_fru_name);
284 	fmd_strfree(alp->al_asru_name);
285 	fmd_strfree(alp->al_label);
286 	nvlist_free(alp->al_asru_fmri);
287 	fmd_free(alp, sizeof (fmd_asru_link_t));
288 }
289 
290 static fmd_asru_link_t *
291 fmd_asru_al_hold(fmd_asru_link_t *alp)
292 {
293 	fmd_asru_t *ap = alp->al_asru;
294 
295 	(void) pthread_mutex_lock(&ap->asru_lock);
296 	ap->asru_refs++;
297 	alp->al_refs++;
298 	ASSERT(alp->al_refs != 0);
299 	(void) pthread_mutex_unlock(&ap->asru_lock);
300 	return (alp);
301 }
302 
303 static void fmd_asru_destroy(fmd_asru_t *ap);
304 
305 /*ARGSUSED*/
306 static void
307 fmd_asru_al_hash_release(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp)
308 {
309 	fmd_asru_t *ap = alp->al_asru;
310 
311 	(void) pthread_mutex_lock(&ap->asru_lock);
312 	ASSERT(alp->al_refs != 0);
313 	if (--alp->al_refs == 0)
314 		fmd_asru_al_destroy(alp);
315 	ASSERT(ap->asru_refs != 0);
316 	if (--ap->asru_refs == 0)
317 		fmd_asru_destroy(ap);
318 	else
319 		(void) pthread_mutex_unlock(&ap->asru_lock);
320 }
321 
322 static int
323 fmd_asru_get_namestr(nvlist_t *nvl, char **name, ssize_t *namelen)
324 {
325 	if ((*namelen = fmd_fmri_nvl2str(nvl, NULL, 0)) == -1)
326 		return (EFMD_ASRU_FMRI);
327 	*name = fmd_alloc(*namelen + 1, FMD_SLEEP);
328 	if (fmd_fmri_nvl2str(nvl, *name, *namelen + 1) == -1) {
329 		if (*name != NULL)
330 			fmd_free(*name, *namelen + 1);
331 		return (EFMD_ASRU_FMRI);
332 	}
333 	return (0);
334 }
335 
336 static fmd_asru_link_t *
337 fmd_asru_al_create(fmd_asru_hash_t *ahp, nvlist_t *nvl, fmd_case_t *cp,
338     const char *al_uuid)
339 {
340 	nvlist_t *asru = NULL, *fru, *rsrc;
341 	int got_rsrc = 0, got_asru = 0, got_fru = 0;
342 	ssize_t fru_namelen, rsrc_namelen, asru_namelen;
343 	char *asru_name, *rsrc_name, *fru_name, *name, *label;
344 	fmd_asru_link_t *alp;
345 	fmd_asru_t *ap;
346 	boolean_t msg;
347 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
348 
349 	if (nvlist_lookup_nvlist(nvl, FM_FAULT_ASRU, &asru) == 0 &&
350 	    fmd_asru_get_namestr(asru, &asru_name, &asru_namelen) == 0)
351 		got_asru = 1;
352 	if (nvlist_lookup_nvlist(nvl, FM_FAULT_FRU, &fru) == 0 &&
353 	    fmd_asru_get_namestr(fru, &fru_name, &fru_namelen) == 0)
354 		got_fru = 1;
355 	if (nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &rsrc) == 0 &&
356 	    fmd_asru_get_namestr(rsrc, &rsrc_name, &rsrc_namelen) == 0)
357 		got_rsrc = 1;
358 	if (nvlist_lookup_string(nvl, FM_FAULT_LOCATION, &label) != 0)
359 		label = "";
360 
361 	/*
362 	 * Grab the rwlock as a writer; Then create and insert the asru with
363 	 * ahp->ah_lock held and hash it in. We'll then drop the rwlock and
364 	 * proceed to initializing the asru.
365 	 */
366 	(void) pthread_rwlock_wrlock(&ahp->ah_lock);
367 
368 	/*
369 	 * Create and initialise the per-fault "link" structure.
370 	 */
371 	alp = fmd_zalloc(sizeof (fmd_asru_link_t), FMD_SLEEP);
372 	if (got_asru)
373 		(void) nvlist_xdup(asru, &alp->al_asru_fmri, &fmd.d_nva);
374 	alp->al_uuid = fmd_strdup(al_uuid, FMD_SLEEP);
375 	alp->al_uuidlen = strlen(alp->al_uuid);
376 	alp->al_refs = 1;
377 
378 	/*
379 	 * If this is the first fault for this asru, then create the per-asru
380 	 * structure and link into the hash.
381 	 */
382 	name = got_asru ? asru_name : "";
383 	if ((ap = fmd_asru_hash_lookup(ahp, name)) == NULL) {
384 		ap = fmd_asru_create(ahp, al_uuid, name, got_asru ? asru :
385 		    NULL);
386 		fmd_asru_hash_insert(ahp, ap);
387 	} else
388 		nvlist_free(ap->asru_event);
389 	(void) nvlist_xdup(nvl, &ap->asru_event, &fmd.d_nva);
390 
391 	/*
392 	 * Put the link structure on the list associated with the per-asru
393 	 * structure. Then put the link structure on the various hashes.
394 	 */
395 	fmd_list_append(&ap->asru_list, (fmd_list_t *)alp);
396 	alp->al_asru = ap;
397 	alp->al_asru_name = got_asru ? asru_name : fmd_strdup("", FMD_SLEEP);
398 	fmd_asru_asru_hash_insert(ahp, alp, alp->al_asru_name);
399 	alp->al_fru_name = got_fru ? fru_name : fmd_strdup("", FMD_SLEEP);
400 	fmd_asru_fru_hash_insert(ahp, alp, alp->al_fru_name);
401 	alp->al_rsrc_name = got_rsrc ? rsrc_name : fmd_strdup("", FMD_SLEEP);
402 	fmd_asru_rsrc_hash_insert(ahp, alp, alp->al_rsrc_name);
403 	alp->al_label = fmd_strdup(label, FMD_SLEEP);
404 	fmd_asru_label_hash_insert(ahp, alp, label);
405 	alp->al_case_uuid = fmd_strdup(cip->ci_uuid, FMD_SLEEP);
406 	fmd_asru_case_hash_insert(ahp, alp, cip->ci_uuid);
407 	(void) pthread_mutex_lock(&ap->asru_lock);
408 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
409 
410 	ap->asru_case = alp->al_case = cp;
411 	if (nvlist_lookup_boolean_value(nvl, FM_SUSPECT_MESSAGE, &msg) == 0 &&
412 	    msg == B_FALSE)
413 		ap->asru_flags |= FMD_ASRU_INVISIBLE;
414 	(void) nvlist_xdup(nvl, &alp->al_event, &fmd.d_nva);
415 	ap->asru_flags |= FMD_ASRU_VALID;
416 	(void) pthread_cond_broadcast(&ap->asru_cv);
417 	(void) pthread_mutex_unlock(&ap->asru_lock);
418 	return (alp);
419 }
420 
421 static void
422 fmd_asru_hash_recreate(fmd_log_t *lp, fmd_event_t *ep, fmd_asru_hash_t *ahp)
423 {
424 	nvlist_t *nvl = FMD_EVENT_NVL(ep);
425 	boolean_t faulty = FMD_B_FALSE, unusable = FMD_B_FALSE;
426 	int ps;
427 	boolean_t repaired = FMD_B_FALSE, replaced = FMD_B_FALSE;
428 	boolean_t acquitted = FMD_B_FALSE;
429 	nvlist_t *flt, *flt_copy, *asru;
430 	char *case_uuid = NULL, *case_code = NULL;
431 	fmd_asru_t *ap;
432 	fmd_asru_link_t *alp;
433 	fmd_case_t *cp;
434 	int64_t *diag_time;
435 	uint_t nelem;
436 	topo_hdl_t *thp;
437 	char *class;
438 	nvlist_t *rsrc;
439 	int err;
440 
441 	/*
442 	 * Extract the most recent values of 'faulty' from the event log.
443 	 */
444 	if (nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_FAULTY,
445 	    &faulty) != 0) {
446 		fmd_error(EFMD_ASRU_EVENT, "failed to reload asru %s: "
447 		    "invalid event log record\n", lp->log_name);
448 		ahp->ah_error = EFMD_ASRU_EVENT;
449 		return;
450 	}
451 	if (nvlist_lookup_nvlist(nvl, FM_RSRC_ASRU_EVENT, &flt) != 0) {
452 		fmd_error(EFMD_ASRU_EVENT, "failed to reload asru %s: "
453 		    "invalid event log record\n", lp->log_name);
454 		ahp->ah_error = EFMD_ASRU_EVENT;
455 		return;
456 	}
457 	(void) nvlist_lookup_string(nvl, FM_RSRC_ASRU_UUID, &case_uuid);
458 	(void) nvlist_lookup_string(nvl, FM_RSRC_ASRU_CODE, &case_code);
459 	(void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_UNUSABLE,
460 	    &unusable);
461 	(void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_REPAIRED,
462 	    &repaired);
463 	(void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_REPLACED,
464 	    &replaced);
465 	(void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_ACQUITTED,
466 	    &acquitted);
467 
468 	/*
469 	 * Attempt to recreate the case in either the CLOSED or REPAIRED state
470 	 * (depending on whether the faulty bit is still set).
471 	 * If the case is already present, fmd_case_recreate() will return it.
472 	 * If not, we'll create a new orphaned case. Either way,  we use the
473 	 * ASRU event to insert a suspect into the partially-restored case.
474 	 */
475 	fmd_module_lock(fmd.d_rmod);
476 	cp = fmd_case_recreate(fmd.d_rmod, NULL, faulty ? FMD_CASE_CLOSED :
477 	    FMD_CASE_REPAIRED, case_uuid, case_code);
478 	fmd_case_hold(cp);
479 	fmd_module_unlock(fmd.d_rmod);
480 	if (nvlist_lookup_int64_array(nvl, FM_SUSPECT_DIAG_TIME, &diag_time,
481 	    &nelem) == 0 && nelem >= 2)
482 		fmd_case_settime(cp, diag_time[0], diag_time[1]);
483 	else
484 		fmd_case_settime(cp, lp->log_stat.st_ctime, 0);
485 	(void) nvlist_xdup(flt, &flt_copy, &fmd.d_nva);
486 
487 	/*
488 	 * For faults with a resource, re-evaluate the asru from the resource.
489 	 */
490 	thp = fmd_fmri_topo_hold(TOPO_VERSION);
491 	if (nvlist_lookup_string(flt_copy, FM_CLASS, &class) == 0 &&
492 	    strncmp(class, "fault", 5) == 0 &&
493 	    nvlist_lookup_nvlist(flt_copy, FM_FAULT_RESOURCE, &rsrc) == 0 &&
494 	    rsrc != NULL && topo_fmri_asru(thp, rsrc, &asru, &err) == 0) {
495 		(void) nvlist_remove(flt_copy, FM_FAULT_ASRU, DATA_TYPE_NVLIST);
496 		(void) nvlist_add_nvlist(flt_copy, FM_FAULT_ASRU, asru);
497 		nvlist_free(asru);
498 	}
499 	fmd_fmri_topo_rele(thp);
500 
501 	(void) nvlist_xdup(flt_copy, &flt, &fmd.d_nva);
502 
503 	fmd_case_recreate_suspect(cp, flt_copy);
504 
505 	/*
506 	 * Now create the resource cache entries.
507 	 */
508 	alp = fmd_asru_al_create(ahp, flt, cp, fmd_strbasename(lp->log_name));
509 	ap = alp->al_asru;
510 
511 	/*
512 	 * Check to see if the resource is still present in the system.
513 	 */
514 	ps = fmd_asru_replacement_state(flt);
515 	if (ps == FMD_OBJ_STATE_REPLACED) {
516 		replaced = FMD_B_TRUE;
517 	} else if (ps == FMD_OBJ_STATE_STILL_PRESENT ||
518 	    ps == FMD_OBJ_STATE_UNKNOWN) {
519 		ap->asru_flags |= FMD_ASRU_PRESENT;
520 		if (nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU,
521 		    &asru) == 0) {
522 			int us;
523 
524 			switch (fmd_fmri_service_state(asru)) {
525 			case FMD_SERVICE_STATE_UNUSABLE:
526 				unusable = FMD_B_TRUE;
527 				break;
528 			case FMD_SERVICE_STATE_OK:
529 			case FMD_SERVICE_STATE_ISOLATE_PENDING:
530 			case FMD_SERVICE_STATE_DEGRADED:
531 				unusable = FMD_B_FALSE;
532 				break;
533 			case FMD_SERVICE_STATE_UNKNOWN:
534 			case -1:
535 				/* not supported by scheme */
536 				us = fmd_fmri_unusable(asru);
537 				if (us > 0)
538 					unusable = FMD_B_TRUE;
539 				else if (us == 0)
540 					unusable = FMD_B_FALSE;
541 				break;
542 			}
543 		}
544 	}
545 
546 	nvlist_free(flt);
547 
548 	ap->asru_flags |= FMD_ASRU_RECREATED;
549 	if (faulty) {
550 		alp->al_flags |= FMD_ASRU_FAULTY;
551 		ap->asru_flags |= FMD_ASRU_FAULTY;
552 	}
553 	if (unusable) {
554 		alp->al_flags |= FMD_ASRU_UNUSABLE;
555 		ap->asru_flags |= FMD_ASRU_UNUSABLE;
556 	}
557 	if (replaced)
558 		alp->al_reason = FMD_ASRU_REPLACED;
559 	else if (repaired)
560 		alp->al_reason = FMD_ASRU_REPAIRED;
561 	else if (acquitted)
562 		alp->al_reason = FMD_ASRU_ACQUITTED;
563 
564 	TRACE((FMD_DBG_ASRU, "asru %s recreated as %p (%s)", alp->al_uuid,
565 	    (void *)ap, _fmd_asru_snames[ap->asru_flags & FMD_ASRU_STATE]));
566 }
567 
568 static void
569 fmd_asru_hash_discard(fmd_asru_hash_t *ahp, const char *uuid, int err)
570 {
571 	char src[PATH_MAX], dst[PATH_MAX];
572 
573 	(void) snprintf(src, PATH_MAX, "%s/%s", ahp->ah_dirpath, uuid);
574 	(void) snprintf(dst, PATH_MAX, "%s/%s-", ahp->ah_dirpath, uuid);
575 
576 	if (err != 0)
577 		err = rename(src, dst);
578 	else
579 		err = unlink(src);
580 
581 	if (err != 0 && errno != ENOENT)
582 		fmd_error(EFMD_ASRU_EVENT, "failed to rename log %s", src);
583 }
584 
585 /*
586  * Open a saved log file and restore it into the ASRU hash.  If we can't even
587  * open the log, rename the log file to <uuid>- to indicate it is corrupt.  If
588  * fmd_log_replay() fails, we either delete the file (if it has reached the
589  * upper limit on cache age) or rename it for debugging if it was corrupted.
590  */
591 static void
592 fmd_asru_hash_logopen(fmd_asru_hash_t *ahp, const char *uuid)
593 {
594 	fmd_log_t *lp = fmd_log_tryopen(ahp->ah_dirpath, uuid, FMD_LOG_ASRU);
595 	uint_t n;
596 
597 	if (lp == NULL) {
598 		fmd_asru_hash_discard(ahp, uuid, errno);
599 		return;
600 	}
601 
602 	ahp->ah_error = 0;
603 	n = ahp->ah_al_count;
604 
605 	fmd_log_replay(lp, (fmd_log_f *)fmd_asru_hash_recreate, ahp);
606 	fmd_log_rele(lp);
607 
608 	if (ahp->ah_al_count == n)
609 		fmd_asru_hash_discard(ahp, uuid, ahp->ah_error);
610 }
611 
612 void
613 fmd_asru_hash_refresh(fmd_asru_hash_t *ahp)
614 {
615 	struct dirent *dp;
616 	DIR *dirp;
617 	int zero;
618 
619 	if ((dirp = opendir(ahp->ah_dirpath)) == NULL) {
620 		fmd_error(EFMD_ASRU_NODIR,
621 		    "failed to open asru cache directory %s", ahp->ah_dirpath);
622 		return;
623 	}
624 
625 	(void) fmd_conf_getprop(fmd.d_conf, "rsrc.zero", &zero);
626 
627 	(void) pthread_rwlock_wrlock(&ahp->ah_lock);
628 
629 	while ((dp = readdir(dirp)) != NULL) {
630 		if (dp->d_name[0] == '.')
631 			continue; /* skip "." and ".." */
632 
633 		if (zero)
634 			fmd_asru_hash_discard(ahp, dp->d_name, 0);
635 		else if (!fmd_strmatch(dp->d_name, "*-"))
636 			fmd_asru_hash_logopen(ahp, dp->d_name);
637 	}
638 
639 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
640 	(void) closedir(dirp);
641 }
642 
643 /*
644  * If the resource is present and faulty but not unusable, replay the fault
645  * event that caused it be marked faulty.  This will cause the agent
646  * subscribing to this fault class to again disable the resource.
647  */
648 /*ARGSUSED*/
649 static void
650 fmd_asru_hash_replay_asru(fmd_asru_t *ap, void *data)
651 {
652 	fmd_event_t *e;
653 	nvlist_t *nvl;
654 	char *class;
655 
656 	if (ap->asru_event != NULL && (ap->asru_flags & (FMD_ASRU_STATE |
657 	    FMD_ASRU_PRESENT)) == (FMD_ASRU_FAULTY | FMD_ASRU_PRESENT)) {
658 
659 		fmd_dprintf(FMD_DBG_ASRU,
660 		    "replaying fault event for %s", ap->asru_name);
661 
662 		(void) nvlist_xdup(ap->asru_event, &nvl, &fmd.d_nva);
663 		(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
664 
665 		(void) nvlist_add_string(nvl, FMD_EVN_UUID,
666 		    ((fmd_case_impl_t *)ap->asru_case)->ci_uuid);
667 
668 		e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
669 		fmd_dispq_dispatch(fmd.d_disp, e, class);
670 	}
671 }
672 
673 void
674 fmd_asru_hash_replay(fmd_asru_hash_t *ahp)
675 {
676 	fmd_asru_hash_apply(ahp, fmd_asru_hash_replay_asru, NULL);
677 }
678 
679 /*
680  * Check if the resource is still present. If not, and if the rsrc.age time
681  * has expired, then do an implicit repair on the resource.
682  */
683 /*ARGSUSED*/
684 static void
685 fmd_asru_repair_if_aged(fmd_asru_link_t *alp, void *arg)
686 {
687 	struct timeval tv;
688 	fmd_log_t *lp;
689 	hrtime_t hrt;
690 	int ps;
691 	int err;
692 
693 	ps = fmd_asru_replacement_state(alp->al_event);
694 	if (ps == FMD_OBJ_STATE_REPLACED) {
695 		fmd_asru_replaced(alp, &err);
696 	} else if (ps == FMD_OBJ_STATE_NOT_PRESENT) {
697 		fmd_time_gettimeofday(&tv);
698 		lp = fmd_log_open(alp->al_asru->asru_root, alp->al_uuid,
699 		    FMD_LOG_ASRU);
700 		hrt = (hrtime_t)(tv.tv_sec - lp->log_stat.st_mtime);
701 		fmd_log_rele(lp);
702 		if (hrt * NANOSEC >= fmd.d_asrus->ah_lifetime)
703 			fmd_asru_removed(alp);
704 	}
705 }
706 
707 void
708 fmd_asru_clear_aged_rsrcs()
709 {
710 	fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_repair_if_aged, NULL);
711 }
712 
713 fmd_asru_hash_t *
714 fmd_asru_hash_create(const char *root, const char *dir)
715 {
716 	fmd_asru_hash_t *ahp;
717 	char path[PATH_MAX];
718 
719 	ahp = fmd_alloc(sizeof (fmd_asru_hash_t), FMD_SLEEP);
720 	(void) pthread_rwlock_init(&ahp->ah_lock, NULL);
721 	ahp->ah_hashlen = fmd.d_str_buckets;
722 	ahp->ah_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, FMD_SLEEP);
723 	ahp->ah_asru_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen,
724 	    FMD_SLEEP);
725 	ahp->ah_case_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen,
726 	    FMD_SLEEP);
727 	ahp->ah_fru_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen,
728 	    FMD_SLEEP);
729 	ahp->ah_label_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen,
730 	    FMD_SLEEP);
731 	ahp->ah_rsrc_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen,
732 	    FMD_SLEEP);
733 	(void) snprintf(path, sizeof (path), "%s/%s", root, dir);
734 	ahp->ah_dirpath = fmd_strdup(path, FMD_SLEEP);
735 	(void) fmd_conf_getprop(fmd.d_conf, "rsrc.age", &ahp->ah_lifetime);
736 	(void) fmd_conf_getprop(fmd.d_conf, "fakenotpresent",
737 	    (uint32_t *)&fmd_asru_fake_not_present);
738 	ahp->ah_al_count = 0;
739 	ahp->ah_count = 0;
740 	ahp->ah_error = 0;
741 	ahp->ah_topo = fmd_topo_hold();
742 
743 	return (ahp);
744 }
745 
746 void
747 fmd_asru_hash_destroy(fmd_asru_hash_t *ahp)
748 {
749 	fmd_asru_link_t *alp, *np;
750 	uint_t i;
751 
752 	for (i = 0; i < ahp->ah_hashlen; i++) {
753 		for (alp = ahp->ah_case_hash[i]; alp != NULL; alp = np) {
754 			np = alp->al_case_next;
755 			alp->al_case_next = NULL;
756 			fmd_case_rele(alp->al_case);
757 			alp->al_case = NULL;
758 			fmd_asru_al_hash_release(ahp, alp);
759 		}
760 	}
761 
762 	fmd_strfree(ahp->ah_dirpath);
763 	fmd_free(ahp->ah_hash, sizeof (void *) * ahp->ah_hashlen);
764 	fmd_free(ahp->ah_asru_hash, sizeof (void *) * ahp->ah_hashlen);
765 	fmd_free(ahp->ah_case_hash, sizeof (void *) * ahp->ah_hashlen);
766 	fmd_free(ahp->ah_fru_hash, sizeof (void *) * ahp->ah_hashlen);
767 	fmd_free(ahp->ah_label_hash, sizeof (void *) * ahp->ah_hashlen);
768 	fmd_free(ahp->ah_rsrc_hash, sizeof (void *) * ahp->ah_hashlen);
769 	fmd_topo_rele(ahp->ah_topo);
770 	fmd_free(ahp, sizeof (fmd_asru_hash_t));
771 }
772 
773 /*
774  * Take a snapshot of the ASRU database by placing an additional hold on each
775  * member in an auxiliary array, and then call 'func' for each ASRU.
776  */
777 void
778 fmd_asru_hash_apply(fmd_asru_hash_t *ahp,
779     void (*func)(fmd_asru_t *, void *), void *arg)
780 {
781 	fmd_asru_t *ap, **aps, **app;
782 	uint_t apc, i;
783 
784 	(void) pthread_rwlock_rdlock(&ahp->ah_lock);
785 
786 	aps = app = fmd_alloc(ahp->ah_count * sizeof (fmd_asru_t *), FMD_SLEEP);
787 	apc = ahp->ah_count;
788 
789 	for (i = 0; i < ahp->ah_hashlen; i++) {
790 		for (ap = ahp->ah_hash[i]; ap != NULL; ap = ap->asru_next)
791 			*app++ = fmd_asru_hold(ap);
792 	}
793 
794 	ASSERT(app == aps + apc);
795 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
796 
797 	for (i = 0; i < apc; i++) {
798 		if (aps[i]->asru_fmri != NULL)
799 			func(aps[i], arg);
800 		fmd_asru_hash_release(ahp, aps[i]);
801 	}
802 
803 	fmd_free(aps, apc * sizeof (fmd_asru_t *));
804 }
805 
806 void
807 fmd_asru_al_hash_apply(fmd_asru_hash_t *ahp,
808     void (*func)(fmd_asru_link_t *, void *), void *arg)
809 {
810 	fmd_asru_link_t *alp, **alps, **alpp;
811 	uint_t alpc, i;
812 
813 	(void) pthread_rwlock_rdlock(&ahp->ah_lock);
814 
815 	alps = alpp = fmd_alloc(ahp->ah_al_count * sizeof (fmd_asru_link_t *),
816 	    FMD_SLEEP);
817 	alpc = ahp->ah_al_count;
818 
819 	for (i = 0; i < ahp->ah_hashlen; i++) {
820 		for (alp = ahp->ah_case_hash[i]; alp != NULL;
821 		    alp = alp->al_case_next)
822 			*alpp++ = fmd_asru_al_hold(alp);
823 	}
824 
825 	ASSERT(alpp == alps + alpc);
826 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
827 
828 	for (i = 0; i < alpc; i++) {
829 		func(alps[i], arg);
830 		fmd_asru_al_hash_release(ahp, alps[i]);
831 	}
832 
833 	fmd_free(alps, alpc * sizeof (fmd_asru_link_t *));
834 }
835 
836 static void
837 fmd_asru_do_hash_apply(fmd_asru_hash_t *ahp, char *name,
838     void (*func)(fmd_asru_link_t *, void *), void *arg,
839     fmd_asru_link_t **hash, size_t match_offset, size_t next_offset)
840 {
841 	fmd_asru_link_t *alp, **alps, **alpp;
842 	uint_t alpc = 0, i;
843 	uint_t h;
844 
845 	(void) pthread_rwlock_rdlock(&ahp->ah_lock);
846 
847 	h = fmd_asru_strhash(ahp, name);
848 
849 	for (alp = hash[h]; alp != NULL; alp =
850 	    /* LINTED pointer alignment */
851 	    FMD_ASRU_AL_HASH_NEXT(alp, next_offset))
852 		if (fmd_asru_strcmp(ahp,
853 		    /* LINTED pointer alignment */
854 		    FMD_ASRU_AL_HASH_NAME(alp, match_offset), name))
855 			alpc++;
856 
857 	alps = alpp = fmd_alloc(alpc * sizeof (fmd_asru_link_t *), FMD_SLEEP);
858 
859 	for (alp = hash[h]; alp != NULL; alp =
860 	    /* LINTED pointer alignment */
861 	    FMD_ASRU_AL_HASH_NEXT(alp, next_offset))
862 		if (fmd_asru_strcmp(ahp,
863 		    /* LINTED pointer alignment */
864 		    FMD_ASRU_AL_HASH_NAME(alp, match_offset), name))
865 			*alpp++ = fmd_asru_al_hold(alp);
866 
867 	ASSERT(alpp == alps + alpc);
868 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
869 
870 	for (i = 0; i < alpc; i++) {
871 		func(alps[i], arg);
872 		fmd_asru_al_hash_release(ahp, alps[i]);
873 	}
874 
875 	fmd_free(alps, alpc * sizeof (fmd_asru_link_t *));
876 }
877 
878 void
879 fmd_asru_hash_apply_by_asru(fmd_asru_hash_t *ahp, char *name,
880     void (*func)(fmd_asru_link_t *, void *), void *arg)
881 {
882 	fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_asru_hash,
883 	    offsetof(fmd_asru_link_t, al_asru_name),
884 	    offsetof(fmd_asru_link_t, al_asru_next));
885 }
886 
887 void
888 fmd_asru_hash_apply_by_case(fmd_asru_hash_t *ahp, fmd_case_t *cp,
889 	void (*func)(fmd_asru_link_t *, void *), void *arg)
890 {
891 	fmd_asru_do_hash_apply(ahp, ((fmd_case_impl_t *)cp)->ci_uuid, func, arg,
892 	    ahp->ah_case_hash, offsetof(fmd_asru_link_t, al_case_uuid),
893 	    offsetof(fmd_asru_link_t, al_case_next));
894 }
895 
896 void
897 fmd_asru_hash_apply_by_fru(fmd_asru_hash_t *ahp, char *name,
898     void (*func)(fmd_asru_link_t *, void *), void *arg)
899 {
900 	fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_fru_hash,
901 	    offsetof(fmd_asru_link_t, al_fru_name),
902 	    offsetof(fmd_asru_link_t, al_fru_next));
903 }
904 
905 void
906 fmd_asru_hash_apply_by_rsrc(fmd_asru_hash_t *ahp, char *name,
907     void (*func)(fmd_asru_link_t *, void *), void *arg)
908 {
909 	fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_rsrc_hash,
910 	    offsetof(fmd_asru_link_t, al_rsrc_name),
911 	    offsetof(fmd_asru_link_t, al_rsrc_next));
912 }
913 
914 void
915 fmd_asru_hash_apply_by_label(fmd_asru_hash_t *ahp, char *name,
916     void (*func)(fmd_asru_link_t *, void *), void *arg)
917 {
918 	fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_label_hash,
919 	    offsetof(fmd_asru_link_t, al_label),
920 	    offsetof(fmd_asru_link_t, al_label_next));
921 }
922 
923 /*
924  * Lookup an asru in the hash by name and place a hold on it.  If the asru is
925  * not found, no entry is created and NULL is returned.
926  */
927 fmd_asru_t *
928 fmd_asru_hash_lookup_name(fmd_asru_hash_t *ahp, const char *name)
929 {
930 	fmd_asru_t *ap;
931 
932 	(void) pthread_rwlock_rdlock(&ahp->ah_lock);
933 	ap = fmd_asru_hash_lookup(ahp, name);
934 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
935 
936 	return (ap);
937 }
938 
939 /*
940  * Create a resource cache entry using the fault event "nvl" for one of the
941  * suspects from the case "cp".
942  *
943  * The fault event can have the following components :  FM_FAULT_ASRU,
944  * FM_FAULT_FRU, FM_FAULT_RESOURCE. These should be set by the Diagnosis Engine
945  * when calling fmd_nvl_create_fault(). In the general case, these are all
946  * optional and an entry will always be added into the cache even if one or all
947  * of these fields is missing.
948  *
949  * However, for hardware faults the recommended practice is that the fault
950  * event should always have the FM_FAULT_RESOURCE field present and that this
951  * should be represented in hc-scheme.
952  *
953  * Currently the DE should also add the FM_FAULT_ASRU and FM_FAULT_FRU fields
954  * where known, though at some future stage fmd might be able to fill these
955  * in automatically from the topology.
956  */
957 fmd_asru_link_t *
958 fmd_asru_hash_create_entry(fmd_asru_hash_t *ahp, fmd_case_t *cp, nvlist_t *nvl)
959 {
960 	char *parsed_uuid;
961 	uuid_t uuid;
962 	int uuidlen;
963 	fmd_asru_link_t *alp;
964 
965 	/*
966 	 * Generate a UUID for the ASRU.  libuuid cleverly gives us no
967 	 * interface for specifying or learning the buffer size.  Sigh.
968 	 * The spec says 36 bytes but we use a tunable just to be safe.
969 	 */
970 	(void) fmd_conf_getprop(fmd.d_conf, "uuidlen", &uuidlen);
971 	parsed_uuid = fmd_zalloc(uuidlen + 1, FMD_SLEEP);
972 	uuid_generate(uuid);
973 	uuid_unparse(uuid, parsed_uuid);
974 
975 	/*
976 	 * Now create the resource cache entries.
977 	 */
978 	fmd_case_hold_locked(cp);
979 	alp = fmd_asru_al_create(ahp, nvl, cp, parsed_uuid);
980 	TRACE((FMD_DBG_ASRU, "asru %s created as %p",
981 	    alp->al_uuid, (void *)alp->al_asru));
982 
983 	fmd_free(parsed_uuid, uuidlen + 1);
984 	return (alp);
985 
986 }
987 
988 /*
989  * Release the reference count on an asru obtained using fmd_asru_hash_lookup.
990  * We take 'ahp' for symmetry and in case we need to use it in future work.
991  */
992 /*ARGSUSED*/
993 void
994 fmd_asru_hash_release(fmd_asru_hash_t *ahp, fmd_asru_t *ap)
995 {
996 	(void) pthread_mutex_lock(&ap->asru_lock);
997 
998 	ASSERT(ap->asru_refs != 0);
999 	if (--ap->asru_refs == 0)
1000 		fmd_asru_destroy(ap);
1001 	else
1002 		(void) pthread_mutex_unlock(&ap->asru_lock);
1003 }
1004 
1005 static void
1006 fmd_asru_do_delete_entry(fmd_asru_hash_t *ahp, fmd_case_t *cp,
1007     fmd_asru_link_t **hash, size_t next_offset, char *name)
1008 {
1009 	uint_t h;
1010 	fmd_asru_link_t *alp, **pp, *alpnext, **alpnextp;
1011 
1012 	(void) pthread_rwlock_wrlock(&ahp->ah_lock);
1013 	h = fmd_asru_strhash(ahp, name);
1014 	pp = &hash[h];
1015 	for (alp = *pp; alp != NULL; alp = alpnext) {
1016 		/* LINTED pointer alignment */
1017 		alpnextp = FMD_ASRU_AL_HASH_NEXTP(alp, next_offset);
1018 		alpnext = *alpnextp;
1019 		if (alp->al_case == cp) {
1020 			*pp = *alpnextp;
1021 			*alpnextp = NULL;
1022 		} else
1023 			pp = alpnextp;
1024 	}
1025 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
1026 }
1027 
1028 static void
1029 fmd_asru_do_hash_delete(fmd_asru_hash_t *ahp, fmd_case_susp_t *cis,
1030     fmd_case_t *cp, fmd_asru_link_t **hash, size_t next_offset, char *nvname)
1031 {
1032 	nvlist_t *nvl;
1033 	char *name = NULL;
1034 	ssize_t namelen;
1035 
1036 	if (nvlist_lookup_nvlist(cis->cis_nvl, nvname, &nvl) == 0 &&
1037 	    (namelen = fmd_fmri_nvl2str(nvl, NULL, 0)) != -1 &&
1038 	    (name = fmd_alloc(namelen + 1, FMD_SLEEP)) != NULL) {
1039 		if (fmd_fmri_nvl2str(nvl, name, namelen + 1) != -1)
1040 			fmd_asru_do_delete_entry(ahp, cp, hash, next_offset,
1041 			    name);
1042 		fmd_free(name, namelen + 1);
1043 	} else
1044 		fmd_asru_do_delete_entry(ahp, cp, hash, next_offset, "");
1045 }
1046 
1047 void
1048 fmd_asru_hash_delete_case(fmd_asru_hash_t *ahp, fmd_case_t *cp)
1049 {
1050 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1051 	fmd_case_susp_t *cis;
1052 	fmd_asru_link_t *alp, **plp, *alpnext;
1053 	fmd_asru_t *ap;
1054 	char path[PATH_MAX];
1055 	char *label;
1056 	uint_t h;
1057 
1058 	/*
1059 	 * first delete hash entries for each suspect
1060 	 */
1061 	for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
1062 		fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_fru_hash,
1063 		    offsetof(fmd_asru_link_t, al_fru_next), FM_FAULT_FRU);
1064 		fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_rsrc_hash,
1065 		    offsetof(fmd_asru_link_t, al_rsrc_next), FM_FAULT_RESOURCE);
1066 		if (nvlist_lookup_string(cis->cis_nvl, FM_FAULT_LOCATION,
1067 		    &label) != 0)
1068 			label = "";
1069 		fmd_asru_do_delete_entry(ahp, cp, ahp->ah_label_hash,
1070 		    offsetof(fmd_asru_link_t, al_label_next), label);
1071 		fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_asru_hash,
1072 		    offsetof(fmd_asru_link_t, al_asru_next), FM_FAULT_ASRU);
1073 	}
1074 
1075 	/*
1076 	 * then delete associated case hash entries
1077 	 */
1078 	(void) pthread_rwlock_wrlock(&ahp->ah_lock);
1079 	h = fmd_asru_strhash(ahp, cip->ci_uuid);
1080 	plp = &ahp->ah_case_hash[h];
1081 	for (alp = *plp; alp != NULL; alp = alpnext) {
1082 		alpnext = alp->al_case_next;
1083 		if (alp->al_case == cp) {
1084 			*plp = alp->al_case_next;
1085 			alp->al_case_next = NULL;
1086 			ASSERT(ahp->ah_al_count != 0);
1087 			ahp->ah_al_count--;
1088 
1089 			/*
1090 			 * decrement case ref.
1091 			 */
1092 			fmd_case_rele_locked(cp);
1093 			alp->al_case = NULL;
1094 
1095 			/*
1096 			 * If we found a matching ASRU, unlink its log file and
1097 			 * then release the hash entry. Note that it may still
1098 			 * be referenced if another thread is manipulating it;
1099 			 * this is ok because once we unlink, the log file will
1100 			 * not be restored, and the log data will be freed when
1101 			 * all of the referencing threads release their
1102 			 * respective references.
1103 			 */
1104 			(void) snprintf(path, sizeof (path), "%s/%s",
1105 			    ahp->ah_dirpath, alp->al_uuid);
1106 			if (unlink(path) != 0)
1107 				fmd_error(EFMD_ASRU_UNLINK,
1108 				    "failed to unlink asru %s", path);
1109 
1110 			/*
1111 			 * Now unlink from the global per-resource cache
1112 			 * and if this is the last link then remove that from
1113 			 * it's own hash too.
1114 			 */
1115 			ap = alp->al_asru;
1116 			(void) pthread_mutex_lock(&ap->asru_lock);
1117 			fmd_list_delete(&ap->asru_list, alp);
1118 			if (ap->asru_list.l_next == NULL) {
1119 				uint_t h;
1120 				fmd_asru_t *ap2, **pp;
1121 				fmd_asru_t *apnext, **apnextp;
1122 
1123 				ASSERT(ahp->ah_count != 0);
1124 				ahp->ah_count--;
1125 				h = fmd_asru_strhash(ahp, ap->asru_name);
1126 				pp = &ahp->ah_hash[h];
1127 				for (ap2 = *pp; ap2 != NULL; ap2 = apnext) {
1128 					apnextp = &ap2->asru_next;
1129 					apnext = *apnextp;
1130 					if (ap2 == ap) {
1131 						*pp = *apnextp;
1132 						*apnextp = NULL;
1133 					} else
1134 						pp = apnextp;
1135 				}
1136 			}
1137 			(void) pthread_mutex_unlock(&ap->asru_lock);
1138 			fmd_asru_al_hash_release(ahp, alp);
1139 		} else
1140 			plp = &alp->al_case_next;
1141 	}
1142 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
1143 }
1144 
1145 static void
1146 fmd_asru_repair_containee(fmd_asru_link_t *alp, void *er)
1147 {
1148 	if (er && (alp->al_asru->asru_flags & FMD_ASRU_INVISIBLE) &&
1149 	    alp->al_asru_fmri && fmd_fmri_contains(er,
1150 	    alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
1151 	    FMD_ASRU_REPAIRED))
1152 		fmd_case_update(alp->al_case);
1153 }
1154 
1155 void
1156 fmd_asru_repaired(fmd_asru_link_t *alp, void *er)
1157 {
1158 	int flags;
1159 	int rval;
1160 
1161 	/*
1162 	 * repair this asru cache entry
1163 	 */
1164 	rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, FMD_ASRU_REPAIRED);
1165 
1166 	/*
1167 	 * now check if all entries associated with this asru are repaired and
1168 	 * if so repair containees
1169 	 */
1170 	(void) pthread_mutex_lock(&alp->al_asru->asru_lock);
1171 	flags = alp->al_asru->asru_flags;
1172 	(void) pthread_mutex_unlock(&alp->al_asru->asru_lock);
1173 	if (!(flags & (FMD_ASRU_FAULTY | FMD_ASRU_INVISIBLE)))
1174 		fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_repair_containee,
1175 		    alp->al_asru_fmri);
1176 
1177 	/*
1178 	 * if called from fmd_adm_repair() and we really did clear the bit then
1179 	 * we need to do a case update to see if the associated case can be
1180 	 * repaired. No need to do this if called from fmd_case_repair() (ie
1181 	 * when er is NULL) as the case will be explicitly repaired anyway.
1182 	 */
1183 	if (er) {
1184 		*(int *)er = 0;
1185 		if (rval)
1186 			fmd_case_update(alp->al_case);
1187 	}
1188 }
1189 
1190 static void
1191 fmd_asru_acquit_containee(fmd_asru_link_t *alp, void *er)
1192 {
1193 	if (er && (alp->al_asru->asru_flags & FMD_ASRU_INVISIBLE) &&
1194 	    alp->al_asru_fmri && fmd_fmri_contains(er,
1195 	    alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
1196 	    FMD_ASRU_ACQUITTED))
1197 		fmd_case_update(alp->al_case);
1198 }
1199 
1200 void
1201 fmd_asru_acquit(fmd_asru_link_t *alp, void *er)
1202 {
1203 	int flags;
1204 	int rval;
1205 
1206 	/*
1207 	 * acquit this asru cache entry
1208 	 */
1209 	rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, FMD_ASRU_ACQUITTED);
1210 
1211 	/*
1212 	 * now check if all entries associated with this asru are acquitted and
1213 	 * if so acquit containees
1214 	 */
1215 	(void) pthread_mutex_lock(&alp->al_asru->asru_lock);
1216 	flags = alp->al_asru->asru_flags;
1217 	(void) pthread_mutex_unlock(&alp->al_asru->asru_lock);
1218 	if (!(flags & (FMD_ASRU_FAULTY | FMD_ASRU_INVISIBLE)))
1219 		fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_acquit_containee,
1220 		    alp->al_asru_fmri);
1221 
1222 	/*
1223 	 * if called from fmd_adm_acquit() and we really did clear the bit then
1224 	 * we need to do a case update to see if the associated case can be
1225 	 * repaired. No need to do this if called from fmd_case_acquit() (ie
1226 	 * when er is NULL) as the case will be explicitly repaired anyway.
1227 	 */
1228 	if (er) {
1229 		*(int *)er = 0;
1230 		if (rval)
1231 			fmd_case_update(alp->al_case);
1232 	}
1233 }
1234 
1235 static void
1236 fmd_asru_replaced_containee(fmd_asru_link_t *alp, void *er)
1237 {
1238 	if (er && (alp->al_asru->asru_flags & FMD_ASRU_INVISIBLE) &&
1239 	    alp->al_asru_fmri && fmd_fmri_contains(er,
1240 	    alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
1241 	    FMD_ASRU_REPLACED))
1242 		fmd_case_update(alp->al_case);
1243 }
1244 
1245 void
1246 fmd_asru_replaced(fmd_asru_link_t *alp, void *er)
1247 {
1248 	int flags;
1249 	int rval;
1250 	int ps;
1251 
1252 	ps = fmd_asru_replacement_state(alp->al_event);
1253 	if (ps == FMD_OBJ_STATE_STILL_PRESENT)
1254 		return;
1255 
1256 	/*
1257 	 * mark this cache entry as replaced
1258 	 */
1259 	rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, FMD_ASRU_REPLACED);
1260 
1261 	/*
1262 	 * now check if all entries associated with this asru are replaced and
1263 	 * if so replace containees
1264 	 */
1265 	(void) pthread_mutex_lock(&alp->al_asru->asru_lock);
1266 	flags = alp->al_asru->asru_flags;
1267 	(void) pthread_mutex_unlock(&alp->al_asru->asru_lock);
1268 	if (!(flags & (FMD_ASRU_FAULTY | FMD_ASRU_INVISIBLE)))
1269 		fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_replaced_containee,
1270 		    alp->al_asru_fmri);
1271 
1272 	*(int *)er = 0;
1273 	if (rval)
1274 		fmd_case_update(alp->al_case);
1275 }
1276 
1277 static void
1278 fmd_asru_removed_containee(fmd_asru_link_t *alp, void *er)
1279 {
1280 	if (er && (alp->al_asru->asru_flags & FMD_ASRU_INVISIBLE) &&
1281 	    alp->al_asru_fmri && fmd_fmri_contains(er,
1282 	    alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
1283 	    0))
1284 		fmd_case_update(alp->al_case);
1285 }
1286 
1287 void
1288 fmd_asru_removed(fmd_asru_link_t *alp)
1289 {
1290 	int flags;
1291 	int rval;
1292 
1293 	/*
1294 	 * mark this cache entry as replacded
1295 	 */
1296 	rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, 0);
1297 
1298 	/*
1299 	 * now check if all entries associated with this asru are removed and
1300 	 * if so replace containees
1301 	 */
1302 	(void) pthread_mutex_lock(&alp->al_asru->asru_lock);
1303 	flags = alp->al_asru->asru_flags;
1304 	(void) pthread_mutex_unlock(&alp->al_asru->asru_lock);
1305 	if (!(flags & (FMD_ASRU_FAULTY | FMD_ASRU_INVISIBLE)))
1306 		fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_removed_containee,
1307 		    alp->al_asru_fmri);
1308 	if (rval)
1309 		fmd_case_update(alp->al_case);
1310 }
1311 
1312 static void
1313 fmd_asru_logevent(fmd_asru_link_t *alp)
1314 {
1315 	fmd_asru_t *ap = alp->al_asru;
1316 	boolean_t faulty = (alp->al_flags & FMD_ASRU_FAULTY) != 0;
1317 	boolean_t unusable = (alp->al_flags & FMD_ASRU_UNUSABLE) != 0;
1318 	boolean_t message = (ap->asru_flags & FMD_ASRU_INVISIBLE) == 0;
1319 	boolean_t repaired = (alp->al_reason == FMD_ASRU_REPAIRED);
1320 	boolean_t replaced = (alp->al_reason == FMD_ASRU_REPLACED);
1321 	boolean_t acquitted = (alp->al_reason == FMD_ASRU_ACQUITTED);
1322 
1323 	fmd_case_impl_t *cip;
1324 	fmd_event_t *e;
1325 	fmd_log_t *lp;
1326 	nvlist_t *nvl;
1327 	char *class;
1328 
1329 	ASSERT(MUTEX_HELD(&ap->asru_lock));
1330 	cip = (fmd_case_impl_t *)alp->al_case;
1331 	ASSERT(cip != NULL);
1332 
1333 	if ((lp = alp->al_log) == NULL)
1334 		lp = fmd_log_open(ap->asru_root, alp->al_uuid, FMD_LOG_ASRU);
1335 
1336 	if (lp == NULL)
1337 		return; /* can't log events if we can't open the log */
1338 
1339 	nvl = fmd_protocol_rsrc_asru(_fmd_asru_events[faulty | (unusable << 1)],
1340 	    alp->al_asru_fmri, cip->ci_uuid, cip->ci_code, faulty, unusable,
1341 	    message, alp->al_event, &cip->ci_tv, repaired, replaced, acquitted);
1342 
1343 	(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
1344 	e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
1345 
1346 	fmd_event_hold(e);
1347 	fmd_log_append(lp, e, NULL);
1348 	fmd_event_rele(e);
1349 
1350 	/*
1351 	 * For now, we close the log file after every update to conserve file
1352 	 * descriptors and daemon overhead.  If this becomes a performance
1353 	 * issue this code can change to keep a fixed-size LRU cache of logs.
1354 	 */
1355 	fmd_log_rele(lp);
1356 	alp->al_log = NULL;
1357 }
1358 
1359 int
1360 fmd_asru_setflags(fmd_asru_link_t *alp, uint_t sflag)
1361 {
1362 	fmd_asru_t *ap = alp->al_asru;
1363 	uint_t nstate, ostate;
1364 
1365 	ASSERT(!(sflag & ~FMD_ASRU_STATE));
1366 	ASSERT(sflag != FMD_ASRU_STATE);
1367 
1368 	(void) pthread_mutex_lock(&ap->asru_lock);
1369 
1370 	ostate = alp->al_flags & FMD_ASRU_STATE;
1371 	alp->al_flags |= sflag;
1372 	nstate = alp->al_flags & FMD_ASRU_STATE;
1373 
1374 	if (nstate == ostate) {
1375 		(void) pthread_mutex_unlock(&ap->asru_lock);
1376 		return (0);
1377 	}
1378 
1379 	ap->asru_flags |= sflag;
1380 	TRACE((FMD_DBG_ASRU, "asru %s %s->%s", alp->al_uuid,
1381 	    _fmd_asru_snames[ostate], _fmd_asru_snames[nstate]));
1382 
1383 	fmd_asru_logevent(alp);
1384 
1385 	(void) pthread_cond_broadcast(&ap->asru_cv);
1386 	(void) pthread_mutex_unlock(&ap->asru_lock);
1387 	return (1);
1388 }
1389 
1390 int
1391 fmd_asru_clrflags(fmd_asru_link_t *alp, uint_t sflag, uint8_t reason)
1392 {
1393 	fmd_asru_t *ap = alp->al_asru;
1394 	fmd_asru_link_t *nalp;
1395 	uint_t nstate, ostate, flags = 0;
1396 
1397 	ASSERT(!(sflag & ~FMD_ASRU_STATE));
1398 	ASSERT(sflag != FMD_ASRU_STATE);
1399 
1400 	(void) pthread_mutex_lock(&ap->asru_lock);
1401 
1402 	ostate = alp->al_flags & FMD_ASRU_STATE;
1403 	alp->al_flags &= ~sflag;
1404 	nstate = alp->al_flags & FMD_ASRU_STATE;
1405 
1406 	if (nstate == ostate) {
1407 		if (reason > alp->al_reason) {
1408 			alp->al_reason = reason;
1409 			fmd_asru_logevent(alp);
1410 			(void) pthread_cond_broadcast(&ap->asru_cv);
1411 		}
1412 		(void) pthread_mutex_unlock(&ap->asru_lock);
1413 		return (0);
1414 	}
1415 	if (reason > alp->al_reason)
1416 		alp->al_reason = reason;
1417 
1418 	if (sflag == FMD_ASRU_UNUSABLE)
1419 		ap->asru_flags &= ~sflag;
1420 	else if (sflag == FMD_ASRU_FAULTY) {
1421 		/*
1422 		 * only clear the faulty bit if all links are clear
1423 		 */
1424 		for (nalp = fmd_list_next(&ap->asru_list); nalp != NULL;
1425 		    nalp = fmd_list_next(nalp))
1426 			flags |= nalp->al_flags;
1427 		if (!(flags & FMD_ASRU_FAULTY))
1428 			ap->asru_flags &= ~sflag;
1429 	}
1430 
1431 	TRACE((FMD_DBG_ASRU, "asru %s %s->%s", alp->al_uuid,
1432 	    _fmd_asru_snames[ostate], _fmd_asru_snames[nstate]));
1433 
1434 	fmd_asru_logevent(alp);
1435 
1436 	(void) pthread_cond_broadcast(&ap->asru_cv);
1437 	(void) pthread_mutex_unlock(&ap->asru_lock);
1438 
1439 	return (1);
1440 }
1441 
1442 /*
1443  * Report the current known state of the link entry (ie this particular fault
1444  * affecting this particular ASRU).
1445  */
1446 int
1447 fmd_asru_al_getstate(fmd_asru_link_t *alp)
1448 {
1449 	int us, st;
1450 	nvlist_t *asru;
1451 	int ps;
1452 
1453 	ps = fmd_asru_replacement_state(alp->al_event);
1454 	if (ps == FMD_OBJ_STATE_NOT_PRESENT)
1455 		return ((alp->al_flags & FMD_ASRU_FAULTY) | FMD_ASRU_UNUSABLE);
1456 	if (ps == FMD_OBJ_STATE_REPLACED) {
1457 		if (alp->al_reason < FMD_ASRU_REPLACED)
1458 			alp->al_reason = FMD_ASRU_REPLACED;
1459 		return ((alp->al_flags & FMD_ASRU_FAULTY) | FMD_ASRU_UNUSABLE);
1460 	}
1461 
1462 	st = (alp->al_flags & FMD_ASRU_STATE) | FMD_ASRU_PRESENT;
1463 	if (nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, &asru) == 0) {
1464 		us = fmd_fmri_service_state(asru);
1465 		if (us == -1 || us == FMD_SERVICE_STATE_UNKNOWN) {
1466 			/* not supported by scheme - try fmd_fmri_unusable */
1467 			us = fmd_fmri_unusable(asru);
1468 		} else if (us == FMD_SERVICE_STATE_UNUSABLE) {
1469 			st |= FMD_ASRU_UNUSABLE;
1470 			return (st);
1471 		} else if (us == FMD_SERVICE_STATE_OK) {
1472 			st &= ~FMD_ASRU_UNUSABLE;
1473 			return (st);
1474 		} else if (us == FMD_SERVICE_STATE_ISOLATE_PENDING) {
1475 			st &= ~FMD_ASRU_UNUSABLE;
1476 			return (st);
1477 		} else if (us == FMD_SERVICE_STATE_DEGRADED) {
1478 			st &= ~FMD_ASRU_UNUSABLE;
1479 			st |= FMD_ASRU_DEGRADED;
1480 			return (st);
1481 		}
1482 	} else
1483 		us = (alp->al_flags & FMD_ASRU_UNUSABLE);
1484 	if (us > 0)
1485 		st |= FMD_ASRU_UNUSABLE;
1486 	else if (us == 0)
1487 		st &= ~FMD_ASRU_UNUSABLE;
1488 	return (st);
1489 }
1490 
1491 /*
1492  * Report the current known state of the ASRU by refreshing its unusable status
1493  * based upon the routines provided by the scheme module.  If the unusable bit
1494  * is different, we do *not* generate a state change here because that change
1495  * may be unrelated to fmd activities and therefore we have no case or event.
1496  * The absence of the transition is harmless as this function is only provided
1497  * for RPC observability and fmd's clients are only concerned with ASRU_FAULTY.
1498  */
1499 int
1500 fmd_asru_getstate(fmd_asru_t *ap)
1501 {
1502 	int us, st;
1503 
1504 	if (!(ap->asru_flags & FMD_ASRU_INTERNAL) &&
1505 	    (fmd_asru_fake_not_present >= FMD_OBJ_STATE_REPLACED ||
1506 	    fmd_fmri_present(ap->asru_fmri) <= 0))
1507 		return (0); /* do not report non-fmd non-present resources */
1508 
1509 	us = fmd_fmri_unusable(ap->asru_fmri);
1510 	st = ap->asru_flags & FMD_ASRU_STATE;
1511 
1512 	if (us > 0)
1513 		st |= FMD_ASRU_UNUSABLE;
1514 	else if (us == 0)
1515 		st &= ~FMD_ASRU_UNUSABLE;
1516 
1517 	return (st);
1518 }
1519