xref: /titanic_44/usr/src/cmd/fm/fmd/common/fmd_asru.c (revision a38ddfee9c8c6b6c5a2947ff52fd2338362a4444)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/fm/protocol.h>
28 #include <uuid/uuid.h>
29 
30 #include <dirent.h>
31 #include <limits.h>
32 #include <unistd.h>
33 #include <alloca.h>
34 #include <stddef.h>
35 #include <fm/libtopo.h>
36 
37 #include <fmd_alloc.h>
38 #include <fmd_string.h>
39 #include <fmd_error.h>
40 #include <fmd_subr.h>
41 #include <fmd_protocol.h>
42 #include <fmd_event.h>
43 #include <fmd_conf.h>
44 #include <fmd_fmri.h>
45 #include <fmd_dispq.h>
46 #include <fmd_case.h>
47 #include <fmd_module.h>
48 #include <fmd_asru.h>
49 
50 #include <fmd.h>
51 
52 static const char *const _fmd_asru_events[] = {
53 	FMD_RSRC_CLASS "asru.ok",		/* UNUSABLE=0 FAULTED=0 */
54 	FMD_RSRC_CLASS "asru.degraded",		/* UNUSABLE=0 FAULTED=1 */
55 	FMD_RSRC_CLASS "asru.unknown",		/* UNUSABLE=1 FAULTED=0 */
56 	FMD_RSRC_CLASS "asru.faulted"		/* UNUSABLE=1 FAULTED=1 */
57 };
58 
59 static const char *const _fmd_asru_snames[] = {
60 	"uf", "uF", "Uf", "UF"			/* same order as above */
61 };
62 
63 volatile uint32_t fmd_asru_fake_not_present = 0;
64 
65 static uint_t
66 fmd_asru_strhash(fmd_asru_hash_t *ahp, const char *val)
67 {
68 	return (topo_fmri_strhash(ahp->ah_topo->ft_hdl, val) % ahp->ah_hashlen);
69 }
70 
71 static boolean_t
72 fmd_asru_strcmp(fmd_asru_hash_t *ahp, const char *a, const char *b)
73 {
74 	return (topo_fmri_strcmp(ahp->ah_topo->ft_hdl, a, b));
75 }
76 
77 static fmd_asru_t *
78 fmd_asru_create(fmd_asru_hash_t *ahp, const char *uuid,
79     const char *name, nvlist_t *fmri)
80 {
81 	fmd_asru_t *ap = fmd_zalloc(sizeof (fmd_asru_t), FMD_SLEEP);
82 	char *s;
83 
84 	(void) pthread_mutex_init(&ap->asru_lock, NULL);
85 	(void) pthread_cond_init(&ap->asru_cv, NULL);
86 
87 	ap->asru_name = fmd_strdup(name, FMD_SLEEP);
88 	if (fmri)
89 		(void) nvlist_xdup(fmri, &ap->asru_fmri, &fmd.d_nva);
90 	ap->asru_root = fmd_strdup(ahp->ah_dirpath, FMD_SLEEP);
91 	ap->asru_uuid = fmd_strdup(uuid, FMD_SLEEP);
92 	ap->asru_uuidlen = ap->asru_uuid ? strlen(ap->asru_uuid) : 0;
93 	ap->asru_refs = 1;
94 
95 	if (fmri && nvlist_lookup_string(fmri, FM_FMRI_SCHEME, &s) == 0 &&
96 	    strcmp(s, FM_FMRI_SCHEME_FMD) == 0)
97 		ap->asru_flags |= FMD_ASRU_INTERNAL;
98 
99 	return (ap);
100 }
101 
102 static void
103 fmd_asru_destroy(fmd_asru_t *ap)
104 {
105 	ASSERT(MUTEX_HELD(&ap->asru_lock));
106 	ASSERT(ap->asru_refs == 0);
107 
108 	nvlist_free(ap->asru_event);
109 	fmd_strfree(ap->asru_name);
110 	nvlist_free(ap->asru_fmri);
111 	fmd_strfree(ap->asru_root);
112 	fmd_free(ap->asru_uuid, ap->asru_uuidlen + 1);
113 	fmd_free(ap, sizeof (fmd_asru_t));
114 }
115 
116 static void
117 fmd_asru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_t *ap)
118 {
119 	uint_t h = fmd_asru_strhash(ahp, ap->asru_name);
120 
121 	ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
122 	ap->asru_next = ahp->ah_hash[h];
123 	ahp->ah_hash[h] = ap;
124 	ahp->ah_count++;
125 }
126 
127 static fmd_asru_t *
128 fmd_asru_hold(fmd_asru_t *ap)
129 {
130 	(void) pthread_mutex_lock(&ap->asru_lock);
131 	ap->asru_refs++;
132 	ASSERT(ap->asru_refs != 0);
133 	(void) pthread_mutex_unlock(&ap->asru_lock);
134 	return (ap);
135 }
136 
137 /*
138  * Lookup an asru in the hash by name and place a hold on it.  If the asru is
139  * not found, no entry is created and NULL is returned.  This internal function
140  * is for callers who have the ah_lock held and is used by lookup_name below.
141  */
142 fmd_asru_t *
143 fmd_asru_hash_lookup(fmd_asru_hash_t *ahp, const char *name)
144 {
145 	fmd_asru_t *ap;
146 	uint_t h;
147 
148 	ASSERT(RW_LOCK_HELD(&ahp->ah_lock));
149 	h = fmd_asru_strhash(ahp, name);
150 
151 	for (ap = ahp->ah_hash[h]; ap != NULL; ap = ap->asru_next) {
152 		if (fmd_asru_strcmp(ahp, ap->asru_name, name))
153 			break;
154 	}
155 
156 	if (ap != NULL)
157 		(void) fmd_asru_hold(ap);
158 	else
159 		(void) fmd_set_errno(EFMD_ASRU_NOENT);
160 
161 	return (ap);
162 }
163 
164 static int
165 fmd_asru_replacement_state(nvlist_t *event)
166 {
167 	int ps = -1;
168 	nvlist_t *asru, *fru, *rsrc;
169 
170 	/*
171 	 * Check if there is evidence that this object is no longer present.
172 	 * In general fmd_fmri_present() should be supported on resources and/or
173 	 * frus, as those are the things that are physically present or not
174 	 * present - an asru can be spread over a number of frus some of which
175 	 * are present and some not, so fmd_fmri_present() is not generally
176 	 * meaningful. However retain a check for asru first for compatibility.
177 	 * If we have checked all three and we still get -1 then nothing knows
178 	 * whether it's present or not, so err on the safe side and treat it
179 	 * as still present.
180 	 */
181 	if (fmd_asru_fake_not_present)
182 		return (fmd_asru_fake_not_present);
183 	if (nvlist_lookup_nvlist(event, FM_FAULT_ASRU, &asru) == 0)
184 		ps = fmd_fmri_replaced(asru);
185 	if (ps == -1) {
186 		if (nvlist_lookup_nvlist(event, FM_FAULT_RESOURCE, &rsrc) == 0)
187 			ps = fmd_fmri_replaced(rsrc);
188 	} else if (ps == FMD_OBJ_STATE_UNKNOWN) {
189 		/* see if we can improve on UNKNOWN */
190 		if (nvlist_lookup_nvlist(event, FM_FAULT_RESOURCE,
191 		    &rsrc) == 0) {
192 			int ps2 = fmd_fmri_replaced(rsrc);
193 			if (ps2 == FMD_OBJ_STATE_STILL_PRESENT ||
194 			    ps2 == FMD_OBJ_STATE_REPLACED)
195 				ps = ps2;
196 		}
197 	}
198 	if (ps == -1) {
199 		if (nvlist_lookup_nvlist(event, FM_FAULT_FRU, &fru) == 0)
200 			ps = fmd_fmri_replaced(fru);
201 	} else if (ps == FMD_OBJ_STATE_UNKNOWN) {
202 		/* see if we can improve on UNKNOWN */
203 		if (nvlist_lookup_nvlist(event, FM_FAULT_FRU, &fru) == 0) {
204 			int ps2 = fmd_fmri_replaced(fru);
205 			if (ps2 == FMD_OBJ_STATE_STILL_PRESENT ||
206 			    ps2 == FMD_OBJ_STATE_REPLACED)
207 				ps = ps2;
208 		}
209 	}
210 	if (ps == -1)
211 		ps = FMD_OBJ_STATE_UNKNOWN;
212 	return (ps);
213 }
214 
215 static void
216 fmd_asru_asru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp,
217     char *name)
218 {
219 	uint_t h = fmd_asru_strhash(ahp, name);
220 
221 	ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
222 	alp->al_asru_next = ahp->ah_asru_hash[h];
223 	ahp->ah_asru_hash[h] = alp;
224 	ahp->ah_al_count++;
225 }
226 
227 static void
228 fmd_asru_case_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp,
229     char *name)
230 {
231 	uint_t h = fmd_asru_strhash(ahp, name);
232 
233 	ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
234 	alp->al_case_next = ahp->ah_case_hash[h];
235 	ahp->ah_case_hash[h] = alp;
236 }
237 
238 static void
239 fmd_asru_fru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, char *name)
240 {
241 	uint_t h = fmd_asru_strhash(ahp, name);
242 
243 	ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
244 	alp->al_fru_next = ahp->ah_fru_hash[h];
245 	ahp->ah_fru_hash[h] = alp;
246 }
247 
248 static void
249 fmd_asru_label_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp,
250     char *name)
251 {
252 	uint_t h = fmd_asru_strhash(ahp, name);
253 
254 	ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
255 	alp->al_label_next = ahp->ah_label_hash[h];
256 	ahp->ah_label_hash[h] = alp;
257 }
258 
259 static void
260 fmd_asru_rsrc_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp,
261     char *name)
262 {
263 	uint_t h = fmd_asru_strhash(ahp, name);
264 
265 	ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
266 	alp->al_rsrc_next = ahp->ah_rsrc_hash[h];
267 	ahp->ah_rsrc_hash[h] = alp;
268 }
269 
270 static void
271 fmd_asru_al_destroy(fmd_asru_link_t *alp)
272 {
273 	ASSERT(alp->al_refs == 0);
274 	ASSERT(MUTEX_HELD(&alp->al_asru->asru_lock));
275 
276 	if (alp->al_log != NULL)
277 		fmd_log_rele(alp->al_log);
278 
279 	fmd_free(alp->al_uuid, alp->al_uuidlen + 1);
280 	nvlist_free(alp->al_event);
281 	fmd_strfree(alp->al_rsrc_name);
282 	fmd_strfree(alp->al_case_uuid);
283 	fmd_strfree(alp->al_fru_name);
284 	fmd_strfree(alp->al_asru_name);
285 	fmd_strfree(alp->al_label);
286 	nvlist_free(alp->al_asru_fmri);
287 	fmd_free(alp, sizeof (fmd_asru_link_t));
288 }
289 
290 static fmd_asru_link_t *
291 fmd_asru_al_hold(fmd_asru_link_t *alp)
292 {
293 	fmd_asru_t *ap = alp->al_asru;
294 
295 	(void) pthread_mutex_lock(&ap->asru_lock);
296 	ap->asru_refs++;
297 	alp->al_refs++;
298 	ASSERT(alp->al_refs != 0);
299 	(void) pthread_mutex_unlock(&ap->asru_lock);
300 	return (alp);
301 }
302 
303 static void fmd_asru_destroy(fmd_asru_t *ap);
304 
305 /*ARGSUSED*/
306 static void
307 fmd_asru_al_hash_release(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp)
308 {
309 	fmd_asru_t *ap = alp->al_asru;
310 
311 	(void) pthread_mutex_lock(&ap->asru_lock);
312 	ASSERT(alp->al_refs != 0);
313 	if (--alp->al_refs == 0)
314 		fmd_asru_al_destroy(alp);
315 	ASSERT(ap->asru_refs != 0);
316 	if (--ap->asru_refs == 0)
317 		fmd_asru_destroy(ap);
318 	else
319 		(void) pthread_mutex_unlock(&ap->asru_lock);
320 }
321 
322 static int
323 fmd_asru_get_namestr(nvlist_t *nvl, char **name, ssize_t *namelen)
324 {
325 	if ((*namelen = fmd_fmri_nvl2str(nvl, NULL, 0)) == -1)
326 		return (EFMD_ASRU_FMRI);
327 	*name = fmd_alloc(*namelen + 1, FMD_SLEEP);
328 	if (fmd_fmri_nvl2str(nvl, *name, *namelen + 1) == -1) {
329 		if (*name != NULL)
330 			fmd_free(*name, *namelen + 1);
331 		return (EFMD_ASRU_FMRI);
332 	}
333 	return (0);
334 }
335 
336 static fmd_asru_link_t *
337 fmd_asru_al_create(fmd_asru_hash_t *ahp, nvlist_t *nvl, fmd_case_t *cp,
338     const char *al_uuid)
339 {
340 	nvlist_t *asru = NULL, *fru, *rsrc;
341 	int got_rsrc = 0, got_asru = 0, got_fru = 0;
342 	ssize_t fru_namelen, rsrc_namelen, asru_namelen;
343 	char *asru_name, *rsrc_name, *fru_name, *name, *label;
344 	fmd_asru_link_t *alp;
345 	fmd_asru_t *ap;
346 	boolean_t msg;
347 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
348 
349 	if (nvlist_lookup_nvlist(nvl, FM_FAULT_ASRU, &asru) == 0 &&
350 	    fmd_asru_get_namestr(asru, &asru_name, &asru_namelen) == 0)
351 		got_asru = 1;
352 	if (nvlist_lookup_nvlist(nvl, FM_FAULT_FRU, &fru) == 0 &&
353 	    fmd_asru_get_namestr(fru, &fru_name, &fru_namelen) == 0)
354 		got_fru = 1;
355 	if (nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &rsrc) == 0 &&
356 	    fmd_asru_get_namestr(rsrc, &rsrc_name, &rsrc_namelen) == 0)
357 		got_rsrc = 1;
358 	if (nvlist_lookup_string(nvl, FM_FAULT_LOCATION, &label) != 0)
359 		label = "";
360 
361 	/*
362 	 * Grab the rwlock as a writer; Then create and insert the asru with
363 	 * ahp->ah_lock held and hash it in. We'll then drop the rwlock and
364 	 * proceed to initializing the asru.
365 	 */
366 	(void) pthread_rwlock_wrlock(&ahp->ah_lock);
367 
368 	/*
369 	 * Create and initialise the per-fault "link" structure.
370 	 */
371 	alp = fmd_zalloc(sizeof (fmd_asru_link_t), FMD_SLEEP);
372 	if (got_asru)
373 		(void) nvlist_xdup(asru, &alp->al_asru_fmri, &fmd.d_nva);
374 	alp->al_uuid = fmd_strdup(al_uuid, FMD_SLEEP);
375 	alp->al_uuidlen = strlen(alp->al_uuid);
376 	alp->al_refs = 1;
377 
378 	/*
379 	 * If this is the first fault for this asru, then create the per-asru
380 	 * structure and link into the hash.
381 	 */
382 	name = got_asru ? asru_name : "";
383 	if ((ap = fmd_asru_hash_lookup(ahp, name)) == NULL) {
384 		ap = fmd_asru_create(ahp, al_uuid, name, got_asru ? asru :
385 		    NULL);
386 		fmd_asru_hash_insert(ahp, ap);
387 	} else
388 		nvlist_free(ap->asru_event);
389 	(void) nvlist_xdup(nvl, &ap->asru_event, &fmd.d_nva);
390 
391 	/*
392 	 * Put the link structure on the list associated with the per-asru
393 	 * structure. Then put the link structure on the various hashes.
394 	 */
395 	fmd_list_append(&ap->asru_list, (fmd_list_t *)alp);
396 	alp->al_asru = ap;
397 	alp->al_asru_name = got_asru ? asru_name : fmd_strdup("", FMD_SLEEP);
398 	fmd_asru_asru_hash_insert(ahp, alp, alp->al_asru_name);
399 	alp->al_fru_name = got_fru ? fru_name : fmd_strdup("", FMD_SLEEP);
400 	fmd_asru_fru_hash_insert(ahp, alp, alp->al_fru_name);
401 	alp->al_rsrc_name = got_rsrc ? rsrc_name : fmd_strdup("", FMD_SLEEP);
402 	fmd_asru_rsrc_hash_insert(ahp, alp, alp->al_rsrc_name);
403 	alp->al_label = fmd_strdup(label, FMD_SLEEP);
404 	fmd_asru_label_hash_insert(ahp, alp, label);
405 	alp->al_case_uuid = fmd_strdup(cip->ci_uuid, FMD_SLEEP);
406 	fmd_asru_case_hash_insert(ahp, alp, cip->ci_uuid);
407 	(void) pthread_mutex_lock(&ap->asru_lock);
408 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
409 
410 	ap->asru_case = alp->al_case = cp;
411 	if (nvlist_lookup_boolean_value(nvl, FM_SUSPECT_MESSAGE, &msg) == 0 &&
412 	    msg == B_FALSE)
413 		ap->asru_flags |= FMD_ASRU_INVISIBLE;
414 	(void) nvlist_xdup(nvl, &alp->al_event, &fmd.d_nva);
415 	ap->asru_flags |= FMD_ASRU_VALID;
416 	(void) pthread_cond_broadcast(&ap->asru_cv);
417 	(void) pthread_mutex_unlock(&ap->asru_lock);
418 	return (alp);
419 }
420 
421 static void
422 fmd_asru_hash_recreate(fmd_log_t *lp, fmd_event_t *ep, fmd_asru_hash_t *ahp)
423 {
424 	nvlist_t *nvl = FMD_EVENT_NVL(ep);
425 	boolean_t faulty = FMD_B_FALSE, unusable = FMD_B_FALSE;
426 	int ps;
427 	boolean_t repaired = FMD_B_FALSE, replaced = FMD_B_FALSE;
428 	boolean_t acquitted = FMD_B_FALSE;
429 	nvlist_t *flt, *flt_copy, *asru;
430 	char *case_uuid = NULL, *case_code = NULL;
431 	fmd_asru_t *ap;
432 	fmd_asru_link_t *alp;
433 	fmd_case_t *cp;
434 	int64_t *diag_time;
435 	uint_t nelem;
436 	topo_hdl_t *thp;
437 	char *class;
438 	nvlist_t *rsrc;
439 	int err;
440 
441 	/*
442 	 * Extract the most recent values of 'faulty' from the event log.
443 	 */
444 	if (nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_FAULTY,
445 	    &faulty) != 0) {
446 		fmd_error(EFMD_ASRU_EVENT, "failed to reload asru %s: "
447 		    "invalid event log record\n", lp->log_name);
448 		ahp->ah_error = EFMD_ASRU_EVENT;
449 		return;
450 	}
451 	if (nvlist_lookup_nvlist(nvl, FM_RSRC_ASRU_EVENT, &flt) != 0) {
452 		fmd_error(EFMD_ASRU_EVENT, "failed to reload asru %s: "
453 		    "invalid event log record\n", lp->log_name);
454 		ahp->ah_error = EFMD_ASRU_EVENT;
455 		return;
456 	}
457 	(void) nvlist_lookup_string(nvl, FM_RSRC_ASRU_UUID, &case_uuid);
458 	(void) nvlist_lookup_string(nvl, FM_RSRC_ASRU_CODE, &case_code);
459 	(void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_UNUSABLE,
460 	    &unusable);
461 	(void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_REPAIRED,
462 	    &repaired);
463 	(void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_REPLACED,
464 	    &replaced);
465 	(void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_ACQUITTED,
466 	    &acquitted);
467 
468 	/*
469 	 * Attempt to recreate the case in either the CLOSED or REPAIRED state
470 	 * (depending on whether the faulty bit is still set).
471 	 * If the case is already present, fmd_case_recreate() will return it.
472 	 * If not, we'll create a new orphaned case. Either way,  we use the
473 	 * ASRU event to insert a suspect into the partially-restored case.
474 	 */
475 	fmd_module_lock(fmd.d_rmod);
476 	cp = fmd_case_recreate(fmd.d_rmod, NULL, faulty ? FMD_CASE_CLOSED :
477 	    FMD_CASE_REPAIRED, case_uuid, case_code);
478 	fmd_case_hold(cp);
479 	fmd_module_unlock(fmd.d_rmod);
480 	if (nvlist_lookup_int64_array(nvl, FM_SUSPECT_DIAG_TIME, &diag_time,
481 	    &nelem) == 0 && nelem >= 2)
482 		fmd_case_settime(cp, diag_time[0], diag_time[1]);
483 	else
484 		fmd_case_settime(cp, lp->log_stat.st_ctime, 0);
485 	(void) nvlist_xdup(flt, &flt_copy, &fmd.d_nva);
486 
487 	/*
488 	 * For faults with a resource, re-evaluate the asru from the resource.
489 	 */
490 	thp = fmd_fmri_topo_hold(TOPO_VERSION);
491 	if (nvlist_lookup_string(flt_copy, FM_CLASS, &class) == 0 &&
492 	    strncmp(class, "fault", 5) == 0 &&
493 	    nvlist_lookup_nvlist(flt_copy, FM_FAULT_RESOURCE, &rsrc) == 0 &&
494 	    rsrc != NULL && topo_fmri_asru(thp, rsrc, &asru, &err) == 0) {
495 		(void) nvlist_remove(flt_copy, FM_FAULT_ASRU, DATA_TYPE_NVLIST);
496 		(void) nvlist_add_nvlist(flt_copy, FM_FAULT_ASRU, asru);
497 		nvlist_free(asru);
498 	}
499 	fmd_fmri_topo_rele(thp);
500 
501 	(void) nvlist_xdup(flt_copy, &flt, &fmd.d_nva);
502 
503 	fmd_case_recreate_suspect(cp, flt_copy);
504 
505 	/*
506 	 * Now create the resource cache entries.
507 	 */
508 	alp = fmd_asru_al_create(ahp, flt, cp, fmd_strbasename(lp->log_name));
509 	ap = alp->al_asru;
510 
511 	/*
512 	 * Check to see if the resource is still present in the system.
513 	 */
514 	ps = fmd_asru_replacement_state(flt);
515 	if (ps == FMD_OBJ_STATE_REPLACED) {
516 		replaced = FMD_B_TRUE;
517 	} else if (ps == FMD_OBJ_STATE_STILL_PRESENT ||
518 	    ps == FMD_OBJ_STATE_UNKNOWN) {
519 		ap->asru_flags |= FMD_ASRU_PRESENT;
520 		if (nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU,
521 		    &asru) == 0) {
522 			int us;
523 
524 			switch (fmd_fmri_service_state(asru)) {
525 			case FMD_SERVICE_STATE_UNUSABLE:
526 				unusable = FMD_B_TRUE;
527 				break;
528 			case FMD_SERVICE_STATE_OK:
529 			case FMD_SERVICE_STATE_DEGRADED:
530 				unusable = FMD_B_FALSE;
531 				break;
532 			case FMD_SERVICE_STATE_UNKNOWN:
533 			case -1:
534 				/* not supported by scheme */
535 				us = fmd_fmri_unusable(asru);
536 				if (us > 0)
537 					unusable = FMD_B_TRUE;
538 				else if (us == 0)
539 					unusable = FMD_B_FALSE;
540 				break;
541 			}
542 		}
543 	}
544 
545 	nvlist_free(flt);
546 
547 	ap->asru_flags |= FMD_ASRU_RECREATED;
548 	if (faulty) {
549 		alp->al_flags |= FMD_ASRU_FAULTY;
550 		ap->asru_flags |= FMD_ASRU_FAULTY;
551 	}
552 	if (unusable) {
553 		alp->al_flags |= FMD_ASRU_UNUSABLE;
554 		ap->asru_flags |= FMD_ASRU_UNUSABLE;
555 	}
556 	if (replaced)
557 		alp->al_reason = FMD_ASRU_REPLACED;
558 	else if (repaired)
559 		alp->al_reason = FMD_ASRU_REPAIRED;
560 	else if (acquitted)
561 		alp->al_reason = FMD_ASRU_ACQUITTED;
562 
563 	TRACE((FMD_DBG_ASRU, "asru %s recreated as %p (%s)", alp->al_uuid,
564 	    (void *)ap, _fmd_asru_snames[ap->asru_flags & FMD_ASRU_STATE]));
565 }
566 
567 static void
568 fmd_asru_hash_discard(fmd_asru_hash_t *ahp, const char *uuid, int err)
569 {
570 	char src[PATH_MAX], dst[PATH_MAX];
571 
572 	(void) snprintf(src, PATH_MAX, "%s/%s", ahp->ah_dirpath, uuid);
573 	(void) snprintf(dst, PATH_MAX, "%s/%s-", ahp->ah_dirpath, uuid);
574 
575 	if (err != 0)
576 		err = rename(src, dst);
577 	else
578 		err = unlink(src);
579 
580 	if (err != 0 && errno != ENOENT)
581 		fmd_error(EFMD_ASRU_EVENT, "failed to rename log %s", src);
582 }
583 
584 /*
585  * Open a saved log file and restore it into the ASRU hash.  If we can't even
586  * open the log, rename the log file to <uuid>- to indicate it is corrupt.  If
587  * fmd_log_replay() fails, we either delete the file (if it has reached the
588  * upper limit on cache age) or rename it for debugging if it was corrupted.
589  */
590 static void
591 fmd_asru_hash_logopen(fmd_asru_hash_t *ahp, const char *uuid)
592 {
593 	fmd_log_t *lp = fmd_log_tryopen(ahp->ah_dirpath, uuid, FMD_LOG_ASRU);
594 	uint_t n;
595 
596 	if (lp == NULL) {
597 		fmd_asru_hash_discard(ahp, uuid, errno);
598 		return;
599 	}
600 
601 	ahp->ah_error = 0;
602 	n = ahp->ah_al_count;
603 
604 	fmd_log_replay(lp, (fmd_log_f *)fmd_asru_hash_recreate, ahp);
605 	fmd_log_rele(lp);
606 
607 	if (ahp->ah_al_count == n)
608 		fmd_asru_hash_discard(ahp, uuid, ahp->ah_error);
609 }
610 
611 void
612 fmd_asru_hash_refresh(fmd_asru_hash_t *ahp)
613 {
614 	struct dirent *dp;
615 	DIR *dirp;
616 	int zero;
617 
618 	if ((dirp = opendir(ahp->ah_dirpath)) == NULL) {
619 		fmd_error(EFMD_ASRU_NODIR,
620 		    "failed to open asru cache directory %s", ahp->ah_dirpath);
621 		return;
622 	}
623 
624 	(void) fmd_conf_getprop(fmd.d_conf, "rsrc.zero", &zero);
625 
626 	(void) pthread_rwlock_wrlock(&ahp->ah_lock);
627 
628 	while ((dp = readdir(dirp)) != NULL) {
629 		if (dp->d_name[0] == '.')
630 			continue; /* skip "." and ".." */
631 
632 		if (zero)
633 			fmd_asru_hash_discard(ahp, dp->d_name, 0);
634 		else if (!fmd_strmatch(dp->d_name, "*-"))
635 			fmd_asru_hash_logopen(ahp, dp->d_name);
636 	}
637 
638 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
639 	(void) closedir(dirp);
640 }
641 
642 /*
643  * If the resource is present and faulty but not unusable, replay the fault
644  * event that caused it be marked faulty.  This will cause the agent
645  * subscribing to this fault class to again disable the resource.
646  */
647 /*ARGSUSED*/
648 static void
649 fmd_asru_hash_replay_asru(fmd_asru_t *ap, void *data)
650 {
651 	fmd_event_t *e;
652 	nvlist_t *nvl;
653 	char *class;
654 
655 	if (ap->asru_event != NULL && (ap->asru_flags & (FMD_ASRU_STATE |
656 	    FMD_ASRU_PRESENT)) == (FMD_ASRU_FAULTY | FMD_ASRU_PRESENT)) {
657 
658 		fmd_dprintf(FMD_DBG_ASRU,
659 		    "replaying fault event for %s", ap->asru_name);
660 
661 		(void) nvlist_xdup(ap->asru_event, &nvl, &fmd.d_nva);
662 		(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
663 
664 		(void) nvlist_add_string(nvl, FMD_EVN_UUID,
665 		    ((fmd_case_impl_t *)ap->asru_case)->ci_uuid);
666 
667 		e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
668 		fmd_dispq_dispatch(fmd.d_disp, e, class);
669 	}
670 }
671 
672 void
673 fmd_asru_hash_replay(fmd_asru_hash_t *ahp)
674 {
675 	fmd_asru_hash_apply(ahp, fmd_asru_hash_replay_asru, NULL);
676 }
677 
678 /*
679  * Check if the resource is still present. If not, and if the rsrc.age time
680  * has expired, then do an implicit repair on the resource.
681  */
682 /*ARGSUSED*/
683 static void
684 fmd_asru_repair_if_aged(fmd_asru_link_t *alp, void *arg)
685 {
686 	struct timeval tv;
687 	fmd_log_t *lp;
688 	hrtime_t hrt;
689 	int ps;
690 	int err;
691 
692 	ps = fmd_asru_replacement_state(alp->al_event);
693 	if (ps == FMD_OBJ_STATE_REPLACED) {
694 		fmd_asru_replaced(alp, &err);
695 	} else if (ps == FMD_OBJ_STATE_NOT_PRESENT) {
696 		fmd_time_gettimeofday(&tv);
697 		lp = fmd_log_open(alp->al_asru->asru_root, alp->al_uuid,
698 		    FMD_LOG_ASRU);
699 		hrt = (hrtime_t)(tv.tv_sec - lp->log_stat.st_mtime);
700 		fmd_log_rele(lp);
701 		if (hrt * NANOSEC >= fmd.d_asrus->ah_lifetime)
702 			fmd_asru_removed(alp);
703 	}
704 }
705 
706 void
707 fmd_asru_clear_aged_rsrcs()
708 {
709 	fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_repair_if_aged, NULL);
710 }
711 
712 fmd_asru_hash_t *
713 fmd_asru_hash_create(const char *root, const char *dir)
714 {
715 	fmd_asru_hash_t *ahp;
716 	char path[PATH_MAX];
717 
718 	ahp = fmd_alloc(sizeof (fmd_asru_hash_t), FMD_SLEEP);
719 	(void) pthread_rwlock_init(&ahp->ah_lock, NULL);
720 	ahp->ah_hashlen = fmd.d_str_buckets;
721 	ahp->ah_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, FMD_SLEEP);
722 	ahp->ah_asru_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen,
723 	    FMD_SLEEP);
724 	ahp->ah_case_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen,
725 	    FMD_SLEEP);
726 	ahp->ah_fru_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen,
727 	    FMD_SLEEP);
728 	ahp->ah_label_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen,
729 	    FMD_SLEEP);
730 	ahp->ah_rsrc_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen,
731 	    FMD_SLEEP);
732 	(void) snprintf(path, sizeof (path), "%s/%s", root, dir);
733 	ahp->ah_dirpath = fmd_strdup(path, FMD_SLEEP);
734 	(void) fmd_conf_getprop(fmd.d_conf, "rsrc.age", &ahp->ah_lifetime);
735 	(void) fmd_conf_getprop(fmd.d_conf, "fakenotpresent",
736 	    (uint32_t *)&fmd_asru_fake_not_present);
737 	ahp->ah_al_count = 0;
738 	ahp->ah_count = 0;
739 	ahp->ah_error = 0;
740 	ahp->ah_topo = fmd_topo_hold();
741 
742 	return (ahp);
743 }
744 
745 void
746 fmd_asru_hash_destroy(fmd_asru_hash_t *ahp)
747 {
748 	fmd_asru_link_t *alp, *np;
749 	uint_t i;
750 
751 	for (i = 0; i < ahp->ah_hashlen; i++) {
752 		for (alp = ahp->ah_case_hash[i]; alp != NULL; alp = np) {
753 			np = alp->al_case_next;
754 			alp->al_case_next = NULL;
755 			fmd_case_rele(alp->al_case);
756 			alp->al_case = NULL;
757 			fmd_asru_al_hash_release(ahp, alp);
758 		}
759 	}
760 
761 	fmd_strfree(ahp->ah_dirpath);
762 	fmd_free(ahp->ah_hash, sizeof (void *) * ahp->ah_hashlen);
763 	fmd_free(ahp->ah_asru_hash, sizeof (void *) * ahp->ah_hashlen);
764 	fmd_free(ahp->ah_case_hash, sizeof (void *) * ahp->ah_hashlen);
765 	fmd_free(ahp->ah_fru_hash, sizeof (void *) * ahp->ah_hashlen);
766 	fmd_free(ahp->ah_label_hash, sizeof (void *) * ahp->ah_hashlen);
767 	fmd_free(ahp->ah_rsrc_hash, sizeof (void *) * ahp->ah_hashlen);
768 	fmd_topo_rele(ahp->ah_topo);
769 	fmd_free(ahp, sizeof (fmd_asru_hash_t));
770 }
771 
772 /*
773  * Take a snapshot of the ASRU database by placing an additional hold on each
774  * member in an auxiliary array, and then call 'func' for each ASRU.
775  */
776 void
777 fmd_asru_hash_apply(fmd_asru_hash_t *ahp,
778     void (*func)(fmd_asru_t *, void *), void *arg)
779 {
780 	fmd_asru_t *ap, **aps, **app;
781 	uint_t apc, i;
782 
783 	(void) pthread_rwlock_rdlock(&ahp->ah_lock);
784 
785 	aps = app = fmd_alloc(ahp->ah_count * sizeof (fmd_asru_t *), FMD_SLEEP);
786 	apc = ahp->ah_count;
787 
788 	for (i = 0; i < ahp->ah_hashlen; i++) {
789 		for (ap = ahp->ah_hash[i]; ap != NULL; ap = ap->asru_next)
790 			*app++ = fmd_asru_hold(ap);
791 	}
792 
793 	ASSERT(app == aps + apc);
794 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
795 
796 	for (i = 0; i < apc; i++) {
797 		if (aps[i]->asru_fmri != NULL)
798 			func(aps[i], arg);
799 		fmd_asru_hash_release(ahp, aps[i]);
800 	}
801 
802 	fmd_free(aps, apc * sizeof (fmd_asru_t *));
803 }
804 
805 void
806 fmd_asru_al_hash_apply(fmd_asru_hash_t *ahp,
807     void (*func)(fmd_asru_link_t *, void *), void *arg)
808 {
809 	fmd_asru_link_t *alp, **alps, **alpp;
810 	uint_t alpc, i;
811 
812 	(void) pthread_rwlock_rdlock(&ahp->ah_lock);
813 
814 	alps = alpp = fmd_alloc(ahp->ah_al_count * sizeof (fmd_asru_link_t *),
815 	    FMD_SLEEP);
816 	alpc = ahp->ah_al_count;
817 
818 	for (i = 0; i < ahp->ah_hashlen; i++) {
819 		for (alp = ahp->ah_case_hash[i]; alp != NULL;
820 		    alp = alp->al_case_next)
821 			*alpp++ = fmd_asru_al_hold(alp);
822 	}
823 
824 	ASSERT(alpp == alps + alpc);
825 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
826 
827 	for (i = 0; i < alpc; i++) {
828 		func(alps[i], arg);
829 		fmd_asru_al_hash_release(ahp, alps[i]);
830 	}
831 
832 	fmd_free(alps, alpc * sizeof (fmd_asru_link_t *));
833 }
834 
835 static void
836 fmd_asru_do_hash_apply(fmd_asru_hash_t *ahp, char *name,
837     void (*func)(fmd_asru_link_t *, void *), void *arg,
838     fmd_asru_link_t **hash, size_t match_offset, size_t next_offset)
839 {
840 	fmd_asru_link_t *alp, **alps, **alpp;
841 	uint_t alpc = 0, i;
842 	uint_t h;
843 
844 	(void) pthread_rwlock_rdlock(&ahp->ah_lock);
845 
846 	h = fmd_asru_strhash(ahp, name);
847 
848 	for (alp = hash[h]; alp != NULL; alp =
849 	    /* LINTED pointer alignment */
850 	    FMD_ASRU_AL_HASH_NEXT(alp, next_offset))
851 		if (fmd_asru_strcmp(ahp,
852 		    /* LINTED pointer alignment */
853 		    FMD_ASRU_AL_HASH_NAME(alp, match_offset), name))
854 			alpc++;
855 
856 	alps = alpp = fmd_alloc(alpc * sizeof (fmd_asru_link_t *), FMD_SLEEP);
857 
858 	for (alp = hash[h]; alp != NULL; alp =
859 	    /* LINTED pointer alignment */
860 	    FMD_ASRU_AL_HASH_NEXT(alp, next_offset))
861 		if (fmd_asru_strcmp(ahp,
862 		    /* LINTED pointer alignment */
863 		    FMD_ASRU_AL_HASH_NAME(alp, match_offset), name))
864 			*alpp++ = fmd_asru_al_hold(alp);
865 
866 	ASSERT(alpp == alps + alpc);
867 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
868 
869 	for (i = 0; i < alpc; i++) {
870 		func(alps[i], arg);
871 		fmd_asru_al_hash_release(ahp, alps[i]);
872 	}
873 
874 	fmd_free(alps, alpc * sizeof (fmd_asru_link_t *));
875 }
876 
877 void
878 fmd_asru_hash_apply_by_asru(fmd_asru_hash_t *ahp, char *name,
879     void (*func)(fmd_asru_link_t *, void *), void *arg)
880 {
881 	fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_asru_hash,
882 	    offsetof(fmd_asru_link_t, al_asru_name),
883 	    offsetof(fmd_asru_link_t, al_asru_next));
884 }
885 
886 void
887 fmd_asru_hash_apply_by_case(fmd_asru_hash_t *ahp, fmd_case_t *cp,
888 	void (*func)(fmd_asru_link_t *, void *), void *arg)
889 {
890 	fmd_asru_do_hash_apply(ahp, ((fmd_case_impl_t *)cp)->ci_uuid, func, arg,
891 	    ahp->ah_case_hash, offsetof(fmd_asru_link_t, al_case_uuid),
892 	    offsetof(fmd_asru_link_t, al_case_next));
893 }
894 
895 void
896 fmd_asru_hash_apply_by_fru(fmd_asru_hash_t *ahp, char *name,
897     void (*func)(fmd_asru_link_t *, void *), void *arg)
898 {
899 	fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_fru_hash,
900 	    offsetof(fmd_asru_link_t, al_fru_name),
901 	    offsetof(fmd_asru_link_t, al_fru_next));
902 }
903 
904 void
905 fmd_asru_hash_apply_by_rsrc(fmd_asru_hash_t *ahp, char *name,
906     void (*func)(fmd_asru_link_t *, void *), void *arg)
907 {
908 	fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_rsrc_hash,
909 	    offsetof(fmd_asru_link_t, al_rsrc_name),
910 	    offsetof(fmd_asru_link_t, al_rsrc_next));
911 }
912 
913 void
914 fmd_asru_hash_apply_by_label(fmd_asru_hash_t *ahp, char *name,
915     void (*func)(fmd_asru_link_t *, void *), void *arg)
916 {
917 	fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_label_hash,
918 	    offsetof(fmd_asru_link_t, al_label),
919 	    offsetof(fmd_asru_link_t, al_label_next));
920 }
921 
922 /*
923  * Lookup an asru in the hash by name and place a hold on it.  If the asru is
924  * not found, no entry is created and NULL is returned.
925  */
926 fmd_asru_t *
927 fmd_asru_hash_lookup_name(fmd_asru_hash_t *ahp, const char *name)
928 {
929 	fmd_asru_t *ap;
930 
931 	(void) pthread_rwlock_rdlock(&ahp->ah_lock);
932 	ap = fmd_asru_hash_lookup(ahp, name);
933 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
934 
935 	return (ap);
936 }
937 
938 /*
939  * Create a resource cache entry using the fault event "nvl" for one of the
940  * suspects from the case "cp".
941  *
942  * The fault event can have the following components :  FM_FAULT_ASRU,
943  * FM_FAULT_FRU, FM_FAULT_RESOURCE. These should be set by the Diagnosis Engine
944  * when calling fmd_nvl_create_fault(). In the general case, these are all
945  * optional and an entry will always be added into the cache even if one or all
946  * of these fields is missing.
947  *
948  * However, for hardware faults the recommended practice is that the fault
949  * event should always have the FM_FAULT_RESOURCE field present and that this
950  * should be represented in hc-scheme.
951  *
952  * Currently the DE should also add the FM_FAULT_ASRU and FM_FAULT_FRU fields
953  * where known, though at some future stage fmd might be able to fill these
954  * in automatically from the topology.
955  */
956 fmd_asru_link_t *
957 fmd_asru_hash_create_entry(fmd_asru_hash_t *ahp, fmd_case_t *cp, nvlist_t *nvl)
958 {
959 	char *parsed_uuid;
960 	uuid_t uuid;
961 	int uuidlen;
962 	fmd_asru_link_t *alp;
963 
964 	/*
965 	 * Generate a UUID for the ASRU.  libuuid cleverly gives us no
966 	 * interface for specifying or learning the buffer size.  Sigh.
967 	 * The spec says 36 bytes but we use a tunable just to be safe.
968 	 */
969 	(void) fmd_conf_getprop(fmd.d_conf, "uuidlen", &uuidlen);
970 	parsed_uuid = fmd_zalloc(uuidlen + 1, FMD_SLEEP);
971 	uuid_generate(uuid);
972 	uuid_unparse(uuid, parsed_uuid);
973 
974 	/*
975 	 * Now create the resource cache entries.
976 	 */
977 	fmd_case_hold_locked(cp);
978 	alp = fmd_asru_al_create(ahp, nvl, cp, parsed_uuid);
979 	TRACE((FMD_DBG_ASRU, "asru %s created as %p",
980 	    alp->al_uuid, (void *)alp->al_asru));
981 
982 	fmd_free(parsed_uuid, uuidlen + 1);
983 	return (alp);
984 
985 }
986 
987 /*
988  * Release the reference count on an asru obtained using fmd_asru_hash_lookup.
989  * We take 'ahp' for symmetry and in case we need to use it in future work.
990  */
991 /*ARGSUSED*/
992 void
993 fmd_asru_hash_release(fmd_asru_hash_t *ahp, fmd_asru_t *ap)
994 {
995 	(void) pthread_mutex_lock(&ap->asru_lock);
996 
997 	ASSERT(ap->asru_refs != 0);
998 	if (--ap->asru_refs == 0)
999 		fmd_asru_destroy(ap);
1000 	else
1001 		(void) pthread_mutex_unlock(&ap->asru_lock);
1002 }
1003 
1004 static void
1005 fmd_asru_do_delete_entry(fmd_asru_hash_t *ahp, fmd_case_t *cp,
1006     fmd_asru_link_t **hash, size_t next_offset, char *name)
1007 {
1008 	uint_t h;
1009 	fmd_asru_link_t *alp, **pp, *alpnext, **alpnextp;
1010 
1011 	(void) pthread_rwlock_wrlock(&ahp->ah_lock);
1012 	h = fmd_asru_strhash(ahp, name);
1013 	pp = &hash[h];
1014 	for (alp = *pp; alp != NULL; alp = alpnext) {
1015 		/* LINTED pointer alignment */
1016 		alpnextp = FMD_ASRU_AL_HASH_NEXTP(alp, next_offset);
1017 		alpnext = *alpnextp;
1018 		if (alp->al_case == cp) {
1019 			*pp = *alpnextp;
1020 			*alpnextp = NULL;
1021 		} else
1022 			pp = alpnextp;
1023 	}
1024 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
1025 }
1026 
1027 static void
1028 fmd_asru_do_hash_delete(fmd_asru_hash_t *ahp, fmd_case_susp_t *cis,
1029     fmd_case_t *cp, fmd_asru_link_t **hash, size_t next_offset, char *nvname)
1030 {
1031 	nvlist_t *nvl;
1032 	char *name = NULL;
1033 	ssize_t namelen;
1034 
1035 	if (nvlist_lookup_nvlist(cis->cis_nvl, nvname, &nvl) == 0 &&
1036 	    (namelen = fmd_fmri_nvl2str(nvl, NULL, 0)) != -1 &&
1037 	    (name = fmd_alloc(namelen + 1, FMD_SLEEP)) != NULL) {
1038 		if (fmd_fmri_nvl2str(nvl, name, namelen + 1) != -1)
1039 			fmd_asru_do_delete_entry(ahp, cp, hash, next_offset,
1040 			    name);
1041 		fmd_free(name, namelen + 1);
1042 	} else
1043 		fmd_asru_do_delete_entry(ahp, cp, hash, next_offset, "");
1044 }
1045 
1046 void
1047 fmd_asru_hash_delete_case(fmd_asru_hash_t *ahp, fmd_case_t *cp)
1048 {
1049 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1050 	fmd_case_susp_t *cis;
1051 	fmd_asru_link_t *alp, **plp, *alpnext;
1052 	fmd_asru_t *ap;
1053 	char path[PATH_MAX];
1054 	char *label;
1055 	uint_t h;
1056 
1057 	/*
1058 	 * first delete hash entries for each suspect
1059 	 */
1060 	for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
1061 		fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_fru_hash,
1062 		    offsetof(fmd_asru_link_t, al_fru_next), FM_FAULT_FRU);
1063 		fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_rsrc_hash,
1064 		    offsetof(fmd_asru_link_t, al_rsrc_next), FM_FAULT_RESOURCE);
1065 		if (nvlist_lookup_string(cis->cis_nvl, FM_FAULT_LOCATION,
1066 		    &label) != 0)
1067 			label = "";
1068 		fmd_asru_do_delete_entry(ahp, cp, ahp->ah_label_hash,
1069 		    offsetof(fmd_asru_link_t, al_label_next), label);
1070 		fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_asru_hash,
1071 		    offsetof(fmd_asru_link_t, al_asru_next), FM_FAULT_ASRU);
1072 	}
1073 
1074 	/*
1075 	 * then delete associated case hash entries
1076 	 */
1077 	(void) pthread_rwlock_wrlock(&ahp->ah_lock);
1078 	h = fmd_asru_strhash(ahp, cip->ci_uuid);
1079 	plp = &ahp->ah_case_hash[h];
1080 	for (alp = *plp; alp != NULL; alp = alpnext) {
1081 		alpnext = alp->al_case_next;
1082 		if (alp->al_case == cp) {
1083 			*plp = alp->al_case_next;
1084 			alp->al_case_next = NULL;
1085 			ASSERT(ahp->ah_al_count != 0);
1086 			ahp->ah_al_count--;
1087 
1088 			/*
1089 			 * decrement case ref.
1090 			 */
1091 			fmd_case_rele_locked(cp);
1092 			alp->al_case = NULL;
1093 
1094 			/*
1095 			 * If we found a matching ASRU, unlink its log file and
1096 			 * then release the hash entry. Note that it may still
1097 			 * be referenced if another thread is manipulating it;
1098 			 * this is ok because once we unlink, the log file will
1099 			 * not be restored, and the log data will be freed when
1100 			 * all of the referencing threads release their
1101 			 * respective references.
1102 			 */
1103 			(void) snprintf(path, sizeof (path), "%s/%s",
1104 			    ahp->ah_dirpath, alp->al_uuid);
1105 			if (unlink(path) != 0)
1106 				fmd_error(EFMD_ASRU_UNLINK,
1107 				    "failed to unlink asru %s", path);
1108 
1109 			/*
1110 			 * Now unlink from the global per-resource cache
1111 			 * and if this is the last link then remove that from
1112 			 * it's own hash too.
1113 			 */
1114 			ap = alp->al_asru;
1115 			(void) pthread_mutex_lock(&ap->asru_lock);
1116 			fmd_list_delete(&ap->asru_list, alp);
1117 			if (ap->asru_list.l_next == NULL) {
1118 				uint_t h;
1119 				fmd_asru_t *ap2, **pp;
1120 				fmd_asru_t *apnext, **apnextp;
1121 
1122 				ASSERT(ahp->ah_count != 0);
1123 				ahp->ah_count--;
1124 				h = fmd_asru_strhash(ahp, ap->asru_name);
1125 				pp = &ahp->ah_hash[h];
1126 				for (ap2 = *pp; ap2 != NULL; ap2 = apnext) {
1127 					apnextp = &ap2->asru_next;
1128 					apnext = *apnextp;
1129 					if (ap2 == ap) {
1130 						*pp = *apnextp;
1131 						*apnextp = NULL;
1132 					} else
1133 						pp = apnextp;
1134 				}
1135 			}
1136 			(void) pthread_mutex_unlock(&ap->asru_lock);
1137 			fmd_asru_al_hash_release(ahp, alp);
1138 		} else
1139 			plp = &alp->al_case_next;
1140 	}
1141 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
1142 }
1143 
1144 static void
1145 fmd_asru_repair_containee(fmd_asru_link_t *alp, void *er)
1146 {
1147 	if (er && alp->al_asru_fmri && fmd_fmri_contains(er,
1148 	    alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
1149 	    FMD_ASRU_REPAIRED))
1150 		fmd_case_update(alp->al_case);
1151 }
1152 
1153 void
1154 fmd_asru_repaired(fmd_asru_link_t *alp, void *er)
1155 {
1156 	int flags;
1157 	int rval;
1158 
1159 	/*
1160 	 * repair this asru cache entry
1161 	 */
1162 	rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, FMD_ASRU_REPAIRED);
1163 
1164 	/*
1165 	 * now check if all entries associated with this asru are repaired and
1166 	 * if so repair containees
1167 	 */
1168 	(void) pthread_mutex_lock(&alp->al_asru->asru_lock);
1169 	flags = alp->al_asru->asru_flags;
1170 	(void) pthread_mutex_unlock(&alp->al_asru->asru_lock);
1171 	if (!(flags & FMD_ASRU_FAULTY))
1172 		fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_repair_containee,
1173 		    alp->al_asru_fmri);
1174 
1175 	/*
1176 	 * if called from fmd_adm_repair() and we really did clear the bit then
1177 	 * we need to do a case update to see if the associated case can be
1178 	 * repaired. No need to do this if called from fmd_case_repair() (ie
1179 	 * when er is NULL) as the case will be explicitly repaired anyway.
1180 	 */
1181 	if (er) {
1182 		*(int *)er = 0;
1183 		if (rval)
1184 			fmd_case_update(alp->al_case);
1185 	}
1186 }
1187 
1188 static void
1189 fmd_asru_acquit_containee(fmd_asru_link_t *alp, void *er)
1190 {
1191 	if (er && alp->al_asru_fmri && fmd_fmri_contains(er,
1192 	    alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
1193 	    FMD_ASRU_ACQUITTED))
1194 		fmd_case_update(alp->al_case);
1195 }
1196 
1197 void
1198 fmd_asru_acquit(fmd_asru_link_t *alp, void *er)
1199 {
1200 	int flags;
1201 	int rval;
1202 
1203 	/*
1204 	 * acquit this asru cache entry
1205 	 */
1206 	rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, FMD_ASRU_ACQUITTED);
1207 
1208 	/*
1209 	 * now check if all entries associated with this asru are acquitted and
1210 	 * if so acquit containees
1211 	 */
1212 	(void) pthread_mutex_lock(&alp->al_asru->asru_lock);
1213 	flags = alp->al_asru->asru_flags;
1214 	(void) pthread_mutex_unlock(&alp->al_asru->asru_lock);
1215 	if (!(flags & FMD_ASRU_FAULTY))
1216 		fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_acquit_containee,
1217 		    alp->al_asru_fmri);
1218 
1219 	/*
1220 	 * if called from fmd_adm_acquit() and we really did clear the bit then
1221 	 * we need to do a case update to see if the associated case can be
1222 	 * repaired. No need to do this if called from fmd_case_acquit() (ie
1223 	 * when er is NULL) as the case will be explicitly repaired anyway.
1224 	 */
1225 	if (er) {
1226 		*(int *)er = 0;
1227 		if (rval)
1228 			fmd_case_update(alp->al_case);
1229 	}
1230 }
1231 
1232 static void
1233 fmd_asru_replaced_containee(fmd_asru_link_t *alp, void *er)
1234 {
1235 	if (er && alp->al_asru_fmri && fmd_fmri_contains(er,
1236 	    alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
1237 	    FMD_ASRU_REPLACED))
1238 		fmd_case_update(alp->al_case);
1239 }
1240 
1241 void
1242 fmd_asru_replaced(fmd_asru_link_t *alp, void *er)
1243 {
1244 	int flags;
1245 	int rval;
1246 	int ps;
1247 
1248 	ps = fmd_asru_replacement_state(alp->al_event);
1249 	if (ps == FMD_OBJ_STATE_STILL_PRESENT)
1250 		return;
1251 
1252 	/*
1253 	 * mark this cache entry as replaced
1254 	 */
1255 	rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, FMD_ASRU_REPLACED);
1256 
1257 	/*
1258 	 * now check if all entries associated with this asru are replaced and
1259 	 * if so replace containees
1260 	 */
1261 	(void) pthread_mutex_lock(&alp->al_asru->asru_lock);
1262 	flags = alp->al_asru->asru_flags;
1263 	(void) pthread_mutex_unlock(&alp->al_asru->asru_lock);
1264 	if (!(flags & FMD_ASRU_FAULTY))
1265 		fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_replaced_containee,
1266 		    alp->al_asru_fmri);
1267 
1268 	*(int *)er = 0;
1269 	if (rval)
1270 		fmd_case_update(alp->al_case);
1271 }
1272 
1273 static void
1274 fmd_asru_removed_containee(fmd_asru_link_t *alp, void *er)
1275 {
1276 	if (er && alp->al_asru_fmri && fmd_fmri_contains(er,
1277 	    alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
1278 	    0))
1279 		fmd_case_update(alp->al_case);
1280 }
1281 
1282 void
1283 fmd_asru_removed(fmd_asru_link_t *alp)
1284 {
1285 	int flags;
1286 	int rval;
1287 
1288 	/*
1289 	 * mark this cache entry as replacded
1290 	 */
1291 	rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, 0);
1292 
1293 	/*
1294 	 * now check if all entries associated with this asru are removed and
1295 	 * if so replace containees
1296 	 */
1297 	(void) pthread_mutex_lock(&alp->al_asru->asru_lock);
1298 	flags = alp->al_asru->asru_flags;
1299 	(void) pthread_mutex_unlock(&alp->al_asru->asru_lock);
1300 	if (!(flags & FMD_ASRU_FAULTY))
1301 		fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_removed_containee,
1302 		    alp->al_asru_fmri);
1303 	if (rval)
1304 		fmd_case_update(alp->al_case);
1305 }
1306 
1307 static void
1308 fmd_asru_logevent(fmd_asru_link_t *alp)
1309 {
1310 	fmd_asru_t *ap = alp->al_asru;
1311 	boolean_t faulty = (alp->al_flags & FMD_ASRU_FAULTY) != 0;
1312 	boolean_t unusable = (alp->al_flags & FMD_ASRU_UNUSABLE) != 0;
1313 	boolean_t message = (ap->asru_flags & FMD_ASRU_INVISIBLE) == 0;
1314 	boolean_t repaired = (alp->al_reason == FMD_ASRU_REPAIRED);
1315 	boolean_t replaced = (alp->al_reason == FMD_ASRU_REPLACED);
1316 	boolean_t acquitted = (alp->al_reason == FMD_ASRU_ACQUITTED);
1317 
1318 	fmd_case_impl_t *cip;
1319 	fmd_event_t *e;
1320 	fmd_log_t *lp;
1321 	nvlist_t *nvl;
1322 	char *class;
1323 
1324 	ASSERT(MUTEX_HELD(&ap->asru_lock));
1325 	cip = (fmd_case_impl_t *)alp->al_case;
1326 	ASSERT(cip != NULL);
1327 
1328 	if ((lp = alp->al_log) == NULL)
1329 		lp = fmd_log_open(ap->asru_root, alp->al_uuid, FMD_LOG_ASRU);
1330 
1331 	if (lp == NULL)
1332 		return; /* can't log events if we can't open the log */
1333 
1334 	nvl = fmd_protocol_rsrc_asru(_fmd_asru_events[faulty | (unusable << 1)],
1335 	    alp->al_asru_fmri, cip->ci_uuid, cip->ci_code, faulty, unusable,
1336 	    message, alp->al_event, &cip->ci_tv, repaired, replaced, acquitted);
1337 
1338 	(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
1339 	e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
1340 
1341 	fmd_event_hold(e);
1342 	fmd_log_append(lp, e, NULL);
1343 	fmd_event_rele(e);
1344 
1345 	/*
1346 	 * For now, we close the log file after every update to conserve file
1347 	 * descriptors and daemon overhead.  If this becomes a performance
1348 	 * issue this code can change to keep a fixed-size LRU cache of logs.
1349 	 */
1350 	fmd_log_rele(lp);
1351 	alp->al_log = NULL;
1352 }
1353 
1354 int
1355 fmd_asru_setflags(fmd_asru_link_t *alp, uint_t sflag)
1356 {
1357 	fmd_asru_t *ap = alp->al_asru;
1358 	uint_t nstate, ostate;
1359 
1360 	ASSERT(!(sflag & ~FMD_ASRU_STATE));
1361 	ASSERT(sflag != FMD_ASRU_STATE);
1362 
1363 	(void) pthread_mutex_lock(&ap->asru_lock);
1364 
1365 	ostate = alp->al_flags & FMD_ASRU_STATE;
1366 	alp->al_flags |= sflag;
1367 	nstate = alp->al_flags & FMD_ASRU_STATE;
1368 
1369 	if (nstate == ostate) {
1370 		(void) pthread_mutex_unlock(&ap->asru_lock);
1371 		return (0);
1372 	}
1373 
1374 	ap->asru_flags |= sflag;
1375 	TRACE((FMD_DBG_ASRU, "asru %s %s->%s", alp->al_uuid,
1376 	    _fmd_asru_snames[ostate], _fmd_asru_snames[nstate]));
1377 
1378 	fmd_asru_logevent(alp);
1379 
1380 	(void) pthread_cond_broadcast(&ap->asru_cv);
1381 	(void) pthread_mutex_unlock(&ap->asru_lock);
1382 	return (1);
1383 }
1384 
1385 int
1386 fmd_asru_clrflags(fmd_asru_link_t *alp, uint_t sflag, uint8_t reason)
1387 {
1388 	fmd_asru_t *ap = alp->al_asru;
1389 	fmd_asru_link_t *nalp;
1390 	uint_t nstate, ostate, flags = 0;
1391 
1392 	ASSERT(!(sflag & ~FMD_ASRU_STATE));
1393 	ASSERT(sflag != FMD_ASRU_STATE);
1394 
1395 	(void) pthread_mutex_lock(&ap->asru_lock);
1396 
1397 	ostate = alp->al_flags & FMD_ASRU_STATE;
1398 	alp->al_flags &= ~sflag;
1399 	nstate = alp->al_flags & FMD_ASRU_STATE;
1400 
1401 	if (nstate == ostate) {
1402 		if (reason > alp->al_reason) {
1403 			alp->al_reason = reason;
1404 			fmd_asru_logevent(alp);
1405 			(void) pthread_cond_broadcast(&ap->asru_cv);
1406 		}
1407 		(void) pthread_mutex_unlock(&ap->asru_lock);
1408 		return (0);
1409 	}
1410 	if (reason > alp->al_reason)
1411 		alp->al_reason = reason;
1412 
1413 	if (sflag == FMD_ASRU_UNUSABLE)
1414 		ap->asru_flags &= ~sflag;
1415 	else if (sflag == FMD_ASRU_FAULTY) {
1416 		/*
1417 		 * only clear the faulty bit if all links are clear
1418 		 */
1419 		for (nalp = fmd_list_next(&ap->asru_list); nalp != NULL;
1420 		    nalp = fmd_list_next(nalp))
1421 			flags |= nalp->al_flags;
1422 		if (!(flags & FMD_ASRU_FAULTY))
1423 			ap->asru_flags &= ~sflag;
1424 	}
1425 
1426 	TRACE((FMD_DBG_ASRU, "asru %s %s->%s", alp->al_uuid,
1427 	    _fmd_asru_snames[ostate], _fmd_asru_snames[nstate]));
1428 
1429 	fmd_asru_logevent(alp);
1430 
1431 	(void) pthread_cond_broadcast(&ap->asru_cv);
1432 	(void) pthread_mutex_unlock(&ap->asru_lock);
1433 
1434 	return (1);
1435 }
1436 
1437 /*
1438  * Report the current known state of the link entry (ie this particular fault
1439  * affecting this particular ASRU).
1440  */
1441 int
1442 fmd_asru_al_getstate(fmd_asru_link_t *alp)
1443 {
1444 	int us, st;
1445 	nvlist_t *asru;
1446 	int ps;
1447 
1448 	ps = fmd_asru_replacement_state(alp->al_event);
1449 	if (ps == FMD_OBJ_STATE_NOT_PRESENT)
1450 		return ((alp->al_flags & FMD_ASRU_FAULTY) | FMD_ASRU_UNUSABLE);
1451 	if (ps == FMD_OBJ_STATE_REPLACED) {
1452 		if (alp->al_reason < FMD_ASRU_REPLACED)
1453 			alp->al_reason = FMD_ASRU_REPLACED;
1454 		return ((alp->al_flags & FMD_ASRU_FAULTY) | FMD_ASRU_UNUSABLE);
1455 	}
1456 
1457 	st = (alp->al_flags & FMD_ASRU_STATE) | FMD_ASRU_PRESENT;
1458 	if (nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, &asru) == 0) {
1459 		us = fmd_fmri_service_state(asru);
1460 		if (us == -1 || us == FMD_SERVICE_STATE_UNKNOWN) {
1461 			/* not supported by scheme - try fmd_fmri_unusable */
1462 			us = fmd_fmri_unusable(asru);
1463 		} else if (us == FMD_SERVICE_STATE_UNUSABLE) {
1464 			st |= FMD_ASRU_UNUSABLE;
1465 			return (st);
1466 		} else if (us == FMD_SERVICE_STATE_OK) {
1467 			st &= ~FMD_ASRU_UNUSABLE;
1468 			return (st);
1469 		} else if (us == FMD_SERVICE_STATE_DEGRADED) {
1470 			st &= ~FMD_ASRU_UNUSABLE;
1471 			st |= FMD_ASRU_DEGRADED;
1472 			return (st);
1473 		}
1474 	} else
1475 		us = (alp->al_flags & FMD_ASRU_UNUSABLE);
1476 	if (us > 0)
1477 		st |= FMD_ASRU_UNUSABLE;
1478 	else if (us == 0)
1479 		st &= ~FMD_ASRU_UNUSABLE;
1480 	return (st);
1481 }
1482 
1483 /*
1484  * Report the current known state of the ASRU by refreshing its unusable status
1485  * based upon the routines provided by the scheme module.  If the unusable bit
1486  * is different, we do *not* generate a state change here because that change
1487  * may be unrelated to fmd activities and therefore we have no case or event.
1488  * The absence of the transition is harmless as this function is only provided
1489  * for RPC observability and fmd's clients are only concerned with ASRU_FAULTY.
1490  */
1491 int
1492 fmd_asru_getstate(fmd_asru_t *ap)
1493 {
1494 	int us, st;
1495 
1496 	if (!(ap->asru_flags & FMD_ASRU_INTERNAL) &&
1497 	    (fmd_asru_fake_not_present >= FMD_OBJ_STATE_REPLACED ||
1498 	    fmd_fmri_present(ap->asru_fmri) <= 0))
1499 		return (0); /* do not report non-fmd non-present resources */
1500 
1501 	us = fmd_fmri_unusable(ap->asru_fmri);
1502 	st = ap->asru_flags & FMD_ASRU_STATE;
1503 
1504 	if (us > 0)
1505 		st |= FMD_ASRU_UNUSABLE;
1506 	else if (us == 0)
1507 		st &= ~FMD_ASRU_UNUSABLE;
1508 
1509 	return (st);
1510 }
1511