xref: /titanic_50/usr/src/cmd/fm/fmd/common/fmd_asru.c (revision ab1eb80a7237fca06df43e2adb4776f0316547a4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/fm/protocol.h>
30 #include <uuid/uuid.h>
31 
32 #include <dirent.h>
33 #include <limits.h>
34 #include <unistd.h>
35 #include <alloca.h>
36 #include <stddef.h>
37 #include <fm/libtopo.h>
38 
39 #include <fmd_alloc.h>
40 #include <fmd_string.h>
41 #include <fmd_error.h>
42 #include <fmd_subr.h>
43 #include <fmd_protocol.h>
44 #include <fmd_event.h>
45 #include <fmd_conf.h>
46 #include <fmd_fmri.h>
47 #include <fmd_dispq.h>
48 #include <fmd_case.h>
49 #include <fmd_module.h>
50 #include <fmd_asru.h>
51 
52 #include <fmd.h>
53 
54 static const char *const _fmd_asru_events[] = {
55 	FMD_RSRC_CLASS "asru.ok",		/* UNUSABLE=0 FAULTED=0 */
56 	FMD_RSRC_CLASS "asru.degraded",		/* UNUSABLE=0 FAULTED=1 */
57 	FMD_RSRC_CLASS "asru.unknown",		/* UNUSABLE=1 FAULTED=0 */
58 	FMD_RSRC_CLASS "asru.faulted"		/* UNUSABLE=1 FAULTED=1 */
59 };
60 
61 static const char *const _fmd_asru_snames[] = {
62 	"uf", "uF", "Uf", "UF"			/* same order as above */
63 };
64 
65 volatile uint32_t fmd_asru_fake_not_present = 0;
66 
67 static uint_t
68 fmd_asru_strhash(fmd_asru_hash_t *ahp, const char *val)
69 {
70 	return (topo_fmri_strhash(ahp->ah_topo->ft_hdl, val) % ahp->ah_hashlen);
71 }
72 
73 static boolean_t
74 fmd_asru_strcmp(fmd_asru_hash_t *ahp, const char *a, const char *b)
75 {
76 	return (topo_fmri_strcmp(ahp->ah_topo->ft_hdl, a, b));
77 }
78 
79 static fmd_asru_t *
80 fmd_asru_create(fmd_asru_hash_t *ahp, const char *uuid,
81     const char *name, nvlist_t *fmri)
82 {
83 	fmd_asru_t *ap = fmd_zalloc(sizeof (fmd_asru_t), FMD_SLEEP);
84 	char *s;
85 
86 	(void) pthread_mutex_init(&ap->asru_lock, NULL);
87 	(void) pthread_cond_init(&ap->asru_cv, NULL);
88 
89 	ap->asru_name = fmd_strdup(name, FMD_SLEEP);
90 	if (fmri)
91 		(void) nvlist_xdup(fmri, &ap->asru_fmri, &fmd.d_nva);
92 	ap->asru_root = fmd_strdup(ahp->ah_dirpath, FMD_SLEEP);
93 	ap->asru_uuid = fmd_strdup(uuid, FMD_SLEEP);
94 	ap->asru_uuidlen = ap->asru_uuid ? strlen(ap->asru_uuid) : 0;
95 	ap->asru_refs = 1;
96 
97 	if (fmri && nvlist_lookup_string(fmri, FM_FMRI_SCHEME, &s) == 0 &&
98 	    strcmp(s, FM_FMRI_SCHEME_FMD) == 0)
99 		ap->asru_flags |= FMD_ASRU_INTERNAL;
100 
101 	return (ap);
102 }
103 
104 static void
105 fmd_asru_destroy(fmd_asru_t *ap)
106 {
107 	ASSERT(MUTEX_HELD(&ap->asru_lock));
108 	ASSERT(ap->asru_refs == 0);
109 
110 	nvlist_free(ap->asru_event);
111 	fmd_strfree(ap->asru_name);
112 	nvlist_free(ap->asru_fmri);
113 	fmd_strfree(ap->asru_root);
114 	fmd_free(ap->asru_uuid, ap->asru_uuidlen + 1);
115 	fmd_free(ap, sizeof (fmd_asru_t));
116 }
117 
118 static void
119 fmd_asru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_t *ap)
120 {
121 	uint_t h = fmd_asru_strhash(ahp, ap->asru_name);
122 
123 	ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
124 	ap->asru_next = ahp->ah_hash[h];
125 	ahp->ah_hash[h] = ap;
126 	ahp->ah_count++;
127 }
128 
129 static fmd_asru_t *
130 fmd_asru_hold(fmd_asru_t *ap)
131 {
132 	(void) pthread_mutex_lock(&ap->asru_lock);
133 	ap->asru_refs++;
134 	ASSERT(ap->asru_refs != 0);
135 	(void) pthread_mutex_unlock(&ap->asru_lock);
136 	return (ap);
137 }
138 
139 /*
140  * Lookup an asru in the hash by name and place a hold on it.  If the asru is
141  * not found, no entry is created and NULL is returned.  This internal function
142  * is for callers who have the ah_lock held and is used by lookup_name below.
143  */
144 fmd_asru_t *
145 fmd_asru_hash_lookup(fmd_asru_hash_t *ahp, const char *name)
146 {
147 	fmd_asru_t *ap;
148 	uint_t h;
149 
150 	ASSERT(RW_LOCK_HELD(&ahp->ah_lock));
151 	h = fmd_asru_strhash(ahp, name);
152 
153 	for (ap = ahp->ah_hash[h]; ap != NULL; ap = ap->asru_next) {
154 		if (fmd_asru_strcmp(ahp, ap->asru_name, name))
155 			break;
156 	}
157 
158 	if (ap != NULL)
159 		(void) fmd_asru_hold(ap);
160 	else
161 		(void) fmd_set_errno(EFMD_ASRU_NOENT);
162 
163 	return (ap);
164 }
165 
166 static int
167 fmd_asru_replacement_state(nvlist_t *event)
168 {
169 	int ps = -1;
170 	nvlist_t *asru, *fru, *rsrc;
171 
172 	/*
173 	 * Check if there is evidence that this object is no longer present.
174 	 * In general fmd_fmri_present() should be supported on resources and/or
175 	 * frus, as those are the things that are physically present or not
176 	 * present - an asru can be spread over a number of frus some of which
177 	 * are present and some not, so fmd_fmri_present() is not generally
178 	 * meaningful. However retain a check for asru first for compatibility.
179 	 * If we have checked all three and we still get -1 then nothing knows
180 	 * whether it's present or not, so err on the safe side and treat it
181 	 * as still present.
182 	 */
183 	if (fmd_asru_fake_not_present)
184 		return (fmd_asru_fake_not_present);
185 	if (nvlist_lookup_nvlist(event, FM_FAULT_ASRU, &asru) == 0)
186 		ps = fmd_fmri_replaced(asru);
187 	if (ps == -1) {
188 		if (nvlist_lookup_nvlist(event, FM_FAULT_RESOURCE, &rsrc) == 0)
189 			ps = fmd_fmri_replaced(rsrc);
190 	} else if (ps == FMD_OBJ_STATE_UNKNOWN) {
191 		/* see if we can improve on UNKNOWN */
192 		if (nvlist_lookup_nvlist(event, FM_FAULT_RESOURCE,
193 		    &rsrc) == 0) {
194 			int ps2 = fmd_fmri_replaced(rsrc);
195 			if (ps2 == FMD_OBJ_STATE_STILL_PRESENT ||
196 			    ps2 == FMD_OBJ_STATE_REPLACED)
197 				ps = ps2;
198 		}
199 	}
200 	if (ps == -1) {
201 		if (nvlist_lookup_nvlist(event, FM_FAULT_FRU, &fru) == 0)
202 			ps = fmd_fmri_replaced(fru);
203 	} else if (ps == FMD_OBJ_STATE_UNKNOWN) {
204 		/* see if we can improve on UNKNOWN */
205 		if (nvlist_lookup_nvlist(event, FM_FAULT_FRU, &fru) == 0) {
206 			int ps2 = fmd_fmri_replaced(fru);
207 			if (ps2 == FMD_OBJ_STATE_STILL_PRESENT ||
208 			    ps2 == FMD_OBJ_STATE_REPLACED)
209 				ps = ps2;
210 		}
211 	}
212 	if (ps == -1)
213 		ps = FMD_OBJ_STATE_UNKNOWN;
214 	return (ps);
215 }
216 
217 static void
218 fmd_asru_asru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp,
219     char *name)
220 {
221 	uint_t h = fmd_asru_strhash(ahp, name);
222 
223 	ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
224 	alp->al_asru_next = ahp->ah_asru_hash[h];
225 	ahp->ah_asru_hash[h] = alp;
226 	ahp->ah_al_count++;
227 }
228 
229 static void
230 fmd_asru_case_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp,
231     char *name)
232 {
233 	uint_t h = fmd_asru_strhash(ahp, name);
234 
235 	ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
236 	alp->al_case_next = ahp->ah_case_hash[h];
237 	ahp->ah_case_hash[h] = alp;
238 }
239 
240 static void
241 fmd_asru_fru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, char *name)
242 {
243 	uint_t h = fmd_asru_strhash(ahp, name);
244 
245 	ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
246 	alp->al_fru_next = ahp->ah_fru_hash[h];
247 	ahp->ah_fru_hash[h] = alp;
248 }
249 
250 static void
251 fmd_asru_label_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp,
252     char *name)
253 {
254 	uint_t h = fmd_asru_strhash(ahp, name);
255 
256 	ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
257 	alp->al_label_next = ahp->ah_label_hash[h];
258 	ahp->ah_label_hash[h] = alp;
259 }
260 
261 static void
262 fmd_asru_rsrc_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp,
263     char *name)
264 {
265 	uint_t h = fmd_asru_strhash(ahp, name);
266 
267 	ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
268 	alp->al_rsrc_next = ahp->ah_rsrc_hash[h];
269 	ahp->ah_rsrc_hash[h] = alp;
270 }
271 
272 static void
273 fmd_asru_al_destroy(fmd_asru_link_t *alp)
274 {
275 	ASSERT(alp->al_refs == 0);
276 	ASSERT(MUTEX_HELD(&alp->al_asru->asru_lock));
277 
278 	if (alp->al_log != NULL)
279 		fmd_log_rele(alp->al_log);
280 
281 	fmd_free(alp->al_uuid, alp->al_uuidlen + 1);
282 	nvlist_free(alp->al_event);
283 	fmd_strfree(alp->al_rsrc_name);
284 	fmd_strfree(alp->al_case_uuid);
285 	fmd_strfree(alp->al_fru_name);
286 	fmd_strfree(alp->al_asru_name);
287 	fmd_strfree(alp->al_label);
288 	nvlist_free(alp->al_asru_fmri);
289 	fmd_free(alp, sizeof (fmd_asru_link_t));
290 }
291 
292 static fmd_asru_link_t *
293 fmd_asru_al_hold(fmd_asru_link_t *alp)
294 {
295 	fmd_asru_t *ap = alp->al_asru;
296 
297 	(void) pthread_mutex_lock(&ap->asru_lock);
298 	ap->asru_refs++;
299 	alp->al_refs++;
300 	ASSERT(alp->al_refs != 0);
301 	(void) pthread_mutex_unlock(&ap->asru_lock);
302 	return (alp);
303 }
304 
305 static void fmd_asru_destroy(fmd_asru_t *ap);
306 
307 /*ARGSUSED*/
308 static void
309 fmd_asru_al_hash_release(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp)
310 {
311 	fmd_asru_t *ap = alp->al_asru;
312 
313 	(void) pthread_mutex_lock(&ap->asru_lock);
314 	ASSERT(alp->al_refs != 0);
315 	if (--alp->al_refs == 0)
316 		fmd_asru_al_destroy(alp);
317 	ASSERT(ap->asru_refs != 0);
318 	if (--ap->asru_refs == 0)
319 		fmd_asru_destroy(ap);
320 	else
321 		(void) pthread_mutex_unlock(&ap->asru_lock);
322 }
323 
324 static int
325 fmd_asru_get_namestr(nvlist_t *nvl, char **name, ssize_t *namelen)
326 {
327 	if ((*namelen = fmd_fmri_nvl2str(nvl, NULL, 0)) == -1)
328 		return (EFMD_ASRU_FMRI);
329 	*name = fmd_alloc(*namelen + 1, FMD_SLEEP);
330 	if (fmd_fmri_nvl2str(nvl, *name, *namelen + 1) == -1) {
331 		if (*name != NULL)
332 			fmd_free(*name, *namelen + 1);
333 		return (EFMD_ASRU_FMRI);
334 	}
335 	return (0);
336 }
337 
338 static fmd_asru_link_t *
339 fmd_asru_al_create(fmd_asru_hash_t *ahp, nvlist_t *nvl, fmd_case_t *cp,
340     const char *al_uuid)
341 {
342 	nvlist_t *asru = NULL, *fru, *rsrc;
343 	int got_rsrc = 0, got_asru = 0, got_fru = 0;
344 	ssize_t fru_namelen, rsrc_namelen, asru_namelen;
345 	char *asru_name, *rsrc_name, *fru_name, *name, *label;
346 	fmd_asru_link_t *alp;
347 	fmd_asru_t *ap;
348 	boolean_t msg;
349 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
350 
351 	if (nvlist_lookup_nvlist(nvl, FM_FAULT_ASRU, &asru) == 0 &&
352 	    fmd_asru_get_namestr(asru, &asru_name, &asru_namelen) == 0)
353 		got_asru = 1;
354 	if (nvlist_lookup_nvlist(nvl, FM_FAULT_FRU, &fru) == 0 &&
355 	    fmd_asru_get_namestr(fru, &fru_name, &fru_namelen) == 0)
356 		got_fru = 1;
357 	if (nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &rsrc) == 0 &&
358 	    fmd_asru_get_namestr(rsrc, &rsrc_name, &rsrc_namelen) == 0)
359 		got_rsrc = 1;
360 	if (nvlist_lookup_string(nvl, FM_FAULT_LOCATION, &label) != 0)
361 		label = "";
362 
363 	/*
364 	 * Grab the rwlock as a writer; Then create and insert the asru with
365 	 * ahp->ah_lock held and hash it in. We'll then drop the rwlock and
366 	 * proceed to initializing the asru.
367 	 */
368 	(void) pthread_rwlock_wrlock(&ahp->ah_lock);
369 
370 	/*
371 	 * Create and initialise the per-fault "link" structure.
372 	 */
373 	alp = fmd_zalloc(sizeof (fmd_asru_link_t), FMD_SLEEP);
374 	if (got_asru)
375 		(void) nvlist_xdup(asru, &alp->al_asru_fmri, &fmd.d_nva);
376 	alp->al_uuid = fmd_strdup(al_uuid, FMD_SLEEP);
377 	alp->al_uuidlen = strlen(alp->al_uuid);
378 	alp->al_refs = 1;
379 
380 	/*
381 	 * If this is the first fault for this asru, then create the per-asru
382 	 * structure and link into the hash.
383 	 */
384 	name = got_asru ? asru_name : "";
385 	if ((ap = fmd_asru_hash_lookup(ahp, name)) == NULL) {
386 		ap = fmd_asru_create(ahp, al_uuid, name, got_asru ? asru :
387 		    NULL);
388 		fmd_asru_hash_insert(ahp, ap);
389 	} else
390 		nvlist_free(ap->asru_event);
391 	(void) nvlist_xdup(nvl, &ap->asru_event, &fmd.d_nva);
392 
393 	/*
394 	 * Put the link structure on the list associated with the per-asru
395 	 * structure. Then put the link structure on the various hashes.
396 	 */
397 	fmd_list_append(&ap->asru_list, (fmd_list_t *)alp);
398 	alp->al_asru = ap;
399 	alp->al_asru_name = got_asru ? asru_name : fmd_strdup("", FMD_SLEEP);
400 	fmd_asru_asru_hash_insert(ahp, alp, alp->al_asru_name);
401 	alp->al_fru_name = got_fru ? fru_name : fmd_strdup("", FMD_SLEEP);
402 	fmd_asru_fru_hash_insert(ahp, alp, alp->al_fru_name);
403 	alp->al_rsrc_name = got_rsrc ? rsrc_name : fmd_strdup("", FMD_SLEEP);
404 	fmd_asru_rsrc_hash_insert(ahp, alp, alp->al_rsrc_name);
405 	alp->al_label = fmd_strdup(label, FMD_SLEEP);
406 	fmd_asru_label_hash_insert(ahp, alp, label);
407 	alp->al_case_uuid = fmd_strdup(cip->ci_uuid, FMD_SLEEP);
408 	fmd_asru_case_hash_insert(ahp, alp, cip->ci_uuid);
409 	(void) pthread_mutex_lock(&ap->asru_lock);
410 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
411 
412 	ap->asru_case = alp->al_case = cp;
413 	if (nvlist_lookup_boolean_value(nvl, FM_SUSPECT_MESSAGE, &msg) == 0 &&
414 	    msg == B_FALSE)
415 		ap->asru_flags |= FMD_ASRU_INVISIBLE;
416 	(void) nvlist_xdup(nvl, &alp->al_event, &fmd.d_nva);
417 	ap->asru_flags |= FMD_ASRU_VALID;
418 	(void) pthread_cond_broadcast(&ap->asru_cv);
419 	(void) pthread_mutex_unlock(&ap->asru_lock);
420 	return (alp);
421 }
422 
423 static void
424 fmd_asru_hash_recreate(fmd_log_t *lp, fmd_event_t *ep, fmd_asru_hash_t *ahp)
425 {
426 	nvlist_t *nvl = FMD_EVENT_NVL(ep);
427 	boolean_t faulty = FMD_B_FALSE, unusable = FMD_B_FALSE;
428 	int ps;
429 	boolean_t repaired = FMD_B_FALSE, replaced = FMD_B_FALSE;
430 	boolean_t acquitted = FMD_B_FALSE;
431 	nvlist_t *flt, *flt_copy, *asru;
432 	char *case_uuid = NULL, *case_code = NULL;
433 	fmd_asru_t *ap;
434 	fmd_asru_link_t *alp;
435 	fmd_case_t *cp;
436 	int64_t *diag_time;
437 	uint_t nelem;
438 	topo_hdl_t *thp;
439 	char *class;
440 	nvlist_t *rsrc;
441 	int err;
442 
443 	/*
444 	 * Extract the most recent values of 'faulty' from the event log.
445 	 */
446 	if (nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_FAULTY,
447 	    &faulty) != 0) {
448 		fmd_error(EFMD_ASRU_EVENT, "failed to reload asru %s: "
449 		    "invalid event log record\n", lp->log_name);
450 		ahp->ah_error = EFMD_ASRU_EVENT;
451 		return;
452 	}
453 	if (nvlist_lookup_nvlist(nvl, FM_RSRC_ASRU_EVENT, &flt) != 0) {
454 		fmd_error(EFMD_ASRU_EVENT, "failed to reload asru %s: "
455 		    "invalid event log record\n", lp->log_name);
456 		ahp->ah_error = EFMD_ASRU_EVENT;
457 		return;
458 	}
459 	(void) nvlist_lookup_string(nvl, FM_RSRC_ASRU_UUID, &case_uuid);
460 	(void) nvlist_lookup_string(nvl, FM_RSRC_ASRU_CODE, &case_code);
461 	(void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_UNUSABLE,
462 	    &unusable);
463 	(void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_REPAIRED,
464 	    &repaired);
465 	(void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_REPLACED,
466 	    &replaced);
467 	(void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_ACQUITTED,
468 	    &acquitted);
469 
470 	/*
471 	 * Attempt to recreate the case in either the CLOSED or REPAIRED state
472 	 * (depending on whether the faulty bit is still set).
473 	 * If the case is already present, fmd_case_recreate() will return it.
474 	 * If not, we'll create a new orphaned case. Either way,  we use the
475 	 * ASRU event to insert a suspect into the partially-restored case.
476 	 */
477 	fmd_module_lock(fmd.d_rmod);
478 	cp = fmd_case_recreate(fmd.d_rmod, NULL, faulty ? FMD_CASE_CLOSED :
479 	    FMD_CASE_REPAIRED, case_uuid, case_code);
480 	fmd_case_hold(cp);
481 	fmd_module_unlock(fmd.d_rmod);
482 	if (nvlist_lookup_int64_array(nvl, FM_SUSPECT_DIAG_TIME, &diag_time,
483 	    &nelem) == 0 && nelem >= 2)
484 		fmd_case_settime(cp, diag_time[0], diag_time[1]);
485 	else
486 		fmd_case_settime(cp, lp->log_stat.st_ctime, 0);
487 	(void) nvlist_xdup(flt, &flt_copy, &fmd.d_nva);
488 
489 	/*
490 	 * For faults with a resource, re-evaluate the asru from the resource.
491 	 */
492 	thp = fmd_fmri_topo_hold(TOPO_VERSION);
493 	if (nvlist_lookup_string(flt_copy, FM_CLASS, &class) == 0 &&
494 	    strncmp(class, "fault", 5) == 0 &&
495 	    nvlist_lookup_nvlist(flt_copy, FM_FAULT_RESOURCE, &rsrc) == 0 &&
496 	    rsrc != NULL && topo_fmri_asru(thp, rsrc, &asru, &err) == 0) {
497 		(void) nvlist_remove(flt_copy, FM_FAULT_ASRU, DATA_TYPE_NVLIST);
498 		(void) nvlist_add_nvlist(flt_copy, FM_FAULT_ASRU, asru);
499 		nvlist_free(asru);
500 	}
501 	fmd_fmri_topo_rele(thp);
502 
503 	(void) nvlist_xdup(flt_copy, &flt, &fmd.d_nva);
504 
505 	fmd_case_recreate_suspect(cp, flt_copy);
506 
507 	/*
508 	 * Now create the resource cache entries.
509 	 */
510 	alp = fmd_asru_al_create(ahp, flt, cp, fmd_strbasename(lp->log_name));
511 	ap = alp->al_asru;
512 
513 	/*
514 	 * Check to see if the resource is still present in the system.
515 	 */
516 	ps = fmd_asru_replacement_state(flt);
517 	if (ps == FMD_OBJ_STATE_STILL_PRESENT || ps == FMD_OBJ_STATE_UNKNOWN)
518 		ap->asru_flags |= FMD_ASRU_PRESENT;
519 	else if (ps == FMD_OBJ_STATE_REPLACED)
520 		replaced = FMD_B_TRUE;
521 
522 	nvlist_free(flt);
523 
524 	ap->asru_flags |= FMD_ASRU_RECREATED;
525 	if (faulty) {
526 		alp->al_flags |= FMD_ASRU_FAULTY;
527 		ap->asru_flags |= FMD_ASRU_FAULTY;
528 	}
529 	if (unusable) {
530 		alp->al_flags |= FMD_ASRU_UNUSABLE;
531 		ap->asru_flags |= FMD_ASRU_UNUSABLE;
532 	}
533 	if (replaced)
534 		alp->al_reason = FMD_ASRU_REPLACED;
535 	else if (repaired)
536 		alp->al_reason = FMD_ASRU_REPAIRED;
537 	else if (acquitted)
538 		alp->al_reason = FMD_ASRU_ACQUITTED;
539 
540 	TRACE((FMD_DBG_ASRU, "asru %s recreated as %p (%s)", alp->al_uuid,
541 	    (void *)ap, _fmd_asru_snames[ap->asru_flags & FMD_ASRU_STATE]));
542 }
543 
544 static void
545 fmd_asru_hash_discard(fmd_asru_hash_t *ahp, const char *uuid, int err)
546 {
547 	char src[PATH_MAX], dst[PATH_MAX];
548 
549 	(void) snprintf(src, PATH_MAX, "%s/%s", ahp->ah_dirpath, uuid);
550 	(void) snprintf(dst, PATH_MAX, "%s/%s-", ahp->ah_dirpath, uuid);
551 
552 	if (err != 0)
553 		err = rename(src, dst);
554 	else
555 		err = unlink(src);
556 
557 	if (err != 0 && errno != ENOENT)
558 		fmd_error(EFMD_ASRU_EVENT, "failed to rename log %s", src);
559 }
560 
561 /*
562  * Open a saved log file and restore it into the ASRU hash.  If we can't even
563  * open the log, rename the log file to <uuid>- to indicate it is corrupt.  If
564  * fmd_log_replay() fails, we either delete the file (if it has reached the
565  * upper limit on cache age) or rename it for debugging if it was corrupted.
566  */
567 static void
568 fmd_asru_hash_logopen(fmd_asru_hash_t *ahp, const char *uuid)
569 {
570 	fmd_log_t *lp = fmd_log_tryopen(ahp->ah_dirpath, uuid, FMD_LOG_ASRU);
571 	uint_t n;
572 
573 	if (lp == NULL) {
574 		fmd_asru_hash_discard(ahp, uuid, errno);
575 		return;
576 	}
577 
578 	ahp->ah_error = 0;
579 	n = ahp->ah_al_count;
580 
581 	fmd_log_replay(lp, (fmd_log_f *)fmd_asru_hash_recreate, ahp);
582 	fmd_log_rele(lp);
583 
584 	if (ahp->ah_al_count == n)
585 		fmd_asru_hash_discard(ahp, uuid, ahp->ah_error);
586 }
587 
588 void
589 fmd_asru_hash_refresh(fmd_asru_hash_t *ahp)
590 {
591 	struct dirent *dp;
592 	DIR *dirp;
593 	int zero;
594 
595 	if ((dirp = opendir(ahp->ah_dirpath)) == NULL) {
596 		fmd_error(EFMD_ASRU_NODIR,
597 		    "failed to open asru cache directory %s", ahp->ah_dirpath);
598 		return;
599 	}
600 
601 	(void) fmd_conf_getprop(fmd.d_conf, "rsrc.zero", &zero);
602 
603 	(void) pthread_rwlock_wrlock(&ahp->ah_lock);
604 
605 	while ((dp = readdir(dirp)) != NULL) {
606 		if (dp->d_name[0] == '.')
607 			continue; /* skip "." and ".." */
608 
609 		if (zero)
610 			fmd_asru_hash_discard(ahp, dp->d_name, 0);
611 		else if (!fmd_strmatch(dp->d_name, "*-"))
612 			fmd_asru_hash_logopen(ahp, dp->d_name);
613 	}
614 
615 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
616 	(void) closedir(dirp);
617 }
618 
619 /*
620  * If the resource is present and faulty but not unusable, replay the fault
621  * event that caused it be marked faulty.  This will cause the agent
622  * subscribing to this fault class to again disable the resource.
623  */
624 /*ARGSUSED*/
625 static void
626 fmd_asru_hash_replay_asru(fmd_asru_t *ap, void *data)
627 {
628 	fmd_event_t *e;
629 	nvlist_t *nvl;
630 	char *class;
631 
632 	if (ap->asru_event != NULL && (ap->asru_flags & (FMD_ASRU_STATE |
633 	    FMD_ASRU_PRESENT)) == (FMD_ASRU_FAULTY | FMD_ASRU_PRESENT)) {
634 
635 		fmd_dprintf(FMD_DBG_ASRU,
636 		    "replaying fault event for %s", ap->asru_name);
637 
638 		(void) nvlist_xdup(ap->asru_event, &nvl, &fmd.d_nva);
639 		(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
640 
641 		(void) nvlist_add_string(nvl, FMD_EVN_UUID,
642 		    ((fmd_case_impl_t *)ap->asru_case)->ci_uuid);
643 
644 		e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
645 		fmd_dispq_dispatch(fmd.d_disp, e, class);
646 	}
647 }
648 
649 void
650 fmd_asru_hash_replay(fmd_asru_hash_t *ahp)
651 {
652 	fmd_asru_hash_apply(ahp, fmd_asru_hash_replay_asru, NULL);
653 }
654 
655 /*
656  * Check if the resource is still present. If not, and if the rsrc.age time
657  * has expired, then do an implicit repair on the resource.
658  */
659 /*ARGSUSED*/
660 static void
661 fmd_asru_repair_if_aged(fmd_asru_link_t *alp, void *arg)
662 {
663 	struct timeval tv;
664 	fmd_log_t *lp;
665 	hrtime_t hrt;
666 	int ps;
667 	int err;
668 
669 	ps = fmd_asru_replacement_state(alp->al_event);
670 	if (ps == FMD_OBJ_STATE_REPLACED) {
671 		fmd_asru_replaced(alp, &err);
672 	} else if (ps == FMD_OBJ_STATE_NOT_PRESENT) {
673 		fmd_time_gettimeofday(&tv);
674 		lp = fmd_log_open(alp->al_asru->asru_root, alp->al_uuid,
675 		    FMD_LOG_ASRU);
676 		hrt = (hrtime_t)(tv.tv_sec - lp->log_stat.st_mtime);
677 		fmd_log_rele(lp);
678 		if (hrt * NANOSEC >= fmd.d_asrus->ah_lifetime)
679 			fmd_asru_removed(alp);
680 	}
681 }
682 
683 void
684 fmd_asru_clear_aged_rsrcs()
685 {
686 	fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_repair_if_aged, NULL);
687 }
688 
689 fmd_asru_hash_t *
690 fmd_asru_hash_create(const char *root, const char *dir)
691 {
692 	fmd_asru_hash_t *ahp;
693 	char path[PATH_MAX];
694 
695 	ahp = fmd_alloc(sizeof (fmd_asru_hash_t), FMD_SLEEP);
696 	(void) pthread_rwlock_init(&ahp->ah_lock, NULL);
697 	ahp->ah_hashlen = fmd.d_str_buckets;
698 	ahp->ah_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, FMD_SLEEP);
699 	ahp->ah_asru_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen,
700 	    FMD_SLEEP);
701 	ahp->ah_case_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen,
702 	    FMD_SLEEP);
703 	ahp->ah_fru_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen,
704 	    FMD_SLEEP);
705 	ahp->ah_label_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen,
706 	    FMD_SLEEP);
707 	ahp->ah_rsrc_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen,
708 	    FMD_SLEEP);
709 	(void) snprintf(path, sizeof (path), "%s/%s", root, dir);
710 	ahp->ah_dirpath = fmd_strdup(path, FMD_SLEEP);
711 	(void) fmd_conf_getprop(fmd.d_conf, "rsrc.age", &ahp->ah_lifetime);
712 	(void) fmd_conf_getprop(fmd.d_conf, "fakenotpresent",
713 	    (uint32_t *)&fmd_asru_fake_not_present);
714 	ahp->ah_al_count = 0;
715 	ahp->ah_count = 0;
716 	ahp->ah_error = 0;
717 	ahp->ah_topo = fmd_topo_hold();
718 
719 	return (ahp);
720 }
721 
722 void
723 fmd_asru_hash_destroy(fmd_asru_hash_t *ahp)
724 {
725 	fmd_asru_link_t *alp, *np;
726 	uint_t i;
727 
728 	for (i = 0; i < ahp->ah_hashlen; i++) {
729 		for (alp = ahp->ah_case_hash[i]; alp != NULL; alp = np) {
730 			np = alp->al_case_next;
731 			alp->al_case_next = NULL;
732 			fmd_case_rele(alp->al_case);
733 			alp->al_case = NULL;
734 			fmd_asru_al_hash_release(ahp, alp);
735 		}
736 	}
737 
738 	fmd_strfree(ahp->ah_dirpath);
739 	fmd_free(ahp->ah_hash, sizeof (void *) * ahp->ah_hashlen);
740 	fmd_free(ahp->ah_asru_hash, sizeof (void *) * ahp->ah_hashlen);
741 	fmd_free(ahp->ah_case_hash, sizeof (void *) * ahp->ah_hashlen);
742 	fmd_free(ahp->ah_fru_hash, sizeof (void *) * ahp->ah_hashlen);
743 	fmd_free(ahp->ah_label_hash, sizeof (void *) * ahp->ah_hashlen);
744 	fmd_free(ahp->ah_rsrc_hash, sizeof (void *) * ahp->ah_hashlen);
745 	fmd_topo_rele(ahp->ah_topo);
746 	fmd_free(ahp, sizeof (fmd_asru_hash_t));
747 }
748 
749 /*
750  * Take a snapshot of the ASRU database by placing an additional hold on each
751  * member in an auxiliary array, and then call 'func' for each ASRU.
752  */
753 void
754 fmd_asru_hash_apply(fmd_asru_hash_t *ahp,
755     void (*func)(fmd_asru_t *, void *), void *arg)
756 {
757 	fmd_asru_t *ap, **aps, **app;
758 	uint_t apc, i;
759 
760 	(void) pthread_rwlock_rdlock(&ahp->ah_lock);
761 
762 	aps = app = fmd_alloc(ahp->ah_count * sizeof (fmd_asru_t *), FMD_SLEEP);
763 	apc = ahp->ah_count;
764 
765 	for (i = 0; i < ahp->ah_hashlen; i++) {
766 		for (ap = ahp->ah_hash[i]; ap != NULL; ap = ap->asru_next)
767 			*app++ = fmd_asru_hold(ap);
768 	}
769 
770 	ASSERT(app == aps + apc);
771 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
772 
773 	for (i = 0; i < apc; i++) {
774 		if (aps[i]->asru_fmri != NULL)
775 			func(aps[i], arg);
776 		fmd_asru_hash_release(ahp, aps[i]);
777 	}
778 
779 	fmd_free(aps, apc * sizeof (fmd_asru_t *));
780 }
781 
782 void
783 fmd_asru_al_hash_apply(fmd_asru_hash_t *ahp,
784     void (*func)(fmd_asru_link_t *, void *), void *arg)
785 {
786 	fmd_asru_link_t *alp, **alps, **alpp;
787 	uint_t alpc, i;
788 
789 	(void) pthread_rwlock_rdlock(&ahp->ah_lock);
790 
791 	alps = alpp = fmd_alloc(ahp->ah_al_count * sizeof (fmd_asru_link_t *),
792 	    FMD_SLEEP);
793 	alpc = ahp->ah_al_count;
794 
795 	for (i = 0; i < ahp->ah_hashlen; i++) {
796 		for (alp = ahp->ah_case_hash[i]; alp != NULL;
797 		    alp = alp->al_case_next)
798 			*alpp++ = fmd_asru_al_hold(alp);
799 	}
800 
801 	ASSERT(alpp == alps + alpc);
802 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
803 
804 	for (i = 0; i < alpc; i++) {
805 		func(alps[i], arg);
806 		fmd_asru_al_hash_release(ahp, alps[i]);
807 	}
808 
809 	fmd_free(alps, alpc * sizeof (fmd_asru_link_t *));
810 }
811 
812 static void
813 fmd_asru_do_hash_apply(fmd_asru_hash_t *ahp, char *name,
814     void (*func)(fmd_asru_link_t *, void *), void *arg,
815     fmd_asru_link_t **hash, size_t match_offset, size_t next_offset)
816 {
817 	fmd_asru_link_t *alp, **alps, **alpp;
818 	uint_t alpc = 0, i;
819 	uint_t h;
820 
821 	(void) pthread_rwlock_rdlock(&ahp->ah_lock);
822 
823 	h = fmd_asru_strhash(ahp, name);
824 
825 	for (alp = hash[h]; alp != NULL; alp =
826 	    /* LINTED pointer alignment */
827 	    FMD_ASRU_AL_HASH_NEXT(alp, next_offset))
828 		if (fmd_asru_strcmp(ahp,
829 		    /* LINTED pointer alignment */
830 		    FMD_ASRU_AL_HASH_NAME(alp, match_offset), name))
831 			alpc++;
832 
833 	alps = alpp = fmd_alloc(alpc * sizeof (fmd_asru_link_t *), FMD_SLEEP);
834 
835 	for (alp = hash[h]; alp != NULL; alp =
836 	    /* LINTED pointer alignment */
837 	    FMD_ASRU_AL_HASH_NEXT(alp, next_offset))
838 		if (fmd_asru_strcmp(ahp,
839 		    /* LINTED pointer alignment */
840 		    FMD_ASRU_AL_HASH_NAME(alp, match_offset), name))
841 			*alpp++ = fmd_asru_al_hold(alp);
842 
843 	ASSERT(alpp == alps + alpc);
844 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
845 
846 	for (i = 0; i < alpc; i++) {
847 		func(alps[i], arg);
848 		fmd_asru_al_hash_release(ahp, alps[i]);
849 	}
850 
851 	fmd_free(alps, alpc * sizeof (fmd_asru_link_t *));
852 }
853 
854 void
855 fmd_asru_hash_apply_by_asru(fmd_asru_hash_t *ahp, char *name,
856     void (*func)(fmd_asru_link_t *, void *), void *arg)
857 {
858 	fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_asru_hash,
859 	    offsetof(fmd_asru_link_t, al_asru_name),
860 	    offsetof(fmd_asru_link_t, al_asru_next));
861 }
862 
863 void
864 fmd_asru_hash_apply_by_case(fmd_asru_hash_t *ahp, fmd_case_t *cp,
865 	void (*func)(fmd_asru_link_t *, void *), void *arg)
866 {
867 	fmd_asru_do_hash_apply(ahp, ((fmd_case_impl_t *)cp)->ci_uuid, func, arg,
868 	    ahp->ah_case_hash, offsetof(fmd_asru_link_t, al_case_uuid),
869 	    offsetof(fmd_asru_link_t, al_case_next));
870 }
871 
872 void
873 fmd_asru_hash_apply_by_fru(fmd_asru_hash_t *ahp, char *name,
874     void (*func)(fmd_asru_link_t *, void *), void *arg)
875 {
876 	fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_fru_hash,
877 	    offsetof(fmd_asru_link_t, al_fru_name),
878 	    offsetof(fmd_asru_link_t, al_fru_next));
879 }
880 
881 void
882 fmd_asru_hash_apply_by_rsrc(fmd_asru_hash_t *ahp, char *name,
883     void (*func)(fmd_asru_link_t *, void *), void *arg)
884 {
885 	fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_rsrc_hash,
886 	    offsetof(fmd_asru_link_t, al_rsrc_name),
887 	    offsetof(fmd_asru_link_t, al_rsrc_next));
888 }
889 
890 void
891 fmd_asru_hash_apply_by_label(fmd_asru_hash_t *ahp, char *name,
892     void (*func)(fmd_asru_link_t *, void *), void *arg)
893 {
894 	fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_label_hash,
895 	    offsetof(fmd_asru_link_t, al_label),
896 	    offsetof(fmd_asru_link_t, al_label_next));
897 }
898 
899 /*
900  * Lookup an asru in the hash by name and place a hold on it.  If the asru is
901  * not found, no entry is created and NULL is returned.
902  */
903 fmd_asru_t *
904 fmd_asru_hash_lookup_name(fmd_asru_hash_t *ahp, const char *name)
905 {
906 	fmd_asru_t *ap;
907 
908 	(void) pthread_rwlock_rdlock(&ahp->ah_lock);
909 	ap = fmd_asru_hash_lookup(ahp, name);
910 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
911 
912 	return (ap);
913 }
914 
915 /*
916  * Create a resource cache entry using the fault event "nvl" for one of the
917  * suspects from the case "cp".
918  *
919  * The fault event can have the following components :  FM_FAULT_ASRU,
920  * FM_FAULT_FRU, FM_FAULT_RESOURCE. These should be set by the Diagnosis Engine
921  * when calling fmd_nvl_create_fault(). In the general case, these are all
922  * optional and an entry will always be added into the cache even if one or all
923  * of these fields is missing.
924  *
925  * However, for hardware faults the recommended practice is that the fault
926  * event should always have the FM_FAULT_RESOURCE field present and that this
927  * should be represented in hc-scheme.
928  *
929  * Currently the DE should also add the FM_FAULT_ASRU and FM_FAULT_FRU fields
930  * where known, though at some future stage fmd might be able to fill these
931  * in automatically from the topology.
932  */
933 fmd_asru_link_t *
934 fmd_asru_hash_create_entry(fmd_asru_hash_t *ahp, fmd_case_t *cp, nvlist_t *nvl)
935 {
936 	char *parsed_uuid;
937 	uuid_t uuid;
938 	int uuidlen;
939 	fmd_asru_link_t *alp;
940 
941 	/*
942 	 * Generate a UUID for the ASRU.  libuuid cleverly gives us no
943 	 * interface for specifying or learning the buffer size.  Sigh.
944 	 * The spec says 36 bytes but we use a tunable just to be safe.
945 	 */
946 	(void) fmd_conf_getprop(fmd.d_conf, "uuidlen", &uuidlen);
947 	parsed_uuid = fmd_zalloc(uuidlen + 1, FMD_SLEEP);
948 	uuid_generate(uuid);
949 	uuid_unparse(uuid, parsed_uuid);
950 
951 	/*
952 	 * Now create the resource cache entries.
953 	 */
954 	fmd_case_hold_locked(cp);
955 	alp = fmd_asru_al_create(ahp, nvl, cp, parsed_uuid);
956 	TRACE((FMD_DBG_ASRU, "asru %s created as %p",
957 	    alp->al_uuid, (void *)alp->al_asru));
958 
959 	fmd_free(parsed_uuid, uuidlen + 1);
960 	return (alp);
961 
962 }
963 
964 /*
965  * Release the reference count on an asru obtained using fmd_asru_hash_lookup.
966  * We take 'ahp' for symmetry and in case we need to use it in future work.
967  */
968 /*ARGSUSED*/
969 void
970 fmd_asru_hash_release(fmd_asru_hash_t *ahp, fmd_asru_t *ap)
971 {
972 	(void) pthread_mutex_lock(&ap->asru_lock);
973 
974 	ASSERT(ap->asru_refs != 0);
975 	if (--ap->asru_refs == 0)
976 		fmd_asru_destroy(ap);
977 	else
978 		(void) pthread_mutex_unlock(&ap->asru_lock);
979 }
980 
981 static void
982 fmd_asru_do_delete_entry(fmd_asru_hash_t *ahp, fmd_case_t *cp,
983     fmd_asru_link_t **hash, size_t next_offset, char *name)
984 {
985 	uint_t h;
986 	fmd_asru_link_t *alp, **pp, *alpnext, **alpnextp;
987 
988 	(void) pthread_rwlock_wrlock(&ahp->ah_lock);
989 	h = fmd_asru_strhash(ahp, name);
990 	pp = &hash[h];
991 	for (alp = *pp; alp != NULL; alp = alpnext) {
992 		/* LINTED pointer alignment */
993 		alpnextp = FMD_ASRU_AL_HASH_NEXTP(alp, next_offset);
994 		alpnext = *alpnextp;
995 		if (alp->al_case == cp) {
996 			*pp = *alpnextp;
997 			*alpnextp = NULL;
998 		} else
999 			pp = alpnextp;
1000 	}
1001 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
1002 }
1003 
1004 static void
1005 fmd_asru_do_hash_delete(fmd_asru_hash_t *ahp, fmd_case_susp_t *cis,
1006     fmd_case_t *cp, fmd_asru_link_t **hash, size_t next_offset, char *nvname)
1007 {
1008 	nvlist_t *nvl;
1009 	char *name = NULL;
1010 	ssize_t namelen;
1011 
1012 	if (nvlist_lookup_nvlist(cis->cis_nvl, nvname, &nvl) == 0 &&
1013 	    (namelen = fmd_fmri_nvl2str(nvl, NULL, 0)) != -1 &&
1014 	    (name = fmd_alloc(namelen + 1, FMD_SLEEP)) != NULL) {
1015 		if (fmd_fmri_nvl2str(nvl, name, namelen + 1) != -1)
1016 			fmd_asru_do_delete_entry(ahp, cp, hash, next_offset,
1017 			    name);
1018 		fmd_free(name, namelen + 1);
1019 	} else
1020 		fmd_asru_do_delete_entry(ahp, cp, hash, next_offset, "");
1021 }
1022 
1023 void
1024 fmd_asru_hash_delete_case(fmd_asru_hash_t *ahp, fmd_case_t *cp)
1025 {
1026 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1027 	fmd_case_susp_t *cis;
1028 	fmd_asru_link_t *alp, **plp, *alpnext;
1029 	fmd_asru_t *ap;
1030 	char path[PATH_MAX];
1031 	char *label;
1032 	uint_t h;
1033 
1034 	/*
1035 	 * first delete hash entries for each suspect
1036 	 */
1037 	for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
1038 		fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_fru_hash,
1039 		    offsetof(fmd_asru_link_t, al_fru_next), FM_FAULT_FRU);
1040 		fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_rsrc_hash,
1041 		    offsetof(fmd_asru_link_t, al_rsrc_next), FM_FAULT_RESOURCE);
1042 		if (nvlist_lookup_string(cis->cis_nvl, FM_FAULT_LOCATION,
1043 		    &label) != 0)
1044 			label = "";
1045 		fmd_asru_do_delete_entry(ahp, cp, ahp->ah_label_hash,
1046 		    offsetof(fmd_asru_link_t, al_label_next), label);
1047 		fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_asru_hash,
1048 		    offsetof(fmd_asru_link_t, al_asru_next), FM_FAULT_ASRU);
1049 	}
1050 
1051 	/*
1052 	 * then delete associated case hash entries
1053 	 */
1054 	(void) pthread_rwlock_wrlock(&ahp->ah_lock);
1055 	h = fmd_asru_strhash(ahp, cip->ci_uuid);
1056 	plp = &ahp->ah_case_hash[h];
1057 	for (alp = *plp; alp != NULL; alp = alpnext) {
1058 		alpnext = alp->al_case_next;
1059 		if (alp->al_case == cp) {
1060 			*plp = alp->al_case_next;
1061 			alp->al_case_next = NULL;
1062 			ASSERT(ahp->ah_al_count != 0);
1063 			ahp->ah_al_count--;
1064 
1065 			/*
1066 			 * decrement case ref.
1067 			 */
1068 			fmd_case_rele_locked(cp);
1069 			alp->al_case = NULL;
1070 
1071 			/*
1072 			 * If we found a matching ASRU, unlink its log file and
1073 			 * then release the hash entry. Note that it may still
1074 			 * be referenced if another thread is manipulating it;
1075 			 * this is ok because once we unlink, the log file will
1076 			 * not be restored, and the log data will be freed when
1077 			 * all of the referencing threads release their
1078 			 * respective references.
1079 			 */
1080 			(void) snprintf(path, sizeof (path), "%s/%s",
1081 			    ahp->ah_dirpath, alp->al_uuid);
1082 			if (unlink(path) != 0)
1083 				fmd_error(EFMD_ASRU_UNLINK,
1084 				    "failed to unlink asru %s", path);
1085 
1086 			/*
1087 			 * Now unlink from the global per-resource cache
1088 			 * and if this is the last link then remove that from
1089 			 * it's own hash too.
1090 			 */
1091 			ap = alp->al_asru;
1092 			(void) pthread_mutex_lock(&ap->asru_lock);
1093 			fmd_list_delete(&ap->asru_list, alp);
1094 			if (ap->asru_list.l_next == NULL) {
1095 				uint_t h;
1096 				fmd_asru_t *ap2, **pp;
1097 				fmd_asru_t *apnext, **apnextp;
1098 
1099 				ASSERT(ahp->ah_count != 0);
1100 				ahp->ah_count--;
1101 				h = fmd_asru_strhash(ahp, ap->asru_name);
1102 				pp = &ahp->ah_hash[h];
1103 				for (ap2 = *pp; ap2 != NULL; ap2 = apnext) {
1104 					apnextp = &ap2->asru_next;
1105 					apnext = *apnextp;
1106 					if (ap2 == ap) {
1107 						*pp = *apnextp;
1108 						*apnextp = NULL;
1109 					} else
1110 						pp = apnextp;
1111 				}
1112 			}
1113 			(void) pthread_mutex_unlock(&ap->asru_lock);
1114 			fmd_asru_al_hash_release(ahp, alp);
1115 		} else
1116 			plp = &alp->al_case_next;
1117 	}
1118 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
1119 }
1120 
1121 static void
1122 fmd_asru_repair_containee(fmd_asru_link_t *alp, void *er)
1123 {
1124 	if (er && alp->al_asru_fmri && fmd_fmri_contains(er,
1125 	    alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
1126 	    FMD_ASRU_REPAIRED))
1127 		fmd_case_update(alp->al_case);
1128 }
1129 
1130 void
1131 fmd_asru_repaired(fmd_asru_link_t *alp, void *er)
1132 {
1133 	int flags;
1134 	int rval;
1135 
1136 	/*
1137 	 * repair this asru cache entry
1138 	 */
1139 	rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, FMD_ASRU_REPAIRED);
1140 
1141 	/*
1142 	 * now check if all entries associated with this asru are repaired and
1143 	 * if so repair containees
1144 	 */
1145 	(void) pthread_mutex_lock(&alp->al_asru->asru_lock);
1146 	flags = alp->al_asru->asru_flags;
1147 	(void) pthread_mutex_unlock(&alp->al_asru->asru_lock);
1148 	if (!(flags & FMD_ASRU_FAULTY))
1149 		fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_repair_containee,
1150 		    alp->al_asru_fmri);
1151 
1152 	/*
1153 	 * if called from fmd_adm_repair() and we really did clear the bit then
1154 	 * we need to do a case update to see if the associated case can be
1155 	 * repaired. No need to do this if called from fmd_case_repair() (ie
1156 	 * when er is NULL) as the case will be explicitly repaired anyway.
1157 	 */
1158 	if (er) {
1159 		*(int *)er = 0;
1160 		if (rval)
1161 			fmd_case_update(alp->al_case);
1162 	}
1163 }
1164 
1165 static void
1166 fmd_asru_acquit_containee(fmd_asru_link_t *alp, void *er)
1167 {
1168 	if (er && alp->al_asru_fmri && fmd_fmri_contains(er,
1169 	    alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
1170 	    FMD_ASRU_ACQUITTED))
1171 		fmd_case_update(alp->al_case);
1172 }
1173 
1174 void
1175 fmd_asru_acquit(fmd_asru_link_t *alp, void *er)
1176 {
1177 	int flags;
1178 	int rval;
1179 
1180 	/*
1181 	 * acquit this asru cache entry
1182 	 */
1183 	rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, FMD_ASRU_ACQUITTED);
1184 
1185 	/*
1186 	 * now check if all entries associated with this asru are acquitted and
1187 	 * if so acquit containees
1188 	 */
1189 	(void) pthread_mutex_lock(&alp->al_asru->asru_lock);
1190 	flags = alp->al_asru->asru_flags;
1191 	(void) pthread_mutex_unlock(&alp->al_asru->asru_lock);
1192 	if (!(flags & FMD_ASRU_FAULTY))
1193 		fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_acquit_containee,
1194 		    alp->al_asru_fmri);
1195 
1196 	/*
1197 	 * if called from fmd_adm_acquit() and we really did clear the bit then
1198 	 * we need to do a case update to see if the associated case can be
1199 	 * repaired. No need to do this if called from fmd_case_acquit() (ie
1200 	 * when er is NULL) as the case will be explicitly repaired anyway.
1201 	 */
1202 	if (er) {
1203 		*(int *)er = 0;
1204 		if (rval)
1205 			fmd_case_update(alp->al_case);
1206 	}
1207 }
1208 
1209 static void
1210 fmd_asru_replaced_containee(fmd_asru_link_t *alp, void *er)
1211 {
1212 	if (er && alp->al_asru_fmri && fmd_fmri_contains(er,
1213 	    alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
1214 	    FMD_ASRU_REPLACED))
1215 		fmd_case_update(alp->al_case);
1216 }
1217 
1218 void
1219 fmd_asru_replaced(fmd_asru_link_t *alp, void *er)
1220 {
1221 	int flags;
1222 	int rval;
1223 	int ps;
1224 
1225 	ps = fmd_asru_replacement_state(alp->al_event);
1226 	if (ps == FMD_OBJ_STATE_STILL_PRESENT)
1227 		return;
1228 
1229 	/*
1230 	 * mark this cache entry as replaced
1231 	 */
1232 	rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, FMD_ASRU_REPLACED);
1233 
1234 	/*
1235 	 * now check if all entries associated with this asru are replaced and
1236 	 * if so replace containees
1237 	 */
1238 	(void) pthread_mutex_lock(&alp->al_asru->asru_lock);
1239 	flags = alp->al_asru->asru_flags;
1240 	(void) pthread_mutex_unlock(&alp->al_asru->asru_lock);
1241 	if (!(flags & FMD_ASRU_FAULTY))
1242 		fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_replaced_containee,
1243 		    alp->al_asru_fmri);
1244 
1245 	*(int *)er = 0;
1246 	if (rval)
1247 		fmd_case_update(alp->al_case);
1248 }
1249 
1250 static void
1251 fmd_asru_removed_containee(fmd_asru_link_t *alp, void *er)
1252 {
1253 	if (er && alp->al_asru_fmri && fmd_fmri_contains(er,
1254 	    alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
1255 	    0))
1256 		fmd_case_update(alp->al_case);
1257 }
1258 
1259 void
1260 fmd_asru_removed(fmd_asru_link_t *alp)
1261 {
1262 	int flags;
1263 	int rval;
1264 
1265 	/*
1266 	 * mark this cache entry as replacded
1267 	 */
1268 	rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, 0);
1269 
1270 	/*
1271 	 * now check if all entries associated with this asru are removed and
1272 	 * if so replace containees
1273 	 */
1274 	(void) pthread_mutex_lock(&alp->al_asru->asru_lock);
1275 	flags = alp->al_asru->asru_flags;
1276 	(void) pthread_mutex_unlock(&alp->al_asru->asru_lock);
1277 	if (!(flags & FMD_ASRU_FAULTY))
1278 		fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_removed_containee,
1279 		    alp->al_asru_fmri);
1280 	if (rval)
1281 		fmd_case_update(alp->al_case);
1282 }
1283 
1284 static void
1285 fmd_asru_logevent(fmd_asru_link_t *alp)
1286 {
1287 	fmd_asru_t *ap = alp->al_asru;
1288 	boolean_t faulty = (alp->al_flags & FMD_ASRU_FAULTY) != 0;
1289 	boolean_t unusable = (alp->al_flags & FMD_ASRU_UNUSABLE) != 0;
1290 	boolean_t message = (ap->asru_flags & FMD_ASRU_INVISIBLE) == 0;
1291 	boolean_t repaired = (alp->al_reason == FMD_ASRU_REPAIRED);
1292 	boolean_t replaced = (alp->al_reason == FMD_ASRU_REPLACED);
1293 	boolean_t acquitted = (alp->al_reason == FMD_ASRU_ACQUITTED);
1294 
1295 	fmd_case_impl_t *cip;
1296 	fmd_event_t *e;
1297 	fmd_log_t *lp;
1298 	nvlist_t *nvl;
1299 	char *class;
1300 
1301 	ASSERT(MUTEX_HELD(&ap->asru_lock));
1302 	cip = (fmd_case_impl_t *)alp->al_case;
1303 	ASSERT(cip != NULL);
1304 
1305 	if ((lp = alp->al_log) == NULL)
1306 		lp = fmd_log_open(ap->asru_root, alp->al_uuid, FMD_LOG_ASRU);
1307 
1308 	if (lp == NULL)
1309 		return; /* can't log events if we can't open the log */
1310 
1311 	nvl = fmd_protocol_rsrc_asru(_fmd_asru_events[faulty | (unusable << 1)],
1312 	    alp->al_asru_fmri, cip->ci_uuid, cip->ci_code, faulty, unusable,
1313 	    message, alp->al_event, &cip->ci_tv, repaired, replaced, acquitted);
1314 
1315 	(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
1316 	e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
1317 
1318 	fmd_event_hold(e);
1319 	fmd_log_append(lp, e, NULL);
1320 	fmd_event_rele(e);
1321 
1322 	/*
1323 	 * For now, we close the log file after every update to conserve file
1324 	 * descriptors and daemon overhead.  If this becomes a performance
1325 	 * issue this code can change to keep a fixed-size LRU cache of logs.
1326 	 */
1327 	fmd_log_rele(lp);
1328 	alp->al_log = NULL;
1329 }
1330 
1331 int
1332 fmd_asru_setflags(fmd_asru_link_t *alp, uint_t sflag)
1333 {
1334 	fmd_asru_t *ap = alp->al_asru;
1335 	uint_t nstate, ostate;
1336 
1337 	ASSERT(!(sflag & ~FMD_ASRU_STATE));
1338 	ASSERT(sflag != FMD_ASRU_STATE);
1339 
1340 	(void) pthread_mutex_lock(&ap->asru_lock);
1341 
1342 	ostate = alp->al_flags & FMD_ASRU_STATE;
1343 	alp->al_flags |= sflag;
1344 	nstate = alp->al_flags & FMD_ASRU_STATE;
1345 
1346 	if (nstate == ostate) {
1347 		(void) pthread_mutex_unlock(&ap->asru_lock);
1348 		return (0);
1349 	}
1350 
1351 	ap->asru_flags |= sflag;
1352 	TRACE((FMD_DBG_ASRU, "asru %s %s->%s", alp->al_uuid,
1353 	    _fmd_asru_snames[ostate], _fmd_asru_snames[nstate]));
1354 
1355 	fmd_asru_logevent(alp);
1356 
1357 	(void) pthread_cond_broadcast(&ap->asru_cv);
1358 	(void) pthread_mutex_unlock(&ap->asru_lock);
1359 	return (1);
1360 }
1361 
1362 int
1363 fmd_asru_clrflags(fmd_asru_link_t *alp, uint_t sflag, uint8_t reason)
1364 {
1365 	fmd_asru_t *ap = alp->al_asru;
1366 	fmd_asru_link_t *nalp;
1367 	uint_t nstate, ostate, flags = 0;
1368 
1369 	ASSERT(!(sflag & ~FMD_ASRU_STATE));
1370 	ASSERT(sflag != FMD_ASRU_STATE);
1371 
1372 	(void) pthread_mutex_lock(&ap->asru_lock);
1373 
1374 	ostate = alp->al_flags & FMD_ASRU_STATE;
1375 	alp->al_flags &= ~sflag;
1376 	nstate = alp->al_flags & FMD_ASRU_STATE;
1377 
1378 	if (nstate == ostate) {
1379 		if (reason > alp->al_reason) {
1380 			alp->al_reason = reason;
1381 			fmd_asru_logevent(alp);
1382 			(void) pthread_cond_broadcast(&ap->asru_cv);
1383 		}
1384 		(void) pthread_mutex_unlock(&ap->asru_lock);
1385 		return (0);
1386 	}
1387 	if (reason > alp->al_reason)
1388 		alp->al_reason = reason;
1389 
1390 	if (sflag == FMD_ASRU_UNUSABLE)
1391 		ap->asru_flags &= ~sflag;
1392 	else if (sflag == FMD_ASRU_FAULTY) {
1393 		/*
1394 		 * only clear the faulty bit if all links are clear
1395 		 */
1396 		for (nalp = fmd_list_next(&ap->asru_list); nalp != NULL;
1397 		    nalp = fmd_list_next(nalp))
1398 			flags |= nalp->al_flags;
1399 		if (!(flags & FMD_ASRU_FAULTY))
1400 			ap->asru_flags &= ~sflag;
1401 	}
1402 
1403 	TRACE((FMD_DBG_ASRU, "asru %s %s->%s", alp->al_uuid,
1404 	    _fmd_asru_snames[ostate], _fmd_asru_snames[nstate]));
1405 
1406 	fmd_asru_logevent(alp);
1407 
1408 	(void) pthread_cond_broadcast(&ap->asru_cv);
1409 	(void) pthread_mutex_unlock(&ap->asru_lock);
1410 
1411 	return (1);
1412 }
1413 
1414 /*
1415  * Report the current known state of the link entry (ie this particular fault
1416  * affecting this particular ASRU).
1417  */
1418 int
1419 fmd_asru_al_getstate(fmd_asru_link_t *alp)
1420 {
1421 	int us, st;
1422 	nvlist_t *asru;
1423 	int ps;
1424 
1425 	ps = fmd_asru_replacement_state(alp->al_event);
1426 	if (ps == FMD_OBJ_STATE_NOT_PRESENT)
1427 		return ((alp->al_flags & FMD_ASRU_FAULTY) | FMD_ASRU_UNUSABLE);
1428 	if (ps == FMD_OBJ_STATE_REPLACED) {
1429 		if (alp->al_reason < FMD_ASRU_REPLACED)
1430 			alp->al_reason = FMD_ASRU_REPLACED;
1431 		return ((alp->al_flags & FMD_ASRU_FAULTY) | FMD_ASRU_UNUSABLE);
1432 	}
1433 
1434 	st = (alp->al_flags & FMD_ASRU_STATE) | FMD_ASRU_PRESENT;
1435 	if (nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, &asru) == 0) {
1436 		us = fmd_fmri_service_state(asru);
1437 		if (us == -1 || us == FMD_SERVICE_STATE_UNKNOWN) {
1438 			/* not supported by scheme - try fmd_fmri_unusable */
1439 			us = fmd_fmri_unusable(asru);
1440 		} else if (us == FMD_SERVICE_STATE_UNUSABLE) {
1441 			st |= FMD_ASRU_UNUSABLE;
1442 			return (st);
1443 		} else if (us == FMD_SERVICE_STATE_OK) {
1444 			st &= ~FMD_ASRU_UNUSABLE;
1445 			return (st);
1446 		} else if (us == FMD_SERVICE_STATE_DEGRADED) {
1447 			st &= ~FMD_ASRU_UNUSABLE;
1448 			st |= FMD_ASRU_DEGRADED;
1449 			return (st);
1450 		}
1451 	} else
1452 		us = (alp->al_flags & FMD_ASRU_UNUSABLE);
1453 	if (us > 0)
1454 		st |= FMD_ASRU_UNUSABLE;
1455 	else if (us == 0)
1456 		st &= ~FMD_ASRU_UNUSABLE;
1457 	return (st);
1458 }
1459 
1460 /*
1461  * Report the current known state of the ASRU by refreshing its unusable status
1462  * based upon the routines provided by the scheme module.  If the unusable bit
1463  * is different, we do *not* generate a state change here because that change
1464  * may be unrelated to fmd activities and therefore we have no case or event.
1465  * The absence of the transition is harmless as this function is only provided
1466  * for RPC observability and fmd's clients are only concerned with ASRU_FAULTY.
1467  */
1468 int
1469 fmd_asru_getstate(fmd_asru_t *ap)
1470 {
1471 	int us, st;
1472 
1473 	if (!(ap->asru_flags & FMD_ASRU_INTERNAL) &&
1474 	    (fmd_asru_fake_not_present >= FMD_OBJ_STATE_REPLACED ||
1475 	    fmd_fmri_present(ap->asru_fmri) <= 0))
1476 		return (0); /* do not report non-fmd non-present resources */
1477 
1478 	us = fmd_fmri_unusable(ap->asru_fmri);
1479 	st = ap->asru_flags & FMD_ASRU_STATE;
1480 
1481 	if (us > 0)
1482 		st |= FMD_ASRU_UNUSABLE;
1483 	else if (us == 0)
1484 		st &= ~FMD_ASRU_UNUSABLE;
1485 
1486 	return (st);
1487 }
1488