xref: /titanic_50/usr/src/cmd/fm/fmd/common/fmd_asru.c (revision ff3124eff995e6cd8ebd8c6543648e0670920034)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/fm/protocol.h>
30 #include <uuid/uuid.h>
31 
32 #include <dirent.h>
33 #include <limits.h>
34 #include <unistd.h>
35 #include <alloca.h>
36 #include <stddef.h>
37 #include <fm/libtopo.h>
38 
39 #include <fmd_alloc.h>
40 #include <fmd_string.h>
41 #include <fmd_error.h>
42 #include <fmd_subr.h>
43 #include <fmd_protocol.h>
44 #include <fmd_event.h>
45 #include <fmd_conf.h>
46 #include <fmd_fmri.h>
47 #include <fmd_dispq.h>
48 #include <fmd_case.h>
49 #include <fmd_module.h>
50 #include <fmd_asru.h>
51 
52 #include <fmd.h>
53 
54 static const char *const _fmd_asru_events[] = {
55 	FMD_RSRC_CLASS "asru.ok",		/* UNUSABLE=0 FAULTED=0 */
56 	FMD_RSRC_CLASS "asru.degraded",		/* UNUSABLE=0 FAULTED=1 */
57 	FMD_RSRC_CLASS "asru.unknown",		/* UNUSABLE=1 FAULTED=0 */
58 	FMD_RSRC_CLASS "asru.faulted"		/* UNUSABLE=1 FAULTED=1 */
59 };
60 
61 static const char *const _fmd_asru_snames[] = {
62 	"uf", "uF", "Uf", "UF"			/* same order as above */
63 };
64 
65 volatile uint32_t fmd_asru_fake_not_present = 0;
66 
67 static uint_t
68 fmd_asru_strhash(fmd_asru_hash_t *ahp, const char *val)
69 {
70 	return (topo_fmri_strhash(ahp->ah_topo->ft_hdl, val) % ahp->ah_hashlen);
71 }
72 
73 static boolean_t
74 fmd_asru_strcmp(fmd_asru_hash_t *ahp, const char *a, const char *b)
75 {
76 	return (topo_fmri_strcmp(ahp->ah_topo->ft_hdl, a, b));
77 }
78 
79 static fmd_asru_t *
80 fmd_asru_create(fmd_asru_hash_t *ahp, const char *uuid,
81     const char *name, nvlist_t *fmri)
82 {
83 	fmd_asru_t *ap = fmd_zalloc(sizeof (fmd_asru_t), FMD_SLEEP);
84 	char *s;
85 
86 	(void) pthread_mutex_init(&ap->asru_lock, NULL);
87 	(void) pthread_cond_init(&ap->asru_cv, NULL);
88 
89 	ap->asru_name = fmd_strdup(name, FMD_SLEEP);
90 	if (fmri)
91 		(void) nvlist_xdup(fmri, &ap->asru_fmri, &fmd.d_nva);
92 	ap->asru_root = fmd_strdup(ahp->ah_dirpath, FMD_SLEEP);
93 	ap->asru_uuid = fmd_strdup(uuid, FMD_SLEEP);
94 	ap->asru_uuidlen = ap->asru_uuid ? strlen(ap->asru_uuid) : 0;
95 	ap->asru_refs = 1;
96 
97 	if (fmri && nvlist_lookup_string(fmri, FM_FMRI_SCHEME, &s) == 0 &&
98 	    strcmp(s, FM_FMRI_SCHEME_FMD) == 0)
99 		ap->asru_flags |= FMD_ASRU_INTERNAL;
100 
101 	return (ap);
102 }
103 
104 static void
105 fmd_asru_destroy(fmd_asru_t *ap)
106 {
107 	ASSERT(MUTEX_HELD(&ap->asru_lock));
108 	ASSERT(ap->asru_refs == 0);
109 
110 	nvlist_free(ap->asru_event);
111 	fmd_strfree(ap->asru_name);
112 	nvlist_free(ap->asru_fmri);
113 	fmd_strfree(ap->asru_root);
114 	fmd_free(ap->asru_uuid, ap->asru_uuidlen + 1);
115 	fmd_free(ap, sizeof (fmd_asru_t));
116 }
117 
118 static void
119 fmd_asru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_t *ap)
120 {
121 	uint_t h = fmd_asru_strhash(ahp, ap->asru_name);
122 
123 	ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
124 	ap->asru_next = ahp->ah_hash[h];
125 	ahp->ah_hash[h] = ap;
126 	ahp->ah_count++;
127 }
128 
129 static fmd_asru_t *
130 fmd_asru_hold(fmd_asru_t *ap)
131 {
132 	(void) pthread_mutex_lock(&ap->asru_lock);
133 	ap->asru_refs++;
134 	ASSERT(ap->asru_refs != 0);
135 	(void) pthread_mutex_unlock(&ap->asru_lock);
136 	return (ap);
137 }
138 
139 /*
140  * Lookup an asru in the hash by name and place a hold on it.  If the asru is
141  * not found, no entry is created and NULL is returned.  This internal function
142  * is for callers who have the ah_lock held and is used by lookup_name below.
143  */
144 fmd_asru_t *
145 fmd_asru_hash_lookup(fmd_asru_hash_t *ahp, const char *name)
146 {
147 	fmd_asru_t *ap;
148 	uint_t h;
149 
150 	ASSERT(RW_LOCK_HELD(&ahp->ah_lock));
151 	h = fmd_asru_strhash(ahp, name);
152 
153 	for (ap = ahp->ah_hash[h]; ap != NULL; ap = ap->asru_next) {
154 		if (fmd_asru_strcmp(ahp, ap->asru_name, name))
155 			break;
156 	}
157 
158 	if (ap != NULL)
159 		(void) fmd_asru_hold(ap);
160 	else
161 		(void) fmd_set_errno(EFMD_ASRU_NOENT);
162 
163 	return (ap);
164 }
165 
166 static int
167 fmd_asru_is_present(nvlist_t *event)
168 {
169 	int ps = -1;
170 	nvlist_t *asru, *fru, *rsrc;
171 
172 	/*
173 	 * Check if there is evidence that this object is no longer present.
174 	 * In general fmd_fmri_present() should be supported on resources and/or
175 	 * frus, as those are the things that are physically present or not
176 	 * present - an asru can be spread over a number of frus some of which
177 	 * are present and some not, so fmd_fmri_present() is not generally
178 	 * meaningful. However retain a check for asru first for compatibility.
179 	 * If we have checked all three and we still get -1 then nothing knows
180 	 * whether it's present or not, so err on the safe side and treat it
181 	 * as still present.
182 	 */
183 	if (fmd_asru_fake_not_present)
184 		ps = 0;
185 	if (ps == -1 && nvlist_lookup_nvlist(event, FM_FAULT_ASRU, &asru) == 0)
186 		ps = fmd_fmri_present(asru);
187 	if (ps == -1 && nvlist_lookup_nvlist(event, FM_FAULT_RESOURCE,
188 	    &rsrc) == 0)
189 		ps = fmd_fmri_present(rsrc);
190 	if (ps == -1 && nvlist_lookup_nvlist(event, FM_FAULT_FRU, &fru) == 0)
191 		ps = fmd_fmri_present(fru);
192 	if (ps == -1)
193 		ps = 1;
194 	return (ps);
195 }
196 
197 static void
198 fmd_asru_asru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp,
199     char *name)
200 {
201 	uint_t h = fmd_asru_strhash(ahp, name);
202 
203 	ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
204 	alp->al_asru_next = ahp->ah_asru_hash[h];
205 	ahp->ah_asru_hash[h] = alp;
206 	ahp->ah_al_count++;
207 }
208 
209 static void
210 fmd_asru_case_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp,
211     char *name)
212 {
213 	uint_t h = fmd_asru_strhash(ahp, name);
214 
215 	ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
216 	alp->al_case_next = ahp->ah_case_hash[h];
217 	ahp->ah_case_hash[h] = alp;
218 }
219 
220 static void
221 fmd_asru_fru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, char *name)
222 {
223 	uint_t h = fmd_asru_strhash(ahp, name);
224 
225 	ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
226 	alp->al_fru_next = ahp->ah_fru_hash[h];
227 	ahp->ah_fru_hash[h] = alp;
228 }
229 
230 static void
231 fmd_asru_label_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp,
232     char *name)
233 {
234 	uint_t h = fmd_asru_strhash(ahp, name);
235 
236 	ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
237 	alp->al_label_next = ahp->ah_label_hash[h];
238 	ahp->ah_label_hash[h] = alp;
239 }
240 
241 static void
242 fmd_asru_rsrc_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp,
243     char *name)
244 {
245 	uint_t h = fmd_asru_strhash(ahp, name);
246 
247 	ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
248 	alp->al_rsrc_next = ahp->ah_rsrc_hash[h];
249 	ahp->ah_rsrc_hash[h] = alp;
250 }
251 
252 static void
253 fmd_asru_al_destroy(fmd_asru_link_t *alp)
254 {
255 	ASSERT(alp->al_refs == 0);
256 	ASSERT(MUTEX_HELD(&alp->al_asru->asru_lock));
257 
258 	if (alp->al_log != NULL)
259 		fmd_log_rele(alp->al_log);
260 
261 	fmd_free(alp->al_uuid, alp->al_uuidlen + 1);
262 	nvlist_free(alp->al_event);
263 	fmd_strfree(alp->al_rsrc_name);
264 	fmd_strfree(alp->al_case_uuid);
265 	fmd_strfree(alp->al_fru_name);
266 	fmd_strfree(alp->al_asru_name);
267 	fmd_strfree(alp->al_label);
268 	nvlist_free(alp->al_asru_fmri);
269 	fmd_free(alp, sizeof (fmd_asru_link_t));
270 }
271 
272 static fmd_asru_link_t *
273 fmd_asru_al_hold(fmd_asru_link_t *alp)
274 {
275 	fmd_asru_t *ap = alp->al_asru;
276 
277 	(void) pthread_mutex_lock(&ap->asru_lock);
278 	ap->asru_refs++;
279 	alp->al_refs++;
280 	ASSERT(alp->al_refs != 0);
281 	(void) pthread_mutex_unlock(&ap->asru_lock);
282 	return (alp);
283 }
284 
285 static void fmd_asru_destroy(fmd_asru_t *ap);
286 
287 /*ARGSUSED*/
288 static void
289 fmd_asru_al_hash_release(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp)
290 {
291 	fmd_asru_t *ap = alp->al_asru;
292 
293 	(void) pthread_mutex_lock(&ap->asru_lock);
294 	ASSERT(alp->al_refs != 0);
295 	if (--alp->al_refs == 0)
296 		fmd_asru_al_destroy(alp);
297 	ASSERT(ap->asru_refs != 0);
298 	if (--ap->asru_refs == 0)
299 		fmd_asru_destroy(ap);
300 	else
301 		(void) pthread_mutex_unlock(&ap->asru_lock);
302 }
303 
304 static int
305 fmd_asru_get_namestr(nvlist_t *nvl, char **name, ssize_t *namelen)
306 {
307 	if ((*namelen = fmd_fmri_nvl2str(nvl, NULL, 0)) == -1)
308 		return (EFMD_ASRU_FMRI);
309 	*name = fmd_alloc(*namelen + 1, FMD_SLEEP);
310 	if (fmd_fmri_nvl2str(nvl, *name, *namelen + 1) == -1) {
311 		if (*name != NULL)
312 			fmd_free(*name, *namelen + 1);
313 		return (EFMD_ASRU_FMRI);
314 	}
315 	return (0);
316 }
317 
318 static fmd_asru_link_t *
319 fmd_asru_al_create(fmd_asru_hash_t *ahp, nvlist_t *nvl, fmd_case_t *cp,
320     const char *al_uuid)
321 {
322 	nvlist_t *asru = NULL, *fru, *rsrc;
323 	int got_rsrc = 0, got_asru = 0, got_fru = 0;
324 	ssize_t fru_namelen, rsrc_namelen, asru_namelen;
325 	char *asru_name, *rsrc_name, *fru_name, *name, *label;
326 	fmd_asru_link_t *alp;
327 	fmd_asru_t *ap;
328 	boolean_t msg;
329 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
330 
331 	if (nvlist_lookup_nvlist(nvl, FM_FAULT_ASRU, &asru) == 0 &&
332 	    fmd_asru_get_namestr(asru, &asru_name, &asru_namelen) == 0)
333 		got_asru = 1;
334 	if (nvlist_lookup_nvlist(nvl, FM_FAULT_FRU, &fru) == 0 &&
335 	    fmd_asru_get_namestr(fru, &fru_name, &fru_namelen) == 0)
336 		got_fru = 1;
337 	if (nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &rsrc) == 0 &&
338 	    fmd_asru_get_namestr(rsrc, &rsrc_name, &rsrc_namelen) == 0)
339 		got_rsrc = 1;
340 	if (nvlist_lookup_string(nvl, FM_FAULT_LOCATION, &label) != 0)
341 		label = "";
342 
343 	/*
344 	 * Grab the rwlock as a writer; Then create and insert the asru with
345 	 * ahp->ah_lock held and hash it in. We'll then drop the rwlock and
346 	 * proceed to initializing the asru.
347 	 */
348 	(void) pthread_rwlock_wrlock(&ahp->ah_lock);
349 
350 	/*
351 	 * Create and initialise the per-fault "link" structure.
352 	 */
353 	alp = fmd_zalloc(sizeof (fmd_asru_link_t), FMD_SLEEP);
354 	if (got_asru)
355 		(void) nvlist_xdup(asru, &alp->al_asru_fmri, &fmd.d_nva);
356 	alp->al_uuid = fmd_strdup(al_uuid, FMD_SLEEP);
357 	alp->al_uuidlen = strlen(alp->al_uuid);
358 	alp->al_refs = 1;
359 
360 	/*
361 	 * If this is the first fault for this asru, then create the per-asru
362 	 * structure and link into the hash.
363 	 */
364 	name = got_asru ? asru_name : "";
365 	if ((ap = fmd_asru_hash_lookup(ahp, name)) == NULL) {
366 		ap = fmd_asru_create(ahp, al_uuid, name, got_asru ? asru :
367 		    NULL);
368 		fmd_asru_hash_insert(ahp, ap);
369 	} else
370 		nvlist_free(ap->asru_event);
371 	(void) nvlist_xdup(nvl, &ap->asru_event, &fmd.d_nva);
372 
373 	/*
374 	 * Put the link structure on the list associated with the per-asru
375 	 * structure. Then put the link structure on the various hashes.
376 	 */
377 	fmd_list_append(&ap->asru_list, (fmd_list_t *)alp);
378 	alp->al_asru = ap;
379 	alp->al_asru_name = got_asru ? asru_name : fmd_strdup("", FMD_SLEEP);
380 	fmd_asru_asru_hash_insert(ahp, alp, alp->al_asru_name);
381 	alp->al_fru_name = got_fru ? fru_name : fmd_strdup("", FMD_SLEEP);
382 	fmd_asru_fru_hash_insert(ahp, alp, alp->al_fru_name);
383 	alp->al_rsrc_name = got_rsrc ? rsrc_name : fmd_strdup("", FMD_SLEEP);
384 	fmd_asru_rsrc_hash_insert(ahp, alp, alp->al_rsrc_name);
385 	alp->al_label = fmd_strdup(label, FMD_SLEEP);
386 	fmd_asru_label_hash_insert(ahp, alp, label);
387 	alp->al_case_uuid = fmd_strdup(cip->ci_uuid, FMD_SLEEP);
388 	fmd_asru_case_hash_insert(ahp, alp, cip->ci_uuid);
389 	(void) pthread_mutex_lock(&ap->asru_lock);
390 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
391 
392 	ap->asru_case = alp->al_case = cp;
393 	if (nvlist_lookup_boolean_value(nvl, FM_SUSPECT_MESSAGE, &msg) == 0 &&
394 	    msg == B_FALSE)
395 		ap->asru_flags |= FMD_ASRU_INVISIBLE;
396 	(void) nvlist_xdup(nvl, &alp->al_event, &fmd.d_nva);
397 	ap->asru_flags |= FMD_ASRU_VALID;
398 	(void) pthread_cond_broadcast(&ap->asru_cv);
399 	(void) pthread_mutex_unlock(&ap->asru_lock);
400 	return (alp);
401 }
402 
403 static void
404 fmd_asru_hash_recreate(fmd_log_t *lp, fmd_event_t *ep, fmd_asru_hash_t *ahp)
405 {
406 	nvlist_t *nvl = FMD_EVENT_NVL(ep);
407 	boolean_t f, u, ps, us;
408 	nvlist_t *flt, *flt_copy, *asru;
409 	char *case_uuid = NULL, *case_code = NULL;
410 	fmd_asru_t *ap;
411 	fmd_asru_link_t *alp;
412 	fmd_case_t *cp;
413 	int64_t *diag_time;
414 	uint_t nelem;
415 	topo_hdl_t *thp;
416 	char *class;
417 	nvlist_t *rsrc;
418 	int err;
419 
420 	/*
421 	 * Extract the most recent values of 'faulty' from the event log.
422 	 */
423 	if (nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_FAULTY, &f) != 0) {
424 		fmd_error(EFMD_ASRU_EVENT, "failed to reload asru %s: "
425 		    "invalid event log record\n", lp->log_name);
426 		ahp->ah_error = EFMD_ASRU_EVENT;
427 		return;
428 	}
429 	if (nvlist_lookup_nvlist(nvl, FM_RSRC_ASRU_EVENT, &flt) != 0) {
430 		fmd_error(EFMD_ASRU_EVENT, "failed to reload asru %s: "
431 		    "invalid event log record\n", lp->log_name);
432 		ahp->ah_error = EFMD_ASRU_EVENT;
433 		return;
434 	}
435 	(void) nvlist_lookup_string(nvl, FM_RSRC_ASRU_UUID, &case_uuid);
436 	(void) nvlist_lookup_string(nvl, FM_RSRC_ASRU_CODE, &case_code);
437 
438 	/*
439 	 * Attempt to recreate the case in the CLOSED state.
440 	 * If the case is already present, fmd_case_recreate() will return it.
441 	 * If not, we'll create a new orphaned case. Either way,  we use the
442 	 * ASRU event to insert a suspect into the partially-restored case.
443 	 */
444 	fmd_module_lock(fmd.d_rmod);
445 	cp = fmd_case_recreate(fmd.d_rmod, NULL, FMD_CASE_CLOSED, case_uuid,
446 	    case_code);
447 	fmd_case_hold(cp);
448 	fmd_module_unlock(fmd.d_rmod);
449 	if (nvlist_lookup_int64_array(nvl, FM_SUSPECT_DIAG_TIME, &diag_time,
450 	    &nelem) == 0 && nelem >= 2)
451 		fmd_case_settime(cp, diag_time[0], diag_time[1]);
452 	else
453 		fmd_case_settime(cp, lp->log_stat.st_ctime, 0);
454 	(void) nvlist_xdup(flt, &flt_copy, &fmd.d_nva);
455 
456 	/*
457 	 * For faults with a resource, re-evaluate the asru from the resource.
458 	 */
459 	thp = fmd_fmri_topo_hold(TOPO_VERSION);
460 	if (nvlist_lookup_string(flt_copy, FM_CLASS, &class) == 0 &&
461 	    strncmp(class, "fault", 5) == 0 &&
462 	    nvlist_lookup_nvlist(flt_copy, FM_FAULT_RESOURCE, &rsrc) == 0 &&
463 	    rsrc != NULL && topo_fmri_asru(thp, rsrc, &asru, &err) == 0) {
464 		(void) nvlist_remove(flt_copy, FM_FAULT_ASRU, DATA_TYPE_NVLIST);
465 		(void) nvlist_add_nvlist(flt_copy, FM_FAULT_ASRU, asru);
466 		nvlist_free(asru);
467 	}
468 	fmd_fmri_topo_rele(thp);
469 
470 	(void) nvlist_xdup(flt_copy, &flt, &fmd.d_nva);
471 
472 	fmd_case_recreate_suspect(cp, flt_copy);
473 
474 	/*
475 	 * Now create the resource cache entries.
476 	 */
477 	alp = fmd_asru_al_create(ahp, flt, cp, fmd_strbasename(lp->log_name));
478 	ap = alp->al_asru;
479 
480 	/*
481 	 * Check to see if the resource is still present in the system.  If
482 	 * so, then update the value of the unusable bit based on the current
483 	 * system configuration.  If not, then consider unusable.
484 	 */
485 	ps = fmd_asru_is_present(flt);
486 	if (ps) {
487 		if (nvlist_lookup_nvlist(flt, FM_FAULT_ASRU, &asru) != 0)
488 			u = FMD_B_FALSE;
489 		else if ((us = fmd_fmri_unusable(asru)) == -1) {
490 			fmd_error(EFMD_ASRU_FMRI, "failed to update "
491 			    "status of asru %s", lp->log_name);
492 			u = FMD_B_FALSE;
493 		} else
494 			u = us != 0;
495 
496 	} else
497 		u = FMD_B_TRUE;	/* not present; set unusable */
498 
499 	nvlist_free(flt);
500 
501 	ap->asru_flags |= FMD_ASRU_RECREATED;
502 	if (ps)
503 		ap->asru_flags |= FMD_ASRU_PRESENT;
504 	if (f) {
505 		alp->al_flags |= FMD_ASRU_FAULTY;
506 		ap->asru_flags |= FMD_ASRU_FAULTY;
507 	}
508 	if (u) {
509 		alp->al_flags |= FMD_ASRU_UNUSABLE;
510 		ap->asru_flags |= FMD_ASRU_UNUSABLE;
511 	}
512 
513 	TRACE((FMD_DBG_ASRU, "asru %s recreated as %p (%s)", alp->al_uuid,
514 	    (void *)ap, _fmd_asru_snames[ap->asru_flags & FMD_ASRU_STATE]));
515 }
516 
517 static void
518 fmd_asru_hash_discard(fmd_asru_hash_t *ahp, const char *uuid, int err)
519 {
520 	char src[PATH_MAX], dst[PATH_MAX];
521 
522 	(void) snprintf(src, PATH_MAX, "%s/%s", ahp->ah_dirpath, uuid);
523 	(void) snprintf(dst, PATH_MAX, "%s/%s-", ahp->ah_dirpath, uuid);
524 
525 	if (err != 0)
526 		err = rename(src, dst);
527 	else
528 		err = unlink(src);
529 
530 	if (err != 0 && errno != ENOENT)
531 		fmd_error(EFMD_ASRU_EVENT, "failed to rename log %s", src);
532 }
533 
534 /*
535  * Open a saved log file and restore it into the ASRU hash.  If we can't even
536  * open the log, rename the log file to <uuid>- to indicate it is corrupt.  If
537  * fmd_log_replay() fails, we either delete the file (if it has reached the
538  * upper limit on cache age) or rename it for debugging if it was corrupted.
539  */
540 static void
541 fmd_asru_hash_logopen(fmd_asru_hash_t *ahp, const char *uuid)
542 {
543 	fmd_log_t *lp = fmd_log_tryopen(ahp->ah_dirpath, uuid, FMD_LOG_ASRU);
544 	uint_t n;
545 
546 	if (lp == NULL) {
547 		fmd_asru_hash_discard(ahp, uuid, errno);
548 		return;
549 	}
550 
551 	ahp->ah_error = 0;
552 	n = ahp->ah_al_count;
553 
554 	fmd_log_replay(lp, (fmd_log_f *)fmd_asru_hash_recreate, ahp);
555 	fmd_log_rele(lp);
556 
557 	if (ahp->ah_al_count == n)
558 		fmd_asru_hash_discard(ahp, uuid, ahp->ah_error);
559 }
560 
561 void
562 fmd_asru_hash_refresh(fmd_asru_hash_t *ahp)
563 {
564 	struct dirent *dp;
565 	DIR *dirp;
566 	int zero;
567 
568 	if ((dirp = opendir(ahp->ah_dirpath)) == NULL) {
569 		fmd_error(EFMD_ASRU_NODIR,
570 		    "failed to open asru cache directory %s", ahp->ah_dirpath);
571 		return;
572 	}
573 
574 	(void) fmd_conf_getprop(fmd.d_conf, "rsrc.zero", &zero);
575 
576 	(void) pthread_rwlock_wrlock(&ahp->ah_lock);
577 
578 	while ((dp = readdir(dirp)) != NULL) {
579 		if (dp->d_name[0] == '.')
580 			continue; /* skip "." and ".." */
581 
582 		if (zero)
583 			fmd_asru_hash_discard(ahp, dp->d_name, 0);
584 		else if (!fmd_strmatch(dp->d_name, "*-"))
585 			fmd_asru_hash_logopen(ahp, dp->d_name);
586 	}
587 
588 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
589 	(void) closedir(dirp);
590 }
591 
592 /*
593  * If the resource is present and faulty but not unusable, replay the fault
594  * event that caused it be marked faulty.  This will cause the agent
595  * subscribing to this fault class to again disable the resource.
596  */
597 /*ARGSUSED*/
598 static void
599 fmd_asru_hash_replay_asru(fmd_asru_t *ap, void *data)
600 {
601 	fmd_event_t *e;
602 	nvlist_t *nvl;
603 	char *class;
604 
605 	if (ap->asru_event != NULL && (ap->asru_flags & (FMD_ASRU_STATE |
606 	    FMD_ASRU_PRESENT)) == (FMD_ASRU_FAULTY | FMD_ASRU_PRESENT)) {
607 
608 		fmd_dprintf(FMD_DBG_ASRU,
609 		    "replaying fault event for %s", ap->asru_name);
610 
611 		(void) nvlist_xdup(ap->asru_event, &nvl, &fmd.d_nva);
612 		(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
613 
614 		(void) nvlist_add_string(nvl, FMD_EVN_UUID,
615 		    ((fmd_case_impl_t *)ap->asru_case)->ci_uuid);
616 
617 		e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
618 		fmd_dispq_dispatch(fmd.d_disp, e, class);
619 	}
620 }
621 
622 void
623 fmd_asru_hash_replay(fmd_asru_hash_t *ahp)
624 {
625 	fmd_asru_hash_apply(ahp, fmd_asru_hash_replay_asru, NULL);
626 }
627 
628 /*
629  * Check if the resource is still present. If not, and if the rsrc.age time
630  * has expired, then do an implicit repair on the resource.
631  */
632 static void
633 fmd_asru_repair_if_aged(fmd_asru_link_t *alp, void *er)
634 {
635 	struct timeval tv;
636 	fmd_log_t *lp;
637 	hrtime_t hrt;
638 
639 	if (fmd_asru_is_present(alp->al_event))
640 		return;
641 	fmd_time_gettimeofday(&tv);
642 	lp = fmd_log_open(alp->al_asru->asru_root, alp->al_uuid, FMD_LOG_ASRU);
643 	hrt = (hrtime_t)(tv.tv_sec - lp->log_stat.st_mtime);
644 	fmd_log_rele(lp);
645 	if (hrt * NANOSEC >= fmd.d_asrus->ah_lifetime)
646 		fmd_asru_repair(alp, er);
647 }
648 
649 void
650 fmd_asru_clear_aged_rsrcs()
651 {
652 	int err;
653 
654 	fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_repair_if_aged, &err);
655 }
656 
657 fmd_asru_hash_t *
658 fmd_asru_hash_create(const char *root, const char *dir)
659 {
660 	fmd_asru_hash_t *ahp;
661 	char path[PATH_MAX];
662 
663 	ahp = fmd_alloc(sizeof (fmd_asru_hash_t), FMD_SLEEP);
664 	(void) pthread_rwlock_init(&ahp->ah_lock, NULL);
665 	ahp->ah_hashlen = fmd.d_str_buckets;
666 	ahp->ah_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, FMD_SLEEP);
667 	ahp->ah_asru_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen,
668 	    FMD_SLEEP);
669 	ahp->ah_case_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen,
670 	    FMD_SLEEP);
671 	ahp->ah_fru_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen,
672 	    FMD_SLEEP);
673 	ahp->ah_label_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen,
674 	    FMD_SLEEP);
675 	ahp->ah_rsrc_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen,
676 	    FMD_SLEEP);
677 	(void) snprintf(path, sizeof (path), "%s/%s", root, dir);
678 	ahp->ah_dirpath = fmd_strdup(path, FMD_SLEEP);
679 	(void) fmd_conf_getprop(fmd.d_conf, "rsrc.age", &ahp->ah_lifetime);
680 	(void) fmd_conf_getprop(fmd.d_conf, "fakenotpresent",
681 	    (uint32_t *)&fmd_asru_fake_not_present);
682 	ahp->ah_al_count = 0;
683 	ahp->ah_count = 0;
684 	ahp->ah_error = 0;
685 	ahp->ah_topo = fmd_topo_hold();
686 
687 	return (ahp);
688 }
689 
690 void
691 fmd_asru_hash_destroy(fmd_asru_hash_t *ahp)
692 {
693 	fmd_asru_link_t *alp, *np;
694 	uint_t i;
695 
696 	for (i = 0; i < ahp->ah_hashlen; i++) {
697 		for (alp = ahp->ah_case_hash[i]; alp != NULL; alp = np) {
698 			np = alp->al_case_next;
699 			alp->al_case_next = NULL;
700 			fmd_case_rele(alp->al_case);
701 			alp->al_case = NULL;
702 			fmd_asru_al_hash_release(ahp, alp);
703 		}
704 	}
705 
706 	fmd_strfree(ahp->ah_dirpath);
707 	fmd_free(ahp->ah_hash, sizeof (void *) * ahp->ah_hashlen);
708 	fmd_free(ahp->ah_asru_hash, sizeof (void *) * ahp->ah_hashlen);
709 	fmd_free(ahp->ah_case_hash, sizeof (void *) * ahp->ah_hashlen);
710 	fmd_free(ahp->ah_fru_hash, sizeof (void *) * ahp->ah_hashlen);
711 	fmd_free(ahp->ah_label_hash, sizeof (void *) * ahp->ah_hashlen);
712 	fmd_free(ahp->ah_rsrc_hash, sizeof (void *) * ahp->ah_hashlen);
713 	fmd_topo_rele(ahp->ah_topo);
714 	fmd_free(ahp, sizeof (fmd_asru_hash_t));
715 }
716 
717 /*
718  * Take a snapshot of the ASRU database by placing an additional hold on each
719  * member in an auxiliary array, and then call 'func' for each ASRU.
720  */
721 void
722 fmd_asru_hash_apply(fmd_asru_hash_t *ahp,
723     void (*func)(fmd_asru_t *, void *), void *arg)
724 {
725 	fmd_asru_t *ap, **aps, **app;
726 	uint_t apc, i;
727 
728 	(void) pthread_rwlock_rdlock(&ahp->ah_lock);
729 
730 	aps = app = fmd_alloc(ahp->ah_count * sizeof (fmd_asru_t *), FMD_SLEEP);
731 	apc = ahp->ah_count;
732 
733 	for (i = 0; i < ahp->ah_hashlen; i++) {
734 		for (ap = ahp->ah_hash[i]; ap != NULL; ap = ap->asru_next)
735 			*app++ = fmd_asru_hold(ap);
736 	}
737 
738 	ASSERT(app == aps + apc);
739 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
740 
741 	for (i = 0; i < apc; i++) {
742 		if (aps[i]->asru_fmri != NULL)
743 			func(aps[i], arg);
744 		fmd_asru_hash_release(ahp, aps[i]);
745 	}
746 
747 	fmd_free(aps, apc * sizeof (fmd_asru_t *));
748 }
749 
750 void
751 fmd_asru_al_hash_apply(fmd_asru_hash_t *ahp,
752     void (*func)(fmd_asru_link_t *, void *), void *arg)
753 {
754 	fmd_asru_link_t *alp, **alps, **alpp;
755 	uint_t alpc, i;
756 
757 	(void) pthread_rwlock_rdlock(&ahp->ah_lock);
758 
759 	alps = alpp = fmd_alloc(ahp->ah_al_count * sizeof (fmd_asru_link_t *),
760 	    FMD_SLEEP);
761 	alpc = ahp->ah_al_count;
762 
763 	for (i = 0; i < ahp->ah_hashlen; i++) {
764 		for (alp = ahp->ah_case_hash[i]; alp != NULL;
765 		    alp = alp->al_case_next)
766 			*alpp++ = fmd_asru_al_hold(alp);
767 	}
768 
769 	ASSERT(alpp == alps + alpc);
770 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
771 
772 	for (i = 0; i < alpc; i++) {
773 		func(alps[i], arg);
774 		fmd_asru_al_hash_release(ahp, alps[i]);
775 	}
776 
777 	fmd_free(alps, alpc * sizeof (fmd_asru_link_t *));
778 }
779 
780 static void
781 fmd_asru_do_hash_apply(fmd_asru_hash_t *ahp, char *name,
782     void (*func)(fmd_asru_link_t *, void *), void *arg,
783     fmd_asru_link_t **hash, size_t match_offset, size_t next_offset)
784 {
785 	fmd_asru_link_t *alp, **alps, **alpp;
786 	uint_t alpc = 0, i;
787 	uint_t h;
788 
789 	(void) pthread_rwlock_rdlock(&ahp->ah_lock);
790 
791 	h = fmd_asru_strhash(ahp, name);
792 
793 	for (alp = hash[h]; alp != NULL; alp =
794 	    /* LINTED pointer alignment */
795 	    FMD_ASRU_AL_HASH_NEXT(alp, next_offset))
796 		if (fmd_asru_strcmp(ahp,
797 		    /* LINTED pointer alignment */
798 		    FMD_ASRU_AL_HASH_NAME(alp, match_offset), name))
799 			alpc++;
800 
801 	alps = alpp = fmd_alloc(alpc * sizeof (fmd_asru_link_t *), FMD_SLEEP);
802 
803 	for (alp = hash[h]; alp != NULL; alp =
804 	    /* LINTED pointer alignment */
805 	    FMD_ASRU_AL_HASH_NEXT(alp, next_offset))
806 		if (fmd_asru_strcmp(ahp,
807 		    /* LINTED pointer alignment */
808 		    FMD_ASRU_AL_HASH_NAME(alp, match_offset), name))
809 			*alpp++ = fmd_asru_al_hold(alp);
810 
811 	ASSERT(alpp == alps + alpc);
812 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
813 
814 	for (i = 0; i < alpc; i++) {
815 		func(alps[i], arg);
816 		fmd_asru_al_hash_release(ahp, alps[i]);
817 	}
818 
819 	fmd_free(alps, alpc * sizeof (fmd_asru_link_t *));
820 }
821 
822 void
823 fmd_asru_hash_apply_by_asru(fmd_asru_hash_t *ahp, char *name,
824     void (*func)(fmd_asru_link_t *, void *), void *arg)
825 {
826 	fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_asru_hash,
827 	    offsetof(fmd_asru_link_t, al_asru_name),
828 	    offsetof(fmd_asru_link_t, al_asru_next));
829 }
830 
831 void
832 fmd_asru_hash_apply_by_case(fmd_asru_hash_t *ahp, fmd_case_t *cp,
833 	void (*func)(fmd_asru_link_t *, void *), void *arg)
834 {
835 	fmd_asru_do_hash_apply(ahp, ((fmd_case_impl_t *)cp)->ci_uuid, func, arg,
836 	    ahp->ah_case_hash, offsetof(fmd_asru_link_t, al_case_uuid),
837 	    offsetof(fmd_asru_link_t, al_case_next));
838 }
839 
840 void
841 fmd_asru_hash_apply_by_fru(fmd_asru_hash_t *ahp, char *name,
842     void (*func)(fmd_asru_link_t *, void *), void *arg)
843 {
844 	fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_fru_hash,
845 	    offsetof(fmd_asru_link_t, al_fru_name),
846 	    offsetof(fmd_asru_link_t, al_fru_next));
847 }
848 
849 void
850 fmd_asru_hash_apply_by_rsrc(fmd_asru_hash_t *ahp, char *name,
851     void (*func)(fmd_asru_link_t *, void *), void *arg)
852 {
853 	fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_rsrc_hash,
854 	    offsetof(fmd_asru_link_t, al_rsrc_name),
855 	    offsetof(fmd_asru_link_t, al_rsrc_next));
856 }
857 
858 void
859 fmd_asru_hash_apply_by_label(fmd_asru_hash_t *ahp, char *name,
860     void (*func)(fmd_asru_link_t *, void *), void *arg)
861 {
862 	fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_label_hash,
863 	    offsetof(fmd_asru_link_t, al_label),
864 	    offsetof(fmd_asru_link_t, al_label_next));
865 }
866 
867 /*
868  * Lookup an asru in the hash by name and place a hold on it.  If the asru is
869  * not found, no entry is created and NULL is returned.
870  */
871 fmd_asru_t *
872 fmd_asru_hash_lookup_name(fmd_asru_hash_t *ahp, const char *name)
873 {
874 	fmd_asru_t *ap;
875 
876 	(void) pthread_rwlock_rdlock(&ahp->ah_lock);
877 	ap = fmd_asru_hash_lookup(ahp, name);
878 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
879 
880 	return (ap);
881 }
882 
883 /*
884  * Lookup an asru in the hash and place a hold on it.
885  */
886 fmd_asru_t *
887 fmd_asru_hash_lookup_nvl(fmd_asru_hash_t *ahp, nvlist_t *fmri)
888 {
889 	fmd_asru_t *ap;
890 	char *name = NULL;
891 	ssize_t namelen;
892 
893 	if (fmd_asru_get_namestr(fmri, &name, &namelen) != 0)
894 		return (NULL);
895 	(void) pthread_rwlock_rdlock(&ahp->ah_lock);
896 	ap = fmd_asru_hash_lookup(ahp, name);
897 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
898 	fmd_free(name, namelen + 1);
899 	return (ap);
900 }
901 
902 /*
903  * Create a resource cache entry using the fault event "nvl" for one of the
904  * suspects from the case "cp".
905  *
906  * The fault event can have the following components :  FM_FAULT_ASRU,
907  * FM_FAULT_FRU, FM_FAULT_RESOURCE. These should be set by the Diagnosis Engine
908  * when calling fmd_nvl_create_fault(). In the general case, these are all
909  * optional and an entry will always be added into the cache even if one or all
910  * of these fields is missing.
911  *
912  * However, for hardware faults the recommended practice is that the fault
913  * event should always have the FM_FAULT_RESOURCE field present and that this
914  * should be represented in hc-scheme.
915  *
916  * Currently the DE should also add the FM_FAULT_ASRU and FM_FAULT_FRU fields
917  * where known, though at some future stage fmd might be able to fill these
918  * in automatically from the topology.
919  */
920 fmd_asru_link_t *
921 fmd_asru_hash_create_entry(fmd_asru_hash_t *ahp, fmd_case_t *cp, nvlist_t *nvl)
922 {
923 	char *parsed_uuid;
924 	uuid_t uuid;
925 	int uuidlen;
926 	fmd_asru_link_t *alp;
927 
928 	/*
929 	 * Generate a UUID for the ASRU.  libuuid cleverly gives us no
930 	 * interface for specifying or learning the buffer size.  Sigh.
931 	 * The spec says 36 bytes but we use a tunable just to be safe.
932 	 */
933 	(void) fmd_conf_getprop(fmd.d_conf, "uuidlen", &uuidlen);
934 	parsed_uuid = fmd_zalloc(uuidlen + 1, FMD_SLEEP);
935 	uuid_generate(uuid);
936 	uuid_unparse(uuid, parsed_uuid);
937 
938 	/*
939 	 * Now create the resource cache entries.
940 	 */
941 	fmd_case_hold_locked(cp);
942 	alp = fmd_asru_al_create(ahp, nvl, cp, parsed_uuid);
943 	TRACE((FMD_DBG_ASRU, "asru %s created as %p",
944 	    alp->al_uuid, (void *)alp->al_asru));
945 
946 	fmd_free(parsed_uuid, uuidlen + 1);
947 	return (alp);
948 
949 }
950 
951 /*
952  * Release the reference count on an asru obtained using fmd_asru_hash_lookup.
953  * We take 'ahp' for symmetry and in case we need to use it in future work.
954  */
955 /*ARGSUSED*/
956 void
957 fmd_asru_hash_release(fmd_asru_hash_t *ahp, fmd_asru_t *ap)
958 {
959 	(void) pthread_mutex_lock(&ap->asru_lock);
960 
961 	ASSERT(ap->asru_refs != 0);
962 	if (--ap->asru_refs == 0)
963 		fmd_asru_destroy(ap);
964 	else
965 		(void) pthread_mutex_unlock(&ap->asru_lock);
966 }
967 
968 static void
969 fmd_asru_do_delete_entry(fmd_asru_hash_t *ahp, fmd_case_t *cp,
970     fmd_asru_link_t **hash, size_t next_offset, char *name)
971 {
972 	uint_t h;
973 	fmd_asru_link_t *alp, **pp, *alpnext, **alpnextp;
974 
975 	(void) pthread_rwlock_wrlock(&ahp->ah_lock);
976 	h = fmd_asru_strhash(ahp, name);
977 	pp = &hash[h];
978 	for (alp = *pp; alp != NULL; alp = alpnext) {
979 		/* LINTED pointer alignment */
980 		alpnextp = FMD_ASRU_AL_HASH_NEXTP(alp, next_offset);
981 		alpnext = *alpnextp;
982 		if (alp->al_case == cp) {
983 			*pp = *alpnextp;
984 			*alpnextp = NULL;
985 		} else
986 			pp = alpnextp;
987 	}
988 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
989 }
990 
991 static void
992 fmd_asru_do_hash_delete(fmd_asru_hash_t *ahp, fmd_case_susp_t *cis,
993     fmd_case_t *cp, fmd_asru_link_t **hash, size_t next_offset, char *nvname)
994 {
995 	nvlist_t *nvl;
996 	char *name = NULL;
997 	ssize_t namelen;
998 
999 	if (nvlist_lookup_nvlist(cis->cis_nvl, nvname, &nvl) == 0 &&
1000 	    (namelen = fmd_fmri_nvl2str(nvl, NULL, 0)) != -1 &&
1001 	    (name = fmd_alloc(namelen + 1, FMD_SLEEP)) != NULL) {
1002 		if (fmd_fmri_nvl2str(nvl, name, namelen + 1) != -1)
1003 			fmd_asru_do_delete_entry(ahp, cp, hash, next_offset,
1004 			    name);
1005 		fmd_free(name, namelen + 1);
1006 	} else
1007 		fmd_asru_do_delete_entry(ahp, cp, hash, next_offset, "");
1008 }
1009 
1010 void
1011 fmd_asru_hash_delete_case(fmd_asru_hash_t *ahp, fmd_case_t *cp)
1012 {
1013 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1014 	fmd_case_susp_t *cis;
1015 	fmd_asru_link_t *alp, **plp, *alpnext;
1016 	fmd_asru_t *ap;
1017 	char path[PATH_MAX];
1018 	char *label;
1019 	uint_t h;
1020 
1021 	/*
1022 	 * first delete hash entries for each suspect
1023 	 */
1024 	for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
1025 		fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_fru_hash,
1026 		    offsetof(fmd_asru_link_t, al_fru_next), FM_FAULT_FRU);
1027 		fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_rsrc_hash,
1028 		    offsetof(fmd_asru_link_t, al_rsrc_next), FM_FAULT_RESOURCE);
1029 		if (nvlist_lookup_string(cis->cis_nvl, FM_FAULT_LOCATION,
1030 		    &label) != 0)
1031 			label = "";
1032 		fmd_asru_do_delete_entry(ahp, cp, ahp->ah_label_hash,
1033 		    offsetof(fmd_asru_link_t, al_label_next), label);
1034 		fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_asru_hash,
1035 		    offsetof(fmd_asru_link_t, al_asru_next), FM_FAULT_ASRU);
1036 	}
1037 
1038 	/*
1039 	 * then delete associated case hash entries
1040 	 */
1041 	(void) pthread_rwlock_wrlock(&ahp->ah_lock);
1042 	h = fmd_asru_strhash(ahp, cip->ci_uuid);
1043 	plp = &ahp->ah_case_hash[h];
1044 	for (alp = *plp; alp != NULL; alp = alpnext) {
1045 		alpnext = alp->al_case_next;
1046 		if (alp->al_case == cp) {
1047 			*plp = alp->al_case_next;
1048 			alp->al_case_next = NULL;
1049 			ASSERT(ahp->ah_al_count != 0);
1050 			ahp->ah_al_count--;
1051 
1052 			/*
1053 			 * decrement case ref.
1054 			 */
1055 			fmd_case_rele_locked(cp);
1056 			alp->al_case = NULL;
1057 
1058 			/*
1059 			 * If we found a matching ASRU, unlink its log file and
1060 			 * then release the hash entry. Note that it may still
1061 			 * be referenced if another thread is manipulating it;
1062 			 * this is ok because once we unlink, the log file will
1063 			 * not be restored, and the log data will be freed when
1064 			 * all of the referencing threads release their
1065 			 * respective references.
1066 			 */
1067 			(void) snprintf(path, sizeof (path), "%s/%s",
1068 			    ahp->ah_dirpath, alp->al_uuid);
1069 			if (unlink(path) != 0)
1070 				fmd_error(EFMD_ASRU_UNLINK,
1071 				    "failed to unlink asru %s", path);
1072 
1073 			/*
1074 			 * Now unlink from the global per-resource cache
1075 			 * and if this is the last link then remove that from
1076 			 * it's own hash too.
1077 			 */
1078 			ap = alp->al_asru;
1079 			(void) pthread_mutex_lock(&ap->asru_lock);
1080 			fmd_list_delete(&ap->asru_list, alp);
1081 			if (ap->asru_list.l_next == NULL) {
1082 				uint_t h;
1083 				fmd_asru_t *ap2, **pp;
1084 				fmd_asru_t *apnext, **apnextp;
1085 
1086 				ASSERT(ahp->ah_count != 0);
1087 				ahp->ah_count--;
1088 				h = fmd_asru_strhash(ahp, ap->asru_name);
1089 				pp = &ahp->ah_hash[h];
1090 				for (ap2 = *pp; ap2 != NULL; ap2 = apnext) {
1091 					apnextp = &ap2->asru_next;
1092 					apnext = *apnextp;
1093 					if (ap2 == ap) {
1094 						*pp = *apnextp;
1095 						*apnextp = NULL;
1096 					} else
1097 						pp = apnextp;
1098 				}
1099 			}
1100 			(void) pthread_mutex_unlock(&ap->asru_lock);
1101 			fmd_asru_al_hash_release(ahp, alp);
1102 		} else
1103 			plp = &alp->al_case_next;
1104 	}
1105 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
1106 }
1107 
1108 static void
1109 fmd_asru_repair_containee(fmd_asru_link_t *alp, void *er)
1110 {
1111 	if (er && alp->al_asru_fmri && fmd_fmri_contains(er,
1112 	    alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY))
1113 		fmd_case_update(alp->al_case);
1114 }
1115 
1116 void
1117 fmd_asru_repair(fmd_asru_link_t *alp, void *er)
1118 {
1119 	int flags;
1120 	int rval;
1121 
1122 	/*
1123 	 * repair this asru cache entry
1124 	 */
1125 	rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY);
1126 
1127 	/*
1128 	 * now check if all entries associated with this asru are repaired and
1129 	 * if so repair containees
1130 	 */
1131 	(void) pthread_mutex_lock(&alp->al_asru->asru_lock);
1132 	flags = alp->al_asru->asru_flags;
1133 	(void) pthread_mutex_unlock(&alp->al_asru->asru_lock);
1134 	if (!(flags & FMD_ASRU_FAULTY))
1135 		fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_repair_containee,
1136 		    alp->al_asru_fmri);
1137 
1138 	/*
1139 	 * if called from fmd_adm_repair() and we really did clear the bit then
1140 	 * we need to do a case update to see if the associated case can be
1141 	 * repaired. No need to do this if called from fmd_case_repair() (ie
1142 	 * when er is NULL) as the case will be explicitly repaired anyway.
1143 	 */
1144 	if (er) {
1145 		*(int *)er = 0;
1146 		if (rval)
1147 			fmd_case_update(alp->al_case);
1148 	}
1149 }
1150 
1151 static void
1152 fmd_asru_logevent(fmd_asru_link_t *alp)
1153 {
1154 	fmd_asru_t *ap = alp->al_asru;
1155 	boolean_t f = (ap->asru_flags & FMD_ASRU_FAULTY) != 0;
1156 	boolean_t u = (ap->asru_flags & FMD_ASRU_UNUSABLE) != 0;
1157 	boolean_t m = (ap->asru_flags & FMD_ASRU_INVISIBLE) == 0;
1158 
1159 	fmd_case_impl_t *cip;
1160 	fmd_event_t *e;
1161 	fmd_log_t *lp;
1162 	nvlist_t *nvl;
1163 	char *class;
1164 
1165 	ASSERT(MUTEX_HELD(&ap->asru_lock));
1166 	cip = (fmd_case_impl_t *)alp->al_case;
1167 	ASSERT(cip != NULL);
1168 
1169 	if ((lp = alp->al_log) == NULL)
1170 		lp = fmd_log_open(ap->asru_root, alp->al_uuid, FMD_LOG_ASRU);
1171 
1172 	if (lp == NULL)
1173 		return; /* can't log events if we can't open the log */
1174 
1175 	nvl = fmd_protocol_rsrc_asru(_fmd_asru_events[f | (u << 1)],
1176 	    alp->al_asru_fmri, cip->ci_uuid, cip->ci_code, f, u, m,
1177 	    alp->al_event, &cip->ci_tv);
1178 
1179 	(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
1180 	e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
1181 
1182 	fmd_event_hold(e);
1183 	fmd_log_append(lp, e, NULL);
1184 	fmd_event_rele(e);
1185 
1186 	/*
1187 	 * For now, we close the log file after every update to conserve file
1188 	 * descriptors and daemon overhead.  If this becomes a performance
1189 	 * issue this code can change to keep a fixed-size LRU cache of logs.
1190 	 */
1191 	fmd_log_rele(lp);
1192 	alp->al_log = NULL;
1193 }
1194 
1195 int
1196 fmd_asru_setflags(fmd_asru_link_t *alp, uint_t sflag)
1197 {
1198 	fmd_asru_t *ap = alp->al_asru;
1199 	uint_t nstate, ostate;
1200 
1201 	ASSERT(!(sflag & ~FMD_ASRU_STATE));
1202 	ASSERT(sflag != FMD_ASRU_STATE);
1203 
1204 	(void) pthread_mutex_lock(&ap->asru_lock);
1205 
1206 	ostate = alp->al_flags & FMD_ASRU_STATE;
1207 	alp->al_flags |= sflag;
1208 	nstate = alp->al_flags & FMD_ASRU_STATE;
1209 
1210 	if (nstate == ostate) {
1211 		(void) pthread_mutex_unlock(&ap->asru_lock);
1212 		return (0);
1213 	}
1214 
1215 	ap->asru_flags |= sflag;
1216 	TRACE((FMD_DBG_ASRU, "asru %s %s->%s", alp->al_uuid,
1217 	    _fmd_asru_snames[ostate], _fmd_asru_snames[nstate]));
1218 
1219 	fmd_asru_logevent(alp);
1220 
1221 	(void) pthread_cond_broadcast(&ap->asru_cv);
1222 	(void) pthread_mutex_unlock(&ap->asru_lock);
1223 	return (1);
1224 }
1225 
1226 int
1227 fmd_asru_clrflags(fmd_asru_link_t *alp, uint_t sflag)
1228 {
1229 	fmd_asru_t *ap = alp->al_asru;
1230 	fmd_asru_link_t *nalp;
1231 	uint_t nstate, ostate, flags = 0;
1232 
1233 	ASSERT(!(sflag & ~FMD_ASRU_STATE));
1234 	ASSERT(sflag != FMD_ASRU_STATE);
1235 
1236 	(void) pthread_mutex_lock(&ap->asru_lock);
1237 
1238 	ostate = alp->al_flags & FMD_ASRU_STATE;
1239 	alp->al_flags &= ~sflag;
1240 	nstate = alp->al_flags & FMD_ASRU_STATE;
1241 
1242 	if (nstate == ostate) {
1243 		(void) pthread_mutex_unlock(&ap->asru_lock);
1244 		return (0);
1245 	}
1246 
1247 	if (sflag == FMD_ASRU_UNUSABLE)
1248 		ap->asru_flags &= ~sflag;
1249 	else if (sflag == FMD_ASRU_FAULTY) {
1250 		/*
1251 		 * only clear the faulty bit if all links are clear
1252 		 */
1253 		for (nalp = fmd_list_next(&ap->asru_list); nalp != NULL;
1254 		    nalp = fmd_list_next(nalp))
1255 			flags |= nalp->al_flags;
1256 		if (!(flags & FMD_ASRU_FAULTY))
1257 			ap->asru_flags &= ~sflag;
1258 	}
1259 
1260 	TRACE((FMD_DBG_ASRU, "asru %s %s->%s", alp->al_uuid,
1261 	    _fmd_asru_snames[ostate], _fmd_asru_snames[nstate]));
1262 
1263 	fmd_asru_logevent(alp);
1264 
1265 	(void) pthread_cond_broadcast(&ap->asru_cv);
1266 	(void) pthread_mutex_unlock(&ap->asru_lock);
1267 
1268 	return (1);
1269 }
1270 
1271 /*
1272  * Report the current known state of the link entry (ie this particular fault
1273  * affecting this particular ASRU).
1274  */
1275 int
1276 fmd_asru_al_getstate(fmd_asru_link_t *alp)
1277 {
1278 	int us, st;
1279 	nvlist_t *asru;
1280 
1281 	if (fmd_asru_is_present(alp->al_event) == 0)
1282 		return ((alp->al_flags & FMD_ASRU_FAULTY) | FMD_ASRU_UNUSABLE);
1283 
1284 	if (nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, &asru) == 0)
1285 		us = fmd_fmri_unusable(asru);
1286 	else
1287 		us = (alp->al_flags & FMD_ASRU_UNUSABLE);
1288 	st = (alp->al_flags & FMD_ASRU_STATE) | FMD_ASRU_PRESENT;
1289 	if (us > 0)
1290 		st |= FMD_ASRU_UNUSABLE;
1291 	else if (us == 0)
1292 		st &= ~FMD_ASRU_UNUSABLE;
1293 	return (st);
1294 }
1295 
1296 /*
1297  * Report the current known state of the ASRU by refreshing its unusable status
1298  * based upon the routines provided by the scheme module.  If the unusable bit
1299  * is different, we do *not* generate a state change here because that change
1300  * may be unrelated to fmd activities and therefore we have no case or event.
1301  * The absence of the transition is harmless as this function is only provided
1302  * for RPC observability and fmd's clients are only concerned with ASRU_FAULTY.
1303  */
1304 int
1305 fmd_asru_getstate(fmd_asru_t *ap)
1306 {
1307 	int us, st;
1308 
1309 	if (!(ap->asru_flags & FMD_ASRU_INTERNAL) &&
1310 	    (fmd_asru_fake_not_present || fmd_fmri_present(ap->asru_fmri) <= 0))
1311 		return (0); /* do not report non-fmd non-present resources */
1312 
1313 	us = fmd_fmri_unusable(ap->asru_fmri);
1314 	st = ap->asru_flags & FMD_ASRU_STATE;
1315 
1316 	if (us > 0)
1317 		st |= FMD_ASRU_UNUSABLE;
1318 	else if (us == 0)
1319 		st &= ~FMD_ASRU_UNUSABLE;
1320 
1321 	return (st);
1322 }
1323