xref: /illumos-gate/usr/src/cmd/fm/fmd/common/fmd_asru.c (revision dcafa541382944b24abd3a40c357b47e04f314e2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/fm/protocol.h>
30 #include <uuid/uuid.h>
31 
32 #include <dirent.h>
33 #include <limits.h>
34 #include <unistd.h>
35 #include <alloca.h>
36 #include <stddef.h>
37 #include <fm/libtopo.h>
38 
39 #include <fmd_alloc.h>
40 #include <fmd_string.h>
41 #include <fmd_error.h>
42 #include <fmd_subr.h>
43 #include <fmd_protocol.h>
44 #include <fmd_event.h>
45 #include <fmd_conf.h>
46 #include <fmd_fmri.h>
47 #include <fmd_dispq.h>
48 #include <fmd_case.h>
49 #include <fmd_module.h>
50 #include <fmd_asru.h>
51 
52 #include <fmd.h>
53 
54 static const char *const _fmd_asru_events[] = {
55 	FMD_RSRC_CLASS "asru.ok",		/* UNUSABLE=0 FAULTED=0 */
56 	FMD_RSRC_CLASS "asru.degraded",		/* UNUSABLE=0 FAULTED=1 */
57 	FMD_RSRC_CLASS "asru.unknown",		/* UNUSABLE=1 FAULTED=0 */
58 	FMD_RSRC_CLASS "asru.faulted"		/* UNUSABLE=1 FAULTED=1 */
59 };
60 
61 static const char *const _fmd_asru_snames[] = {
62 	"uf", "uF", "Uf", "UF"			/* same order as above */
63 };
64 
65 volatile uint32_t fmd_asru_fake_not_present = 0;
66 
67 static uint_t
68 fmd_asru_strhash(fmd_asru_hash_t *ahp, const char *val)
69 {
70 	return (topo_fmri_strhash(ahp->ah_topo->ft_hdl, val) % ahp->ah_hashlen);
71 }
72 
73 static boolean_t
74 fmd_asru_strcmp(fmd_asru_hash_t *ahp, const char *a, const char *b)
75 {
76 	return (topo_fmri_strcmp(ahp->ah_topo->ft_hdl, a, b));
77 }
78 
79 static fmd_asru_t *
80 fmd_asru_create(fmd_asru_hash_t *ahp, const char *uuid,
81     const char *name, nvlist_t *fmri)
82 {
83 	fmd_asru_t *ap = fmd_zalloc(sizeof (fmd_asru_t), FMD_SLEEP);
84 	char *s;
85 
86 	(void) pthread_mutex_init(&ap->asru_lock, NULL);
87 	(void) pthread_cond_init(&ap->asru_cv, NULL);
88 
89 	ap->asru_name = fmd_strdup(name, FMD_SLEEP);
90 	if (fmri)
91 		(void) nvlist_xdup(fmri, &ap->asru_fmri, &fmd.d_nva);
92 	ap->asru_root = fmd_strdup(ahp->ah_dirpath, FMD_SLEEP);
93 	ap->asru_uuid = fmd_strdup(uuid, FMD_SLEEP);
94 	ap->asru_uuidlen = ap->asru_uuid ? strlen(ap->asru_uuid) : 0;
95 	ap->asru_refs = 1;
96 
97 	if (fmri && nvlist_lookup_string(fmri, FM_FMRI_SCHEME, &s) == 0 &&
98 	    strcmp(s, FM_FMRI_SCHEME_FMD) == 0)
99 		ap->asru_flags |= FMD_ASRU_INTERNAL;
100 
101 	return (ap);
102 }
103 
104 static void
105 fmd_asru_destroy(fmd_asru_t *ap)
106 {
107 	ASSERT(MUTEX_HELD(&ap->asru_lock));
108 	ASSERT(ap->asru_refs == 0);
109 
110 	nvlist_free(ap->asru_event);
111 	fmd_strfree(ap->asru_name);
112 	nvlist_free(ap->asru_fmri);
113 	fmd_strfree(ap->asru_root);
114 	fmd_free(ap->asru_uuid, ap->asru_uuidlen + 1);
115 	fmd_free(ap, sizeof (fmd_asru_t));
116 }
117 
118 static void
119 fmd_asru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_t *ap)
120 {
121 	uint_t h = fmd_asru_strhash(ahp, ap->asru_name);
122 
123 	ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
124 	ap->asru_next = ahp->ah_hash[h];
125 	ahp->ah_hash[h] = ap;
126 	ahp->ah_count++;
127 }
128 
129 static fmd_asru_t *
130 fmd_asru_hold(fmd_asru_t *ap)
131 {
132 	(void) pthread_mutex_lock(&ap->asru_lock);
133 	ap->asru_refs++;
134 	ASSERT(ap->asru_refs != 0);
135 	(void) pthread_mutex_unlock(&ap->asru_lock);
136 	return (ap);
137 }
138 
139 /*
140  * Lookup an asru in the hash by name and place a hold on it.  If the asru is
141  * not found, no entry is created and NULL is returned.  This internal function
142  * is for callers who have the ah_lock held and is used by lookup_name below.
143  */
144 fmd_asru_t *
145 fmd_asru_hash_lookup(fmd_asru_hash_t *ahp, const char *name)
146 {
147 	fmd_asru_t *ap;
148 	uint_t h;
149 
150 	ASSERT(RW_LOCK_HELD(&ahp->ah_lock));
151 	h = fmd_asru_strhash(ahp, name);
152 
153 	for (ap = ahp->ah_hash[h]; ap != NULL; ap = ap->asru_next) {
154 		if (fmd_asru_strcmp(ahp, ap->asru_name, name))
155 			break;
156 	}
157 
158 	if (ap != NULL)
159 		(void) fmd_asru_hold(ap);
160 	else
161 		(void) fmd_set_errno(EFMD_ASRU_NOENT);
162 
163 	return (ap);
164 }
165 
166 static int
167 fmd_asru_is_present(nvlist_t *event)
168 {
169 	int ps = -1;
170 	nvlist_t *asru, *fru, *rsrc;
171 
172 	/*
173 	 * Check if there is evidence that this object is no longer present.
174 	 * In general fmd_fmri_present() should be supported on resources and/or
175 	 * frus, as those are the things that are physically present or not
176 	 * present - an asru can be spread over a number of frus some of which
177 	 * are present and some not, so fmd_fmri_present() is not generally
178 	 * meaningful. However retain a check for asru first for compatibility.
179 	 * If we have checked all three and we still get -1 then nothing knows
180 	 * whether it's present or not, so err on the safe side and treat it
181 	 * as still present.
182 	 */
183 	if (fmd_asru_fake_not_present)
184 		ps = 0;
185 	if (ps == -1 && nvlist_lookup_nvlist(event, FM_FAULT_ASRU, &asru) == 0)
186 		ps = fmd_fmri_present(asru);
187 	if (ps == -1 && nvlist_lookup_nvlist(event, FM_FAULT_RESOURCE,
188 	    &rsrc) == 0)
189 		ps = fmd_fmri_present(rsrc);
190 	if (ps == -1 && nvlist_lookup_nvlist(event, FM_FAULT_FRU, &fru) == 0)
191 		ps = fmd_fmri_present(fru);
192 	if (ps == -1)
193 		ps = 1;
194 	return (ps);
195 }
196 
197 static void
198 fmd_asru_asru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp,
199     char *name)
200 {
201 	uint_t h = fmd_asru_strhash(ahp, name);
202 
203 	ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
204 	alp->al_asru_next = ahp->ah_asru_hash[h];
205 	ahp->ah_asru_hash[h] = alp;
206 	ahp->ah_al_count++;
207 }
208 
209 static void
210 fmd_asru_case_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp,
211     char *name)
212 {
213 	uint_t h = fmd_asru_strhash(ahp, name);
214 
215 	ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
216 	alp->al_case_next = ahp->ah_case_hash[h];
217 	ahp->ah_case_hash[h] = alp;
218 }
219 
220 static void
221 fmd_asru_fru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, char *name)
222 {
223 	uint_t h = fmd_asru_strhash(ahp, name);
224 
225 	ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
226 	alp->al_fru_next = ahp->ah_fru_hash[h];
227 	ahp->ah_fru_hash[h] = alp;
228 }
229 
230 static void
231 fmd_asru_label_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp,
232     char *name)
233 {
234 	uint_t h = fmd_asru_strhash(ahp, name);
235 
236 	ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
237 	alp->al_label_next = ahp->ah_label_hash[h];
238 	ahp->ah_label_hash[h] = alp;
239 }
240 
241 static void
242 fmd_asru_rsrc_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp,
243     char *name)
244 {
245 	uint_t h = fmd_asru_strhash(ahp, name);
246 
247 	ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
248 	alp->al_rsrc_next = ahp->ah_rsrc_hash[h];
249 	ahp->ah_rsrc_hash[h] = alp;
250 }
251 
252 static void
253 fmd_asru_al_destroy(fmd_asru_link_t *alp)
254 {
255 	ASSERT(alp->al_refs == 0);
256 	ASSERT(MUTEX_HELD(&alp->al_asru->asru_lock));
257 
258 	if (alp->al_log != NULL)
259 		fmd_log_rele(alp->al_log);
260 
261 	fmd_free(alp->al_uuid, alp->al_uuidlen + 1);
262 	nvlist_free(alp->al_event);
263 	fmd_strfree(alp->al_rsrc_name);
264 	fmd_strfree(alp->al_case_uuid);
265 	fmd_strfree(alp->al_fru_name);
266 	fmd_strfree(alp->al_asru_name);
267 	fmd_strfree(alp->al_label);
268 	nvlist_free(alp->al_asru_fmri);
269 	fmd_free(alp, sizeof (fmd_asru_link_t));
270 }
271 
272 static fmd_asru_link_t *
273 fmd_asru_al_hold(fmd_asru_link_t *alp)
274 {
275 	fmd_asru_t *ap = alp->al_asru;
276 
277 	(void) pthread_mutex_lock(&ap->asru_lock);
278 	ap->asru_refs++;
279 	alp->al_refs++;
280 	ASSERT(alp->al_refs != 0);
281 	(void) pthread_mutex_unlock(&ap->asru_lock);
282 	return (alp);
283 }
284 
285 static void fmd_asru_destroy(fmd_asru_t *ap);
286 
287 /*ARGSUSED*/
288 static void
289 fmd_asru_al_hash_release(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp)
290 {
291 	fmd_asru_t *ap = alp->al_asru;
292 
293 	(void) pthread_mutex_lock(&ap->asru_lock);
294 	ASSERT(alp->al_refs != 0);
295 	if (--alp->al_refs == 0)
296 		fmd_asru_al_destroy(alp);
297 	ASSERT(ap->asru_refs != 0);
298 	if (--ap->asru_refs == 0)
299 		fmd_asru_destroy(ap);
300 	else
301 		(void) pthread_mutex_unlock(&ap->asru_lock);
302 }
303 
304 static int
305 fmd_asru_get_namestr(nvlist_t *nvl, char **name, ssize_t *namelen)
306 {
307 	if ((*namelen = fmd_fmri_nvl2str(nvl, NULL, 0)) == -1)
308 		return (EFMD_ASRU_FMRI);
309 	*name = fmd_alloc(*namelen + 1, FMD_SLEEP);
310 	if (fmd_fmri_nvl2str(nvl, *name, *namelen + 1) == -1) {
311 		if (*name != NULL)
312 			fmd_free(*name, *namelen + 1);
313 		return (EFMD_ASRU_FMRI);
314 	}
315 	return (0);
316 }
317 
318 static fmd_asru_link_t *
319 fmd_asru_al_create(fmd_asru_hash_t *ahp, nvlist_t *nvl, fmd_case_t *cp,
320     const char *al_uuid)
321 {
322 	nvlist_t *asru = NULL, *fru, *rsrc;
323 	int got_rsrc = 0, got_asru = 0, got_fru = 0;
324 	ssize_t fru_namelen, rsrc_namelen, asru_namelen;
325 	char *asru_name, *rsrc_name, *fru_name, *name, *label;
326 	fmd_asru_link_t *alp;
327 	fmd_asru_t *ap;
328 	boolean_t msg;
329 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
330 
331 	if (nvlist_lookup_nvlist(nvl, FM_FAULT_ASRU, &asru) == 0 &&
332 	    fmd_asru_get_namestr(asru, &asru_name, &asru_namelen) == 0)
333 		got_asru = 1;
334 	if (nvlist_lookup_nvlist(nvl, FM_FAULT_FRU, &fru) == 0 &&
335 	    fmd_asru_get_namestr(fru, &fru_name, &fru_namelen) == 0)
336 		got_fru = 1;
337 	if (nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &rsrc) == 0 &&
338 	    fmd_asru_get_namestr(rsrc, &rsrc_name, &rsrc_namelen) == 0)
339 		got_rsrc = 1;
340 	if (nvlist_lookup_string(nvl, FM_FAULT_LOCATION, &label) != 0)
341 		label = "";
342 
343 	/*
344 	 * Grab the rwlock as a writer; Then create and insert the asru with
345 	 * ahp->ah_lock held and hash it in. We'll then drop the rwlock and
346 	 * proceed to initializing the asru.
347 	 */
348 	(void) pthread_rwlock_wrlock(&ahp->ah_lock);
349 
350 	/*
351 	 * Create and initialise the per-fault "link" structure.
352 	 */
353 	alp = fmd_zalloc(sizeof (fmd_asru_link_t), FMD_SLEEP);
354 	if (got_asru)
355 		(void) nvlist_xdup(asru, &alp->al_asru_fmri, &fmd.d_nva);
356 	alp->al_uuid = fmd_strdup(al_uuid, FMD_SLEEP);
357 	alp->al_uuidlen = strlen(alp->al_uuid);
358 	alp->al_refs = 1;
359 
360 	/*
361 	 * If this is the first fault for this asru, then create the per-asru
362 	 * structure and link into the hash.
363 	 */
364 	name = got_asru ? asru_name : "";
365 	if ((ap = fmd_asru_hash_lookup(ahp, name)) == NULL) {
366 		ap = fmd_asru_create(ahp, al_uuid, name, got_asru ? asru :
367 		    NULL);
368 		fmd_asru_hash_insert(ahp, ap);
369 	} else
370 		nvlist_free(ap->asru_event);
371 	(void) nvlist_xdup(nvl, &ap->asru_event, &fmd.d_nva);
372 
373 	/*
374 	 * Put the link structure on the list associated with the per-asru
375 	 * structure. Then put the link structure on the various hashes.
376 	 */
377 	fmd_list_append(&ap->asru_list, (fmd_list_t *)alp);
378 	alp->al_asru = ap;
379 	alp->al_asru_name = got_asru ? asru_name : fmd_strdup("", FMD_SLEEP);
380 	fmd_asru_asru_hash_insert(ahp, alp, alp->al_asru_name);
381 	alp->al_fru_name = got_fru ? fru_name : fmd_strdup("", FMD_SLEEP);
382 	fmd_asru_fru_hash_insert(ahp, alp, alp->al_fru_name);
383 	alp->al_rsrc_name = got_rsrc ? rsrc_name : fmd_strdup("", FMD_SLEEP);
384 	fmd_asru_rsrc_hash_insert(ahp, alp, alp->al_rsrc_name);
385 	alp->al_label = fmd_strdup(label, FMD_SLEEP);
386 	fmd_asru_label_hash_insert(ahp, alp, label);
387 	alp->al_case_uuid = fmd_strdup(cip->ci_uuid, FMD_SLEEP);
388 	fmd_asru_case_hash_insert(ahp, alp, cip->ci_uuid);
389 	(void) pthread_mutex_lock(&ap->asru_lock);
390 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
391 
392 	ap->asru_case = alp->al_case = cp;
393 	if (nvlist_lookup_boolean_value(nvl, FM_SUSPECT_MESSAGE, &msg) == 0 &&
394 	    msg == B_FALSE)
395 		ap->asru_flags |= FMD_ASRU_INVISIBLE;
396 	(void) nvlist_xdup(nvl, &alp->al_event, &fmd.d_nva);
397 	ap->asru_flags |= FMD_ASRU_VALID;
398 	(void) pthread_cond_broadcast(&ap->asru_cv);
399 	(void) pthread_mutex_unlock(&ap->asru_lock);
400 	return (alp);
401 }
402 
403 static void
404 fmd_asru_hash_recreate(fmd_log_t *lp, fmd_event_t *ep, fmd_asru_hash_t *ahp)
405 {
406 	nvlist_t *nvl = FMD_EVENT_NVL(ep);
407 	boolean_t f, u, ps, us;
408 	nvlist_t *flt, *flt_copy, *asru;
409 	char *case_uuid = NULL, *case_code = NULL;
410 	fmd_asru_t *ap;
411 	fmd_asru_link_t *alp;
412 	fmd_case_t *cp;
413 	int64_t *diag_time;
414 	uint_t nelem;
415 
416 	/*
417 	 * Extract the most recent values of 'faulty' from the event log.
418 	 */
419 	if (nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_FAULTY, &f) != 0) {
420 		fmd_error(EFMD_ASRU_EVENT, "failed to reload asru %s: "
421 		    "invalid event log record\n", lp->log_name);
422 		ahp->ah_error = EFMD_ASRU_EVENT;
423 		return;
424 	}
425 	if (nvlist_lookup_nvlist(nvl, FM_RSRC_ASRU_EVENT, &flt) != 0) {
426 		fmd_error(EFMD_ASRU_EVENT, "failed to reload asru %s: "
427 		    "invalid event log record\n", lp->log_name);
428 		ahp->ah_error = EFMD_ASRU_EVENT;
429 		return;
430 	}
431 	(void) nvlist_lookup_string(nvl, FM_RSRC_ASRU_UUID, &case_uuid);
432 	(void) nvlist_lookup_string(nvl, FM_RSRC_ASRU_CODE, &case_code);
433 
434 	/*
435 	 * Attempt to recreate the case in the CLOSED state.
436 	 * If the case is already present, fmd_case_recreate() will return it.
437 	 * If not, we'll create a new orphaned case. Either way,  we use the
438 	 * ASRU event to insert a suspect into the partially-restored case.
439 	 */
440 	fmd_module_lock(fmd.d_rmod);
441 	cp = fmd_case_recreate(fmd.d_rmod, NULL, FMD_CASE_CLOSED, case_uuid,
442 	    case_code);
443 	fmd_case_hold(cp);
444 	fmd_module_unlock(fmd.d_rmod);
445 	if (nvlist_lookup_int64_array(nvl, FM_SUSPECT_DIAG_TIME, &diag_time,
446 	    &nelem) == 0 && nelem >= 2)
447 		fmd_case_settime(cp, diag_time[0], diag_time[1]);
448 	else
449 		fmd_case_settime(cp, lp->log_stat.st_ctime, 0);
450 	(void) nvlist_xdup(flt, &flt_copy, &fmd.d_nva);
451 	fmd_case_recreate_suspect(cp, flt_copy);
452 
453 	/*
454 	 * Now create the resource cache entries.
455 	 */
456 	alp = fmd_asru_al_create(ahp, flt, cp, fmd_strbasename(lp->log_name));
457 	ap = alp->al_asru;
458 
459 	/*
460 	 * Check to see if the resource is still present in the system.  If
461 	 * so, then update the value of the unusable bit based on the current
462 	 * system configuration.  If not, then consider unusable.
463 	 */
464 	ps = fmd_asru_is_present(flt);
465 	if (ps) {
466 		if (nvlist_lookup_nvlist(flt, FM_FAULT_ASRU, &asru) != 0)
467 			u = FMD_B_FALSE;
468 		else if ((us = fmd_fmri_unusable(asru)) == -1) {
469 			fmd_error(EFMD_ASRU_FMRI, "failed to update "
470 			    "status of asru %s", lp->log_name);
471 			u = FMD_B_FALSE;
472 		} else
473 			u = us != 0;
474 
475 	} else
476 		u = FMD_B_TRUE;	/* not present; set unusable */
477 
478 	ap->asru_flags |= FMD_ASRU_RECREATED;
479 	if (ps)
480 		ap->asru_flags |= FMD_ASRU_PRESENT;
481 	if (f) {
482 		alp->al_flags |= FMD_ASRU_FAULTY;
483 		ap->asru_flags |= FMD_ASRU_FAULTY;
484 	}
485 	if (u) {
486 		alp->al_flags |= FMD_ASRU_UNUSABLE;
487 		ap->asru_flags |= FMD_ASRU_UNUSABLE;
488 	}
489 
490 	TRACE((FMD_DBG_ASRU, "asru %s recreated as %p (%s)", alp->al_uuid,
491 	    (void *)ap, _fmd_asru_snames[ap->asru_flags & FMD_ASRU_STATE]));
492 }
493 
494 static void
495 fmd_asru_hash_discard(fmd_asru_hash_t *ahp, const char *uuid, int err)
496 {
497 	char src[PATH_MAX], dst[PATH_MAX];
498 
499 	(void) snprintf(src, PATH_MAX, "%s/%s", ahp->ah_dirpath, uuid);
500 	(void) snprintf(dst, PATH_MAX, "%s/%s-", ahp->ah_dirpath, uuid);
501 
502 	if (err != 0)
503 		err = rename(src, dst);
504 	else
505 		err = unlink(src);
506 
507 	if (err != 0 && errno != ENOENT)
508 		fmd_error(EFMD_ASRU_EVENT, "failed to rename log %s", src);
509 }
510 
511 /*
512  * Open a saved log file and restore it into the ASRU hash.  If we can't even
513  * open the log, rename the log file to <uuid>- to indicate it is corrupt.  If
514  * fmd_log_replay() fails, we either delete the file (if it has reached the
515  * upper limit on cache age) or rename it for debugging if it was corrupted.
516  */
517 static void
518 fmd_asru_hash_logopen(fmd_asru_hash_t *ahp, const char *uuid)
519 {
520 	fmd_log_t *lp = fmd_log_tryopen(ahp->ah_dirpath, uuid, FMD_LOG_ASRU);
521 	uint_t n;
522 
523 	if (lp == NULL) {
524 		fmd_asru_hash_discard(ahp, uuid, errno);
525 		return;
526 	}
527 
528 	ahp->ah_error = 0;
529 	n = ahp->ah_al_count;
530 
531 	fmd_log_replay(lp, (fmd_log_f *)fmd_asru_hash_recreate, ahp);
532 	fmd_log_rele(lp);
533 
534 	if (ahp->ah_al_count == n)
535 		fmd_asru_hash_discard(ahp, uuid, ahp->ah_error);
536 }
537 
538 void
539 fmd_asru_hash_refresh(fmd_asru_hash_t *ahp)
540 {
541 	struct dirent *dp;
542 	DIR *dirp;
543 	int zero;
544 
545 	if ((dirp = opendir(ahp->ah_dirpath)) == NULL) {
546 		fmd_error(EFMD_ASRU_NODIR,
547 		    "failed to open asru cache directory %s", ahp->ah_dirpath);
548 		return;
549 	}
550 
551 	(void) fmd_conf_getprop(fmd.d_conf, "rsrc.zero", &zero);
552 
553 	(void) pthread_rwlock_wrlock(&ahp->ah_lock);
554 
555 	while ((dp = readdir(dirp)) != NULL) {
556 		if (dp->d_name[0] == '.')
557 			continue; /* skip "." and ".." */
558 
559 		if (zero)
560 			fmd_asru_hash_discard(ahp, dp->d_name, 0);
561 		else if (!fmd_strmatch(dp->d_name, "*-"))
562 			fmd_asru_hash_logopen(ahp, dp->d_name);
563 	}
564 
565 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
566 	(void) closedir(dirp);
567 }
568 
569 /*
570  * If the resource is present and faulty but not unusable, replay the fault
571  * event that caused it be marked faulty.  This will cause the agent
572  * subscribing to this fault class to again disable the resource.
573  */
574 /*ARGSUSED*/
575 static void
576 fmd_asru_hash_replay_asru(fmd_asru_t *ap, void *data)
577 {
578 	fmd_event_t *e;
579 	nvlist_t *nvl;
580 	char *class;
581 
582 	if (ap->asru_event != NULL && (ap->asru_flags & (FMD_ASRU_STATE |
583 	    FMD_ASRU_PRESENT)) == (FMD_ASRU_FAULTY | FMD_ASRU_PRESENT)) {
584 
585 		fmd_dprintf(FMD_DBG_ASRU,
586 		    "replaying fault event for %s", ap->asru_name);
587 
588 		(void) nvlist_xdup(ap->asru_event, &nvl, &fmd.d_nva);
589 		(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
590 
591 		(void) nvlist_add_string(nvl, FMD_EVN_UUID,
592 		    ((fmd_case_impl_t *)ap->asru_case)->ci_uuid);
593 
594 		e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
595 		fmd_dispq_dispatch(fmd.d_disp, e, class);
596 	}
597 }
598 
599 void
600 fmd_asru_hash_replay(fmd_asru_hash_t *ahp)
601 {
602 	fmd_asru_hash_apply(ahp, fmd_asru_hash_replay_asru, NULL);
603 }
604 
605 /*
606  * Check if the resource is still present. If not, and if the rsrc.age time
607  * has expired, then do an implicit repair on the resource.
608  */
609 static void
610 fmd_asru_repair_if_aged(fmd_asru_link_t *alp, void *er)
611 {
612 	struct timeval tv;
613 	fmd_log_t *lp;
614 	hrtime_t hrt;
615 
616 	if (fmd_asru_is_present(alp->al_event))
617 		return;
618 	fmd_time_gettimeofday(&tv);
619 	lp = fmd_log_open(alp->al_asru->asru_root, alp->al_uuid, FMD_LOG_ASRU);
620 	hrt = (hrtime_t)(tv.tv_sec - lp->log_stat.st_mtime);
621 	fmd_log_rele(lp);
622 	if (hrt * NANOSEC >= fmd.d_asrus->ah_lifetime)
623 		fmd_asru_repair(alp, er);
624 }
625 
626 void
627 fmd_asru_clear_aged_rsrcs()
628 {
629 	int err;
630 
631 	fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_repair_if_aged, &err);
632 }
633 
634 fmd_asru_hash_t *
635 fmd_asru_hash_create(const char *root, const char *dir)
636 {
637 	fmd_asru_hash_t *ahp;
638 	char path[PATH_MAX];
639 
640 	ahp = fmd_alloc(sizeof (fmd_asru_hash_t), FMD_SLEEP);
641 	(void) pthread_rwlock_init(&ahp->ah_lock, NULL);
642 	ahp->ah_hashlen = fmd.d_str_buckets;
643 	ahp->ah_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, FMD_SLEEP);
644 	ahp->ah_asru_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen,
645 	    FMD_SLEEP);
646 	ahp->ah_case_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen,
647 	    FMD_SLEEP);
648 	ahp->ah_fru_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen,
649 	    FMD_SLEEP);
650 	ahp->ah_label_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen,
651 	    FMD_SLEEP);
652 	ahp->ah_rsrc_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen,
653 	    FMD_SLEEP);
654 	(void) snprintf(path, sizeof (path), "%s/%s", root, dir);
655 	ahp->ah_dirpath = fmd_strdup(path, FMD_SLEEP);
656 	(void) fmd_conf_getprop(fmd.d_conf, "rsrc.age", &ahp->ah_lifetime);
657 	(void) fmd_conf_getprop(fmd.d_conf, "fakenotpresent",
658 	    (uint32_t *)&fmd_asru_fake_not_present);
659 	ahp->ah_al_count = 0;
660 	ahp->ah_count = 0;
661 	ahp->ah_error = 0;
662 	ahp->ah_topo = fmd_topo_hold();
663 
664 	return (ahp);
665 }
666 
667 void
668 fmd_asru_hash_destroy(fmd_asru_hash_t *ahp)
669 {
670 	fmd_asru_link_t *alp, *np;
671 	uint_t i;
672 
673 	for (i = 0; i < ahp->ah_hashlen; i++) {
674 		for (alp = ahp->ah_case_hash[i]; alp != NULL; alp = np) {
675 			np = alp->al_case_next;
676 			alp->al_case_next = NULL;
677 			fmd_case_rele(alp->al_case);
678 			alp->al_case = NULL;
679 			fmd_asru_al_hash_release(ahp, alp);
680 		}
681 	}
682 
683 	fmd_strfree(ahp->ah_dirpath);
684 	fmd_free(ahp->ah_hash, sizeof (void *) * ahp->ah_hashlen);
685 	fmd_free(ahp->ah_asru_hash, sizeof (void *) * ahp->ah_hashlen);
686 	fmd_free(ahp->ah_case_hash, sizeof (void *) * ahp->ah_hashlen);
687 	fmd_free(ahp->ah_fru_hash, sizeof (void *) * ahp->ah_hashlen);
688 	fmd_free(ahp->ah_label_hash, sizeof (void *) * ahp->ah_hashlen);
689 	fmd_free(ahp->ah_rsrc_hash, sizeof (void *) * ahp->ah_hashlen);
690 	fmd_topo_rele(ahp->ah_topo);
691 	fmd_free(ahp, sizeof (fmd_asru_hash_t));
692 }
693 
694 /*
695  * Take a snapshot of the ASRU database by placing an additional hold on each
696  * member in an auxiliary array, and then call 'func' for each ASRU.
697  */
698 void
699 fmd_asru_hash_apply(fmd_asru_hash_t *ahp,
700     void (*func)(fmd_asru_t *, void *), void *arg)
701 {
702 	fmd_asru_t *ap, **aps, **app;
703 	uint_t apc, i;
704 
705 	(void) pthread_rwlock_rdlock(&ahp->ah_lock);
706 
707 	aps = app = fmd_alloc(ahp->ah_count * sizeof (fmd_asru_t *), FMD_SLEEP);
708 	apc = ahp->ah_count;
709 
710 	for (i = 0; i < ahp->ah_hashlen; i++) {
711 		for (ap = ahp->ah_hash[i]; ap != NULL; ap = ap->asru_next)
712 			*app++ = fmd_asru_hold(ap);
713 	}
714 
715 	ASSERT(app == aps + apc);
716 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
717 
718 	for (i = 0; i < apc; i++) {
719 		if (aps[i]->asru_fmri != NULL)
720 			func(aps[i], arg);
721 		fmd_asru_hash_release(ahp, aps[i]);
722 	}
723 
724 	fmd_free(aps, apc * sizeof (fmd_asru_t *));
725 }
726 
727 void
728 fmd_asru_al_hash_apply(fmd_asru_hash_t *ahp,
729     void (*func)(fmd_asru_link_t *, void *), void *arg)
730 {
731 	fmd_asru_link_t *alp, **alps, **alpp;
732 	uint_t alpc, i;
733 
734 	(void) pthread_rwlock_rdlock(&ahp->ah_lock);
735 
736 	alps = alpp = fmd_alloc(ahp->ah_al_count * sizeof (fmd_asru_link_t *),
737 	    FMD_SLEEP);
738 	alpc = ahp->ah_al_count;
739 
740 	for (i = 0; i < ahp->ah_hashlen; i++) {
741 		for (alp = ahp->ah_case_hash[i]; alp != NULL;
742 		    alp = alp->al_case_next)
743 			*alpp++ = fmd_asru_al_hold(alp);
744 	}
745 
746 	ASSERT(alpp == alps + alpc);
747 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
748 
749 	for (i = 0; i < alpc; i++) {
750 		func(alps[i], arg);
751 		fmd_asru_al_hash_release(ahp, alps[i]);
752 	}
753 
754 	fmd_free(alps, alpc * sizeof (fmd_asru_link_t *));
755 }
756 
757 static void
758 fmd_asru_do_hash_apply(fmd_asru_hash_t *ahp, char *name,
759     void (*func)(fmd_asru_link_t *, void *), void *arg,
760     fmd_asru_link_t **hash, size_t match_offset, size_t next_offset)
761 {
762 	fmd_asru_link_t *alp, **alps, **alpp;
763 	uint_t alpc = 0, i;
764 	uint_t h;
765 
766 	(void) pthread_rwlock_rdlock(&ahp->ah_lock);
767 
768 	h = fmd_asru_strhash(ahp, name);
769 
770 	for (alp = hash[h]; alp != NULL; alp =
771 	    /* LINTED pointer alignment */
772 	    FMD_ASRU_AL_HASH_NEXT(alp, next_offset))
773 		if (fmd_asru_strcmp(ahp,
774 		    /* LINTED pointer alignment */
775 		    FMD_ASRU_AL_HASH_NAME(alp, match_offset), name))
776 			alpc++;
777 
778 	alps = alpp = fmd_alloc(alpc * sizeof (fmd_asru_link_t *), FMD_SLEEP);
779 
780 	for (alp = hash[h]; alp != NULL; alp =
781 	    /* LINTED pointer alignment */
782 	    FMD_ASRU_AL_HASH_NEXT(alp, next_offset))
783 		if (fmd_asru_strcmp(ahp,
784 		    /* LINTED pointer alignment */
785 		    FMD_ASRU_AL_HASH_NAME(alp, match_offset), name))
786 			*alpp++ = fmd_asru_al_hold(alp);
787 
788 	ASSERT(alpp == alps + alpc);
789 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
790 
791 	for (i = 0; i < alpc; i++) {
792 		func(alps[i], arg);
793 		fmd_asru_al_hash_release(ahp, alps[i]);
794 	}
795 
796 	fmd_free(alps, alpc * sizeof (fmd_asru_link_t *));
797 }
798 
799 void
800 fmd_asru_hash_apply_by_asru(fmd_asru_hash_t *ahp, char *name,
801     void (*func)(fmd_asru_link_t *, void *), void *arg)
802 {
803 	fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_asru_hash,
804 	    offsetof(fmd_asru_link_t, al_asru_name),
805 	    offsetof(fmd_asru_link_t, al_asru_next));
806 }
807 
808 void
809 fmd_asru_hash_apply_by_case(fmd_asru_hash_t *ahp, fmd_case_t *cp,
810 	void (*func)(fmd_asru_link_t *, void *), void *arg)
811 {
812 	fmd_asru_do_hash_apply(ahp, ((fmd_case_impl_t *)cp)->ci_uuid, func, arg,
813 	    ahp->ah_case_hash, offsetof(fmd_asru_link_t, al_case_uuid),
814 	    offsetof(fmd_asru_link_t, al_case_next));
815 }
816 
817 void
818 fmd_asru_hash_apply_by_fru(fmd_asru_hash_t *ahp, char *name,
819     void (*func)(fmd_asru_link_t *, void *), void *arg)
820 {
821 	fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_fru_hash,
822 	    offsetof(fmd_asru_link_t, al_fru_name),
823 	    offsetof(fmd_asru_link_t, al_fru_next));
824 }
825 
826 void
827 fmd_asru_hash_apply_by_rsrc(fmd_asru_hash_t *ahp, char *name,
828     void (*func)(fmd_asru_link_t *, void *), void *arg)
829 {
830 	fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_rsrc_hash,
831 	    offsetof(fmd_asru_link_t, al_rsrc_name),
832 	    offsetof(fmd_asru_link_t, al_rsrc_next));
833 }
834 
835 void
836 fmd_asru_hash_apply_by_label(fmd_asru_hash_t *ahp, char *name,
837     void (*func)(fmd_asru_link_t *, void *), void *arg)
838 {
839 	fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_label_hash,
840 	    offsetof(fmd_asru_link_t, al_label),
841 	    offsetof(fmd_asru_link_t, al_label_next));
842 }
843 
844 /*
845  * Lookup an asru in the hash by name and place a hold on it.  If the asru is
846  * not found, no entry is created and NULL is returned.
847  */
848 fmd_asru_t *
849 fmd_asru_hash_lookup_name(fmd_asru_hash_t *ahp, const char *name)
850 {
851 	fmd_asru_t *ap;
852 
853 	(void) pthread_rwlock_rdlock(&ahp->ah_lock);
854 	ap = fmd_asru_hash_lookup(ahp, name);
855 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
856 
857 	return (ap);
858 }
859 
860 /*
861  * Lookup an asru in the hash and place a hold on it.
862  */
863 fmd_asru_t *
864 fmd_asru_hash_lookup_nvl(fmd_asru_hash_t *ahp, nvlist_t *fmri)
865 {
866 	fmd_asru_t *ap;
867 	char *name = NULL;
868 	ssize_t namelen;
869 
870 	if (fmd_asru_get_namestr(fmri, &name, &namelen) != 0)
871 		return (NULL);
872 	(void) pthread_rwlock_rdlock(&ahp->ah_lock);
873 	ap = fmd_asru_hash_lookup(ahp, name);
874 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
875 	fmd_free(name, namelen + 1);
876 	return (ap);
877 }
878 
879 /*
880  * Create a resource cache entry using the fault event "nvl" for one of the
881  * suspects from the case "cp".
882  *
883  * The fault event can have the following components :  FM_FAULT_ASRU,
884  * FM_FAULT_FRU, FM_FAULT_RESOURCE. These should be set by the Diagnosis Engine
885  * when calling fmd_nvl_create_fault(). In the general case, these are all
886  * optional and an entry will always be added into the cache even if one or all
887  * of these fields is missing.
888  *
889  * However, for hardware faults the recommended practice is that the fault
890  * event should always have the FM_FAULT_RESOURCE field present and that this
891  * should be represented in hc-scheme.
892  *
893  * Currently the DE should also add the FM_FAULT_ASRU and FM_FAULT_FRU fields
894  * where known, though at some future stage fmd might be able to fill these
895  * in automatically from the topology.
896  */
897 fmd_asru_link_t *
898 fmd_asru_hash_create_entry(fmd_asru_hash_t *ahp, fmd_case_t *cp, nvlist_t *nvl)
899 {
900 	char *parsed_uuid;
901 	uuid_t uuid;
902 	int uuidlen;
903 	fmd_asru_link_t *alp;
904 
905 	/*
906 	 * Generate a UUID for the ASRU.  libuuid cleverly gives us no
907 	 * interface for specifying or learning the buffer size.  Sigh.
908 	 * The spec says 36 bytes but we use a tunable just to be safe.
909 	 */
910 	(void) fmd_conf_getprop(fmd.d_conf, "uuidlen", &uuidlen);
911 	parsed_uuid = fmd_zalloc(uuidlen + 1, FMD_SLEEP);
912 	uuid_generate(uuid);
913 	uuid_unparse(uuid, parsed_uuid);
914 
915 	/*
916 	 * Now create the resource cache entries.
917 	 */
918 	fmd_case_hold_locked(cp);
919 	alp = fmd_asru_al_create(ahp, nvl, cp, parsed_uuid);
920 	TRACE((FMD_DBG_ASRU, "asru %s created as %p",
921 	    alp->al_uuid, (void *)alp->al_asru));
922 
923 	fmd_free(parsed_uuid, uuidlen + 1);
924 	return (alp);
925 
926 }
927 
928 /*
929  * Release the reference count on an asru obtained using fmd_asru_hash_lookup.
930  * We take 'ahp' for symmetry and in case we need to use it in future work.
931  */
932 /*ARGSUSED*/
933 void
934 fmd_asru_hash_release(fmd_asru_hash_t *ahp, fmd_asru_t *ap)
935 {
936 	(void) pthread_mutex_lock(&ap->asru_lock);
937 
938 	ASSERT(ap->asru_refs != 0);
939 	if (--ap->asru_refs == 0)
940 		fmd_asru_destroy(ap);
941 	else
942 		(void) pthread_mutex_unlock(&ap->asru_lock);
943 }
944 
945 static void
946 fmd_asru_do_delete_entry(fmd_asru_hash_t *ahp, fmd_case_t *cp,
947     fmd_asru_link_t **hash, size_t next_offset, char *name)
948 {
949 	uint_t h;
950 	fmd_asru_link_t *alp, **pp, *alpnext, **alpnextp;
951 
952 	(void) pthread_rwlock_wrlock(&ahp->ah_lock);
953 	h = fmd_asru_strhash(ahp, name);
954 	pp = &hash[h];
955 	for (alp = *pp; alp != NULL; alp = alpnext) {
956 		/* LINTED pointer alignment */
957 		alpnextp = FMD_ASRU_AL_HASH_NEXTP(alp, next_offset);
958 		alpnext = *alpnextp;
959 		if (alp->al_case == cp) {
960 			*pp = *alpnextp;
961 			*alpnextp = NULL;
962 		} else
963 			pp = alpnextp;
964 	}
965 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
966 }
967 
968 static void
969 fmd_asru_do_hash_delete(fmd_asru_hash_t *ahp, fmd_case_susp_t *cis,
970     fmd_case_t *cp, fmd_asru_link_t **hash, size_t next_offset, char *nvname)
971 {
972 	nvlist_t *nvl;
973 	char *name = NULL;
974 	ssize_t namelen;
975 
976 	if (nvlist_lookup_nvlist(cis->cis_nvl, nvname, &nvl) == 0 &&
977 	    (namelen = fmd_fmri_nvl2str(nvl, NULL, 0)) != -1 &&
978 	    (name = fmd_alloc(namelen + 1, FMD_SLEEP)) != NULL) {
979 		if (fmd_fmri_nvl2str(nvl, name, namelen + 1) != -1)
980 			fmd_asru_do_delete_entry(ahp, cp, hash, next_offset,
981 			    name);
982 		fmd_free(name, namelen + 1);
983 	} else
984 		fmd_asru_do_delete_entry(ahp, cp, hash, next_offset, "");
985 }
986 
987 void
988 fmd_asru_hash_delete_case(fmd_asru_hash_t *ahp, fmd_case_t *cp)
989 {
990 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
991 	fmd_case_susp_t *cis;
992 	fmd_asru_link_t *alp, **plp, *alpnext;
993 	fmd_asru_t *ap;
994 	char path[PATH_MAX];
995 	char *label;
996 	uint_t h;
997 
998 	/*
999 	 * first delete hash entries for each suspect
1000 	 */
1001 	for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
1002 		fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_fru_hash,
1003 		    offsetof(fmd_asru_link_t, al_fru_next), FM_FAULT_FRU);
1004 		fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_rsrc_hash,
1005 		    offsetof(fmd_asru_link_t, al_rsrc_next), FM_FAULT_RESOURCE);
1006 		if (nvlist_lookup_string(cis->cis_nvl, FM_FAULT_LOCATION,
1007 		    &label) != 0)
1008 			label = "";
1009 		fmd_asru_do_delete_entry(ahp, cp, ahp->ah_label_hash,
1010 		    offsetof(fmd_asru_link_t, al_label_next), label);
1011 		fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_asru_hash,
1012 		    offsetof(fmd_asru_link_t, al_asru_next), FM_FAULT_ASRU);
1013 	}
1014 
1015 	/*
1016 	 * then delete associated case hash entries
1017 	 */
1018 	(void) pthread_rwlock_wrlock(&ahp->ah_lock);
1019 	h = fmd_asru_strhash(ahp, cip->ci_uuid);
1020 	plp = &ahp->ah_case_hash[h];
1021 	for (alp = *plp; alp != NULL; alp = alpnext) {
1022 		alpnext = alp->al_case_next;
1023 		if (alp->al_case == cp) {
1024 			*plp = alp->al_case_next;
1025 			alp->al_case_next = NULL;
1026 			ASSERT(ahp->ah_al_count != 0);
1027 			ahp->ah_al_count--;
1028 
1029 			/*
1030 			 * decrement case ref.
1031 			 */
1032 			fmd_case_rele_locked(cp);
1033 			alp->al_case = NULL;
1034 
1035 			/*
1036 			 * If we found a matching ASRU, unlink its log file and
1037 			 * then release the hash entry. Note that it may still
1038 			 * be referenced if another thread is manipulating it;
1039 			 * this is ok because once we unlink, the log file will
1040 			 * not be restored, and the log data will be freed when
1041 			 * all of the referencing threads release their
1042 			 * respective references.
1043 			 */
1044 			(void) snprintf(path, sizeof (path), "%s/%s",
1045 			    ahp->ah_dirpath, alp->al_uuid);
1046 			if (unlink(path) != 0)
1047 				fmd_error(EFMD_ASRU_UNLINK,
1048 				    "failed to unlink asru %s", path);
1049 
1050 			/*
1051 			 * Now unlink from the global per-resource cache
1052 			 * and if this is the last link then remove that from
1053 			 * it's own hash too.
1054 			 */
1055 			ap = alp->al_asru;
1056 			(void) pthread_mutex_lock(&ap->asru_lock);
1057 			fmd_list_delete(&ap->asru_list, alp);
1058 			if (ap->asru_list.l_next == NULL) {
1059 				uint_t h;
1060 				fmd_asru_t *ap2, **pp;
1061 				fmd_asru_t *apnext, **apnextp;
1062 
1063 				ASSERT(ahp->ah_count != 0);
1064 				ahp->ah_count--;
1065 				h = fmd_asru_strhash(ahp, ap->asru_name);
1066 				pp = &ahp->ah_hash[h];
1067 				for (ap2 = *pp; ap2 != NULL; ap2 = apnext) {
1068 					apnextp = &ap2->asru_next;
1069 					apnext = *apnextp;
1070 					if (ap2 == ap) {
1071 						*pp = *apnextp;
1072 						*apnextp = NULL;
1073 					} else
1074 						pp = apnextp;
1075 				}
1076 			}
1077 			(void) pthread_mutex_unlock(&ap->asru_lock);
1078 			fmd_asru_al_hash_release(ahp, alp);
1079 		} else
1080 			plp = &alp->al_case_next;
1081 	}
1082 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
1083 }
1084 
1085 static void
1086 fmd_asru_repair_containee(fmd_asru_link_t *alp, void *er)
1087 {
1088 	if (er && alp->al_asru_fmri && fmd_fmri_contains(er,
1089 	    alp->al_asru_fmri) > 0 && fmd_asru_clrflags(alp, FMD_ASRU_FAULTY))
1090 		fmd_case_update(alp->al_case);
1091 }
1092 
1093 void
1094 fmd_asru_repair(fmd_asru_link_t *alp, void *er)
1095 {
1096 	int flags;
1097 	int rval;
1098 
1099 	/*
1100 	 * repair this asru cache entry
1101 	 */
1102 	rval = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY);
1103 
1104 	/*
1105 	 * now check if all entries associated with this asru are repaired and
1106 	 * if so repair containees
1107 	 */
1108 	(void) pthread_mutex_lock(&alp->al_asru->asru_lock);
1109 	flags = alp->al_asru->asru_flags;
1110 	(void) pthread_mutex_unlock(&alp->al_asru->asru_lock);
1111 	if (!(flags & FMD_ASRU_FAULTY))
1112 		fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_repair_containee,
1113 		    alp->al_asru_fmri);
1114 
1115 	/*
1116 	 * if called from fmd_adm_repair() and we really did clear the bit then
1117 	 * we need to do a case update to see if the associated case can be
1118 	 * repaired. No need to do this if called from fmd_case_repair() (ie
1119 	 * when er is NULL) as the case will be explicitly repaired anyway.
1120 	 */
1121 	if (er) {
1122 		*(int *)er = 0;
1123 		if (rval)
1124 			fmd_case_update(alp->al_case);
1125 	}
1126 }
1127 
1128 static void
1129 fmd_asru_logevent(fmd_asru_link_t *alp)
1130 {
1131 	fmd_asru_t *ap = alp->al_asru;
1132 	boolean_t f = (ap->asru_flags & FMD_ASRU_FAULTY) != 0;
1133 	boolean_t u = (ap->asru_flags & FMD_ASRU_UNUSABLE) != 0;
1134 	boolean_t m = (ap->asru_flags & FMD_ASRU_INVISIBLE) == 0;
1135 
1136 	fmd_case_impl_t *cip;
1137 	fmd_event_t *e;
1138 	fmd_log_t *lp;
1139 	nvlist_t *nvl;
1140 	char *class;
1141 
1142 	ASSERT(MUTEX_HELD(&ap->asru_lock));
1143 	cip = (fmd_case_impl_t *)alp->al_case;
1144 	ASSERT(cip != NULL);
1145 
1146 	if ((lp = alp->al_log) == NULL)
1147 		lp = fmd_log_open(ap->asru_root, alp->al_uuid, FMD_LOG_ASRU);
1148 
1149 	if (lp == NULL)
1150 		return; /* can't log events if we can't open the log */
1151 
1152 	nvl = fmd_protocol_rsrc_asru(_fmd_asru_events[f | (u << 1)],
1153 	    alp->al_asru_fmri, cip->ci_uuid, cip->ci_code, f, u, m,
1154 	    alp->al_event, &cip->ci_tv);
1155 
1156 	(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
1157 	e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
1158 
1159 	fmd_event_hold(e);
1160 	fmd_log_append(lp, e, NULL);
1161 	fmd_event_rele(e);
1162 
1163 	/*
1164 	 * For now, we close the log file after every update to conserve file
1165 	 * descriptors and daemon overhead.  If this becomes a performance
1166 	 * issue this code can change to keep a fixed-size LRU cache of logs.
1167 	 */
1168 	fmd_log_rele(lp);
1169 	alp->al_log = NULL;
1170 }
1171 
1172 int
1173 fmd_asru_setflags(fmd_asru_link_t *alp, uint_t sflag)
1174 {
1175 	fmd_asru_t *ap = alp->al_asru;
1176 	uint_t nstate, ostate;
1177 
1178 	ASSERT(!(sflag & ~FMD_ASRU_STATE));
1179 	ASSERT(sflag != FMD_ASRU_STATE);
1180 
1181 	(void) pthread_mutex_lock(&ap->asru_lock);
1182 
1183 	ostate = alp->al_flags & FMD_ASRU_STATE;
1184 	alp->al_flags |= sflag;
1185 	nstate = alp->al_flags & FMD_ASRU_STATE;
1186 
1187 	if (nstate == ostate) {
1188 		(void) pthread_mutex_unlock(&ap->asru_lock);
1189 		return (0);
1190 	}
1191 
1192 	ap->asru_flags |= sflag;
1193 	TRACE((FMD_DBG_ASRU, "asru %s %s->%s", alp->al_uuid,
1194 	    _fmd_asru_snames[ostate], _fmd_asru_snames[nstate]));
1195 
1196 	fmd_asru_logevent(alp);
1197 
1198 	(void) pthread_cond_broadcast(&ap->asru_cv);
1199 	(void) pthread_mutex_unlock(&ap->asru_lock);
1200 	return (1);
1201 }
1202 
1203 int
1204 fmd_asru_clrflags(fmd_asru_link_t *alp, uint_t sflag)
1205 {
1206 	fmd_asru_t *ap = alp->al_asru;
1207 	fmd_asru_link_t *nalp;
1208 	uint_t nstate, ostate, flags = 0;
1209 
1210 	ASSERT(!(sflag & ~FMD_ASRU_STATE));
1211 	ASSERT(sflag != FMD_ASRU_STATE);
1212 
1213 	(void) pthread_mutex_lock(&ap->asru_lock);
1214 
1215 	ostate = alp->al_flags & FMD_ASRU_STATE;
1216 	alp->al_flags &= ~sflag;
1217 	nstate = alp->al_flags & FMD_ASRU_STATE;
1218 
1219 	if (nstate == ostate) {
1220 		(void) pthread_mutex_unlock(&ap->asru_lock);
1221 		return (0);
1222 	}
1223 
1224 	if (sflag == FMD_ASRU_UNUSABLE)
1225 		ap->asru_flags &= ~sflag;
1226 	else if (sflag == FMD_ASRU_FAULTY) {
1227 		/*
1228 		 * only clear the faulty bit if all links are clear
1229 		 */
1230 		for (nalp = fmd_list_next(&ap->asru_list); nalp != NULL;
1231 		    nalp = fmd_list_next(nalp))
1232 			flags |= nalp->al_flags;
1233 		if (!(flags & FMD_ASRU_FAULTY))
1234 			ap->asru_flags &= ~sflag;
1235 	}
1236 
1237 	TRACE((FMD_DBG_ASRU, "asru %s %s->%s", alp->al_uuid,
1238 	    _fmd_asru_snames[ostate], _fmd_asru_snames[nstate]));
1239 
1240 	fmd_asru_logevent(alp);
1241 
1242 	(void) pthread_cond_broadcast(&ap->asru_cv);
1243 	(void) pthread_mutex_unlock(&ap->asru_lock);
1244 
1245 	return (1);
1246 }
1247 
1248 /*
1249  * Report the current known state of the link entry (ie this particular fault
1250  * affecting this particular ASRU).
1251  */
1252 int
1253 fmd_asru_al_getstate(fmd_asru_link_t *alp)
1254 {
1255 	int us, st;
1256 	nvlist_t *asru;
1257 
1258 	if (fmd_asru_is_present(alp->al_event) == 0)
1259 		return ((alp->al_flags & FMD_ASRU_FAULTY) | FMD_ASRU_UNUSABLE);
1260 
1261 	if (nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, &asru) == 0)
1262 		us = fmd_fmri_unusable(asru);
1263 	else
1264 		us = (alp->al_flags & FMD_ASRU_UNUSABLE);
1265 	st = (alp->al_flags & FMD_ASRU_STATE) | FMD_ASRU_PRESENT;
1266 	if (us > 0)
1267 		st |= FMD_ASRU_UNUSABLE;
1268 	else if (us == 0)
1269 		st &= ~FMD_ASRU_UNUSABLE;
1270 	return (st);
1271 }
1272 
1273 /*
1274  * Report the current known state of the ASRU by refreshing its unusable status
1275  * based upon the routines provided by the scheme module.  If the unusable bit
1276  * is different, we do *not* generate a state change here because that change
1277  * may be unrelated to fmd activities and therefore we have no case or event.
1278  * The absence of the transition is harmless as this function is only provided
1279  * for RPC observability and fmd's clients are only concerned with ASRU_FAULTY.
1280  */
1281 int
1282 fmd_asru_getstate(fmd_asru_t *ap)
1283 {
1284 	int us, st;
1285 
1286 	if (!(ap->asru_flags & FMD_ASRU_INTERNAL) &&
1287 	    (fmd_asru_fake_not_present || fmd_fmri_present(ap->asru_fmri) <= 0))
1288 		return (0); /* do not report non-fmd non-present resources */
1289 
1290 	us = fmd_fmri_unusable(ap->asru_fmri);
1291 	st = ap->asru_flags & FMD_ASRU_STATE;
1292 
1293 	if (us > 0)
1294 		st |= FMD_ASRU_UNUSABLE;
1295 	else if (us == 0)
1296 		st &= ~FMD_ASRU_UNUSABLE;
1297 
1298 	return (st);
1299 }
1300