xref: /titanic_44/usr/src/cmd/fm/fmd/common/fmd_asru.c (revision ab4a9beb2e4d596be0b3288c7d92919e27781b57)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/fm/protocol.h>
30 #include <uuid/uuid.h>
31 
32 #include <dirent.h>
33 #include <limits.h>
34 #include <unistd.h>
35 #include <alloca.h>
36 
37 #include <fmd_alloc.h>
38 #include <fmd_string.h>
39 #include <fmd_error.h>
40 #include <fmd_subr.h>
41 #include <fmd_protocol.h>
42 #include <fmd_event.h>
43 #include <fmd_conf.h>
44 #include <fmd_fmri.h>
45 #include <fmd_dispq.h>
46 #include <fmd_case.h>
47 #include <fmd_module.h>
48 #include <fmd_asru.h>
49 
50 #include <fmd.h>
51 
52 static const char *const _fmd_asru_events[] = {
53 	FMD_RSRC_CLASS "asru.ok",		/* UNUSABLE=0 FAULTED=0 */
54 	FMD_RSRC_CLASS "asru.degraded",		/* UNUSABLE=0 FAULTED=1 */
55 	FMD_RSRC_CLASS "asru.unknown",		/* UNUSABLE=1 FAULTED=0 */
56 	FMD_RSRC_CLASS "asru.faulted"		/* UNUSABLE=1 FAULTED=1 */
57 };
58 
59 static const char *const _fmd_asru_snames[] = {
60 	"uf", "uF", "Uf", "UF"			/* same order as above */
61 };
62 
63 volatile uint32_t fmd_asru_fake_not_present = 0;
64 
65 static fmd_asru_t *
66 fmd_asru_create(fmd_asru_hash_t *ahp, const char *uuid,
67     const char *name, nvlist_t *fmri)
68 {
69 	fmd_asru_t *ap = fmd_alloc(sizeof (fmd_asru_t), FMD_SLEEP);
70 	char *s;
71 
72 	(void) pthread_mutex_init(&ap->asru_lock, NULL);
73 	(void) pthread_cond_init(&ap->asru_cv, NULL);
74 
75 	ap->asru_next = NULL;
76 	ap->asru_name = fmd_strdup(name, FMD_SLEEP);
77 	(void) nvlist_xdup(fmri, &ap->asru_fmri, &fmd.d_nva);
78 	ap->asru_root = fmd_strdup(ahp->ah_dirpath, FMD_SLEEP);
79 	ap->asru_uuid = fmd_strdup(uuid, FMD_SLEEP);
80 	ap->asru_uuidlen = ap->asru_uuid ? strlen(ap->asru_uuid) : 0;
81 	ap->asru_log = NULL;
82 	ap->asru_refs = 1;
83 	ap->asru_flags = 0;
84 	ap->asru_case = NULL;
85 	ap->asru_event = NULL;
86 
87 	if (nvlist_lookup_string(ap->asru_fmri, FM_FMRI_SCHEME, &s) == 0 &&
88 	    strcmp(s, FM_FMRI_SCHEME_FMD) == 0)
89 		ap->asru_flags |= FMD_ASRU_INTERNAL;
90 
91 	return (ap);
92 }
93 
94 static void
95 fmd_asru_destroy(fmd_asru_t *ap)
96 {
97 	ASSERT(MUTEX_HELD(&ap->asru_lock));
98 	ASSERT(ap->asru_refs == 0);
99 
100 	if (ap->asru_log != NULL)
101 		fmd_log_rele(ap->asru_log);
102 
103 	if (ap->asru_case != NULL)
104 		fmd_case_rele(ap->asru_case);
105 
106 	nvlist_free(ap->asru_event);
107 	fmd_strfree(ap->asru_name);
108 	nvlist_free(ap->asru_fmri);
109 	fmd_strfree(ap->asru_root);
110 	fmd_free(ap->asru_uuid, ap->asru_uuidlen + 1);
111 	fmd_free(ap, sizeof (fmd_asru_t));
112 }
113 
114 static void
115 fmd_asru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_t *ap)
116 {
117 	uint_t h = fmd_strhash(ap->asru_name) % ahp->ah_hashlen;
118 
119 	ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
120 	ap->asru_next = ahp->ah_hash[h];
121 	ahp->ah_hash[h] = ap;
122 	ahp->ah_count++;
123 }
124 
125 static fmd_asru_t *
126 fmd_asru_hold(fmd_asru_t *ap)
127 {
128 	(void) pthread_mutex_lock(&ap->asru_lock);
129 	ap->asru_refs++;
130 	ASSERT(ap->asru_refs != 0);
131 	(void) pthread_mutex_unlock(&ap->asru_lock);
132 	return (ap);
133 }
134 
135 /*
136  * Lookup an asru in the hash by name and place a hold on it.  If the asru is
137  * not found, no entry is created and NULL is returned.  This internal function
138  * is for callers who have the ah_lock held and is used by lookup_name below.
139  */
140 fmd_asru_t *
141 fmd_asru_hash_lookup(fmd_asru_hash_t *ahp, const char *name)
142 {
143 	fmd_asru_t *ap;
144 	uint_t h;
145 
146 	ASSERT(RW_LOCK_HELD(&ahp->ah_lock));
147 	h = fmd_strhash(name) % ahp->ah_hashlen;
148 
149 	for (ap = ahp->ah_hash[h]; ap != NULL; ap = ap->asru_next) {
150 		if (strcmp(ap->asru_name, name) == 0)
151 			break;
152 	}
153 
154 	if (ap != NULL)
155 		(void) fmd_asru_hold(ap);
156 	else
157 		(void) fmd_set_errno(EFMD_ASRU_NOENT);
158 
159 	return (ap);
160 }
161 
162 static void
163 fmd_asru_hash_recreate(fmd_log_t *lp, fmd_event_t *ep, fmd_asru_hash_t *ahp)
164 {
165 	nvlist_t *nvl = FMD_EVENT_NVL(ep);
166 	char *case_uuid = NULL, *case_code = NULL;
167 	char *name = NULL;
168 	ssize_t namelen;
169 
170 	nvlist_t *fmri, *flt, *flt_copy;
171 	boolean_t f, u, m;
172 	fmd_asru_t *ap;
173 	int ps, us;
174 	int64_t *diag_time;
175 	uint_t nelem;
176 
177 	/*
178 	 * Extract the resource FMRI and most recent values of 'faulty' and
179 	 * 'unusable' from the event log.  If the event is malformed, return.
180 	 */
181 	if (nvlist_lookup_nvlist(nvl, FM_RSRC_RESOURCE, &fmri) != 0 ||
182 	    nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_FAULTY, &f) != 0 ||
183 	    nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_UNUSABLE, &u) != 0) {
184 		fmd_error(EFMD_ASRU_EVENT, "failed to reload asru %s: "
185 		    "invalid event log record\n", lp->log_name);
186 		ahp->ah_error = EFMD_ASRU_EVENT;
187 		return;
188 	}
189 
190 	/*
191 	 * If this resource has been explicitly repaired, then return and
192 	 * discard the log. This is consistant with the behaviour when rsrc.age
193 	 * expires below.
194 	 */
195 	if (!f)
196 		return;
197 
198 	/*
199 	 * Check to see if the resource is still present in the system.  If
200 	 * so, then update the value of the unusable bit based on the current
201 	 * system configuration.  If not, then either keep the entry in our
202 	 * cache if it is recent, or return and discard it if it is too old.
203 	 */
204 	if (fmd_asru_fake_not_present)
205 		ps = 0;
206 	else if ((ps = fmd_fmri_present(fmri)) == -1) {
207 		fmd_error(EFMD_ASRU_FMRI, "failed to locate %s", lp->log_name);
208 		ahp->ah_error = EFMD_ASRU_FMRI;
209 		return;
210 	}
211 
212 	if (ps) {
213 		if ((us = fmd_fmri_unusable(fmri)) == -1) {
214 			fmd_error(EFMD_ASRU_FMRI, "failed to update "
215 			    "status of asru %s", lp->log_name);
216 			u = FMD_B_FALSE;
217 		} else
218 			u = us != 0;
219 
220 	} else {
221 		struct timeval tv;
222 
223 		fmd_time_gettimeofday(&tv);
224 		if ((hrtime_t)(tv.tv_sec -
225 		    lp->log_stat.st_mtime) * NANOSEC < ahp->ah_lifetime) {
226 			u = FMD_B_TRUE; /* not present; set unusable */
227 		} else
228 			return;	 /* too old; discard this log */
229 	}
230 
231 	/*
232 	 * In order to insert the ASRU into our hash, convert the FMRI from
233 	 * nvlist form into a string form and assign this name to the ASRU.
234 	 */
235 	if ((namelen = fmd_fmri_nvl2str(fmri, NULL, 0)) == -1 ||
236 	    (name = fmd_alloc(namelen + 1, FMD_NOSLEEP)) == NULL ||
237 	    fmd_fmri_nvl2str(fmri, name, namelen + 1) == -1) {
238 		fmd_error(EFMD_ASRU_FMRI,
239 		    "failed to reload asru %s", lp->log_name);
240 		if (name != NULL)
241 			fmd_free(name, namelen + 1);
242 		ahp->ah_error = EFMD_ASRU_FMRI;
243 		return;
244 	}
245 
246 	/*
247 	 * Look to see if the ASRU already exists in the hash: if it does and
248 	 * the existing ASRU entry is unusable but the duplicate is not, then
249 	 * delete the existing entry and continue on using the new entry; if
250 	 * the new entry is no "better", return an error and ignore it.
251 	 */
252 	if ((ap = fmd_asru_hash_lookup(ahp, name)) != NULL) {
253 		if (!u && (ap->asru_flags & FMD_ASRU_UNUSABLE)) {
254 			(void) fmd_asru_hash_delete_name(ahp, name);
255 			fmd_asru_hash_release(ahp, ap);
256 		} else {
257 			fmd_error(EFMD_ASRU_DUP, "removing duplicate asru "
258 			    "log %s for %s\n", lp->log_name, name);
259 			fmd_free(name, namelen + 1);
260 			fmd_asru_hash_release(ahp, ap);
261 			ahp->ah_error = EFMD_ASRU_DUP;
262 			return;
263 		}
264 	}
265 
266 	ap = fmd_asru_create(ahp, fmd_strbasename(lp->log_name), name, fmri);
267 	fmd_free(name, namelen + 1);
268 	ap->asru_flags |= FMD_ASRU_RECREATED;
269 
270 	if (ps)
271 		ap->asru_flags |= FMD_ASRU_PRESENT;
272 	if (f)
273 		ap->asru_flags |= FMD_ASRU_FAULTY;
274 	if (u)
275 		ap->asru_flags |= FMD_ASRU_UNUSABLE;
276 
277 	if (nvlist_lookup_boolean_value(nvl,
278 	    FM_SUSPECT_MESSAGE, &m) == 0 && m == B_FALSE)
279 		ap->asru_flags |= FMD_ASRU_INVISIBLE;
280 
281 	/*
282 	 * Recreate the case in the CLOSED state. If the case is not closed,
283 	 * fmd_case_transition_update() will set it to the correct state later.
284 	 * If the case is already present, fmd_case_recreate() will return
285 	 * as an orphaned case. If not, it will create a new orphaned case.
286 	 * Either way we use the ASRU event to insert a suspect into the
287 	 * restored case.
288 	 */
289 	(void) nvlist_lookup_string(nvl, FM_RSRC_ASRU_UUID, &case_uuid);
290 	(void) nvlist_lookup_string(nvl, FM_RSRC_ASRU_CODE, &case_code);
291 	(void) nvlist_lookup_nvlist(nvl, FM_RSRC_ASRU_EVENT, &flt);
292 
293 	fmd_module_lock(fmd.d_rmod);
294 
295 	ap->asru_case = fmd_case_recreate(fmd.d_rmod, NULL,
296 	    FMD_CASE_CLOSED, case_uuid, case_code);
297 	ASSERT(ap->asru_case != NULL);
298 
299 	ASSERT(fmd_case_orphaned(ap->asru_case));
300 
301 	fmd_case_hold(ap->asru_case);
302 	fmd_module_unlock(fmd.d_rmod);
303 
304 	if (nvlist_lookup_int64_array(nvl, FM_SUSPECT_DIAG_TIME, &diag_time,
305 	    &nelem) == 0 && nelem >= 2)
306 		fmd_case_settime(ap->asru_case, diag_time[0], diag_time[1]);
307 	else
308 		fmd_case_settime(ap->asru_case, lp->log_stat.st_ctime, 0);
309 
310 	(void) nvlist_xdup(flt, &ap->asru_event, &fmd.d_nva);
311 	(void) nvlist_xdup(flt, &flt_copy, &fmd.d_nva);
312 	fmd_case_recreate_suspect(ap->asru_case, flt_copy);
313 
314 	ASSERT(!(ap->asru_flags & FMD_ASRU_VALID));
315 	ap->asru_flags |= FMD_ASRU_VALID;
316 	fmd_asru_hash_insert(ahp, ap);
317 
318 	TRACE((FMD_DBG_ASRU, "asru %s recreated as %p (%s)", ap->asru_uuid,
319 	    (void *)ap, _fmd_asru_snames[ap->asru_flags & FMD_ASRU_STATE]));
320 }
321 
322 static void
323 fmd_asru_hash_discard(fmd_asru_hash_t *ahp, const char *uuid, int err)
324 {
325 	char src[PATH_MAX], dst[PATH_MAX];
326 
327 	(void) snprintf(src, PATH_MAX, "%s/%s", ahp->ah_dirpath, uuid);
328 	(void) snprintf(dst, PATH_MAX, "%s/%s-", ahp->ah_dirpath, uuid);
329 
330 	if (err != 0)
331 		err = rename(src, dst);
332 	else
333 		err = unlink(src);
334 
335 	if (err != 0 && errno != ENOENT)
336 		fmd_error(EFMD_ASRU_EVENT, "failed to rename log %s", src);
337 }
338 
339 /*
340  * Open a saved log file and restore it into the ASRU hash.  If we can't even
341  * open the log, rename the log file to <uuid>- to indicate it is corrupt.  If
342  * fmd_log_replay() fails, we either delete the file (if it has reached the
343  * upper limit on cache age) or rename it for debugging if it was corrupted.
344  */
345 static void
346 fmd_asru_hash_logopen(fmd_asru_hash_t *ahp, const char *uuid)
347 {
348 	fmd_log_t *lp = fmd_log_tryopen(ahp->ah_dirpath, uuid, FMD_LOG_ASRU);
349 	uint_t n;
350 
351 	if (lp == NULL) {
352 		fmd_asru_hash_discard(ahp, uuid, errno);
353 		return;
354 	}
355 
356 	ahp->ah_error = 0;
357 	n = ahp->ah_count;
358 
359 	fmd_log_replay(lp, (fmd_log_f *)fmd_asru_hash_recreate, ahp);
360 	fmd_log_rele(lp);
361 
362 	if (ahp->ah_count == n)
363 		fmd_asru_hash_discard(ahp, uuid, ahp->ah_error);
364 }
365 
366 void
367 fmd_asru_hash_refresh(fmd_asru_hash_t *ahp)
368 {
369 	struct dirent *dp;
370 	DIR *dirp;
371 	int zero;
372 
373 	if ((dirp = opendir(ahp->ah_dirpath)) == NULL) {
374 		fmd_error(EFMD_ASRU_NODIR,
375 		    "failed to open asru cache directory %s", ahp->ah_dirpath);
376 		return;
377 	}
378 
379 	(void) fmd_conf_getprop(fmd.d_conf, "rsrc.zero", &zero);
380 
381 	(void) pthread_rwlock_wrlock(&ahp->ah_lock);
382 
383 	while ((dp = readdir(dirp)) != NULL) {
384 		if (dp->d_name[0] == '.')
385 			continue; /* skip "." and ".." */
386 
387 		if (zero)
388 			fmd_asru_hash_discard(ahp, dp->d_name, 0);
389 		else if (!fmd_strmatch(dp->d_name, "*-"))
390 			fmd_asru_hash_logopen(ahp, dp->d_name);
391 	}
392 
393 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
394 	(void) closedir(dirp);
395 }
396 
397 /*
398  * If the resource is present and faulty but not unusable, replay the fault
399  * event that caused it be marked faulty.  This will cause the agent
400  * subscribing to this fault class to again disable the resource.
401  */
402 /*ARGSUSED*/
403 static void
404 fmd_asru_hash_replay_asru(fmd_asru_t *ap, void *data)
405 {
406 	fmd_event_t *e;
407 	nvlist_t *nvl;
408 	char *class;
409 
410 	if (ap->asru_event != NULL && (ap->asru_flags & (FMD_ASRU_STATE |
411 	    FMD_ASRU_PRESENT)) == (FMD_ASRU_FAULTY | FMD_ASRU_PRESENT)) {
412 
413 		fmd_dprintf(FMD_DBG_ASRU,
414 		    "replaying fault event for %s", ap->asru_name);
415 
416 		(void) nvlist_xdup(ap->asru_event, &nvl, &fmd.d_nva);
417 		(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
418 
419 		(void) nvlist_add_string(nvl, FMD_EVN_UUID,
420 		    ((fmd_case_impl_t *)ap->asru_case)->ci_uuid);
421 
422 		e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
423 		fmd_dispq_dispatch(fmd.d_disp, e, class);
424 	}
425 }
426 
427 void
428 fmd_asru_hash_replay(fmd_asru_hash_t *ahp)
429 {
430 	fmd_asru_hash_apply(ahp, fmd_asru_hash_replay_asru, NULL);
431 }
432 
433 fmd_asru_hash_t *
434 fmd_asru_hash_create(const char *root, const char *dir)
435 {
436 	fmd_asru_hash_t *ahp;
437 	char path[PATH_MAX];
438 
439 	ahp = fmd_alloc(sizeof (fmd_asru_hash_t), FMD_SLEEP);
440 	(void) pthread_rwlock_init(&ahp->ah_lock, NULL);
441 	ahp->ah_hashlen = fmd.d_str_buckets;
442 	ahp->ah_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, FMD_SLEEP);
443 	(void) snprintf(path, sizeof (path), "%s/%s", root, dir);
444 	ahp->ah_dirpath = fmd_strdup(path, FMD_SLEEP);
445 	(void) fmd_conf_getprop(fmd.d_conf, "rsrc.age", &ahp->ah_lifetime);
446 	(void) fmd_conf_getprop(fmd.d_conf, "fakenotpresent",
447 	    (uint32_t *)&fmd_asru_fake_not_present);
448 	ahp->ah_count = 0;
449 	ahp->ah_error = 0;
450 
451 	return (ahp);
452 }
453 
454 void
455 fmd_asru_hash_destroy(fmd_asru_hash_t *ahp)
456 {
457 	fmd_asru_t *ap, *np;
458 	uint_t i;
459 
460 	for (i = 0; i < ahp->ah_hashlen; i++) {
461 		for (ap = ahp->ah_hash[i]; ap != NULL; ap = np) {
462 			np = ap->asru_next;
463 			ap->asru_next = NULL;
464 			fmd_asru_hash_release(ahp, ap);
465 		}
466 	}
467 
468 	fmd_strfree(ahp->ah_dirpath);
469 	fmd_free(ahp->ah_hash, sizeof (void *) * ahp->ah_hashlen);
470 	fmd_free(ahp, sizeof (fmd_asru_hash_t));
471 }
472 
473 /*
474  * Take a snapshot of the ASRU database by placing an additional hold on each
475  * member in an auxiliary array, and then call 'func' for each ASRU.
476  */
477 void
478 fmd_asru_hash_apply(fmd_asru_hash_t *ahp,
479     void (*func)(fmd_asru_t *, void *), void *arg)
480 {
481 	fmd_asru_t *ap, **aps, **app;
482 	uint_t apc, i;
483 
484 	(void) pthread_rwlock_rdlock(&ahp->ah_lock);
485 
486 	aps = app = fmd_alloc(ahp->ah_count * sizeof (fmd_asru_t *), FMD_SLEEP);
487 	apc = ahp->ah_count;
488 
489 	for (i = 0; i < ahp->ah_hashlen; i++) {
490 		for (ap = ahp->ah_hash[i]; ap != NULL; ap = ap->asru_next)
491 			*app++ = fmd_asru_hold(ap);
492 	}
493 
494 	ASSERT(app == aps + apc);
495 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
496 
497 	for (i = 0; i < apc; i++) {
498 		func(aps[i], arg);
499 		fmd_asru_hash_release(ahp, aps[i]);
500 	}
501 
502 	fmd_free(aps, apc * sizeof (fmd_asru_t *));
503 }
504 
505 /*
506  * Lookup an asru in the hash by name and place a hold on it.  If the asru is
507  * not found, no entry is created and NULL is returned.
508  */
509 fmd_asru_t *
510 fmd_asru_hash_lookup_name(fmd_asru_hash_t *ahp, const char *name)
511 {
512 	fmd_asru_t *ap;
513 
514 	(void) pthread_rwlock_rdlock(&ahp->ah_lock);
515 	ap = fmd_asru_hash_lookup(ahp, name);
516 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
517 
518 	return (ap);
519 }
520 
521 /*
522  * Lookup an asru in the hash and place a hold on it.  If 'create' is true, an
523  * absent entry will be created for the caller; otherwise NULL is returned.
524  */
525 fmd_asru_t *
526 fmd_asru_hash_lookup_nvl(fmd_asru_hash_t *ahp, nvlist_t *fmri, int create)
527 {
528 	fmd_asru_t *ap;
529 	char *name = NULL;
530 	ssize_t namelen;
531 	uint_t h;
532 
533 	/*
534 	 * In order to lookup the ASRU in our hash, convert the FMRI from
535 	 * nvlist form into a string form using the scheme module.
536 	 */
537 	if ((namelen = fmd_fmri_nvl2str(fmri, NULL, 0)) == -1 ||
538 	    (name = fmd_alloc(namelen + 1, FMD_NOSLEEP)) == NULL ||
539 	    fmd_fmri_nvl2str(fmri, name, namelen + 1) == -1) {
540 		if (name != NULL)
541 			fmd_free(name, namelen + 1);
542 		return (NULL);
543 	}
544 
545 	/*
546 	 * If we must create the asru, grab the rwlock as a writer; otherwise
547 	 * reader is sufficient.  Then search the hash for the given asru name.
548 	 * If we didn't find the asru in the hash and we need to create it,
549 	 * create and insert the asru with ahp->ah_lock held and hash it in.
550 	 * We'll then drop the rwlock and proceed to initializing the asru.
551 	 */
552 	if (create)
553 		(void) pthread_rwlock_wrlock(&ahp->ah_lock);
554 	else
555 		(void) pthread_rwlock_rdlock(&ahp->ah_lock);
556 
557 	h = fmd_strhash(name) % ahp->ah_hashlen;
558 
559 	for (ap = ahp->ah_hash[h]; ap != NULL; ap = ap->asru_next) {
560 		if (strcmp(ap->asru_name, name) == 0)
561 			break;
562 	}
563 
564 	if (ap == NULL && create == FMD_B_TRUE) {
565 		ap = fmd_asru_create(ahp, NULL, name, fmri);
566 		fmd_asru_hash_insert(ahp, ap);
567 		(void) pthread_mutex_lock(&ap->asru_lock);
568 	} else
569 		create = FMD_B_FALSE;
570 
571 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
572 	fmd_free(name, namelen + 1);
573 
574 	/*
575 	 * If 'create' is still true, then we need to initialize the asru log;
576 	 * If 'create' is false and an asru was found, we must cond_wait for
577 	 * the FMD_ASRU_VALID bit to be set before returning.  In both cases,
578 	 * we increment asru_refs for the caller.
579 	 */
580 	if (create == FMD_B_TRUE) {
581 		uuid_t uuid;
582 
583 		ASSERT(MUTEX_HELD(&ap->asru_lock));
584 		ASSERT(ap->asru_uuid == NULL && ap->asru_log == NULL);
585 
586 		/*
587 		 * Generate a UUID for the ASRU.  libuuid cleverly gives us no
588 		 * interface for specifying or learning the buffer size.  Sigh.
589 		 * The spec says 36 bytes but we use a tunable just to be safe.
590 		 */
591 		(void) fmd_conf_getprop(fmd.d_conf,
592 		    "uuidlen", &ap->asru_uuidlen);
593 
594 		ap->asru_uuid = fmd_zalloc(ap->asru_uuidlen + 1, FMD_SLEEP);
595 		uuid_generate(uuid);
596 		uuid_unparse(uuid, ap->asru_uuid);
597 
598 		ASSERT(!(ap->asru_flags & FMD_ASRU_VALID));
599 		ap->asru_flags |= FMD_ASRU_VALID;
600 
601 		ap->asru_refs++;
602 		ASSERT(ap->asru_refs != 0);
603 		(void) pthread_cond_broadcast(&ap->asru_cv);
604 		(void) pthread_mutex_unlock(&ap->asru_lock);
605 
606 		TRACE((FMD_DBG_ASRU, "asru %s created as %p",
607 		    ap->asru_uuid, (void *)ap));
608 
609 	} else if (ap != NULL) {
610 		(void) pthread_mutex_lock(&ap->asru_lock);
611 
612 		while (!(ap->asru_flags & FMD_ASRU_VALID))
613 			(void) pthread_cond_wait(&ap->asru_cv, &ap->asru_lock);
614 
615 		ap->asru_refs++;
616 		ASSERT(ap->asru_refs != 0);
617 		(void) pthread_mutex_unlock(&ap->asru_lock);
618 	}
619 
620 	return (ap);
621 }
622 
623 /*
624  * Release the reference count on an asru obtained using fmd_asru_hash_lookup.
625  * We take 'ahp' for symmetry and in case we need to use it in future work.
626  */
627 /*ARGSUSED*/
628 void
629 fmd_asru_hash_release(fmd_asru_hash_t *ahp, fmd_asru_t *ap)
630 {
631 	(void) pthread_mutex_lock(&ap->asru_lock);
632 
633 	ASSERT(ap->asru_refs != 0);
634 	if (--ap->asru_refs == 0)
635 		fmd_asru_destroy(ap);
636 	else
637 		(void) pthread_mutex_unlock(&ap->asru_lock);
638 }
639 
640 int
641 fmd_asru_hash_delete_name(fmd_asru_hash_t *ahp, const char *name)
642 {
643 	fmd_asru_t *ap, **pp;
644 	char path[PATH_MAX];
645 	uint_t h;
646 
647 	(void) pthread_rwlock_wrlock(&ahp->ah_lock);
648 
649 	h = fmd_strhash(name) % ahp->ah_hashlen;
650 	pp = &ahp->ah_hash[h];
651 
652 	for (ap = *pp; ap != NULL; ap = ap->asru_next) {
653 		if (strcmp(ap->asru_name, name) == 0)
654 			break;
655 		else
656 			pp = &ap->asru_next;
657 	}
658 
659 	if (ap != NULL) {
660 		*pp = ap->asru_next;
661 		ap->asru_next = NULL;
662 		ASSERT(ahp->ah_count != 0);
663 		ahp->ah_count--;
664 	}
665 
666 	(void) pthread_rwlock_unlock(&ahp->ah_lock);
667 
668 	if (ap == NULL)
669 		return (fmd_set_errno(EFMD_ASRU_NOENT));
670 
671 	/*
672 	 * If we found a matching ASRU, unlink its log file and then release
673 	 * the hash entry.  Note that it may still be referenced if another
674 	 * thread is manipulating it; this is ok because once we unlink, the
675 	 * log file will not be restored, and the log data will be freed when
676 	 * all of the referencing threads release their respective references.
677 	 */
678 	(void) snprintf(path, sizeof (path),
679 	    "%s/%s", ahp->ah_dirpath, ap->asru_uuid);
680 
681 	if (unlink(path) != 0)
682 		fmd_error(EFMD_ASRU_UNLINK, "failed to unlink asru %s", path);
683 
684 	fmd_asru_hash_release(ahp, ap);
685 	return (0);
686 }
687 
688 static void
689 fmd_asru_logevent(fmd_asru_t *ap)
690 {
691 	boolean_t f = (ap->asru_flags & FMD_ASRU_FAULTY) != 0;
692 	boolean_t u = (ap->asru_flags & FMD_ASRU_UNUSABLE) != 0;
693 	boolean_t m = (ap->asru_flags & FMD_ASRU_INVISIBLE) == 0;
694 
695 	fmd_case_impl_t *cip;
696 	fmd_event_t *e;
697 	fmd_log_t *lp;
698 	nvlist_t *nvl;
699 	char *class;
700 
701 	ASSERT(MUTEX_HELD(&ap->asru_lock));
702 	cip = (fmd_case_impl_t *)ap->asru_case;
703 	ASSERT(cip != NULL);
704 
705 	if ((lp = ap->asru_log) == NULL)
706 		lp = fmd_log_open(ap->asru_root, ap->asru_uuid, FMD_LOG_ASRU);
707 
708 	if (lp == NULL)
709 		return; /* can't log events if we can't open the log */
710 
711 	nvl = fmd_protocol_rsrc_asru(_fmd_asru_events[f | (u << 1)],
712 	    ap->asru_fmri, cip->ci_uuid, cip->ci_code, f, u, m, ap->asru_event,
713 	    &cip->ci_tv);
714 
715 	(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
716 	e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
717 
718 	fmd_event_hold(e);
719 	fmd_log_append(lp, e, NULL);
720 	fmd_event_rele(e);
721 
722 	/*
723 	 * For now, we close the log file after every update to conserve file
724 	 * descriptors and daemon overhead.  If this becomes a performance
725 	 * issue this code can change to keep a fixed-size LRU cache of logs.
726 	 */
727 	fmd_log_rele(lp);
728 	ap->asru_log = NULL;
729 }
730 
731 int
732 fmd_asru_setflags(fmd_asru_t *ap, uint_t sflag, fmd_case_t *cp, nvlist_t *nvl)
733 {
734 	fmd_case_t *old_case = NULL;
735 	nvlist_t *old_nvl = NULL;
736 	uint_t nstate, ostate;
737 	boolean_t msg;
738 
739 	ASSERT(!(sflag & ~FMD_ASRU_STATE));
740 	ASSERT(sflag != FMD_ASRU_STATE);
741 
742 	(void) pthread_mutex_lock(&ap->asru_lock);
743 
744 	ostate = ap->asru_flags & FMD_ASRU_STATE;
745 	ap->asru_flags |= sflag;
746 	nstate = ap->asru_flags & FMD_ASRU_STATE;
747 
748 	if (nstate == ostate) {
749 		(void) pthread_mutex_unlock(&ap->asru_lock);
750 		return (0);
751 	}
752 
753 	if (cp != NULL && cp != ap->asru_case) {
754 		old_case = ap->asru_case;
755 		fmd_case_hold_locked(cp);
756 		ap->asru_case = cp;
757 		old_nvl = ap->asru_event;
758 		(void) nvlist_xdup(nvl, &ap->asru_event, &fmd.d_nva);
759 	}
760 
761 	if (nvl != NULL && nvlist_lookup_boolean_value(nvl,
762 	    FM_SUSPECT_MESSAGE, &msg) == 0 && msg == B_FALSE)
763 		ap->asru_flags |= FMD_ASRU_INVISIBLE;
764 
765 	TRACE((FMD_DBG_ASRU, "asru %s %s->%s", ap->asru_uuid,
766 	    _fmd_asru_snames[ostate], _fmd_asru_snames[nstate]));
767 
768 	fmd_asru_logevent(ap);
769 
770 	(void) pthread_cond_broadcast(&ap->asru_cv);
771 	(void) pthread_mutex_unlock(&ap->asru_lock);
772 
773 	if (old_case != NULL)
774 		fmd_case_rele(old_case);
775 
776 	if (old_nvl != NULL)
777 		nvlist_free(old_nvl);
778 
779 	return (1);
780 }
781 
782 int
783 fmd_asru_clrflags(fmd_asru_t *ap, uint_t sflag, fmd_case_t *cp, nvlist_t *nvl)
784 {
785 	fmd_case_t *old_case = NULL;
786 	nvlist_t *old_nvl = NULL;
787 	uint_t nstate, ostate;
788 
789 	ASSERT(!(sflag & ~FMD_ASRU_STATE));
790 	ASSERT(sflag != FMD_ASRU_STATE);
791 
792 	(void) pthread_mutex_lock(&ap->asru_lock);
793 
794 	ostate = ap->asru_flags & FMD_ASRU_STATE;
795 	ap->asru_flags &= ~sflag;
796 	nstate = ap->asru_flags & FMD_ASRU_STATE;
797 
798 	if (nstate == ostate) {
799 		(void) pthread_mutex_unlock(&ap->asru_lock);
800 		return (0);
801 	}
802 
803 	if (cp != NULL && cp != ap->asru_case) {
804 		old_case = ap->asru_case;
805 		fmd_case_hold_locked(cp);
806 		ap->asru_case = cp;
807 		old_nvl = ap->asru_event;
808 		(void) nvlist_xdup(nvl, &ap->asru_event, &fmd.d_nva);
809 	}
810 
811 	TRACE((FMD_DBG_ASRU, "asru %s %s->%s", ap->asru_uuid,
812 	    _fmd_asru_snames[ostate], _fmd_asru_snames[nstate]));
813 
814 	fmd_asru_logevent(ap);
815 
816 	if (cp == NULL && (sflag & FMD_ASRU_FAULTY)) {
817 		old_case = ap->asru_case;
818 		ap->asru_case = NULL;
819 		old_nvl = ap->asru_event;
820 		ap->asru_event = NULL;
821 	}
822 
823 	(void) pthread_cond_broadcast(&ap->asru_cv);
824 	(void) pthread_mutex_unlock(&ap->asru_lock);
825 
826 	if (old_case != NULL) {
827 		if (cp == NULL && (sflag & FMD_ASRU_FAULTY))
828 			fmd_case_update(old_case);
829 		fmd_case_rele(old_case);
830 	}
831 
832 	if (old_nvl != NULL)
833 		nvlist_free(old_nvl);
834 
835 	return (1);
836 }
837 
838 /*
839  * Report the current known state of the ASRU by refreshing its unusable status
840  * based upon the routines provided by the scheme module.  If the unusable bit
841  * is different, we do *not* generate a state change here because that change
842  * may be unrelated to fmd activities and therefore we have no case or event.
843  * The absence of the transition is harmless as this function is only provided
844  * for RPC observability and fmd's clients are only concerned with ASRU_FAULTY.
845  */
846 int
847 fmd_asru_getstate(fmd_asru_t *ap)
848 {
849 	int us, st;
850 
851 	if (!(ap->asru_flags & FMD_ASRU_INTERNAL) &&
852 	    (fmd_asru_fake_not_present || fmd_fmri_present(ap->asru_fmri) <= 0))
853 		return (0); /* do not report non-fmd non-present resources */
854 
855 	us = fmd_fmri_unusable(ap->asru_fmri);
856 	st = ap->asru_flags & FMD_ASRU_STATE;
857 
858 	if (us > 0)
859 		st |= FMD_ASRU_UNUSABLE;
860 	else if (us == 0)
861 		st &= ~FMD_ASRU_UNUSABLE;
862 
863 	return (st);
864 }
865