1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include <sys/fm/protocol.h>
28 #include <uuid/uuid.h>
29
30 #include <dirent.h>
31 #include <limits.h>
32 #include <unistd.h>
33 #include <alloca.h>
34 #include <stddef.h>
35 #include <fm/libtopo.h>
36
37 #include <fmd_alloc.h>
38 #include <fmd_string.h>
39 #include <fmd_error.h>
40 #include <fmd_subr.h>
41 #include <fmd_protocol.h>
42 #include <fmd_event.h>
43 #include <fmd_conf.h>
44 #include <fmd_fmri.h>
45 #include <fmd_dispq.h>
46 #include <fmd_case.h>
47 #include <fmd_module.h>
48 #include <fmd_asru.h>
49
50 #include <fmd.h>
51
52 static const char *const _fmd_asru_events[] = {
53 FMD_RSRC_CLASS "asru.ok", /* UNUSABLE=0 FAULTED=0 */
54 FMD_RSRC_CLASS "asru.degraded", /* UNUSABLE=0 FAULTED=1 */
55 FMD_RSRC_CLASS "asru.unknown", /* UNUSABLE=1 FAULTED=0 */
56 FMD_RSRC_CLASS "asru.faulted" /* UNUSABLE=1 FAULTED=1 */
57 };
58
59 static const char *const _fmd_asru_snames[] = {
60 "uf", "uF", "Uf", "UF" /* same order as above */
61 };
62
63 volatile uint32_t fmd_asru_fake_not_present = 0;
64
65 static uint_t
fmd_asru_strhash(fmd_asru_hash_t * ahp,const char * val)66 fmd_asru_strhash(fmd_asru_hash_t *ahp, const char *val)
67 {
68 return (topo_fmri_strhash(ahp->ah_topo->ft_hdl, val) % ahp->ah_hashlen);
69 }
70
71 static boolean_t
fmd_asru_strcmp(fmd_asru_hash_t * ahp,const char * a,const char * b)72 fmd_asru_strcmp(fmd_asru_hash_t *ahp, const char *a, const char *b)
73 {
74 return (topo_fmri_strcmp(ahp->ah_topo->ft_hdl, a, b));
75 }
76
77 static fmd_asru_t *
fmd_asru_create(fmd_asru_hash_t * ahp,const char * uuid,const char * name,nvlist_t * fmri)78 fmd_asru_create(fmd_asru_hash_t *ahp, const char *uuid,
79 const char *name, nvlist_t *fmri)
80 {
81 fmd_asru_t *ap = fmd_zalloc(sizeof (fmd_asru_t), FMD_SLEEP);
82 char *s;
83
84 (void) pthread_mutex_init(&ap->asru_lock, NULL);
85 (void) pthread_cond_init(&ap->asru_cv, NULL);
86
87 ap->asru_name = fmd_strdup(name, FMD_SLEEP);
88 if (fmri)
89 (void) nvlist_xdup(fmri, &ap->asru_fmri, &fmd.d_nva);
90 ap->asru_root = fmd_strdup(ahp->ah_dirpath, FMD_SLEEP);
91 ap->asru_uuid = fmd_strdup(uuid, FMD_SLEEP);
92 ap->asru_uuidlen = ap->asru_uuid ? strlen(ap->asru_uuid) : 0;
93 ap->asru_refs = 1;
94
95 if (fmri && nvlist_lookup_string(fmri, FM_FMRI_SCHEME, &s) == 0 &&
96 strcmp(s, FM_FMRI_SCHEME_FMD) == 0)
97 ap->asru_flags |= FMD_ASRU_INTERNAL;
98
99 return (ap);
100 }
101
102 static void
fmd_asru_destroy(fmd_asru_t * ap)103 fmd_asru_destroy(fmd_asru_t *ap)
104 {
105 ASSERT(MUTEX_HELD(&ap->asru_lock));
106 ASSERT(ap->asru_refs == 0);
107
108 nvlist_free(ap->asru_event);
109 fmd_strfree(ap->asru_name);
110 nvlist_free(ap->asru_fmri);
111 fmd_strfree(ap->asru_root);
112 fmd_free(ap->asru_uuid, ap->asru_uuidlen + 1);
113 fmd_free(ap, sizeof (fmd_asru_t));
114 }
115
116 static void
fmd_asru_hash_insert(fmd_asru_hash_t * ahp,fmd_asru_t * ap)117 fmd_asru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_t *ap)
118 {
119 uint_t h = fmd_asru_strhash(ahp, ap->asru_name);
120
121 ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
122 ap->asru_next = ahp->ah_hash[h];
123 ahp->ah_hash[h] = ap;
124 ahp->ah_count++;
125 }
126
127 static fmd_asru_t *
fmd_asru_hold(fmd_asru_t * ap)128 fmd_asru_hold(fmd_asru_t *ap)
129 {
130 (void) pthread_mutex_lock(&ap->asru_lock);
131 ap->asru_refs++;
132 ASSERT(ap->asru_refs != 0);
133 (void) pthread_mutex_unlock(&ap->asru_lock);
134 return (ap);
135 }
136
137 /*
138 * Lookup an asru in the hash by name and place a hold on it. If the asru is
139 * not found, no entry is created and NULL is returned. This internal function
140 * is for callers who have the ah_lock held and is used by lookup_name below.
141 */
142 fmd_asru_t *
fmd_asru_hash_lookup(fmd_asru_hash_t * ahp,const char * name)143 fmd_asru_hash_lookup(fmd_asru_hash_t *ahp, const char *name)
144 {
145 fmd_asru_t *ap;
146 uint_t h;
147
148 ASSERT(RW_LOCK_HELD(&ahp->ah_lock));
149 h = fmd_asru_strhash(ahp, name);
150
151 for (ap = ahp->ah_hash[h]; ap != NULL; ap = ap->asru_next) {
152 if (fmd_asru_strcmp(ahp, ap->asru_name, name))
153 break;
154 }
155
156 if (ap != NULL)
157 (void) fmd_asru_hold(ap);
158 else
159 (void) fmd_set_errno(EFMD_ASRU_NOENT);
160
161 return (ap);
162 }
163
164 #define HC_ONLY_FALSE 0
165 #define HC_ONLY_TRUE 1
166
167 static int
fmd_asru_replacement_state(nvlist_t * event,int hc_only)168 fmd_asru_replacement_state(nvlist_t *event, int hc_only)
169 {
170 int ps = -1;
171 nvlist_t *asru, *fru, *rsrc;
172 char *s;
173
174 /*
175 * Check if there is evidence that this object is no longer present.
176 * In general fmd_fmri_present() should be supported on resources and/or
177 * frus, as those are the things that are physically present or not
178 * present - an asru can be spread over a number of frus some of which
179 * are present and some not, so fmd_fmri_present() is not generally
180 * meaningful. However retain a check for asru first for compatibility.
181 * If we have checked all three and we still get -1 then nothing knows
182 * whether it's present or not, so err on the safe side and treat it
183 * as still present.
184 *
185 * Note that if hc_only is set, then we only check status using fmris
186 * that are in hc-scheme.
187 */
188 if (fmd_asru_fake_not_present)
189 return (fmd_asru_fake_not_present);
190 if (nvlist_lookup_nvlist(event, FM_FAULT_ASRU, &asru) == 0 &&
191 (hc_only == HC_ONLY_FALSE || (nvlist_lookup_string(asru,
192 FM_FMRI_SCHEME, &s) == 0 && strcmp(s, FM_FMRI_SCHEME_HC) == 0)))
193 ps = fmd_fmri_replaced(asru);
194 if (ps == -1 || ps == FMD_OBJ_STATE_UNKNOWN) {
195 if (nvlist_lookup_nvlist(event, FM_FAULT_RESOURCE,
196 &rsrc) == 0 && (hc_only == HC_ONLY_FALSE ||
197 (nvlist_lookup_string(rsrc, FM_FMRI_SCHEME, &s) == 0 &&
198 strcmp(s, FM_FMRI_SCHEME_HC) == 0))) {
199 if (ps == -1) {
200 ps = fmd_fmri_replaced(rsrc);
201 } else {
202 /* see if we can improve on UNKNOWN */
203 int ps2 = fmd_fmri_replaced(rsrc);
204 if (ps2 == FMD_OBJ_STATE_STILL_PRESENT ||
205 ps2 == FMD_OBJ_STATE_REPLACED)
206 ps = ps2;
207 }
208 }
209 }
210 if (ps == -1 || ps == FMD_OBJ_STATE_UNKNOWN) {
211 if (nvlist_lookup_nvlist(event, FM_FAULT_FRU, &fru) == 0 &&
212 (hc_only == HC_ONLY_FALSE || (nvlist_lookup_string(fru,
213 FM_FMRI_SCHEME, &s) == 0 &&
214 strcmp(s, FM_FMRI_SCHEME_HC) == 0))) {
215 if (ps == -1) {
216 ps = fmd_fmri_replaced(fru);
217 } else {
218 /* see if we can improve on UNKNOWN */
219 int ps2 = fmd_fmri_replaced(fru);
220 if (ps2 == FMD_OBJ_STATE_STILL_PRESENT ||
221 ps2 == FMD_OBJ_STATE_REPLACED)
222 ps = ps2;
223 }
224 }
225 }
226 if (ps == -1)
227 ps = FMD_OBJ_STATE_UNKNOWN;
228 return (ps);
229 }
230
231 static void
fmd_asru_asru_hash_insert(fmd_asru_hash_t * ahp,fmd_asru_link_t * alp,char * name)232 fmd_asru_asru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp,
233 char *name)
234 {
235 uint_t h = fmd_asru_strhash(ahp, name);
236
237 ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
238 alp->al_asru_next = ahp->ah_asru_hash[h];
239 ahp->ah_asru_hash[h] = alp;
240 ahp->ah_al_count++;
241 }
242
243 static void
fmd_asru_case_hash_insert(fmd_asru_hash_t * ahp,fmd_asru_link_t * alp,char * name)244 fmd_asru_case_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp,
245 char *name)
246 {
247 uint_t h = fmd_asru_strhash(ahp, name);
248
249 ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
250 alp->al_case_next = ahp->ah_case_hash[h];
251 ahp->ah_case_hash[h] = alp;
252 }
253
254 static void
fmd_asru_fru_hash_insert(fmd_asru_hash_t * ahp,fmd_asru_link_t * alp,char * name)255 fmd_asru_fru_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp, char *name)
256 {
257 uint_t h = fmd_asru_strhash(ahp, name);
258
259 ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
260 alp->al_fru_next = ahp->ah_fru_hash[h];
261 ahp->ah_fru_hash[h] = alp;
262 }
263
264 static void
fmd_asru_label_hash_insert(fmd_asru_hash_t * ahp,fmd_asru_link_t * alp,char * name)265 fmd_asru_label_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp,
266 char *name)
267 {
268 uint_t h = fmd_asru_strhash(ahp, name);
269
270 ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
271 alp->al_label_next = ahp->ah_label_hash[h];
272 ahp->ah_label_hash[h] = alp;
273 }
274
275 static void
fmd_asru_rsrc_hash_insert(fmd_asru_hash_t * ahp,fmd_asru_link_t * alp,char * name)276 fmd_asru_rsrc_hash_insert(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp,
277 char *name)
278 {
279 uint_t h = fmd_asru_strhash(ahp, name);
280
281 ASSERT(RW_WRITE_HELD(&ahp->ah_lock));
282 alp->al_rsrc_next = ahp->ah_rsrc_hash[h];
283 ahp->ah_rsrc_hash[h] = alp;
284 }
285
286 static void
fmd_asru_al_destroy(fmd_asru_link_t * alp)287 fmd_asru_al_destroy(fmd_asru_link_t *alp)
288 {
289 ASSERT(alp->al_refs == 0);
290 ASSERT(MUTEX_HELD(&alp->al_asru->asru_lock));
291
292 if (alp->al_log != NULL)
293 fmd_log_rele(alp->al_log);
294
295 fmd_free(alp->al_uuid, alp->al_uuidlen + 1);
296 nvlist_free(alp->al_event);
297 fmd_strfree(alp->al_rsrc_name);
298 fmd_strfree(alp->al_case_uuid);
299 fmd_strfree(alp->al_fru_name);
300 fmd_strfree(alp->al_asru_name);
301 fmd_strfree(alp->al_label);
302 nvlist_free(alp->al_asru_fmri);
303 fmd_free(alp, sizeof (fmd_asru_link_t));
304 }
305
306 static fmd_asru_link_t *
fmd_asru_al_hold(fmd_asru_link_t * alp)307 fmd_asru_al_hold(fmd_asru_link_t *alp)
308 {
309 fmd_asru_t *ap = alp->al_asru;
310
311 (void) pthread_mutex_lock(&ap->asru_lock);
312 ap->asru_refs++;
313 alp->al_refs++;
314 ASSERT(alp->al_refs != 0);
315 (void) pthread_mutex_unlock(&ap->asru_lock);
316 return (alp);
317 }
318
319 static void fmd_asru_destroy(fmd_asru_t *ap);
320
321 /*ARGSUSED*/
322 static void
fmd_asru_al_hash_release(fmd_asru_hash_t * ahp,fmd_asru_link_t * alp)323 fmd_asru_al_hash_release(fmd_asru_hash_t *ahp, fmd_asru_link_t *alp)
324 {
325 fmd_asru_t *ap = alp->al_asru;
326
327 (void) pthread_mutex_lock(&ap->asru_lock);
328 ASSERT(alp->al_refs != 0);
329 if (--alp->al_refs == 0)
330 fmd_asru_al_destroy(alp);
331 ASSERT(ap->asru_refs != 0);
332 if (--ap->asru_refs == 0)
333 fmd_asru_destroy(ap);
334 else
335 (void) pthread_mutex_unlock(&ap->asru_lock);
336 }
337
338 static int
fmd_asru_get_namestr(nvlist_t * nvl,char ** name,ssize_t * namelen)339 fmd_asru_get_namestr(nvlist_t *nvl, char **name, ssize_t *namelen)
340 {
341 if ((*namelen = fmd_fmri_nvl2str(nvl, NULL, 0)) == -1)
342 return (EFMD_ASRU_FMRI);
343 *name = fmd_alloc(*namelen + 1, FMD_SLEEP);
344 if (fmd_fmri_nvl2str(nvl, *name, *namelen + 1) == -1) {
345 if (*name != NULL)
346 fmd_free(*name, *namelen + 1);
347 return (EFMD_ASRU_FMRI);
348 }
349 return (0);
350 }
351
352 static fmd_asru_link_t *
fmd_asru_al_create(fmd_asru_hash_t * ahp,nvlist_t * nvl,fmd_case_t * cp,const char * al_uuid)353 fmd_asru_al_create(fmd_asru_hash_t *ahp, nvlist_t *nvl, fmd_case_t *cp,
354 const char *al_uuid)
355 {
356 nvlist_t *asru = NULL, *fru, *rsrc;
357 int got_rsrc = 0, got_asru = 0, got_fru = 0;
358 ssize_t fru_namelen, rsrc_namelen, asru_namelen;
359 char *asru_name, *rsrc_name, *fru_name, *name, *label;
360 fmd_asru_link_t *alp;
361 fmd_asru_t *ap;
362 boolean_t msg;
363 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
364
365 if (nvlist_lookup_nvlist(nvl, FM_FAULT_ASRU, &asru) == 0 &&
366 fmd_asru_get_namestr(asru, &asru_name, &asru_namelen) == 0)
367 got_asru = 1;
368 if (nvlist_lookup_nvlist(nvl, FM_FAULT_FRU, &fru) == 0 &&
369 fmd_asru_get_namestr(fru, &fru_name, &fru_namelen) == 0)
370 got_fru = 1;
371 if (nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &rsrc) == 0 &&
372 fmd_asru_get_namestr(rsrc, &rsrc_name, &rsrc_namelen) == 0)
373 got_rsrc = 1;
374 if (nvlist_lookup_string(nvl, FM_FAULT_LOCATION, &label) != 0)
375 label = "";
376
377 /*
378 * Grab the rwlock as a writer; Then create and insert the asru with
379 * ahp->ah_lock held and hash it in. We'll then drop the rwlock and
380 * proceed to initializing the asru.
381 */
382 (void) pthread_rwlock_wrlock(&ahp->ah_lock);
383
384 /*
385 * Create and initialise the per-fault "link" structure.
386 */
387 alp = fmd_zalloc(sizeof (fmd_asru_link_t), FMD_SLEEP);
388 if (got_asru)
389 (void) nvlist_xdup(asru, &alp->al_asru_fmri, &fmd.d_nva);
390 alp->al_uuid = fmd_strdup(al_uuid, FMD_SLEEP);
391 alp->al_uuidlen = strlen(alp->al_uuid);
392 alp->al_refs = 1;
393
394 /*
395 * If this is the first fault for this asru, then create the per-asru
396 * structure and link into the hash.
397 */
398 name = got_asru ? asru_name : "";
399 if ((ap = fmd_asru_hash_lookup(ahp, name)) == NULL) {
400 ap = fmd_asru_create(ahp, al_uuid, name, got_asru ? asru :
401 NULL);
402 fmd_asru_hash_insert(ahp, ap);
403 } else
404 nvlist_free(ap->asru_event);
405 (void) nvlist_xdup(nvl, &ap->asru_event, &fmd.d_nva);
406
407 /*
408 * Put the link structure on the list associated with the per-asru
409 * structure. Then put the link structure on the various hashes.
410 */
411 fmd_list_append(&ap->asru_list, (fmd_list_t *)alp);
412 alp->al_asru = ap;
413 alp->al_asru_name = got_asru ? asru_name : fmd_strdup("", FMD_SLEEP);
414 fmd_asru_asru_hash_insert(ahp, alp, alp->al_asru_name);
415 alp->al_fru_name = got_fru ? fru_name : fmd_strdup("", FMD_SLEEP);
416 fmd_asru_fru_hash_insert(ahp, alp, alp->al_fru_name);
417 alp->al_rsrc_name = got_rsrc ? rsrc_name : fmd_strdup("", FMD_SLEEP);
418 fmd_asru_rsrc_hash_insert(ahp, alp, alp->al_rsrc_name);
419 alp->al_label = fmd_strdup(label, FMD_SLEEP);
420 fmd_asru_label_hash_insert(ahp, alp, label);
421 alp->al_case_uuid = fmd_strdup(cip->ci_uuid, FMD_SLEEP);
422 fmd_asru_case_hash_insert(ahp, alp, cip->ci_uuid);
423 (void) pthread_mutex_lock(&ap->asru_lock);
424 (void) pthread_rwlock_unlock(&ahp->ah_lock);
425
426 ap->asru_case = alp->al_case = cp;
427 if (nvlist_lookup_boolean_value(nvl, FM_SUSPECT_MESSAGE, &msg) == 0 &&
428 msg == B_FALSE)
429 ap->asru_flags |= FMD_ASRU_INVISIBLE;
430 (void) nvlist_xdup(nvl, &alp->al_event, &fmd.d_nva);
431 ap->asru_flags |= FMD_ASRU_VALID;
432 (void) pthread_cond_broadcast(&ap->asru_cv);
433 (void) pthread_mutex_unlock(&ap->asru_lock);
434 return (alp);
435 }
436
437 static void
fmd_asru_hash_recreate(fmd_log_t * lp,fmd_event_t * ep,fmd_asru_hash_t * ahp)438 fmd_asru_hash_recreate(fmd_log_t *lp, fmd_event_t *ep, fmd_asru_hash_t *ahp)
439 {
440 nvlist_t *nvl = FMD_EVENT_NVL(ep);
441 boolean_t faulty = FMD_B_FALSE, unusable = FMD_B_FALSE;
442 int ps;
443 boolean_t repaired = FMD_B_FALSE, replaced = FMD_B_FALSE;
444 boolean_t acquitted = FMD_B_FALSE, resolved = FMD_B_FALSE;
445 nvlist_t *flt, *flt_copy, *asru;
446 char *case_uuid = NULL, *case_code = NULL;
447 fmd_asru_t *ap;
448 fmd_asru_link_t *alp;
449 fmd_case_t *cp;
450 int64_t *diag_time;
451 nvlist_t *de_fmri, *de_fmri_dup;
452 uint_t nelem;
453 topo_hdl_t *thp;
454 char *class;
455 nvlist_t *rsrc;
456 int err;
457 boolean_t injected;
458
459 /*
460 * Extract the most recent values of 'faulty' from the event log.
461 */
462 if (nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_FAULTY,
463 &faulty) != 0) {
464 fmd_error(EFMD_ASRU_EVENT, "failed to reload asru %s: "
465 "invalid event log record\n", lp->log_name);
466 ahp->ah_error = EFMD_ASRU_EVENT;
467 return;
468 }
469 if (nvlist_lookup_nvlist(nvl, FM_RSRC_ASRU_EVENT, &flt) != 0) {
470 fmd_error(EFMD_ASRU_EVENT, "failed to reload asru %s: "
471 "invalid event log record\n", lp->log_name);
472 ahp->ah_error = EFMD_ASRU_EVENT;
473 return;
474 }
475 (void) nvlist_lookup_string(nvl, FM_RSRC_ASRU_UUID, &case_uuid);
476 (void) nvlist_lookup_string(nvl, FM_RSRC_ASRU_CODE, &case_code);
477 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_UNUSABLE,
478 &unusable);
479 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_REPAIRED,
480 &repaired);
481 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_REPLACED,
482 &replaced);
483 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_ACQUITTED,
484 &acquitted);
485 (void) nvlist_lookup_boolean_value(nvl, FM_RSRC_ASRU_RESOLVED,
486 &resolved);
487
488 /*
489 * Attempt to recreate the case in CLOSED, REPAIRED or RESOLVED state
490 * (depending on whether the faulty/resolved bits are set).
491 * If the case is already present, fmd_case_recreate() will return it.
492 * If not, we'll create a new orphaned case. Either way, we use the
493 * ASRU event to insert a suspect into the partially-restored case.
494 */
495 fmd_module_lock(fmd.d_rmod);
496 cp = fmd_case_recreate(fmd.d_rmod, NULL, faulty ? FMD_CASE_CLOSED :
497 resolved ? FMD_CASE_RESOLVED : FMD_CASE_REPAIRED, case_uuid,
498 case_code);
499 fmd_case_hold(cp);
500 fmd_module_unlock(fmd.d_rmod);
501 if (nvlist_lookup_boolean_value(nvl, FM_SUSPECT_INJECTED,
502 &injected) == 0 && injected)
503 fmd_case_set_injected(cp);
504 if (nvlist_lookup_int64_array(nvl, FM_SUSPECT_DIAG_TIME, &diag_time,
505 &nelem) == 0 && nelem >= 2)
506 fmd_case_settime(cp, diag_time[0], diag_time[1]);
507 else
508 fmd_case_settime(cp, lp->log_stat.st_ctime, 0);
509 if (nvlist_lookup_nvlist(nvl, FM_SUSPECT_DE, &de_fmri) == 0) {
510 (void) nvlist_xdup(de_fmri, &de_fmri_dup, &fmd.d_nva);
511 fmd_case_set_de_fmri(cp, de_fmri_dup);
512 }
513 (void) nvlist_xdup(flt, &flt_copy, &fmd.d_nva);
514
515 /*
516 * For faults with a resource, re-evaluate the asru from the resource.
517 */
518 thp = fmd_fmri_topo_hold(TOPO_VERSION);
519 if (nvlist_lookup_string(flt_copy, FM_CLASS, &class) == 0 &&
520 strncmp(class, "fault", 5) == 0 &&
521 nvlist_lookup_nvlist(flt_copy, FM_FAULT_RESOURCE, &rsrc) == 0 &&
522 rsrc != NULL &&
523 (fmd_fmri_replaced(rsrc) != FMD_OBJ_STATE_REPLACED) &&
524 topo_fmri_asru(thp, rsrc, &asru, &err) == 0) {
525 (void) nvlist_remove(flt_copy, FM_FAULT_ASRU, DATA_TYPE_NVLIST);
526 (void) nvlist_add_nvlist(flt_copy, FM_FAULT_ASRU, asru);
527 nvlist_free(asru);
528 }
529 fmd_fmri_topo_rele(thp);
530
531 (void) nvlist_xdup(flt_copy, &flt, &fmd.d_nva);
532
533 fmd_case_recreate_suspect(cp, flt_copy);
534
535 /*
536 * Now create the resource cache entries.
537 */
538 alp = fmd_asru_al_create(ahp, flt, cp, fmd_strbasename(lp->log_name));
539 ap = alp->al_asru;
540
541 /*
542 * Check to see if the resource is still present in the system.
543 */
544 ps = fmd_asru_replacement_state(flt, HC_ONLY_FALSE);
545 if (ps == FMD_OBJ_STATE_REPLACED) {
546 replaced = FMD_B_TRUE;
547 } else if (ps == FMD_OBJ_STATE_STILL_PRESENT ||
548 ps == FMD_OBJ_STATE_UNKNOWN) {
549 ap->asru_flags |= FMD_ASRU_PRESENT;
550 if (nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU,
551 &asru) == 0) {
552 int us;
553
554 switch (fmd_fmri_service_state(asru)) {
555 case FMD_SERVICE_STATE_UNUSABLE:
556 unusable = FMD_B_TRUE;
557 break;
558 case FMD_SERVICE_STATE_OK:
559 case FMD_SERVICE_STATE_ISOLATE_PENDING:
560 case FMD_SERVICE_STATE_DEGRADED:
561 unusable = FMD_B_FALSE;
562 break;
563 case FMD_SERVICE_STATE_UNKNOWN:
564 case -1:
565 /* not supported by scheme */
566 us = fmd_fmri_unusable(asru);
567 if (us > 0)
568 unusable = FMD_B_TRUE;
569 else if (us == 0)
570 unusable = FMD_B_FALSE;
571 break;
572 }
573 }
574 }
575
576 nvlist_free(flt);
577
578 ap->asru_flags |= FMD_ASRU_RECREATED;
579 if (faulty) {
580 alp->al_flags |= FMD_ASRU_FAULTY;
581 ap->asru_flags |= FMD_ASRU_FAULTY;
582 }
583 if (unusable) {
584 alp->al_flags |= FMD_ASRU_UNUSABLE;
585 ap->asru_flags |= FMD_ASRU_UNUSABLE;
586 }
587 if (replaced)
588 alp->al_reason = FMD_ASRU_REPLACED;
589 else if (repaired)
590 alp->al_reason = FMD_ASRU_REPAIRED;
591 else if (acquitted)
592 alp->al_reason = FMD_ASRU_ACQUITTED;
593 else
594 alp->al_reason = FMD_ASRU_REMOVED;
595
596 TRACE((FMD_DBG_ASRU, "asru %s recreated as %p (%s)", alp->al_uuid,
597 (void *)ap, _fmd_asru_snames[ap->asru_flags & FMD_ASRU_STATE]));
598 }
599
600 static void
fmd_asru_hash_discard(fmd_asru_hash_t * ahp,const char * uuid,int err)601 fmd_asru_hash_discard(fmd_asru_hash_t *ahp, const char *uuid, int err)
602 {
603 char src[PATH_MAX], dst[PATH_MAX];
604
605 (void) snprintf(src, PATH_MAX, "%s/%s", ahp->ah_dirpath, uuid);
606 (void) snprintf(dst, PATH_MAX, "%s/%s-", ahp->ah_dirpath, uuid);
607
608 if (err != 0)
609 err = rename(src, dst);
610 else
611 err = unlink(src);
612
613 if (err != 0 && errno != ENOENT)
614 fmd_error(EFMD_ASRU_EVENT, "failed to rename log %s", src);
615 }
616
617 /*
618 * Open a saved log file and restore it into the ASRU hash. If we can't even
619 * open the log, rename the log file to <uuid>- to indicate it is corrupt. If
620 * fmd_log_replay() fails, we either delete the file (if it has reached the
621 * upper limit on cache age) or rename it for debugging if it was corrupted.
622 */
623 static void
fmd_asru_hash_logopen(fmd_asru_hash_t * ahp,const char * uuid)624 fmd_asru_hash_logopen(fmd_asru_hash_t *ahp, const char *uuid)
625 {
626 fmd_log_t *lp = fmd_log_tryopen(ahp->ah_dirpath, uuid, FMD_LOG_ASRU);
627 uint_t n;
628
629 if (lp == NULL) {
630 fmd_asru_hash_discard(ahp, uuid, errno);
631 return;
632 }
633
634 ahp->ah_error = 0;
635 n = ahp->ah_al_count;
636
637 fmd_log_replay(lp, (fmd_log_f *)fmd_asru_hash_recreate, ahp);
638 fmd_log_rele(lp);
639
640 if (ahp->ah_al_count == n)
641 fmd_asru_hash_discard(ahp, uuid, ahp->ah_error);
642 }
643
644 void
fmd_asru_hash_refresh(fmd_asru_hash_t * ahp)645 fmd_asru_hash_refresh(fmd_asru_hash_t *ahp)
646 {
647 struct dirent *dp;
648 DIR *dirp;
649 int zero;
650
651 if ((dirp = opendir(ahp->ah_dirpath)) == NULL) {
652 fmd_error(EFMD_ASRU_NODIR,
653 "failed to open asru cache directory %s", ahp->ah_dirpath);
654 return;
655 }
656
657 (void) fmd_conf_getprop(fmd.d_conf, "rsrc.zero", &zero);
658
659 (void) pthread_rwlock_wrlock(&ahp->ah_lock);
660
661 while ((dp = readdir(dirp)) != NULL) {
662 if (dp->d_name[0] == '.')
663 continue; /* skip "." and ".." */
664
665 if (zero)
666 fmd_asru_hash_discard(ahp, dp->d_name, 0);
667 else if (!fmd_strmatch(dp->d_name, "*-"))
668 fmd_asru_hash_logopen(ahp, dp->d_name);
669 }
670
671 (void) pthread_rwlock_unlock(&ahp->ah_lock);
672 (void) closedir(dirp);
673 }
674
675 /*
676 * If the resource is present and faulty but not unusable, replay the fault
677 * event that caused it be marked faulty. This will cause the agent
678 * subscribing to this fault class to again disable the resource.
679 */
680 /*ARGSUSED*/
681 static void
fmd_asru_hash_replay_asru(fmd_asru_t * ap,void * data)682 fmd_asru_hash_replay_asru(fmd_asru_t *ap, void *data)
683 {
684 fmd_event_t *e;
685 nvlist_t *nvl;
686 char *class;
687
688 if (ap->asru_event != NULL && (ap->asru_flags & (FMD_ASRU_STATE |
689 FMD_ASRU_PRESENT)) == (FMD_ASRU_FAULTY | FMD_ASRU_PRESENT)) {
690
691 fmd_dprintf(FMD_DBG_ASRU,
692 "replaying fault event for %s", ap->asru_name);
693
694 (void) nvlist_xdup(ap->asru_event, &nvl, &fmd.d_nva);
695 (void) nvlist_lookup_string(nvl, FM_CLASS, &class);
696
697 (void) nvlist_add_string(nvl, FMD_EVN_UUID,
698 ((fmd_case_impl_t *)ap->asru_case)->ci_uuid);
699
700 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
701 fmd_dispq_dispatch(fmd.d_disp, e, class);
702 }
703 }
704
705 void
fmd_asru_hash_replay(fmd_asru_hash_t * ahp)706 fmd_asru_hash_replay(fmd_asru_hash_t *ahp)
707 {
708 fmd_asru_hash_apply(ahp, fmd_asru_hash_replay_asru, NULL);
709 }
710
711 /*
712 * Check if the resource is still present. If not, and if the rsrc.age time
713 * has expired, then do an implicit repair on the resource.
714 */
715 /*ARGSUSED*/
716 static void
fmd_asru_repair_if_aged(fmd_asru_link_t * alp,void * arg)717 fmd_asru_repair_if_aged(fmd_asru_link_t *alp, void *arg)
718 {
719 struct timeval tv;
720 fmd_log_t *lp;
721 hrtime_t hrt;
722 int ps;
723 int err;
724 fmd_asru_rep_arg_t fara;
725
726 if (!(alp->al_flags & FMD_ASRU_FAULTY))
727 return;
728
729 /*
730 * Checking for aged resources only happens on the diagnosing side
731 * not on a proxy.
732 */
733 if (alp->al_flags & FMD_ASRU_PROXY)
734 return;
735
736 ps = fmd_asru_replacement_state(alp->al_event, HC_ONLY_FALSE);
737 if (ps == FMD_OBJ_STATE_REPLACED) {
738 fara.fara_reason = FMD_ASRU_REPLACED;
739 fara.fara_bywhat = FARA_ALL;
740 fara.fara_rval = &err;
741 fmd_asru_repaired(alp, &fara);
742 } else if (ps == FMD_OBJ_STATE_NOT_PRESENT) {
743 fmd_time_gettimeofday(&tv);
744 lp = fmd_log_open(alp->al_asru->asru_root, alp->al_uuid,
745 FMD_LOG_ASRU);
746 if (lp == NULL)
747 return;
748 hrt = (hrtime_t)(tv.tv_sec - lp->log_stat.st_mtime);
749 fmd_log_rele(lp);
750 if (hrt * NANOSEC >= fmd.d_asrus->ah_lifetime) {
751 fara.fara_reason = FMD_ASRU_REMOVED;
752 fara.fara_bywhat = FARA_ALL;
753 fara.fara_rval = &err;
754 fmd_asru_repaired(alp, &fara);
755 }
756 }
757 }
758
759 /*ARGSUSED*/
760 void
fmd_asru_check_if_aged(fmd_asru_link_t * alp,void * arg)761 fmd_asru_check_if_aged(fmd_asru_link_t *alp, void *arg)
762 {
763 struct timeval tv;
764 fmd_log_t *lp;
765 hrtime_t hrt;
766
767 /*
768 * Case must be in resolved state for this to be called. So modified
769 * time on resource cache entry should be the time the resolve occurred.
770 * Return 0 if not yet hit rsrc.aged.
771 */
772 fmd_time_gettimeofday(&tv);
773 lp = fmd_log_open(alp->al_asru->asru_root, alp->al_uuid, FMD_LOG_ASRU);
774 if (lp == NULL)
775 return;
776 hrt = (hrtime_t)(tv.tv_sec - lp->log_stat.st_mtime);
777 fmd_log_rele(lp);
778 if (hrt * NANOSEC < fmd.d_asrus->ah_lifetime)
779 *(int *)arg = 0;
780 }
781
782 /*ARGSUSED*/
783 void
fmd_asru_most_recent(fmd_asru_link_t * alp,void * arg)784 fmd_asru_most_recent(fmd_asru_link_t *alp, void *arg)
785 {
786 fmd_log_t *lp;
787 uint64_t hrt;
788
789 /*
790 * Find most recent modified time of a set of resource cache entries.
791 */
792 lp = fmd_log_open(alp->al_asru->asru_root, alp->al_uuid, FMD_LOG_ASRU);
793 if (lp == NULL)
794 return;
795 hrt = lp->log_stat.st_mtime;
796 fmd_log_rele(lp);
797 if (*(uint64_t *)arg < hrt)
798 *(uint64_t *)arg = hrt;
799 }
800
801 void
fmd_asru_clear_aged_rsrcs()802 fmd_asru_clear_aged_rsrcs()
803 {
804 int check_if_aged = 1;
805 fmd_asru_al_hash_apply(fmd.d_asrus, fmd_asru_repair_if_aged, NULL);
806 fmd_case_hash_apply(fmd.d_cases, fmd_case_discard_resolved,
807 &check_if_aged);
808 }
809
810 fmd_asru_hash_t *
fmd_asru_hash_create(const char * root,const char * dir)811 fmd_asru_hash_create(const char *root, const char *dir)
812 {
813 fmd_asru_hash_t *ahp;
814 char path[PATH_MAX];
815
816 ahp = fmd_alloc(sizeof (fmd_asru_hash_t), FMD_SLEEP);
817 (void) pthread_rwlock_init(&ahp->ah_lock, NULL);
818 ahp->ah_hashlen = fmd.d_str_buckets;
819 ahp->ah_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen, FMD_SLEEP);
820 ahp->ah_asru_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen,
821 FMD_SLEEP);
822 ahp->ah_case_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen,
823 FMD_SLEEP);
824 ahp->ah_fru_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen,
825 FMD_SLEEP);
826 ahp->ah_label_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen,
827 FMD_SLEEP);
828 ahp->ah_rsrc_hash = fmd_zalloc(sizeof (void *) * ahp->ah_hashlen,
829 FMD_SLEEP);
830 (void) snprintf(path, sizeof (path), "%s/%s", root, dir);
831 ahp->ah_dirpath = fmd_strdup(path, FMD_SLEEP);
832 (void) fmd_conf_getprop(fmd.d_conf, "rsrc.age", &ahp->ah_lifetime);
833 (void) fmd_conf_getprop(fmd.d_conf, "fakenotpresent",
834 (uint32_t *)&fmd_asru_fake_not_present);
835 ahp->ah_al_count = 0;
836 ahp->ah_count = 0;
837 ahp->ah_error = 0;
838 ahp->ah_topo = fmd_topo_hold();
839
840 return (ahp);
841 }
842
843 void
fmd_asru_hash_destroy(fmd_asru_hash_t * ahp)844 fmd_asru_hash_destroy(fmd_asru_hash_t *ahp)
845 {
846 fmd_asru_link_t *alp, *np;
847 uint_t i;
848
849 for (i = 0; i < ahp->ah_hashlen; i++) {
850 for (alp = ahp->ah_case_hash[i]; alp != NULL; alp = np) {
851 np = alp->al_case_next;
852 alp->al_case_next = NULL;
853 fmd_case_rele(alp->al_case);
854 alp->al_case = NULL;
855 fmd_asru_al_hash_release(ahp, alp);
856 }
857 }
858
859 fmd_strfree(ahp->ah_dirpath);
860 fmd_free(ahp->ah_hash, sizeof (void *) * ahp->ah_hashlen);
861 fmd_free(ahp->ah_asru_hash, sizeof (void *) * ahp->ah_hashlen);
862 fmd_free(ahp->ah_case_hash, sizeof (void *) * ahp->ah_hashlen);
863 fmd_free(ahp->ah_fru_hash, sizeof (void *) * ahp->ah_hashlen);
864 fmd_free(ahp->ah_label_hash, sizeof (void *) * ahp->ah_hashlen);
865 fmd_free(ahp->ah_rsrc_hash, sizeof (void *) * ahp->ah_hashlen);
866 fmd_topo_rele(ahp->ah_topo);
867 fmd_free(ahp, sizeof (fmd_asru_hash_t));
868 }
869
870 /*
871 * Take a snapshot of the ASRU database by placing an additional hold on each
872 * member in an auxiliary array, and then call 'func' for each ASRU.
873 */
874 void
fmd_asru_hash_apply(fmd_asru_hash_t * ahp,void (* func)(fmd_asru_t *,void *),void * arg)875 fmd_asru_hash_apply(fmd_asru_hash_t *ahp,
876 void (*func)(fmd_asru_t *, void *), void *arg)
877 {
878 fmd_asru_t *ap, **aps, **app;
879 uint_t apc, i;
880
881 (void) pthread_rwlock_rdlock(&ahp->ah_lock);
882
883 aps = app = fmd_alloc(ahp->ah_count * sizeof (fmd_asru_t *), FMD_SLEEP);
884 apc = ahp->ah_count;
885
886 for (i = 0; i < ahp->ah_hashlen; i++) {
887 for (ap = ahp->ah_hash[i]; ap != NULL; ap = ap->asru_next)
888 *app++ = fmd_asru_hold(ap);
889 }
890
891 ASSERT(app == aps + apc);
892 (void) pthread_rwlock_unlock(&ahp->ah_lock);
893
894 for (i = 0; i < apc; i++) {
895 if (aps[i]->asru_fmri != NULL)
896 func(aps[i], arg);
897 fmd_asru_hash_release(ahp, aps[i]);
898 }
899
900 fmd_free(aps, apc * sizeof (fmd_asru_t *));
901 }
902
903 void
fmd_asru_al_hash_apply(fmd_asru_hash_t * ahp,void (* func)(fmd_asru_link_t *,void *),void * arg)904 fmd_asru_al_hash_apply(fmd_asru_hash_t *ahp,
905 void (*func)(fmd_asru_link_t *, void *), void *arg)
906 {
907 fmd_asru_link_t *alp, **alps, **alpp;
908 uint_t alpc, i;
909
910 (void) pthread_rwlock_rdlock(&ahp->ah_lock);
911
912 alps = alpp = fmd_alloc(ahp->ah_al_count * sizeof (fmd_asru_link_t *),
913 FMD_SLEEP);
914 alpc = ahp->ah_al_count;
915
916 for (i = 0; i < ahp->ah_hashlen; i++) {
917 for (alp = ahp->ah_case_hash[i]; alp != NULL;
918 alp = alp->al_case_next)
919 *alpp++ = fmd_asru_al_hold(alp);
920 }
921
922 ASSERT(alpp == alps + alpc);
923 (void) pthread_rwlock_unlock(&ahp->ah_lock);
924
925 for (i = 0; i < alpc; i++) {
926 func(alps[i], arg);
927 fmd_asru_al_hash_release(ahp, alps[i]);
928 }
929
930 fmd_free(alps, alpc * sizeof (fmd_asru_link_t *));
931 }
932
933 static void
fmd_asru_do_hash_apply(fmd_asru_hash_t * ahp,const char * name,void (* func)(fmd_asru_link_t *,void *),void * arg,fmd_asru_link_t ** hash,size_t match_offset,size_t next_offset)934 fmd_asru_do_hash_apply(fmd_asru_hash_t *ahp, const char *name,
935 void (*func)(fmd_asru_link_t *, void *), void *arg,
936 fmd_asru_link_t **hash, size_t match_offset, size_t next_offset)
937 {
938 fmd_asru_link_t *alp, **alps, **alpp;
939 uint_t alpc = 0, i;
940 uint_t h;
941
942 (void) pthread_rwlock_rdlock(&ahp->ah_lock);
943
944 h = fmd_asru_strhash(ahp, name);
945
946 for (alp = hash[h]; alp != NULL; alp =
947 /* LINTED pointer alignment */
948 FMD_ASRU_AL_HASH_NEXT(alp, next_offset))
949 if (fmd_asru_strcmp(ahp,
950 /* LINTED pointer alignment */
951 FMD_ASRU_AL_HASH_NAME(alp, match_offset), name))
952 alpc++;
953
954 alps = alpp = fmd_alloc(alpc * sizeof (fmd_asru_link_t *), FMD_SLEEP);
955
956 for (alp = hash[h]; alp != NULL; alp =
957 /* LINTED pointer alignment */
958 FMD_ASRU_AL_HASH_NEXT(alp, next_offset))
959 if (fmd_asru_strcmp(ahp,
960 /* LINTED pointer alignment */
961 FMD_ASRU_AL_HASH_NAME(alp, match_offset), name))
962 *alpp++ = fmd_asru_al_hold(alp);
963
964 ASSERT(alpp == alps + alpc);
965 (void) pthread_rwlock_unlock(&ahp->ah_lock);
966
967 for (i = 0; i < alpc; i++) {
968 func(alps[i], arg);
969 fmd_asru_al_hash_release(ahp, alps[i]);
970 }
971
972 fmd_free(alps, alpc * sizeof (fmd_asru_link_t *));
973 }
974
975 void
fmd_asru_hash_apply_by_asru(fmd_asru_hash_t * ahp,const char * name,void (* func)(fmd_asru_link_t *,void *),void * arg)976 fmd_asru_hash_apply_by_asru(fmd_asru_hash_t *ahp, const char *name,
977 void (*func)(fmd_asru_link_t *, void *), void *arg)
978 {
979 fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_asru_hash,
980 offsetof(fmd_asru_link_t, al_asru_name),
981 offsetof(fmd_asru_link_t, al_asru_next));
982 }
983
984 void
fmd_asru_hash_apply_by_case(fmd_asru_hash_t * ahp,fmd_case_t * cp,void (* func)(fmd_asru_link_t *,void *),void * arg)985 fmd_asru_hash_apply_by_case(fmd_asru_hash_t *ahp, fmd_case_t *cp,
986 void (*func)(fmd_asru_link_t *, void *), void *arg)
987 {
988 fmd_asru_do_hash_apply(ahp, ((fmd_case_impl_t *)cp)->ci_uuid, func, arg,
989 ahp->ah_case_hash, offsetof(fmd_asru_link_t, al_case_uuid),
990 offsetof(fmd_asru_link_t, al_case_next));
991 }
992
993 void
fmd_asru_hash_apply_by_fru(fmd_asru_hash_t * ahp,const char * name,void (* func)(fmd_asru_link_t *,void *),void * arg)994 fmd_asru_hash_apply_by_fru(fmd_asru_hash_t *ahp, const char *name,
995 void (*func)(fmd_asru_link_t *, void *), void *arg)
996 {
997 fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_fru_hash,
998 offsetof(fmd_asru_link_t, al_fru_name),
999 offsetof(fmd_asru_link_t, al_fru_next));
1000 }
1001
1002 void
fmd_asru_hash_apply_by_rsrc(fmd_asru_hash_t * ahp,const char * name,void (* func)(fmd_asru_link_t *,void *),void * arg)1003 fmd_asru_hash_apply_by_rsrc(fmd_asru_hash_t *ahp, const char *name,
1004 void (*func)(fmd_asru_link_t *, void *), void *arg)
1005 {
1006 fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_rsrc_hash,
1007 offsetof(fmd_asru_link_t, al_rsrc_name),
1008 offsetof(fmd_asru_link_t, al_rsrc_next));
1009 }
1010
1011 void
fmd_asru_hash_apply_by_label(fmd_asru_hash_t * ahp,const char * name,void (* func)(fmd_asru_link_t *,void *),void * arg)1012 fmd_asru_hash_apply_by_label(fmd_asru_hash_t *ahp, const char *name,
1013 void (*func)(fmd_asru_link_t *, void *), void *arg)
1014 {
1015 fmd_asru_do_hash_apply(ahp, name, func, arg, ahp->ah_label_hash,
1016 offsetof(fmd_asru_link_t, al_label),
1017 offsetof(fmd_asru_link_t, al_label_next));
1018 }
1019
1020 /*
1021 * Lookup an asru in the hash by name and place a hold on it. If the asru is
1022 * not found, no entry is created and NULL is returned.
1023 */
1024 fmd_asru_t *
fmd_asru_hash_lookup_name(fmd_asru_hash_t * ahp,const char * name)1025 fmd_asru_hash_lookup_name(fmd_asru_hash_t *ahp, const char *name)
1026 {
1027 fmd_asru_t *ap;
1028
1029 (void) pthread_rwlock_rdlock(&ahp->ah_lock);
1030 ap = fmd_asru_hash_lookup(ahp, name);
1031 (void) pthread_rwlock_unlock(&ahp->ah_lock);
1032
1033 return (ap);
1034 }
1035
1036 /*
1037 * Create a resource cache entry using the fault event "nvl" for one of the
1038 * suspects from the case "cp".
1039 *
1040 * The fault event can have the following components : FM_FAULT_ASRU,
1041 * FM_FAULT_FRU, FM_FAULT_RESOURCE. These should be set by the Diagnosis Engine
1042 * when calling fmd_nvl_create_fault(). In the general case, these are all
1043 * optional and an entry will always be added into the cache even if one or all
1044 * of these fields is missing.
1045 *
1046 * However, for hardware faults the recommended practice is that the fault
1047 * event should always have the FM_FAULT_RESOURCE field present and that this
1048 * should be represented in hc-scheme.
1049 *
1050 * Currently the DE should also add the FM_FAULT_ASRU and FM_FAULT_FRU fields
1051 * where known, though at some future stage fmd might be able to fill these
1052 * in automatically from the topology.
1053 */
1054 fmd_asru_link_t *
fmd_asru_hash_create_entry(fmd_asru_hash_t * ahp,fmd_case_t * cp,nvlist_t * nvl)1055 fmd_asru_hash_create_entry(fmd_asru_hash_t *ahp, fmd_case_t *cp, nvlist_t *nvl)
1056 {
1057 char *parsed_uuid;
1058 uuid_t uuid;
1059 int uuidlen;
1060 fmd_asru_link_t *alp;
1061
1062 /*
1063 * Generate a UUID for the ASRU. libuuid cleverly gives us no
1064 * interface for specifying or learning the buffer size. Sigh.
1065 * The spec says 36 bytes but we use a tunable just to be safe.
1066 */
1067 (void) fmd_conf_getprop(fmd.d_conf, "uuidlen", &uuidlen);
1068 parsed_uuid = fmd_zalloc(uuidlen + 1, FMD_SLEEP);
1069 uuid_generate(uuid);
1070 uuid_unparse(uuid, parsed_uuid);
1071
1072 /*
1073 * Now create the resource cache entries.
1074 */
1075 fmd_case_hold_locked(cp);
1076 alp = fmd_asru_al_create(ahp, nvl, cp, parsed_uuid);
1077 TRACE((FMD_DBG_ASRU, "asru %s created as %p",
1078 alp->al_uuid, (void *)alp->al_asru));
1079
1080 fmd_free(parsed_uuid, uuidlen + 1);
1081 return (alp);
1082
1083 }
1084
1085 /*
1086 * Release the reference count on an asru obtained using fmd_asru_hash_lookup.
1087 * We take 'ahp' for symmetry and in case we need to use it in future work.
1088 */
1089 /*ARGSUSED*/
1090 void
fmd_asru_hash_release(fmd_asru_hash_t * ahp,fmd_asru_t * ap)1091 fmd_asru_hash_release(fmd_asru_hash_t *ahp, fmd_asru_t *ap)
1092 {
1093 (void) pthread_mutex_lock(&ap->asru_lock);
1094
1095 ASSERT(ap->asru_refs != 0);
1096 if (--ap->asru_refs == 0)
1097 fmd_asru_destroy(ap);
1098 else
1099 (void) pthread_mutex_unlock(&ap->asru_lock);
1100 }
1101
1102 static void
fmd_asru_do_delete_entry(fmd_asru_hash_t * ahp,fmd_case_t * cp,fmd_asru_link_t ** hash,size_t next_offset,char * name)1103 fmd_asru_do_delete_entry(fmd_asru_hash_t *ahp, fmd_case_t *cp,
1104 fmd_asru_link_t **hash, size_t next_offset, char *name)
1105 {
1106 uint_t h;
1107 fmd_asru_link_t *alp, **pp, *alpnext, **alpnextp;
1108
1109 (void) pthread_rwlock_wrlock(&ahp->ah_lock);
1110 h = fmd_asru_strhash(ahp, name);
1111 pp = &hash[h];
1112 for (alp = *pp; alp != NULL; alp = alpnext) {
1113 /* LINTED pointer alignment */
1114 alpnextp = FMD_ASRU_AL_HASH_NEXTP(alp, next_offset);
1115 alpnext = *alpnextp;
1116 if (alp->al_case == cp) {
1117 *pp = *alpnextp;
1118 *alpnextp = NULL;
1119 } else
1120 pp = alpnextp;
1121 }
1122 (void) pthread_rwlock_unlock(&ahp->ah_lock);
1123 }
1124
1125 static void
fmd_asru_do_hash_delete(fmd_asru_hash_t * ahp,fmd_case_susp_t * cis,fmd_case_t * cp,fmd_asru_link_t ** hash,size_t next_offset,char * nvname)1126 fmd_asru_do_hash_delete(fmd_asru_hash_t *ahp, fmd_case_susp_t *cis,
1127 fmd_case_t *cp, fmd_asru_link_t **hash, size_t next_offset, char *nvname)
1128 {
1129 nvlist_t *nvl;
1130 char *name = NULL;
1131 ssize_t namelen;
1132
1133 if (nvlist_lookup_nvlist(cis->cis_nvl, nvname, &nvl) == 0 &&
1134 (namelen = fmd_fmri_nvl2str(nvl, NULL, 0)) != -1 &&
1135 (name = fmd_alloc(namelen + 1, FMD_SLEEP)) != NULL) {
1136 if (fmd_fmri_nvl2str(nvl, name, namelen + 1) != -1)
1137 fmd_asru_do_delete_entry(ahp, cp, hash, next_offset,
1138 name);
1139 fmd_free(name, namelen + 1);
1140 } else
1141 fmd_asru_do_delete_entry(ahp, cp, hash, next_offset, "");
1142 }
1143
1144 void
fmd_asru_hash_delete_case(fmd_asru_hash_t * ahp,fmd_case_t * cp)1145 fmd_asru_hash_delete_case(fmd_asru_hash_t *ahp, fmd_case_t *cp)
1146 {
1147 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1148 fmd_case_susp_t *cis;
1149 fmd_asru_link_t *alp, **plp, *alpnext;
1150 fmd_asru_t *ap;
1151 char path[PATH_MAX];
1152 char *label;
1153 uint_t h;
1154
1155 /*
1156 * first delete hash entries for each suspect
1157 */
1158 for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
1159 fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_fru_hash,
1160 offsetof(fmd_asru_link_t, al_fru_next), FM_FAULT_FRU);
1161 fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_rsrc_hash,
1162 offsetof(fmd_asru_link_t, al_rsrc_next), FM_FAULT_RESOURCE);
1163 if (nvlist_lookup_string(cis->cis_nvl, FM_FAULT_LOCATION,
1164 &label) != 0)
1165 label = "";
1166 fmd_asru_do_delete_entry(ahp, cp, ahp->ah_label_hash,
1167 offsetof(fmd_asru_link_t, al_label_next), label);
1168 fmd_asru_do_hash_delete(ahp, cis, cp, ahp->ah_asru_hash,
1169 offsetof(fmd_asru_link_t, al_asru_next), FM_FAULT_ASRU);
1170 }
1171
1172 /*
1173 * then delete associated case hash entries
1174 */
1175 (void) pthread_rwlock_wrlock(&ahp->ah_lock);
1176 h = fmd_asru_strhash(ahp, cip->ci_uuid);
1177 plp = &ahp->ah_case_hash[h];
1178 for (alp = *plp; alp != NULL; alp = alpnext) {
1179 alpnext = alp->al_case_next;
1180 if (alp->al_case == cp) {
1181 *plp = alp->al_case_next;
1182 alp->al_case_next = NULL;
1183 ASSERT(ahp->ah_al_count != 0);
1184 ahp->ah_al_count--;
1185
1186 /*
1187 * decrement case ref.
1188 */
1189 fmd_case_rele_locked(cp);
1190 alp->al_case = NULL;
1191
1192 /*
1193 * If we found a matching ASRU, unlink its log file and
1194 * then release the hash entry. Note that it may still
1195 * be referenced if another thread is manipulating it;
1196 * this is ok because once we unlink, the log file will
1197 * not be restored, and the log data will be freed when
1198 * all of the referencing threads release their
1199 * respective references.
1200 */
1201 (void) snprintf(path, sizeof (path), "%s/%s",
1202 ahp->ah_dirpath, alp->al_uuid);
1203 if (cip->ci_xprt == NULL && unlink(path) != 0)
1204 fmd_error(EFMD_ASRU_UNLINK,
1205 "failed to unlink asru %s", path);
1206
1207 /*
1208 * Now unlink from the global per-resource cache
1209 * and if this is the last link then remove that from
1210 * it's own hash too.
1211 */
1212 ap = alp->al_asru;
1213 (void) pthread_mutex_lock(&ap->asru_lock);
1214 fmd_list_delete(&ap->asru_list, alp);
1215 if (ap->asru_list.l_next == NULL) {
1216 uint_t h;
1217 fmd_asru_t *ap2, **pp;
1218 fmd_asru_t *apnext, **apnextp;
1219
1220 ASSERT(ahp->ah_count != 0);
1221 ahp->ah_count--;
1222 h = fmd_asru_strhash(ahp, ap->asru_name);
1223 pp = &ahp->ah_hash[h];
1224 for (ap2 = *pp; ap2 != NULL; ap2 = apnext) {
1225 apnextp = &ap2->asru_next;
1226 apnext = *apnextp;
1227 if (ap2 == ap) {
1228 *pp = *apnextp;
1229 *apnextp = NULL;
1230 } else
1231 pp = apnextp;
1232 }
1233 }
1234 (void) pthread_mutex_unlock(&ap->asru_lock);
1235 fmd_asru_al_hash_release(ahp, alp);
1236 } else
1237 plp = &alp->al_case_next;
1238 }
1239 (void) pthread_rwlock_unlock(&ahp->ah_lock);
1240 }
1241
1242 typedef struct {
1243 nvlist_t *farc_parent_fmri;
1244 uint8_t farc_reason;
1245 } fmd_asru_farc_t;
1246
1247 static void
fmd_asru_repair_containee(fmd_asru_link_t * alp,void * arg)1248 fmd_asru_repair_containee(fmd_asru_link_t *alp, void *arg)
1249 {
1250 fmd_asru_farc_t *farcp = (fmd_asru_farc_t *)arg;
1251
1252 if ((alp->al_asru->asru_flags & FMD_ASRU_INVISIBLE) &&
1253 alp->al_asru_fmri &&
1254 fmd_fmri_contains(farcp->farc_parent_fmri, alp->al_asru_fmri) > 0) {
1255 if (fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
1256 farcp->farc_reason)) {
1257 if (alp->al_flags & FMD_ASRU_PROXY)
1258 fmd_case_xprt_updated(alp->al_case);
1259 else
1260 fmd_case_update(alp->al_case);
1261 }
1262 }
1263 }
1264
1265 static void
fmd_asru_do_repair_containees(fmd_asru_link_t * alp,uint8_t reason)1266 fmd_asru_do_repair_containees(fmd_asru_link_t *alp, uint8_t reason)
1267 {
1268 int flags;
1269
1270 /*
1271 * Check if all entries associated with this asru are acquitted and
1272 * if so acquit containees. Don't try to repair containees on proxy
1273 * side unless we have local asru.
1274 */
1275 if (alp->al_asru_fmri != NULL && (!(alp->al_flags & FMD_ASRU_PROXY) ||
1276 (alp->al_flags & FMD_ASRU_PROXY_WITH_ASRU))) {
1277 (void) pthread_mutex_lock(&alp->al_asru->asru_lock);
1278 flags = alp->al_asru->asru_flags;
1279 (void) pthread_mutex_unlock(&alp->al_asru->asru_lock);
1280 if (!(flags & (FMD_ASRU_FAULTY | FMD_ASRU_INVISIBLE))) {
1281 fmd_asru_farc_t farc;
1282
1283 farc.farc_parent_fmri = alp->al_asru_fmri;
1284 farc.farc_reason = reason;
1285 fmd_asru_al_hash_apply(fmd.d_asrus,
1286 fmd_asru_repair_containee, &farc);
1287 }
1288 }
1289 }
1290
1291 void
fmd_asru_repaired(fmd_asru_link_t * alp,void * arg)1292 fmd_asru_repaired(fmd_asru_link_t *alp, void *arg)
1293 {
1294 int cleared;
1295 fmd_asru_rep_arg_t *farap = (fmd_asru_rep_arg_t *)arg;
1296
1297 /*
1298 * don't allow remote repair over readonly transport
1299 */
1300 if (alp->al_flags & FMD_ASRU_PROXY_RDONLY)
1301 return;
1302
1303 /*
1304 * don't allow repair etc by asru on proxy unless asru is local
1305 */
1306 if (farap->fara_bywhat == FARA_BY_ASRU &&
1307 (alp->al_flags & FMD_ASRU_PROXY) &&
1308 !(alp->al_flags & FMD_ASRU_PROXY_WITH_ASRU))
1309 return;
1310 /*
1311 * For acquit, need to check both name and uuid if specified
1312 */
1313 if (farap->fara_reason == FMD_ASRU_ACQUITTED &&
1314 farap->fara_rval != NULL && strcmp(farap->fara_uuid, "") != 0 &&
1315 strcmp(farap->fara_uuid, alp->al_case_uuid) != 0)
1316 return;
1317
1318 /*
1319 * For replaced, verify it has been replaced if we have serial number.
1320 * If not set *farap->fara_rval to FARA_ERR_RSRCNOTR.
1321 */
1322 if (farap->fara_reason == FMD_ASRU_REPLACED &&
1323 !(alp->al_flags & FMD_ASRU_PROXY_EXTERNAL) &&
1324 fmd_asru_replacement_state(alp->al_event,
1325 (alp->al_flags & FMD_ASRU_PROXY) ? HC_ONLY_TRUE : HC_ONLY_FALSE) ==
1326 FMD_OBJ_STATE_STILL_PRESENT) {
1327 if (farap->fara_rval)
1328 *farap->fara_rval = FARA_ERR_RSRCNOTR;
1329 return;
1330 }
1331
1332 cleared = fmd_asru_clrflags(alp, FMD_ASRU_FAULTY, farap->fara_reason);
1333 fmd_asru_do_repair_containees(alp, farap->fara_reason);
1334
1335 /*
1336 * if called from fmd_adm_*() and we really did clear the bit then
1337 * we need to do a case update to see if the associated case can be
1338 * repaired. No need to do this if called from fmd_case_*() (ie
1339 * when arg is NULL) as the case will be explicitly repaired anyway.
1340 */
1341 if (farap->fara_rval) {
1342 /*
1343 * *farap->fara_rval defaults to FARA_ERR_RSRCNOTF (not found).
1344 * If we find a valid cache entry which we repair then we
1345 * set it to FARA_OK. However we don't want to do this if
1346 * we have already set it to FARA_ERR_RSRCNOTR (not replaced)
1347 * in a previous iteration (see above). So only set it to
1348 * FARA_OK if the current value is still FARA_ERR_RSRCNOTF.
1349 */
1350 if (*farap->fara_rval == FARA_ERR_RSRCNOTF)
1351 *farap->fara_rval = FARA_OK;
1352 if (cleared) {
1353 if (alp->al_flags & FMD_ASRU_PROXY)
1354 fmd_case_xprt_updated(alp->al_case);
1355 else
1356 fmd_case_update(alp->al_case);
1357 }
1358 }
1359 }
1360
1361 /*
1362 * Discard the case associated with this alp if it is in resolved state.
1363 * Called on "fmadm flush".
1364 */
1365 /*ARGSUSED*/
1366 void
fmd_asru_flush(fmd_asru_link_t * alp,void * arg)1367 fmd_asru_flush(fmd_asru_link_t *alp, void *arg)
1368 {
1369 int check_if_aged = 0;
1370 int *rval = (int *)arg;
1371
1372 if (alp->al_case)
1373 fmd_case_discard_resolved(alp->al_case, &check_if_aged);
1374 *rval = 0;
1375 }
1376
1377 /*
1378 * This is only called for proxied faults. Set various flags so we can
1379 * find the nature of the transport from the resource cache code.
1380 */
1381 /*ARGSUSED*/
1382 void
fmd_asru_set_on_proxy(fmd_asru_link_t * alp,void * arg)1383 fmd_asru_set_on_proxy(fmd_asru_link_t *alp, void *arg)
1384 {
1385 fmd_asru_set_on_proxy_t *entryp = (fmd_asru_set_on_proxy_t *)arg;
1386
1387 if (*entryp->fasp_countp >= entryp->fasp_maxcount)
1388 return;
1389
1390 /*
1391 * Note that this is a proxy fault and save whetehr transport is
1392 * RDONLY or EXTERNAL.
1393 */
1394 alp->al_flags |= FMD_ASRU_PROXY;
1395 alp->al_asru->asru_flags |= FMD_ASRU_PROXY;
1396
1397 if (entryp->fasp_proxy_external) {
1398 alp->al_flags |= FMD_ASRU_PROXY_EXTERNAL;
1399 alp->al_asru->asru_flags |= FMD_ASRU_PROXY_EXTERNAL;
1400 }
1401
1402 if (entryp->fasp_proxy_rdonly)
1403 alp->al_flags |= FMD_ASRU_PROXY_RDONLY;
1404
1405 /*
1406 * Save whether asru is accessible in local domain
1407 */
1408 if (entryp->fasp_proxy_asru[*entryp->fasp_countp]) {
1409 alp->al_flags |= FMD_ASRU_PROXY_WITH_ASRU;
1410 alp->al_asru->asru_flags |= FMD_ASRU_PROXY_WITH_ASRU;
1411 }
1412 (*entryp->fasp_countp)++;
1413 }
1414
1415 /*ARGSUSED*/
1416 void
fmd_asru_update_containees(fmd_asru_link_t * alp,void * arg)1417 fmd_asru_update_containees(fmd_asru_link_t *alp, void *arg)
1418 {
1419 fmd_asru_do_repair_containees(alp, alp->al_reason);
1420 }
1421
1422 /*
1423 * This function is used for fault proxying. It updates the resource status in
1424 * the resource cache based on information that has come from the other side of
1425 * the transport. This can be called on either the proxy side or the
1426 * diagnosing side.
1427 */
1428 void
fmd_asru_update_status(fmd_asru_link_t * alp,void * arg)1429 fmd_asru_update_status(fmd_asru_link_t *alp, void *arg)
1430 {
1431 fmd_asru_update_status_t *entryp = (fmd_asru_update_status_t *)arg;
1432 uint8_t status;
1433
1434 if (*entryp->faus_countp >= entryp->faus_maxcount)
1435 return;
1436
1437 status = entryp->faus_ba[*entryp->faus_countp];
1438
1439 /*
1440 * For proxy, if there is no asru on the proxy side, but there is on
1441 * the diag side, then take the diag side asru status.
1442 * For diag, if there is an asru on the proxy side, then take the proxy
1443 * side asru status.
1444 */
1445 if (entryp->faus_is_proxy ?
1446 (entryp->faus_diag_asru[*entryp->faus_countp] &&
1447 !entryp->faus_proxy_asru[*entryp->faus_countp]) :
1448 entryp->faus_proxy_asru[*entryp->faus_countp]) {
1449 if (status & FM_SUSPECT_DEGRADED)
1450 alp->al_flags |= FMD_ASRU_DEGRADED;
1451 else
1452 alp->al_flags &= ~FMD_ASRU_DEGRADED;
1453 if (status & FM_SUSPECT_UNUSABLE)
1454 (void) fmd_asru_setflags(alp, FMD_ASRU_UNUSABLE);
1455 else
1456 (void) fmd_asru_clrflags(alp, FMD_ASRU_UNUSABLE, 0);
1457 }
1458
1459 /*
1460 * Update the faulty status too.
1461 */
1462 if (!(status & FM_SUSPECT_FAULTY))
1463 (void) fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
1464 (status & FM_SUSPECT_REPAIRED) ? FMD_ASRU_REPAIRED :
1465 (status & FM_SUSPECT_REPLACED) ? FMD_ASRU_REPLACED :
1466 (status & FM_SUSPECT_ACQUITTED) ? FMD_ASRU_ACQUITTED :
1467 FMD_ASRU_REMOVED);
1468 else if (entryp->faus_is_proxy)
1469 (void) fmd_asru_setflags(alp, FMD_ASRU_FAULTY);
1470
1471 /*
1472 * for proxy only, update the present status too.
1473 */
1474 if (entryp->faus_is_proxy) {
1475 if (!(status & FM_SUSPECT_NOT_PRESENT)) {
1476 alp->al_flags |= FMD_ASRU_PRESENT;
1477 alp->al_asru->asru_flags |= FMD_ASRU_PRESENT;
1478 } else {
1479 alp->al_flags &= ~FMD_ASRU_PRESENT;
1480 alp->al_asru->asru_flags &= ~FMD_ASRU_PRESENT;
1481 }
1482 }
1483 (*entryp->faus_countp)++;
1484 }
1485
1486 /*
1487 * This function is called on the diagnosing side when fault proxying is
1488 * in use and the proxy has sent a uuclose. It updates the status of the
1489 * resource cache entries.
1490 */
1491 void
fmd_asru_close_status(fmd_asru_link_t * alp,void * arg)1492 fmd_asru_close_status(fmd_asru_link_t *alp, void *arg)
1493 {
1494 fmd_asru_close_status_t *entryp = (fmd_asru_close_status_t *)arg;
1495
1496 if (*entryp->facs_countp >= entryp->facs_maxcount)
1497 return;
1498 alp->al_flags &= ~FMD_ASRU_DEGRADED;
1499 (void) fmd_asru_setflags(alp, FMD_ASRU_UNUSABLE);
1500 (*entryp->facs_countp)++;
1501 }
1502
1503 static void
fmd_asru_logevent(fmd_asru_link_t * alp)1504 fmd_asru_logevent(fmd_asru_link_t *alp)
1505 {
1506 fmd_asru_t *ap = alp->al_asru;
1507 boolean_t faulty = (alp->al_flags & FMD_ASRU_FAULTY) != 0;
1508 boolean_t unusable = (alp->al_flags & FMD_ASRU_UNUSABLE) != 0;
1509 boolean_t message = (ap->asru_flags & FMD_ASRU_INVISIBLE) == 0;
1510 boolean_t repaired = (alp->al_reason == FMD_ASRU_REPAIRED);
1511 boolean_t replaced = (alp->al_reason == FMD_ASRU_REPLACED);
1512 boolean_t acquitted = (alp->al_reason == FMD_ASRU_ACQUITTED);
1513
1514 fmd_case_impl_t *cip;
1515 fmd_event_t *e;
1516 fmd_log_t *lp;
1517 nvlist_t *nvl;
1518 char *class;
1519
1520 ASSERT(MUTEX_HELD(&ap->asru_lock));
1521 cip = (fmd_case_impl_t *)alp->al_case;
1522 ASSERT(cip != NULL);
1523
1524 /*
1525 * Don't log to disk on proxy side
1526 */
1527 if (cip->ci_xprt != NULL)
1528 return;
1529
1530 if ((lp = alp->al_log) == NULL)
1531 lp = fmd_log_open(ap->asru_root, alp->al_uuid, FMD_LOG_ASRU);
1532
1533 if (lp == NULL)
1534 return; /* can't log events if we can't open the log */
1535
1536 nvl = fmd_protocol_rsrc_asru(_fmd_asru_events[faulty | (unusable << 1)],
1537 alp->al_asru_fmri, cip->ci_uuid, cip->ci_code, faulty, unusable,
1538 message, alp->al_event, &cip->ci_tv, repaired, replaced, acquitted,
1539 cip->ci_state == FMD_CASE_RESOLVED, cip->ci_diag_de == NULL ?
1540 cip->ci_mod->mod_fmri : cip->ci_diag_de, cip->ci_injected == 1);
1541
1542 (void) nvlist_lookup_string(nvl, FM_CLASS, &class);
1543 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
1544
1545 fmd_event_hold(e);
1546 fmd_log_append(lp, e, NULL);
1547 fmd_event_rele(e);
1548
1549 /*
1550 * For now, we close the log file after every update to conserve file
1551 * descriptors and daemon overhead. If this becomes a performance
1552 * issue this code can change to keep a fixed-size LRU cache of logs.
1553 */
1554 fmd_log_rele(lp);
1555 alp->al_log = NULL;
1556 }
1557
1558 int
fmd_asru_setflags(fmd_asru_link_t * alp,uint_t sflag)1559 fmd_asru_setflags(fmd_asru_link_t *alp, uint_t sflag)
1560 {
1561 fmd_asru_t *ap = alp->al_asru;
1562 uint_t nstate, ostate;
1563
1564 ASSERT(!(sflag & ~FMD_ASRU_STATE));
1565 ASSERT(sflag != FMD_ASRU_STATE);
1566
1567 (void) pthread_mutex_lock(&ap->asru_lock);
1568
1569 ostate = alp->al_flags & FMD_ASRU_STATE;
1570 alp->al_flags |= sflag;
1571 nstate = alp->al_flags & FMD_ASRU_STATE;
1572
1573 if (nstate == ostate) {
1574 (void) pthread_mutex_unlock(&ap->asru_lock);
1575 return (0);
1576 }
1577
1578 ap->asru_flags |= sflag;
1579 TRACE((FMD_DBG_ASRU, "asru %s %s->%s", alp->al_uuid,
1580 _fmd_asru_snames[ostate], _fmd_asru_snames[nstate]));
1581
1582 fmd_asru_logevent(alp);
1583
1584 (void) pthread_cond_broadcast(&ap->asru_cv);
1585 (void) pthread_mutex_unlock(&ap->asru_lock);
1586 return (1);
1587 }
1588
1589 int
fmd_asru_clrflags(fmd_asru_link_t * alp,uint_t sflag,uint8_t reason)1590 fmd_asru_clrflags(fmd_asru_link_t *alp, uint_t sflag, uint8_t reason)
1591 {
1592 fmd_asru_t *ap = alp->al_asru;
1593 fmd_asru_link_t *nalp;
1594 uint_t nstate, ostate, flags = 0;
1595
1596 ASSERT(!(sflag & ~FMD_ASRU_STATE));
1597 ASSERT(sflag != FMD_ASRU_STATE);
1598
1599 (void) pthread_mutex_lock(&ap->asru_lock);
1600
1601 ostate = alp->al_flags & FMD_ASRU_STATE;
1602 alp->al_flags &= ~sflag;
1603 nstate = alp->al_flags & FMD_ASRU_STATE;
1604
1605 if (nstate == ostate) {
1606 if (reason > alp->al_reason &&
1607 ((fmd_case_impl_t *)alp->al_case)->ci_state <
1608 FMD_CASE_REPAIRED) {
1609 alp->al_reason = reason;
1610 fmd_asru_logevent(alp);
1611 (void) pthread_cond_broadcast(&ap->asru_cv);
1612 }
1613 (void) pthread_mutex_unlock(&ap->asru_lock);
1614 return (0);
1615 }
1616 if (reason > alp->al_reason)
1617 alp->al_reason = reason;
1618
1619 if (sflag == FMD_ASRU_UNUSABLE)
1620 ap->asru_flags &= ~sflag;
1621 else if (sflag == FMD_ASRU_FAULTY) {
1622 /*
1623 * only clear the faulty bit if all links are clear
1624 */
1625 for (nalp = fmd_list_next(&ap->asru_list); nalp != NULL;
1626 nalp = fmd_list_next(nalp))
1627 flags |= nalp->al_flags;
1628 if (!(flags & FMD_ASRU_FAULTY))
1629 ap->asru_flags &= ~sflag;
1630 }
1631
1632 TRACE((FMD_DBG_ASRU, "asru %s %s->%s", alp->al_uuid,
1633 _fmd_asru_snames[ostate], _fmd_asru_snames[nstate]));
1634
1635 fmd_asru_logevent(alp);
1636
1637 (void) pthread_cond_broadcast(&ap->asru_cv);
1638 (void) pthread_mutex_unlock(&ap->asru_lock);
1639
1640 return (1);
1641 }
1642
1643 /*ARGSUSED*/
1644 void
fmd_asru_log_resolved(fmd_asru_link_t * alp,void * unused)1645 fmd_asru_log_resolved(fmd_asru_link_t *alp, void *unused)
1646 {
1647 fmd_asru_t *ap = alp->al_asru;
1648
1649 (void) pthread_mutex_lock(&ap->asru_lock);
1650 fmd_asru_logevent(alp);
1651 (void) pthread_cond_broadcast(&ap->asru_cv);
1652 (void) pthread_mutex_unlock(&ap->asru_lock);
1653 }
1654
1655 /*
1656 * Report the current known state of the link entry (ie this particular fault
1657 * affecting this particular ASRU).
1658 */
1659 int
fmd_asru_al_getstate(fmd_asru_link_t * alp)1660 fmd_asru_al_getstate(fmd_asru_link_t *alp)
1661 {
1662 int us, st = (alp->al_flags & (FMD_ASRU_FAULTY | FMD_ASRU_UNUSABLE));
1663 nvlist_t *asru;
1664 int ps = FMD_OBJ_STATE_UNKNOWN;
1665
1666 /*
1667 * For fault proxying with an EXTERNAL transport, believe the presence
1668 * state as sent by the diagnosing side. Otherwise find the presence
1669 * state here. Note that if fault proxying with an INTERNAL transport
1670 * we can only trust the presence state where we are using hc-scheme
1671 * fmris which should be consistant across domains in the same system -
1672 * other schemes can refer to different devices in different domains.
1673 */
1674 if (!(alp->al_flags & FMD_ASRU_PROXY_EXTERNAL)) {
1675 ps = fmd_asru_replacement_state(alp->al_event, (alp->al_flags &
1676 FMD_ASRU_PROXY)? HC_ONLY_TRUE : HC_ONLY_FALSE);
1677 if (ps == FMD_OBJ_STATE_NOT_PRESENT)
1678 return (st | FMD_ASRU_UNUSABLE);
1679 if (ps == FMD_OBJ_STATE_REPLACED) {
1680 if (alp->al_reason < FMD_ASRU_REPLACED)
1681 alp->al_reason = FMD_ASRU_REPLACED;
1682 return (st | FMD_ASRU_UNUSABLE);
1683 }
1684 }
1685 if (ps == FMD_OBJ_STATE_UNKNOWN && (alp->al_flags & FMD_ASRU_PROXY))
1686 st |= (alp->al_flags & (FMD_ASRU_DEGRADED | FMD_ASRU_PRESENT));
1687 else
1688 st |= (alp->al_flags & (FMD_ASRU_DEGRADED)) | FMD_ASRU_PRESENT;
1689
1690 /*
1691 * For fault proxying, unless we have a local ASRU, then believe the
1692 * service state sent by the diagnosing side. Otherwise find the service
1693 * state here. Try fmd_fmri_service_state() first, but if that's not
1694 * supported by the scheme then fall back to fmd_fmri_unusable().
1695 */
1696 if ((!(alp->al_flags & FMD_ASRU_PROXY) ||
1697 (alp->al_flags & FMD_ASRU_PROXY_WITH_ASRU)) &&
1698 nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, &asru) == 0) {
1699 us = fmd_fmri_service_state(asru);
1700 if (us == -1 || us == FMD_SERVICE_STATE_UNKNOWN) {
1701 /* not supported by scheme - try fmd_fmri_unusable */
1702 us = fmd_fmri_unusable(asru);
1703 if (us > 0)
1704 st |= FMD_ASRU_UNUSABLE;
1705 else if (us == 0)
1706 st &= ~FMD_ASRU_UNUSABLE;
1707 } else {
1708 if (us == FMD_SERVICE_STATE_UNUSABLE) {
1709 st &= ~FMD_ASRU_DEGRADED;
1710 st |= FMD_ASRU_UNUSABLE;
1711 } else if (us == FMD_SERVICE_STATE_OK) {
1712 st &= ~(FMD_ASRU_DEGRADED | FMD_ASRU_UNUSABLE);
1713 } else if (us == FMD_SERVICE_STATE_ISOLATE_PENDING) {
1714 st &= ~(FMD_ASRU_DEGRADED | FMD_ASRU_UNUSABLE);
1715 } else if (us == FMD_SERVICE_STATE_DEGRADED) {
1716 st &= ~FMD_ASRU_UNUSABLE;
1717 st |= FMD_ASRU_DEGRADED;
1718 }
1719 }
1720 }
1721 return (st);
1722 }
1723
1724 /*
1725 * Report the current known state of the ASRU by refreshing its unusable status
1726 * based upon the routines provided by the scheme module. If the unusable bit
1727 * is different, we do *not* generate a state change here because that change
1728 * may be unrelated to fmd activities and therefore we have no case or event.
1729 * The absence of the transition is harmless as this function is only provided
1730 * for RPC observability and fmd's clients are only concerned with ASRU_FAULTY.
1731 */
1732 int
fmd_asru_getstate(fmd_asru_t * ap)1733 fmd_asru_getstate(fmd_asru_t *ap)
1734 {
1735 int us, st, p = -1;
1736 char *s;
1737
1738 /* do not report non-fmd non-present resources */
1739 if (!(ap->asru_flags & FMD_ASRU_INTERNAL)) {
1740 /*
1741 * As with fmd_asru_al_getstate(), we can only trust the
1742 * local presence state on a proxy if the transport is
1743 * internal and the scheme is hc. Otherwise we believe the
1744 * state as sent by the diagnosing side.
1745 */
1746 if (!(ap->asru_flags & FMD_ASRU_PROXY) ||
1747 (!(ap->asru_flags & FMD_ASRU_PROXY_EXTERNAL) &&
1748 (nvlist_lookup_string(ap->asru_fmri, FM_FMRI_SCHEME,
1749 &s) == 0 && strcmp(s, FM_FMRI_SCHEME_HC) == 0))) {
1750 if (fmd_asru_fake_not_present >=
1751 FMD_OBJ_STATE_REPLACED)
1752 return (0);
1753 p = fmd_fmri_present(ap->asru_fmri);
1754 }
1755 if (p == 0 || (p < 0 && !(ap->asru_flags & FMD_ASRU_PROXY) ||
1756 !(ap->asru_flags & FMD_ASRU_PRESENT)))
1757 return (0);
1758 }
1759
1760 /*
1761 * As with fmd_asru_al_getstate(), we can only trust the local unusable
1762 * state on a proxy if there is a local ASRU.
1763 */
1764 st = ap->asru_flags & (FMD_ASRU_FAULTY | FMD_ASRU_UNUSABLE);
1765 if (!(ap->asru_flags & FMD_ASRU_PROXY) ||
1766 (ap->asru_flags & FMD_ASRU_PROXY_WITH_ASRU)) {
1767 us = fmd_fmri_unusable(ap->asru_fmri);
1768 if (us > 0)
1769 st |= FMD_ASRU_UNUSABLE;
1770 else if (us == 0)
1771 st &= ~FMD_ASRU_UNUSABLE;
1772 }
1773 return (st);
1774 }
1775