xref: /titanic_41/usr/src/cmd/fm/modules/common/eversholt/fme.c (revision 90685d2c52744c6540828f16cdd2db815d467e37)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  *
26  * fme.c -- fault management exercise module
27  *
28  * this module provides the simulated fault management exercise.
29  */
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <strings.h>
37 #include <ctype.h>
38 #include <alloca.h>
39 #include <libnvpair.h>
40 #include <sys/fm/protocol.h>
41 #include <fm/fmd_api.h>
42 #include "alloc.h"
43 #include "out.h"
44 #include "stats.h"
45 #include "stable.h"
46 #include "literals.h"
47 #include "lut.h"
48 #include "tree.h"
49 #include "ptree.h"
50 #include "itree.h"
51 #include "ipath.h"
52 #include "fme.h"
53 #include "evnv.h"
54 #include "eval.h"
55 #include "config.h"
56 #include "platform.h"
57 
58 /* imported from eft.c... */
59 extern char *Autoclose;
60 extern int Dupclose;
61 extern hrtime_t Hesitate;
62 extern nv_alloc_t Eft_nv_hdl;
63 extern int Max_fme;
64 extern fmd_hdl_t *Hdl;
65 
66 static int Istat_need_save;
67 static int Serd_need_save;
68 void istat_save(void);
69 void serd_save(void);
70 
71 /* fme under construction is global so we can free it on module abort */
72 static struct fme *Nfmep;
73 
74 static const char *Undiag_reason;
75 
76 static int Nextid = 0;
77 
78 static int Open_fme_count = 0;	/* Count of open FMEs */
79 
80 /* list of fault management exercises underway */
81 static struct fme {
82 	struct fme *next;		/* next exercise */
83 	unsigned long long ull;		/* time when fme was created */
84 	int id;				/* FME id */
85 	struct cfgdata *cfgdata;	/* full configuration data */
86 	struct lut *eventtree;		/* propagation tree for this FME */
87 	/*
88 	 * The initial error report that created this FME is kept in
89 	 * two forms.  e0 points to the instance tree node and is used
90 	 * by fme_eval() as the starting point for the inference
91 	 * algorithm.  e0r is the event handle FMD passed to us when
92 	 * the ereport first arrived and is used when setting timers,
93 	 * which are always relative to the time of this initial
94 	 * report.
95 	 */
96 	struct event *e0;
97 	fmd_event_t *e0r;
98 
99 	id_t    timer;			/* for setting an fmd time-out */
100 
101 	struct event *ecurrent;		/* ereport under consideration */
102 	struct event *suspects;		/* current suspect list */
103 	struct event *psuspects;	/* previous suspect list */
104 	int nsuspects;			/* count of suspects */
105 	int nonfault;			/* zero if all suspects T_FAULT */
106 	int posted_suspects;		/* true if we've posted a diagnosis */
107 	int uniqobs;			/* number of unique events observed */
108 	int peek;			/* just peeking, don't track suspects */
109 	int overflow;			/* true if overflow FME */
110 	enum fme_state {
111 		FME_NOTHING = 5000,	/* not evaluated yet */
112 		FME_WAIT,		/* need to wait for more info */
113 		FME_CREDIBLE,		/* suspect list is credible */
114 		FME_DISPROVED,		/* no valid suspects found */
115 		FME_DEFERRED		/* don't know yet (k-count not met) */
116 	} state;
117 
118 	unsigned long long pull;	/* time passed since created */
119 	unsigned long long wull;	/* wait until this time for re-eval */
120 	struct event *observations;	/* observation list */
121 	struct lut *globals;		/* values of global variables */
122 	/* fmd interfacing */
123 	fmd_hdl_t *hdl;			/* handle for talking with fmd */
124 	fmd_case_t *fmcase;		/* what fmd 'case' we associate with */
125 	/* stats */
126 	struct stats *Rcount;
127 	struct stats *Hcallcount;
128 	struct stats *Rcallcount;
129 	struct stats *Ccallcount;
130 	struct stats *Ecallcount;
131 	struct stats *Tcallcount;
132 	struct stats *Marrowcount;
133 	struct stats *diags;
134 } *FMElist, *EFMElist, *ClosedFMEs;
135 
136 static struct case_list {
137 	fmd_case_t *fmcase;
138 	struct case_list *next;
139 } *Undiagablecaselist;
140 
141 static void fme_eval(struct fme *fmep, fmd_event_t *ffep);
142 static enum fme_state hypothesise(struct fme *fmep, struct event *ep,
143 	unsigned long long at_latest_by, unsigned long long *pdelay);
144 static struct node *eventprop_lookup(struct event *ep, const char *propname);
145 static struct node *pathstring2epnamenp(char *path);
146 static void publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep,
147 	fmd_case_t *fmcase);
148 static void restore_suspects(struct fme *fmep);
149 static void save_suspects(struct fme *fmep);
150 static void destroy_fme(struct fme *f);
151 static void fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep,
152     const char *eventstring, const struct ipath *ipp, nvlist_t *nvl);
153 static void istat_counter_reset_cb(struct istat_entry *entp,
154     struct stats *statp, const struct ipath *ipp);
155 static void serd_reset_cb(struct serd_entry *entp, void *unused,
156     const struct ipath *ipp);
157 static void destroy_fme_bufs(struct fme *fp);
158 
159 static struct fme *
160 alloc_fme(void)
161 {
162 	struct fme *fmep;
163 
164 	fmep = MALLOC(sizeof (*fmep));
165 	bzero(fmep, sizeof (*fmep));
166 	return (fmep);
167 }
168 
169 /*
170  * fme_ready -- called when all initialization of the FME (except for
171  *	stats) has completed successfully.  Adds the fme to global lists
172  *	and establishes its stats.
173  */
174 static struct fme *
175 fme_ready(struct fme *fmep)
176 {
177 	char nbuf[100];
178 
179 	Nfmep = NULL;	/* don't need to free this on module abort now */
180 
181 	if (EFMElist) {
182 		EFMElist->next = fmep;
183 		EFMElist = fmep;
184 	} else
185 		FMElist = EFMElist = fmep;
186 
187 	(void) sprintf(nbuf, "fme%d.Rcount", fmep->id);
188 	fmep->Rcount = stats_new_counter(nbuf, "ereports received", 0);
189 	(void) sprintf(nbuf, "fme%d.Hcall", fmep->id);
190 	fmep->Hcallcount = stats_new_counter(nbuf, "calls to hypothesise()", 1);
191 	(void) sprintf(nbuf, "fme%d.Rcall", fmep->id);
192 	fmep->Rcallcount = stats_new_counter(nbuf,
193 	    "calls to requirements_test()", 1);
194 	(void) sprintf(nbuf, "fme%d.Ccall", fmep->id);
195 	fmep->Ccallcount = stats_new_counter(nbuf, "calls to causes_test()", 1);
196 	(void) sprintf(nbuf, "fme%d.Ecall", fmep->id);
197 	fmep->Ecallcount =
198 	    stats_new_counter(nbuf, "calls to effects_test()", 1);
199 	(void) sprintf(nbuf, "fme%d.Tcall", fmep->id);
200 	fmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1);
201 	(void) sprintf(nbuf, "fme%d.Marrow", fmep->id);
202 	fmep->Marrowcount = stats_new_counter(nbuf,
203 	    "arrows marked by mark_arrows()", 1);
204 	(void) sprintf(nbuf, "fme%d.diags", fmep->id);
205 	fmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0);
206 
207 	out(O_ALTFP|O_VERB2, "newfme: config snapshot contains...");
208 	config_print(O_ALTFP|O_VERB2, fmep->cfgdata->cooked);
209 
210 	return (fmep);
211 }
212 
213 extern void ipath_dummy_lut(struct arrow *);
214 extern struct lut *itree_create_dummy(const char *, const struct ipath *);
215 
216 /* ARGSUSED */
217 static void
218 set_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
219 {
220 	struct bubble *bp;
221 	struct arrowlist *ap;
222 
223 	for (bp = itree_next_bubble(ep, NULL); bp;
224 	    bp = itree_next_bubble(ep, bp)) {
225 		if (bp->t != B_FROM)
226 			continue;
227 		for (ap = itree_next_arrow(bp, NULL); ap;
228 		    ap = itree_next_arrow(bp, ap)) {
229 			ap->arrowp->pnode->u.arrow.needed = 1;
230 			ipath_dummy_lut(ap->arrowp);
231 		}
232 	}
233 }
234 
235 /* ARGSUSED */
236 static void
237 unset_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
238 {
239 	struct bubble *bp;
240 	struct arrowlist *ap;
241 
242 	for (bp = itree_next_bubble(ep, NULL); bp;
243 	    bp = itree_next_bubble(ep, bp)) {
244 		if (bp->t != B_FROM)
245 			continue;
246 		for (ap = itree_next_arrow(bp, NULL); ap;
247 		    ap = itree_next_arrow(bp, ap))
248 			ap->arrowp->pnode->u.arrow.needed = 0;
249 	}
250 }
251 
252 static void globals_destructor(void *left, void *right, void *arg);
253 static void clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep);
254 
255 static void
256 prune_propagations(const char *e0class, const struct ipath *e0ipp)
257 {
258 	char nbuf[100];
259 	unsigned long long my_delay = TIMEVAL_EVENTUALLY;
260 	extern struct lut *Usednames;
261 
262 	Nfmep = alloc_fme();
263 	Nfmep->id = Nextid;
264 	Nfmep->state = FME_NOTHING;
265 	Nfmep->eventtree = itree_create_dummy(e0class, e0ipp);
266 	if ((Nfmep->e0 =
267 	    itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) {
268 		out(O_ALTFP, "prune_propagations: e0 not in instance tree");
269 		itree_free(Nfmep->eventtree);
270 		FREE(Nfmep);
271 		Nfmep = NULL;
272 		return;
273 	}
274 	Nfmep->ecurrent = Nfmep->observations = Nfmep->e0;
275 	Nfmep->e0->count++;
276 
277 	(void) sprintf(nbuf, "fme%d.Rcount", Nfmep->id);
278 	Nfmep->Rcount = stats_new_counter(nbuf, "ereports received", 0);
279 	(void) sprintf(nbuf, "fme%d.Hcall", Nfmep->id);
280 	Nfmep->Hcallcount =
281 	    stats_new_counter(nbuf, "calls to hypothesise()", 1);
282 	(void) sprintf(nbuf, "fme%d.Rcall", Nfmep->id);
283 	Nfmep->Rcallcount = stats_new_counter(nbuf,
284 	    "calls to requirements_test()", 1);
285 	(void) sprintf(nbuf, "fme%d.Ccall", Nfmep->id);
286 	Nfmep->Ccallcount =
287 	    stats_new_counter(nbuf, "calls to causes_test()", 1);
288 	(void) sprintf(nbuf, "fme%d.Ecall", Nfmep->id);
289 	Nfmep->Ecallcount =
290 	    stats_new_counter(nbuf, "calls to effects_test()", 1);
291 	(void) sprintf(nbuf, "fme%d.Tcall", Nfmep->id);
292 	Nfmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1);
293 	(void) sprintf(nbuf, "fme%d.Marrow", Nfmep->id);
294 	Nfmep->Marrowcount = stats_new_counter(nbuf,
295 	    "arrows marked by mark_arrows()", 1);
296 	(void) sprintf(nbuf, "fme%d.diags", Nfmep->id);
297 	Nfmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0);
298 
299 	Nfmep->peek = 1;
300 	lut_walk(Nfmep->eventtree, (lut_cb)unset_needed_arrows, (void *)Nfmep);
301 	lut_free(Usednames, NULL, NULL);
302 	Usednames = NULL;
303 	lut_walk(Nfmep->eventtree, (lut_cb)clear_arrows, (void *)Nfmep);
304 	(void) hypothesise(Nfmep, Nfmep->e0, Nfmep->ull, &my_delay);
305 	itree_prune(Nfmep->eventtree);
306 	lut_walk(Nfmep->eventtree, (lut_cb)set_needed_arrows, (void *)Nfmep);
307 
308 	stats_delete(Nfmep->Rcount);
309 	stats_delete(Nfmep->Hcallcount);
310 	stats_delete(Nfmep->Rcallcount);
311 	stats_delete(Nfmep->Ccallcount);
312 	stats_delete(Nfmep->Ecallcount);
313 	stats_delete(Nfmep->Tcallcount);
314 	stats_delete(Nfmep->Marrowcount);
315 	stats_delete(Nfmep->diags);
316 	itree_free(Nfmep->eventtree);
317 	lut_free(Nfmep->globals, globals_destructor, NULL);
318 	FREE(Nfmep);
319 }
320 
321 static struct fme *
322 newfme(const char *e0class, const struct ipath *e0ipp, fmd_hdl_t *hdl,
323 	fmd_case_t *fmcase)
324 {
325 	struct cfgdata *cfgdata;
326 	int init_size;
327 	extern int alloc_total();
328 
329 	init_size = alloc_total();
330 	out(O_ALTFP|O_STAMP, "start config_snapshot using %d bytes", init_size);
331 	if ((cfgdata = config_snapshot()) == NULL) {
332 		out(O_ALTFP, "newfme: NULL configuration");
333 		Undiag_reason = UD_NOCONF;
334 		return (NULL);
335 	}
336 	platform_save_config(hdl, fmcase);
337 	out(O_ALTFP|O_STAMP, "config_snapshot added %d bytes",
338 	    alloc_total() - init_size);
339 
340 	Nfmep = alloc_fme();
341 
342 	Nfmep->id = Nextid++;
343 	Nfmep->cfgdata = cfgdata;
344 	Nfmep->posted_suspects = 0;
345 	Nfmep->uniqobs = 0;
346 	Nfmep->state = FME_NOTHING;
347 	Nfmep->pull = 0ULL;
348 	Nfmep->overflow = 0;
349 
350 	Nfmep->fmcase = fmcase;
351 	Nfmep->hdl = hdl;
352 
353 	if ((Nfmep->eventtree = itree_create(cfgdata->cooked)) == NULL) {
354 		out(O_ALTFP, "newfme: NULL instance tree");
355 		Undiag_reason = UD_INSTFAIL;
356 		config_free(cfgdata);
357 		destroy_fme_bufs(Nfmep);
358 		FREE(Nfmep);
359 		Nfmep = NULL;
360 		return (NULL);
361 	}
362 
363 	itree_ptree(O_ALTFP|O_VERB2, Nfmep->eventtree);
364 
365 	if ((Nfmep->e0 =
366 	    itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) {
367 		out(O_ALTFP, "newfme: e0 not in instance tree");
368 		Undiag_reason = UD_BADEVENTI;
369 		itree_free(Nfmep->eventtree);
370 		config_free(cfgdata);
371 		destroy_fme_bufs(Nfmep);
372 		FREE(Nfmep);
373 		Nfmep = NULL;
374 		return (NULL);
375 	}
376 
377 	return (fme_ready(Nfmep));
378 }
379 
380 void
381 fme_fini(void)
382 {
383 	struct fme *sfp, *fp;
384 	struct case_list *ucasep, *nextcasep;
385 
386 	ucasep = Undiagablecaselist;
387 	while (ucasep != NULL) {
388 		nextcasep = ucasep->next;
389 		FREE(ucasep);
390 		ucasep = nextcasep;
391 	}
392 	Undiagablecaselist = NULL;
393 
394 	/* clean up closed fmes */
395 	fp = ClosedFMEs;
396 	while (fp != NULL) {
397 		sfp = fp->next;
398 		destroy_fme(fp);
399 		fp = sfp;
400 	}
401 	ClosedFMEs = NULL;
402 
403 	fp = FMElist;
404 	while (fp != NULL) {
405 		sfp = fp->next;
406 		destroy_fme(fp);
407 		fp = sfp;
408 	}
409 	FMElist = EFMElist = NULL;
410 
411 	/* if we were in the middle of creating an fme, free it now */
412 	if (Nfmep) {
413 		destroy_fme(Nfmep);
414 		Nfmep = NULL;
415 	}
416 }
417 
418 /*
419  * Allocated space for a buffer name.  20 bytes allows for
420  * a ridiculous 9,999,999 unique observations.
421  */
422 #define	OBBUFNMSZ 20
423 
424 /*
425  *  serialize_observation
426  *
427  *  Create a recoverable version of the current observation
428  *  (f->ecurrent).  We keep a serialized version of each unique
429  *  observation in order that we may resume correctly the fme in the
430  *  correct state if eft or fmd crashes and we're restarted.
431  */
432 static void
433 serialize_observation(struct fme *fp, const char *cls, const struct ipath *ipp)
434 {
435 	size_t pkdlen;
436 	char tmpbuf[OBBUFNMSZ];
437 	char *pkd = NULL;
438 	char *estr;
439 
440 	(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", fp->uniqobs);
441 	estr = ipath2str(cls, ipp);
442 	fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, strlen(estr) + 1);
443 	fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)estr,
444 	    strlen(estr) + 1);
445 	FREE(estr);
446 
447 	if (fp->ecurrent != NULL && fp->ecurrent->nvp != NULL) {
448 		(void) snprintf(tmpbuf,
449 		    OBBUFNMSZ, "observed%d.nvp", fp->uniqobs);
450 		if (nvlist_xpack(fp->ecurrent->nvp,
451 		    &pkd, &pkdlen, NV_ENCODE_XDR, &Eft_nv_hdl) != 0)
452 			out(O_DIE|O_SYS, "pack of observed nvl failed");
453 		fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, pkdlen);
454 		fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)pkd, pkdlen);
455 		FREE(pkd);
456 	}
457 
458 	fp->uniqobs++;
459 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs,
460 	    sizeof (fp->uniqobs));
461 }
462 
463 /*
464  *  init_fme_bufs -- We keep several bits of state about an fme for
465  *	use if eft or fmd crashes and we're restarted.
466  */
467 static void
468 init_fme_bufs(struct fme *fp)
469 {
470 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_PULL, sizeof (fp->pull));
471 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_PULL, (void *)&fp->pull,
472 	    sizeof (fp->pull));
473 
474 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_ID, sizeof (fp->id));
475 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_ID, (void *)&fp->id,
476 	    sizeof (fp->id));
477 
478 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_NOBS, sizeof (fp->uniqobs));
479 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs,
480 	    sizeof (fp->uniqobs));
481 
482 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_POSTD,
483 	    sizeof (fp->posted_suspects));
484 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_POSTD,
485 	    (void *)&fp->posted_suspects, sizeof (fp->posted_suspects));
486 }
487 
488 static void
489 destroy_fme_bufs(struct fme *fp)
490 {
491 	char tmpbuf[OBBUFNMSZ];
492 	int o;
493 
494 	platform_restore_config(fp->hdl, fp->fmcase);
495 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFGLEN);
496 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFG);
497 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_PULL);
498 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_ID);
499 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_POSTD);
500 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_NOBS);
501 
502 	for (o = 0; o < fp->uniqobs; o++) {
503 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", o);
504 		fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf);
505 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", o);
506 		fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf);
507 	}
508 }
509 
510 /*
511  * reconstitute_observations -- convert a case's serialized observations
512  *	back into struct events.  Returns zero if all observations are
513  *	successfully reconstituted.
514  */
515 static int
516 reconstitute_observations(struct fme *fmep)
517 {
518 	struct event *ep;
519 	struct node *epnamenp = NULL;
520 	size_t pkdlen;
521 	char *pkd = NULL;
522 	char *tmpbuf = alloca(OBBUFNMSZ);
523 	char *sepptr;
524 	char *estr;
525 	int ocnt;
526 	int elen;
527 
528 	for (ocnt = 0; ocnt < fmep->uniqobs; ocnt++) {
529 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", ocnt);
530 		elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
531 		if (elen == 0) {
532 			out(O_ALTFP,
533 			    "reconstitute_observation: no %s buffer found.",
534 			    tmpbuf);
535 			Undiag_reason = UD_MISSINGOBS;
536 			break;
537 		}
538 
539 		estr = MALLOC(elen);
540 		fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen);
541 		sepptr = strchr(estr, '@');
542 		if (sepptr == NULL) {
543 			out(O_ALTFP,
544 			    "reconstitute_observation: %s: "
545 			    "missing @ separator in %s.",
546 			    tmpbuf, estr);
547 			Undiag_reason = UD_MISSINGPATH;
548 			FREE(estr);
549 			break;
550 		}
551 
552 		*sepptr = '\0';
553 		if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) {
554 			out(O_ALTFP,
555 			    "reconstitute_observation: %s: "
556 			    "trouble converting path string \"%s\" "
557 			    "to internal representation.",
558 			    tmpbuf, sepptr + 1);
559 			Undiag_reason = UD_MISSINGPATH;
560 			FREE(estr);
561 			break;
562 		}
563 
564 		/* construct the event */
565 		ep = itree_lookup(fmep->eventtree,
566 		    stable(estr), ipath(epnamenp));
567 		if (ep == NULL) {
568 			out(O_ALTFP,
569 			    "reconstitute_observation: %s: "
570 			    "lookup of  \"%s\" in itree failed.",
571 			    tmpbuf, ipath2str(estr, ipath(epnamenp)));
572 			Undiag_reason = UD_BADOBS;
573 			tree_free(epnamenp);
574 			FREE(estr);
575 			break;
576 		}
577 		tree_free(epnamenp);
578 
579 		/*
580 		 * We may or may not have a saved nvlist for the observation
581 		 */
582 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", ocnt);
583 		pkdlen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
584 		if (pkdlen != 0) {
585 			pkd = MALLOC(pkdlen);
586 			fmd_buf_read(fmep->hdl,
587 			    fmep->fmcase, tmpbuf, pkd, pkdlen);
588 			ASSERT(ep->nvp == NULL);
589 			if (nvlist_xunpack(pkd,
590 			    pkdlen, &ep->nvp, &Eft_nv_hdl) != 0)
591 				out(O_DIE|O_SYS, "pack of observed nvl failed");
592 			FREE(pkd);
593 		}
594 
595 		if (ocnt == 0)
596 			fmep->e0 = ep;
597 
598 		FREE(estr);
599 		fmep->ecurrent = ep;
600 		ep->count++;
601 
602 		/* link it into list of observations seen */
603 		ep->observations = fmep->observations;
604 		fmep->observations = ep;
605 	}
606 
607 	if (ocnt == fmep->uniqobs) {
608 		(void) fme_ready(fmep);
609 		return (0);
610 	}
611 
612 	return (1);
613 }
614 
615 /*
616  * restart_fme -- called during eft initialization.  Reconstitutes
617  *	an in-progress fme.
618  */
619 void
620 fme_restart(fmd_hdl_t *hdl, fmd_case_t *inprogress)
621 {
622 	nvlist_t *defect;
623 	struct case_list *bad;
624 	struct fme *fmep;
625 	struct cfgdata *cfgdata = NULL;
626 	size_t rawsz;
627 	struct event *ep;
628 	char *tmpbuf = alloca(OBBUFNMSZ);
629 	char *sepptr;
630 	char *estr;
631 	int elen;
632 	struct node *epnamenp = NULL;
633 	int init_size;
634 	extern int alloc_total();
635 
636 	/*
637 	 * ignore solved or closed cases
638 	 */
639 	if (fmd_case_solved(hdl, inprogress) ||
640 	    fmd_case_closed(hdl, inprogress))
641 		return;
642 
643 	fmep = alloc_fme();
644 	fmep->fmcase = inprogress;
645 	fmep->hdl = hdl;
646 
647 	if (fmd_buf_size(hdl, inprogress, WOBUF_POSTD) == 0) {
648 		out(O_ALTFP, "restart_fme: no saved posted status");
649 		Undiag_reason = UD_MISSINGINFO;
650 		goto badcase;
651 	} else {
652 		fmd_buf_read(hdl, inprogress, WOBUF_POSTD,
653 		    (void *)&fmep->posted_suspects,
654 		    sizeof (fmep->posted_suspects));
655 	}
656 
657 	if (fmd_buf_size(hdl, inprogress, WOBUF_ID) == 0) {
658 		out(O_ALTFP, "restart_fme: no saved id");
659 		Undiag_reason = UD_MISSINGINFO;
660 		goto badcase;
661 	} else {
662 		fmd_buf_read(hdl, inprogress, WOBUF_ID, (void *)&fmep->id,
663 		    sizeof (fmep->id));
664 	}
665 	if (Nextid <= fmep->id)
666 		Nextid = fmep->id + 1;
667 
668 	out(O_ALTFP, "Replay FME %d", fmep->id);
669 
670 	if (fmd_buf_size(hdl, inprogress, WOBUF_CFGLEN) != sizeof (size_t)) {
671 		out(O_ALTFP, "restart_fme: No config data");
672 		Undiag_reason = UD_MISSINGINFO;
673 		goto badcase;
674 	}
675 	fmd_buf_read(hdl, inprogress, WOBUF_CFGLEN, (void *)&rawsz,
676 	    sizeof (size_t));
677 
678 	if ((fmep->e0r = fmd_case_getprincipal(hdl, inprogress)) == NULL) {
679 		out(O_ALTFP, "restart_fme: No event zero");
680 		Undiag_reason = UD_MISSINGZERO;
681 		goto badcase;
682 	}
683 
684 	if (fmd_buf_size(hdl, inprogress, WOBUF_PULL) == 0) {
685 		out(O_ALTFP, "restart_fme: no saved wait time");
686 		Undiag_reason = UD_MISSINGINFO;
687 		goto badcase;
688 	} else {
689 		fmd_buf_read(hdl, inprogress, WOBUF_PULL, (void *)&fmep->pull,
690 		    sizeof (fmep->pull));
691 	}
692 
693 	if (fmd_buf_size(hdl, inprogress, WOBUF_NOBS) == 0) {
694 		out(O_ALTFP, "restart_fme: no count of observations");
695 		Undiag_reason = UD_MISSINGINFO;
696 		goto badcase;
697 	} else {
698 		fmd_buf_read(hdl, inprogress, WOBUF_NOBS,
699 		    (void *)&fmep->uniqobs, sizeof (fmep->uniqobs));
700 	}
701 
702 	(void) snprintf(tmpbuf, OBBUFNMSZ, "observed0");
703 	elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
704 	if (elen == 0) {
705 		out(O_ALTFP, "reconstitute_observation: no %s buffer found.",
706 		    tmpbuf);
707 		Undiag_reason = UD_MISSINGOBS;
708 		goto badcase;
709 	}
710 	estr = MALLOC(elen);
711 	fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen);
712 	sepptr = strchr(estr, '@');
713 	if (sepptr == NULL) {
714 		out(O_ALTFP, "reconstitute_observation: %s: "
715 		    "missing @ separator in %s.",
716 		    tmpbuf, estr);
717 		Undiag_reason = UD_MISSINGPATH;
718 		FREE(estr);
719 		goto badcase;
720 	}
721 	*sepptr = '\0';
722 	if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) {
723 		out(O_ALTFP, "reconstitute_observation: %s: "
724 		    "trouble converting path string \"%s\" "
725 		    "to internal representation.", tmpbuf, sepptr + 1);
726 		Undiag_reason = UD_MISSINGPATH;
727 		FREE(estr);
728 		goto badcase;
729 	}
730 	prune_propagations(stable(estr), ipath(epnamenp));
731 	tree_free(epnamenp);
732 	FREE(estr);
733 
734 	init_size = alloc_total();
735 	out(O_ALTFP|O_STAMP, "start config_restore using %d bytes", init_size);
736 	cfgdata = MALLOC(sizeof (struct cfgdata));
737 	cfgdata->cooked = NULL;
738 	cfgdata->devcache = NULL;
739 	cfgdata->cpucache = NULL;
740 	cfgdata->cooked_refcnt = 0;
741 	cfgdata->raw_refcnt = 1;
742 
743 	if (rawsz > 0) {
744 		if (fmd_buf_size(hdl, inprogress, WOBUF_CFG) != rawsz) {
745 			out(O_ALTFP, "restart_fme: Config data size mismatch");
746 			Undiag_reason = UD_CFGMISMATCH;
747 			goto badcase;
748 		}
749 		cfgdata->begin = MALLOC(rawsz);
750 		cfgdata->end = cfgdata->nextfree = cfgdata->begin + rawsz;
751 		fmd_buf_read(hdl,
752 		    inprogress, WOBUF_CFG, cfgdata->begin, rawsz);
753 	} else {
754 		cfgdata->begin = cfgdata->end = cfgdata->nextfree = NULL;
755 	}
756 	fmep->cfgdata = cfgdata;
757 
758 	config_cook(cfgdata);
759 	if (cfgdata->begin)
760 		FREE(cfgdata->begin);
761 	cfgdata->begin = NULL;
762 	cfgdata->end = NULL;
763 	cfgdata->nextfree = NULL;
764 	out(O_ALTFP|O_STAMP, "config_restore added %d bytes",
765 	    alloc_total() - init_size);
766 
767 	if ((fmep->eventtree = itree_create(cfgdata->cooked)) == NULL) {
768 		/* case not properly saved or irretrievable */
769 		out(O_ALTFP, "restart_fme: NULL instance tree");
770 		Undiag_reason = UD_INSTFAIL;
771 		goto badcase;
772 	}
773 
774 	itree_ptree(O_ALTFP|O_VERB2, fmep->eventtree);
775 
776 	if (reconstitute_observations(fmep) != 0)
777 		goto badcase;
778 
779 	out(O_ALTFP|O_NONL, "FME %d replay observations: ", fmep->id);
780 	for (ep = fmep->observations; ep; ep = ep->observations) {
781 		out(O_ALTFP|O_NONL, " ");
782 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
783 	}
784 	out(O_ALTFP, NULL);
785 
786 	Open_fme_count++;
787 
788 	/* give the diagnosis algorithm a shot at the new FME state */
789 	fme_eval(fmep, fmep->e0r);
790 	return;
791 
792 badcase:
793 	if (fmep->eventtree != NULL)
794 		itree_free(fmep->eventtree);
795 	config_free(cfgdata);
796 	destroy_fme_bufs(fmep);
797 	FREE(fmep);
798 
799 	/*
800 	 * Since we're unable to restart the case, add it to the undiagable
801 	 * list and solve and close it as appropriate.
802 	 */
803 	bad = MALLOC(sizeof (struct case_list));
804 	bad->next = NULL;
805 
806 	if (Undiagablecaselist != NULL)
807 		bad->next = Undiagablecaselist;
808 	Undiagablecaselist = bad;
809 	bad->fmcase = inprogress;
810 
811 	out(O_ALTFP|O_NONL, "[case %s (unable to restart), ",
812 	    fmd_case_uuid(hdl, bad->fmcase));
813 
814 	if (fmd_case_solved(hdl, bad->fmcase)) {
815 		out(O_ALTFP|O_NONL, "already solved, ");
816 	} else {
817 		out(O_ALTFP|O_NONL, "solving, ");
818 		defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100,
819 		    NULL, NULL, NULL);
820 		if (Undiag_reason != NULL)
821 			(void) nvlist_add_string(defect,
822 			    UNDIAG_REASON, Undiag_reason);
823 		fmd_case_add_suspect(hdl, bad->fmcase, defect);
824 		fmd_case_solve(hdl, bad->fmcase);
825 	}
826 
827 	if (fmd_case_closed(hdl, bad->fmcase)) {
828 		out(O_ALTFP, "already closed ]");
829 	} else {
830 		out(O_ALTFP, "closing ]");
831 		fmd_case_close(hdl, bad->fmcase);
832 	}
833 }
834 
835 /*ARGSUSED*/
836 static void
837 globals_destructor(void *left, void *right, void *arg)
838 {
839 	struct evalue *evp = (struct evalue *)right;
840 	if (evp->t == NODEPTR)
841 		tree_free((struct node *)(uintptr_t)evp->v);
842 	evp->v = NULL;
843 	FREE(evp);
844 }
845 
846 void
847 destroy_fme(struct fme *f)
848 {
849 	stats_delete(f->Rcount);
850 	stats_delete(f->Hcallcount);
851 	stats_delete(f->Rcallcount);
852 	stats_delete(f->Ccallcount);
853 	stats_delete(f->Ecallcount);
854 	stats_delete(f->Tcallcount);
855 	stats_delete(f->Marrowcount);
856 	stats_delete(f->diags);
857 
858 	if (f->eventtree != NULL)
859 		itree_free(f->eventtree);
860 	if (f->cfgdata != NULL)
861 		config_free(f->cfgdata);
862 	lut_free(f->globals, globals_destructor, NULL);
863 	FREE(f);
864 }
865 
866 static const char *
867 fme_state2str(enum fme_state s)
868 {
869 	switch (s) {
870 	case FME_NOTHING:	return ("NOTHING");
871 	case FME_WAIT:		return ("WAIT");
872 	case FME_CREDIBLE:	return ("CREDIBLE");
873 	case FME_DISPROVED:	return ("DISPROVED");
874 	case FME_DEFERRED:	return ("DEFERRED");
875 	default:		return ("UNKNOWN");
876 	}
877 }
878 
879 static int
880 is_problem(enum nametype t)
881 {
882 	return (t == N_FAULT || t == N_DEFECT || t == N_UPSET);
883 }
884 
885 static int
886 is_fault(enum nametype t)
887 {
888 	return (t == N_FAULT);
889 }
890 
891 static int
892 is_defect(enum nametype t)
893 {
894 	return (t == N_DEFECT);
895 }
896 
897 static int
898 is_upset(enum nametype t)
899 {
900 	return (t == N_UPSET);
901 }
902 
903 static void
904 fme_print(int flags, struct fme *fmep)
905 {
906 	struct event *ep;
907 
908 	out(flags, "Fault Management Exercise %d", fmep->id);
909 	out(flags, "\t       State: %s", fme_state2str(fmep->state));
910 	out(flags|O_NONL, "\t  Start time: ");
911 	ptree_timeval(flags|O_NONL, &fmep->ull);
912 	out(flags, NULL);
913 	if (fmep->wull) {
914 		out(flags|O_NONL, "\t   Wait time: ");
915 		ptree_timeval(flags|O_NONL, &fmep->wull);
916 		out(flags, NULL);
917 	}
918 	out(flags|O_NONL, "\t          E0: ");
919 	if (fmep->e0)
920 		itree_pevent_brief(flags|O_NONL, fmep->e0);
921 	else
922 		out(flags|O_NONL, "NULL");
923 	out(flags, NULL);
924 	out(flags|O_NONL, "\tObservations:");
925 	for (ep = fmep->observations; ep; ep = ep->observations) {
926 		out(flags|O_NONL, " ");
927 		itree_pevent_brief(flags|O_NONL, ep);
928 	}
929 	out(flags, NULL);
930 	out(flags|O_NONL, "\tSuspect list:");
931 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
932 		out(flags|O_NONL, " ");
933 		itree_pevent_brief(flags|O_NONL, ep);
934 	}
935 	out(flags, NULL);
936 	if (fmep->eventtree != NULL) {
937 		out(flags|O_VERB2, "\t        Tree:");
938 		itree_ptree(flags|O_VERB2, fmep->eventtree);
939 	}
940 }
941 
942 static struct node *
943 pathstring2epnamenp(char *path)
944 {
945 	char *sep = "/";
946 	struct node *ret;
947 	char *ptr;
948 
949 	if ((ptr = strtok(path, sep)) == NULL)
950 		out(O_DIE, "pathstring2epnamenp: invalid empty class");
951 
952 	ret = tree_iname(stable(ptr), NULL, 0);
953 
954 	while ((ptr = strtok(NULL, sep)) != NULL)
955 		ret = tree_name_append(ret,
956 		    tree_iname(stable(ptr), NULL, 0));
957 
958 	return (ret);
959 }
960 
961 /*
962  * for a given upset sp, increment the corresponding SERD engine.  if the
963  * SERD engine trips, return the ename and ipp of the resulting ereport.
964  * returns true if engine tripped and *enamep and *ippp were filled in.
965  */
966 static int
967 serd_eval(struct fme *fmep, fmd_hdl_t *hdl, fmd_event_t *ffep,
968     fmd_case_t *fmcase, struct event *sp, const char **enamep,
969     const struct ipath **ippp)
970 {
971 	struct node *serdinst;
972 	char *serdname;
973 	struct node *nid;
974 	struct serd_entry *newentp;
975 
976 	ASSERT(sp->t == N_UPSET);
977 	ASSERT(ffep != NULL);
978 
979 	/*
980 	 * obtain instanced SERD engine from the upset sp.  from this
981 	 * derive serdname, the string used to identify the SERD engine.
982 	 */
983 	serdinst = eventprop_lookup(sp, L_engine);
984 
985 	if (serdinst == NULL)
986 		return (NULL);
987 
988 	serdname = ipath2str(serdinst->u.stmt.np->u.event.ename->u.name.s,
989 	    ipath(serdinst->u.stmt.np->u.event.epname));
990 
991 	/* handle serd engine "id" property, if there is one */
992 	if ((nid =
993 	    lut_lookup(serdinst->u.stmt.lutp, (void *)L_id, NULL)) != NULL) {
994 		struct evalue *gval;
995 		char suffixbuf[200];
996 		char *suffix;
997 		char *nserdname;
998 		size_t nname;
999 
1000 		out(O_ALTFP|O_NONL, "serd \"%s\" id: ", serdname);
1001 		ptree_name_iter(O_ALTFP|O_NONL, nid);
1002 
1003 		ASSERTinfo(nid->t == T_GLOBID, ptree_nodetype2str(nid->t));
1004 
1005 		if ((gval = lut_lookup(fmep->globals,
1006 		    (void *)nid->u.globid.s, NULL)) == NULL) {
1007 			out(O_ALTFP, " undefined");
1008 		} else if (gval->t == UINT64) {
1009 			out(O_ALTFP, " %llu", gval->v);
1010 			(void) sprintf(suffixbuf, "%llu", gval->v);
1011 			suffix = suffixbuf;
1012 		} else {
1013 			out(O_ALTFP, " \"%s\"", (char *)(uintptr_t)gval->v);
1014 			suffix = (char *)(uintptr_t)gval->v;
1015 		}
1016 
1017 		nname = strlen(serdname) + strlen(suffix) + 2;
1018 		nserdname = MALLOC(nname);
1019 		(void) snprintf(nserdname, nname, "%s:%s", serdname, suffix);
1020 		FREE(serdname);
1021 		serdname = nserdname;
1022 	}
1023 
1024 	if (!fmd_serd_exists(hdl, serdname)) {
1025 		struct node *nN, *nT;
1026 
1027 		/* no SERD engine yet, so create it */
1028 		nN = lut_lookup(serdinst->u.stmt.lutp, (void *)L_N, NULL);
1029 		nT = lut_lookup(serdinst->u.stmt.lutp, (void *)L_T, NULL);
1030 
1031 		ASSERT(nN->t == T_NUM);
1032 		ASSERT(nT->t == T_TIMEVAL);
1033 
1034 		fmd_serd_create(hdl, serdname, (uint_t)nN->u.ull,
1035 		    (hrtime_t)nT->u.ull);
1036 	}
1037 
1038 	newentp = MALLOC(sizeof (*newentp));
1039 	newentp->ename = serdinst->u.stmt.np->u.event.ename->u.name.s;
1040 	newentp->ipath = ipath(serdinst->u.stmt.np->u.event.epname);
1041 	newentp->hdl = hdl;
1042 	if (lut_lookup(SerdEngines, newentp, (lut_cmp)serd_cmp) == NULL) {
1043 		SerdEngines = lut_add(SerdEngines, (void *)newentp,
1044 		    (void *)NULL, (lut_cmp)serd_cmp);
1045 		Serd_need_save = 1;
1046 		serd_save();
1047 	} else {
1048 		FREE(newentp);
1049 	}
1050 
1051 
1052 	/*
1053 	 * increment SERD engine.  if engine fires, reset serd
1054 	 * engine and return trip_strcode
1055 	 */
1056 	if (fmd_serd_record(hdl, serdname, ffep)) {
1057 		struct node *tripinst = lut_lookup(serdinst->u.stmt.lutp,
1058 		    (void *)L_trip, NULL);
1059 
1060 		ASSERT(tripinst != NULL);
1061 
1062 		*enamep = tripinst->u.event.ename->u.name.s;
1063 		*ippp = ipath(tripinst->u.event.epname);
1064 
1065 		fmd_case_add_serd(hdl, fmcase, serdname);
1066 		fmd_serd_reset(hdl, serdname);
1067 		out(O_ALTFP|O_NONL, "[engine fired: %s, sending: ", serdname);
1068 		ipath_print(O_ALTFP|O_NONL, *enamep, *ippp);
1069 		out(O_ALTFP, "]");
1070 
1071 		FREE(serdname);
1072 		return (1);
1073 	}
1074 
1075 	FREE(serdname);
1076 	return (0);
1077 }
1078 
1079 /*
1080  * search a suspect list for upsets.  feed each upset to serd_eval() and
1081  * build up tripped[], an array of ereports produced by the firing of
1082  * any SERD engines.  then feed each ereport back into
1083  * fme_receive_report().
1084  *
1085  * returns ntrip, the number of these ereports produced.
1086  */
1087 static int
1088 upsets_eval(struct fme *fmep, fmd_event_t *ffep)
1089 {
1090 	/* we build an array of tripped ereports that we send ourselves */
1091 	struct {
1092 		const char *ename;
1093 		const struct ipath *ipp;
1094 	} *tripped;
1095 	struct event *sp;
1096 	int ntrip, nupset, i;
1097 
1098 	/*
1099 	 * count the number of upsets to determine the upper limit on
1100 	 * expected trip ereport strings.  remember that one upset can
1101 	 * lead to at most one ereport.
1102 	 */
1103 	nupset = 0;
1104 	for (sp = fmep->suspects; sp; sp = sp->suspects) {
1105 		if (sp->t == N_UPSET)
1106 			nupset++;
1107 	}
1108 
1109 	if (nupset == 0)
1110 		return (0);
1111 
1112 	/*
1113 	 * get to this point if we have upsets and expect some trip
1114 	 * ereports
1115 	 */
1116 	tripped = alloca(sizeof (*tripped) * nupset);
1117 	bzero((void *)tripped, sizeof (*tripped) * nupset);
1118 
1119 	ntrip = 0;
1120 	for (sp = fmep->suspects; sp; sp = sp->suspects)
1121 		if (sp->t == N_UPSET &&
1122 		    serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, sp,
1123 		    &tripped[ntrip].ename, &tripped[ntrip].ipp))
1124 			ntrip++;
1125 
1126 	for (i = 0; i < ntrip; i++)
1127 		fme_receive_report(fmep->hdl, ffep,
1128 		    tripped[i].ename, tripped[i].ipp, NULL);
1129 
1130 	return (ntrip);
1131 }
1132 
1133 /*
1134  * fme_receive_external_report -- call when an external ereport comes in
1135  *
1136  * this routine just converts the relevant information from the ereport
1137  * into a format used internally and passes it on to fme_receive_report().
1138  */
1139 void
1140 fme_receive_external_report(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl,
1141     const char *eventstring)
1142 {
1143 	struct node *epnamenp = platform_getpath(nvl);
1144 	const struct ipath *ipp;
1145 
1146 	/*
1147 	 * XFILE: If we ended up without a path, it's an X-file.
1148 	 * For now, use our undiagnosable interface.
1149 	 */
1150 	if (epnamenp == NULL) {
1151 		fmd_case_t *fmcase;
1152 
1153 		out(O_ALTFP, "XFILE: Unable to get path from ereport");
1154 		Undiag_reason = UD_NOPATH;
1155 		fmcase = fmd_case_open(hdl, NULL);
1156 		publish_undiagnosable(hdl, ffep, fmcase);
1157 		return;
1158 	}
1159 
1160 	ipp = ipath(epnamenp);
1161 	tree_free(epnamenp);
1162 	fme_receive_report(hdl, ffep, stable(eventstring), ipp, nvl);
1163 }
1164 
1165 /*ARGSUSED*/
1166 void
1167 fme_receive_repair_list(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl,
1168     const char *eventstring)
1169 {
1170 	char *uuid;
1171 	nvlist_t **nva;
1172 	uint_t nvc;
1173 	const struct ipath *ipp;
1174 
1175 	if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) != 0 ||
1176 	    nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST,
1177 	    &nva, &nvc) != 0) {
1178 		out(O_ALTFP, "No uuid or fault list for list.repaired event");
1179 		return;
1180 	}
1181 
1182 	out(O_ALTFP, "Processing list.repaired from case %s", uuid);
1183 
1184 	while (nvc-- != 0) {
1185 		/*
1186 		 * Reset any istat or serd engine associated with this path.
1187 		 */
1188 		char *path;
1189 
1190 		if ((ipp = platform_fault2ipath(*nva++)) == NULL)
1191 			continue;
1192 
1193 		path = ipath2str(NULL, ipp);
1194 		out(O_ALTFP, "fme_receive_repair_list: resetting state for %s",
1195 		    path);
1196 		FREE(path);
1197 
1198 		lut_walk(Istats, (lut_cb)istat_counter_reset_cb, (void *)ipp);
1199 		istat_save();
1200 
1201 		lut_walk(SerdEngines, (lut_cb)serd_reset_cb, (void *)ipp);
1202 		serd_save();
1203 	}
1204 }
1205 
1206 static int mark_arrows(struct fme *fmep, struct event *ep, int mark,
1207     unsigned long long at_latest_by, unsigned long long *pdelay, int keep);
1208 
1209 /* ARGSUSED */
1210 static void
1211 clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
1212 {
1213 	struct bubble *bp;
1214 	struct arrowlist *ap;
1215 
1216 	ep->cached_state = 0;
1217 	ep->keep_in_tree = 0;
1218 	for (bp = itree_next_bubble(ep, NULL); bp;
1219 	    bp = itree_next_bubble(ep, bp)) {
1220 		if (bp->t != B_FROM)
1221 			continue;
1222 		bp->mark = 0;
1223 		for (ap = itree_next_arrow(bp, NULL); ap;
1224 		    ap = itree_next_arrow(bp, ap))
1225 			ap->arrowp->mark = 0;
1226 	}
1227 }
1228 
1229 static void
1230 fme_reload_cfgdata(struct fme *fmep)
1231 {
1232 	size_t rawsz;
1233 
1234 	fmep->cfgdata = MALLOC(sizeof (struct cfgdata));
1235 	fmep->cfgdata->cooked = NULL;
1236 	fmep->cfgdata->devcache = NULL;
1237 	fmep->cfgdata->cpucache = NULL;
1238 	fmep->cfgdata->cooked_refcnt = 0;
1239 	fmep->cfgdata->raw_refcnt = 1;
1240 	fmd_buf_read(fmep->hdl, fmep->fmcase, WOBUF_CFGLEN,
1241 	    (void *)&rawsz, sizeof (size_t));
1242 	if (rawsz > 0) {
1243 		fmep->cfgdata->begin = MALLOC(rawsz);
1244 		fmep->cfgdata->end = fmep->cfgdata->nextfree =
1245 		    fmep->cfgdata->begin + rawsz;
1246 		fmd_buf_read(fmep->hdl, fmep->fmcase, WOBUF_CFG,
1247 		    fmep->cfgdata->begin, rawsz);
1248 		config_cook(fmep->cfgdata);
1249 		FREE(fmep->cfgdata->begin);
1250 	}
1251 	fmep->cfgdata->begin = NULL;
1252 	fmep->cfgdata->end = NULL;
1253 	fmep->cfgdata->nextfree = NULL;
1254 }
1255 
1256 static void
1257 fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep,
1258     const char *eventstring, const struct ipath *ipp, nvlist_t *nvl)
1259 {
1260 	struct event *ep;
1261 	struct fme *fmep = NULL;
1262 	struct fme *ofmep = NULL;
1263 	struct fme *cfmep, *svfmep;
1264 	int matched = 0;
1265 	nvlist_t *defect;
1266 	fmd_case_t *fmcase;
1267 
1268 	out(O_ALTFP|O_NONL, "fme_receive_report: ");
1269 	ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1270 	out(O_ALTFP|O_STAMP, NULL);
1271 
1272 	/* decide which FME it goes to */
1273 	for (fmep = FMElist; fmep; fmep = fmep->next) {
1274 		int prev_verbose;
1275 		unsigned long long my_delay = TIMEVAL_EVENTUALLY;
1276 		enum fme_state state;
1277 		nvlist_t *pre_peek_nvp = NULL;
1278 
1279 		if (fmep->overflow) {
1280 			if (!(fmd_case_closed(fmep->hdl, fmep->fmcase)))
1281 				ofmep = fmep;
1282 
1283 			continue;
1284 		}
1285 
1286 		/*
1287 		 * ignore solved or closed cases
1288 		 */
1289 		if (fmep->posted_suspects ||
1290 		    fmd_case_solved(fmep->hdl, fmep->fmcase) ||
1291 		    fmd_case_closed(fmep->hdl, fmep->fmcase))
1292 			continue;
1293 
1294 		/* look up event in event tree for this FME */
1295 		if ((ep = itree_lookup(fmep->eventtree,
1296 		    eventstring, ipp)) == NULL)
1297 			continue;
1298 
1299 		/* note observation */
1300 		fmep->ecurrent = ep;
1301 		if (ep->count++ == 0) {
1302 			/* link it into list of observations seen */
1303 			ep->observations = fmep->observations;
1304 			fmep->observations = ep;
1305 			ep->nvp = evnv_dupnvl(nvl);
1306 		} else {
1307 			/* use new payload values for peek */
1308 			pre_peek_nvp = ep->nvp;
1309 			ep->nvp = evnv_dupnvl(nvl);
1310 		}
1311 
1312 		/* tell hypothesise() not to mess with suspect list */
1313 		fmep->peek = 1;
1314 
1315 		/* don't want this to be verbose (unless Debug is set) */
1316 		prev_verbose = Verbose;
1317 		if (Debug == 0)
1318 			Verbose = 0;
1319 
1320 		fme_reload_cfgdata(fmep);
1321 
1322 		lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep);
1323 		state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay);
1324 
1325 		fmep->peek = 0;
1326 
1327 		/* put verbose flag back */
1328 		Verbose = prev_verbose;
1329 
1330 		if (state != FME_DISPROVED) {
1331 			/* found an FME that explains the ereport */
1332 			matched++;
1333 			out(O_ALTFP|O_NONL, "[");
1334 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1335 			out(O_ALTFP, " explained by FME%d]", fmep->id);
1336 
1337 			if (pre_peek_nvp)
1338 				nvlist_free(pre_peek_nvp);
1339 
1340 			if (ep->count == 1)
1341 				serialize_observation(fmep, eventstring, ipp);
1342 
1343 			if (ffep)
1344 				fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1345 
1346 			stats_counter_bump(fmep->Rcount);
1347 
1348 			/* re-eval FME */
1349 			fme_eval(fmep, ffep);
1350 		} else {
1351 
1352 			/* not a match, undo noting of observation */
1353 			config_free(fmep->cfgdata);
1354 			fmep->cfgdata = NULL;
1355 			fmep->ecurrent = NULL;
1356 			if (--ep->count == 0) {
1357 				/* unlink it from observations */
1358 				fmep->observations = ep->observations;
1359 				ep->observations = NULL;
1360 				nvlist_free(ep->nvp);
1361 				ep->nvp = NULL;
1362 			} else {
1363 				nvlist_free(ep->nvp);
1364 				ep->nvp = pre_peek_nvp;
1365 			}
1366 		}
1367 	}
1368 
1369 	if (matched)
1370 		return;	/* explained by at least one existing FME */
1371 
1372 	/* clean up closed fmes */
1373 	cfmep = ClosedFMEs;
1374 	while (cfmep != NULL) {
1375 		svfmep = cfmep->next;
1376 		destroy_fme(cfmep);
1377 		cfmep = svfmep;
1378 	}
1379 	ClosedFMEs = NULL;
1380 	prune_propagations(eventstring, ipp);
1381 
1382 	if (ofmep) {
1383 		out(O_ALTFP|O_NONL, "[");
1384 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1385 		out(O_ALTFP, " ADDING TO OVERFLOW FME]");
1386 		if (ffep)
1387 			fmd_case_add_ereport(hdl, ofmep->fmcase, ffep);
1388 
1389 		return;
1390 
1391 	} else if (Max_fme && (Open_fme_count >= Max_fme)) {
1392 		out(O_ALTFP|O_NONL, "[");
1393 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1394 		out(O_ALTFP, " MAX OPEN FME REACHED]");
1395 
1396 		fmcase = fmd_case_open(hdl, NULL);
1397 
1398 		/* Create overflow fme */
1399 		if ((fmep = newfme(eventstring, ipp, hdl, fmcase)) == NULL) {
1400 			out(O_ALTFP|O_NONL, "[");
1401 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1402 			out(O_ALTFP, " CANNOT OPEN OVERFLOW FME]");
1403 			publish_undiagnosable(hdl, ffep, fmcase);
1404 			return;
1405 		}
1406 
1407 		Open_fme_count++;
1408 
1409 		init_fme_bufs(fmep);
1410 		fmep->overflow = B_TRUE;
1411 
1412 		if (ffep)
1413 			fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1414 
1415 		defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100,
1416 		    NULL, NULL, NULL);
1417 		(void) nvlist_add_string(defect, UNDIAG_REASON, UD_MAXFME);
1418 		fmd_case_add_suspect(hdl, fmep->fmcase, defect);
1419 		fmd_case_solve(hdl, fmep->fmcase);
1420 		return;
1421 	}
1422 
1423 	/* open a case */
1424 	fmcase = fmd_case_open(hdl, NULL);
1425 
1426 	/* start a new FME */
1427 	if ((fmep = newfme(eventstring, ipp, hdl, fmcase)) == NULL) {
1428 		out(O_ALTFP|O_NONL, "[");
1429 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1430 		out(O_ALTFP, " CANNOT DIAGNOSE]");
1431 		publish_undiagnosable(hdl, ffep, fmcase);
1432 		return;
1433 	}
1434 
1435 	Open_fme_count++;
1436 
1437 	init_fme_bufs(fmep);
1438 
1439 	out(O_ALTFP|O_NONL, "[");
1440 	ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1441 	out(O_ALTFP, " created FME%d, case %s]", fmep->id,
1442 	    fmd_case_uuid(hdl, fmep->fmcase));
1443 
1444 	ep = fmep->e0;
1445 	ASSERT(ep != NULL);
1446 
1447 	/* note observation */
1448 	fmep->ecurrent = ep;
1449 	if (ep->count++ == 0) {
1450 		/* link it into list of observations seen */
1451 		ep->observations = fmep->observations;
1452 		fmep->observations = ep;
1453 		ep->nvp = evnv_dupnvl(nvl);
1454 		serialize_observation(fmep, eventstring, ipp);
1455 	} else {
1456 		/* new payload overrides any previous */
1457 		nvlist_free(ep->nvp);
1458 		ep->nvp = evnv_dupnvl(nvl);
1459 	}
1460 
1461 	stats_counter_bump(fmep->Rcount);
1462 
1463 	if (ffep) {
1464 		fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1465 		fmd_case_setprincipal(hdl, fmep->fmcase, ffep);
1466 		fmep->e0r = ffep;
1467 	}
1468 
1469 	/* give the diagnosis algorithm a shot at the new FME state */
1470 	fme_eval(fmep, ffep);
1471 }
1472 
1473 void
1474 fme_status(int flags)
1475 {
1476 	struct fme *fmep;
1477 
1478 	if (FMElist == NULL) {
1479 		out(flags, "No fault management exercises underway.");
1480 		return;
1481 	}
1482 
1483 	for (fmep = FMElist; fmep; fmep = fmep->next)
1484 		fme_print(flags, fmep);
1485 }
1486 
1487 /*
1488  * "indent" routines used mostly for nicely formatted debug output, but also
1489  * for sanity checking for infinite recursion bugs.
1490  */
1491 
1492 #define	MAX_INDENT 1024
1493 static const char *indent_s[MAX_INDENT];
1494 static int current_indent;
1495 
1496 static void
1497 indent_push(const char *s)
1498 {
1499 	if (current_indent < MAX_INDENT)
1500 		indent_s[current_indent++] = s;
1501 	else
1502 		out(O_DIE, "unexpected recursion depth (%d)", current_indent);
1503 }
1504 
1505 static void
1506 indent_set(const char *s)
1507 {
1508 	current_indent = 0;
1509 	indent_push(s);
1510 }
1511 
1512 static void
1513 indent_pop(void)
1514 {
1515 	if (current_indent > 0)
1516 		current_indent--;
1517 	else
1518 		out(O_DIE, "recursion underflow");
1519 }
1520 
1521 static void
1522 indent(void)
1523 {
1524 	int i;
1525 	if (!Verbose)
1526 		return;
1527 	for (i = 0; i < current_indent; i++)
1528 		out(O_ALTFP|O_VERB|O_NONL, indent_s[i]);
1529 }
1530 
1531 #define	SLNEW		1
1532 #define	SLCHANGED	2
1533 #define	SLWAIT		3
1534 #define	SLDISPROVED	4
1535 
1536 static void
1537 print_suspects(int circumstance, struct fme *fmep)
1538 {
1539 	struct event *ep;
1540 
1541 	out(O_ALTFP|O_NONL, "[");
1542 	if (circumstance == SLCHANGED) {
1543 		out(O_ALTFP|O_NONL, "FME%d diagnosis changed. state: %s, "
1544 		    "suspect list:", fmep->id, fme_state2str(fmep->state));
1545 	} else if (circumstance == SLWAIT) {
1546 		out(O_ALTFP|O_NONL, "FME%d set wait timer %ld ", fmep->id,
1547 		    fmep->timer);
1548 		ptree_timeval(O_ALTFP|O_NONL, &fmep->wull);
1549 	} else if (circumstance == SLDISPROVED) {
1550 		out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS UNKNOWN", fmep->id);
1551 	} else {
1552 		out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS PRODUCED:", fmep->id);
1553 	}
1554 
1555 	if (circumstance == SLWAIT || circumstance == SLDISPROVED) {
1556 		out(O_ALTFP, "]");
1557 		return;
1558 	}
1559 
1560 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
1561 		out(O_ALTFP|O_NONL, " ");
1562 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
1563 	}
1564 	out(O_ALTFP, "]");
1565 }
1566 
1567 static struct node *
1568 eventprop_lookup(struct event *ep, const char *propname)
1569 {
1570 	return (lut_lookup(ep->props, (void *)propname, NULL));
1571 }
1572 
1573 #define	MAXDIGITIDX	23
1574 static char numbuf[MAXDIGITIDX + 1];
1575 
1576 static int
1577 node2uint(struct node *n, uint_t *valp)
1578 {
1579 	struct evalue value;
1580 	struct lut *globals = NULL;
1581 
1582 	if (n == NULL)
1583 		return (1);
1584 
1585 	/*
1586 	 * check value.v since we are being asked to convert an unsigned
1587 	 * long long int to an unsigned int
1588 	 */
1589 	if (! eval_expr(n, NULL, NULL, &globals, NULL, NULL, 0, &value) ||
1590 	    value.t != UINT64 || value.v > (1ULL << 32))
1591 		return (1);
1592 
1593 	*valp = (uint_t)value.v;
1594 
1595 	return (0);
1596 }
1597 
1598 static nvlist_t *
1599 node2fmri(struct node *n)
1600 {
1601 	nvlist_t **pa, *f, *p;
1602 	struct node *nc;
1603 	uint_t depth = 0;
1604 	char *numstr, *nullbyte;
1605 	char *failure;
1606 	int err, i;
1607 
1608 	/* XXX do we need to be able to handle a non-T_NAME node? */
1609 	if (n == NULL || n->t != T_NAME)
1610 		return (NULL);
1611 
1612 	for (nc = n; nc != NULL; nc = nc->u.name.next) {
1613 		if (nc->u.name.child == NULL || nc->u.name.child->t != T_NUM)
1614 			break;
1615 		depth++;
1616 	}
1617 
1618 	if (nc != NULL) {
1619 		/* We bailed early, something went wrong */
1620 		return (NULL);
1621 	}
1622 
1623 	if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0)
1624 		out(O_DIE|O_SYS, "alloc of fmri nvl failed");
1625 	pa = alloca(depth * sizeof (nvlist_t *));
1626 	for (i = 0; i < depth; i++)
1627 		pa[i] = NULL;
1628 
1629 	err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC);
1630 	err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION);
1631 	err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, "");
1632 	err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth);
1633 	if (err != 0) {
1634 		failure = "basic construction of FMRI failed";
1635 		goto boom;
1636 	}
1637 
1638 	numbuf[MAXDIGITIDX] = '\0';
1639 	nullbyte = &numbuf[MAXDIGITIDX];
1640 	i = 0;
1641 
1642 	for (nc = n; nc != NULL; nc = nc->u.name.next) {
1643 		err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl);
1644 		if (err != 0) {
1645 			failure = "alloc of an hc-pair failed";
1646 			goto boom;
1647 		}
1648 		err = nvlist_add_string(p, FM_FMRI_HC_NAME, nc->u.name.s);
1649 		numstr = ulltostr(nc->u.name.child->u.ull, nullbyte);
1650 		err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr);
1651 		if (err != 0) {
1652 			failure = "construction of an hc-pair failed";
1653 			goto boom;
1654 		}
1655 		pa[i++] = p;
1656 	}
1657 
1658 	err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth);
1659 	if (err == 0) {
1660 		for (i = 0; i < depth; i++)
1661 			if (pa[i] != NULL)
1662 				nvlist_free(pa[i]);
1663 		return (f);
1664 	}
1665 	failure = "addition of hc-pair array to FMRI failed";
1666 
1667 boom:
1668 	for (i = 0; i < depth; i++)
1669 		if (pa[i] != NULL)
1670 			nvlist_free(pa[i]);
1671 	nvlist_free(f);
1672 	out(O_DIE, "%s", failure);
1673 	/*NOTREACHED*/
1674 	return (NULL);
1675 }
1676 
1677 static uint_t
1678 avg(uint_t sum, uint_t cnt)
1679 {
1680 	unsigned long long s = sum * 10;
1681 
1682 	return ((s / cnt / 10) + (((s / cnt % 10) >= 5) ? 1 : 0));
1683 }
1684 
1685 static uint8_t
1686 percentof(uint_t part, uint_t whole)
1687 {
1688 	unsigned long long p = part * 1000;
1689 
1690 	return ((p / whole / 10) + (((p / whole % 10) >= 5) ? 1 : 0));
1691 }
1692 
1693 struct rsl {
1694 	struct event *suspect;
1695 	nvlist_t *asru;
1696 	nvlist_t *fru;
1697 	nvlist_t *rsrc;
1698 };
1699 
1700 /*
1701  *  rslfree -- free internal members of struct rsl not expected to be
1702  *	freed elsewhere.
1703  */
1704 static void
1705 rslfree(struct rsl *freeme)
1706 {
1707 	if (freeme->asru != NULL)
1708 		nvlist_free(freeme->asru);
1709 	if (freeme->fru != NULL)
1710 		nvlist_free(freeme->fru);
1711 	if (freeme->rsrc != NULL && freeme->rsrc != freeme->asru)
1712 		nvlist_free(freeme->rsrc);
1713 }
1714 
1715 /*
1716  *  rslcmp -- compare two rsl structures.  Use the following
1717  *	comparisons to establish cardinality:
1718  *
1719  *	1. Name of the suspect's class. (simple strcmp)
1720  *	2. Name of the suspect's ASRU. (trickier, since nvlist)
1721  *
1722  */
1723 static int
1724 rslcmp(const void *a, const void *b)
1725 {
1726 	struct rsl *r1 = (struct rsl *)a;
1727 	struct rsl *r2 = (struct rsl *)b;
1728 	int rv;
1729 
1730 	rv = strcmp(r1->suspect->enode->u.event.ename->u.name.s,
1731 	    r2->suspect->enode->u.event.ename->u.name.s);
1732 	if (rv != 0)
1733 		return (rv);
1734 
1735 	if (r1->asru == NULL && r2->asru == NULL)
1736 		return (0);
1737 	if (r1->asru == NULL)
1738 		return (-1);
1739 	if (r2->asru == NULL)
1740 		return (1);
1741 	return (evnv_cmpnvl(r1->asru, r2->asru, 0));
1742 }
1743 
1744 /*
1745  *  rsluniq -- given an array of rsl structures, seek out and "remove"
1746  *	any duplicates.  Dups are "remove"d by NULLing the suspect pointer
1747  *	of the array element.  Removal also means updating the number of
1748  *	problems and the number of problems which are not faults.  User
1749  *	provides the first and last element pointers.
1750  */
1751 static void
1752 rsluniq(struct rsl *first, struct rsl *last, int *nprobs, int *nnonf)
1753 {
1754 	struct rsl *cr;
1755 
1756 	if (*nprobs == 1)
1757 		return;
1758 
1759 	/*
1760 	 *  At this point, we only expect duplicate defects.
1761 	 *  Eversholt's diagnosis algorithm prevents duplicate
1762 	 *  suspects, but we rewrite defects in the platform code after
1763 	 *  the diagnosis is made, and that can introduce new
1764 	 *  duplicates.
1765 	 */
1766 	while (first <= last) {
1767 		if (first->suspect == NULL || !is_defect(first->suspect->t)) {
1768 			first++;
1769 			continue;
1770 		}
1771 		cr = first + 1;
1772 		while (cr <= last) {
1773 			if (is_defect(first->suspect->t)) {
1774 				if (rslcmp(first, cr) == 0) {
1775 					cr->suspect = NULL;
1776 					rslfree(cr);
1777 					(*nprobs)--;
1778 					(*nnonf)--;
1779 				}
1780 			}
1781 			/*
1782 			 * assume all defects are in order after our
1783 			 * sort and short circuit here with "else break" ?
1784 			 */
1785 			cr++;
1786 		}
1787 		first++;
1788 	}
1789 }
1790 
1791 /*
1792  * get_resources -- for a given suspect, determine what ASRU, FRU and
1793  *     RSRC nvlists should be advertised in the final suspect list.
1794  */
1795 void
1796 get_resources(struct event *sp, struct rsl *rsrcs, struct config *croot)
1797 {
1798 	struct node *asrudef, *frudef;
1799 	nvlist_t *asru, *fru;
1800 	nvlist_t *rsrc = NULL;
1801 	char *pathstr;
1802 
1803 	/*
1804 	 * First find any ASRU and/or FRU defined in the
1805 	 * initial fault tree.
1806 	 */
1807 	asrudef = eventprop_lookup(sp, L_ASRU);
1808 	frudef = eventprop_lookup(sp, L_FRU);
1809 
1810 	/*
1811 	 * Create FMRIs based on those definitions
1812 	 */
1813 	asru = node2fmri(asrudef);
1814 	fru = node2fmri(frudef);
1815 	pathstr = ipath2str(NULL, sp->ipp);
1816 
1817 	/*
1818 	 * Allow for platform translations of the FMRIs
1819 	 */
1820 	platform_units_translate(is_defect(sp->t), croot, &asru, &fru, &rsrc,
1821 	    pathstr);
1822 
1823 	FREE(pathstr);
1824 	rsrcs->suspect = sp;
1825 	rsrcs->asru = asru;
1826 	rsrcs->fru = fru;
1827 	rsrcs->rsrc = rsrc;
1828 }
1829 
1830 /*
1831  * trim_suspects -- prior to publishing, we may need to remove some
1832  *    suspects from the list.  If we're auto-closing upsets, we don't
1833  *    want any of those in the published list.  If the ASRUs for multiple
1834  *    defects resolve to the same ASRU (driver) we only want to publish
1835  *    that as a single suspect.
1836  */
1837 static void
1838 trim_suspects(struct fme *fmep, boolean_t no_upsets, struct rsl **begin,
1839     struct rsl **end)
1840 {
1841 	struct event *ep;
1842 	struct rsl *rp;
1843 	int rpcnt;
1844 
1845 	/*
1846 	 * First save the suspects in the psuspects, then copy back
1847 	 * only the ones we wish to retain.  This resets nsuspects to
1848 	 * zero.
1849 	 */
1850 	rpcnt = fmep->nsuspects;
1851 	save_suspects(fmep);
1852 
1853 	/*
1854 	 * allocate an array of resource pointers for the suspects.
1855 	 * We may end up using less than the full allocation, but this
1856 	 * is a very short-lived array.  publish_suspects() will free
1857 	 * this array when it's done using it.
1858 	 */
1859 	rp = *begin = MALLOC(rpcnt * sizeof (struct rsl));
1860 	bzero(rp, rpcnt * sizeof (struct rsl));
1861 
1862 	/* first pass, remove any unwanted upsets and populate our array */
1863 	for (ep = fmep->psuspects; ep; ep = ep->psuspects) {
1864 		if (no_upsets && is_upset(ep->t))
1865 			continue;
1866 		get_resources(ep, rp, fmep->cfgdata->cooked);
1867 		rp++;
1868 		fmep->nsuspects++;
1869 		if (!is_fault(ep->t))
1870 			fmep->nonfault++;
1871 	}
1872 
1873 	/* if all we had was unwanted upsets, we're done */
1874 	if (fmep->nsuspects == 0)
1875 		return;
1876 
1877 	*end = rp - 1;
1878 
1879 	/* sort the array */
1880 	qsort(*begin, fmep->nsuspects, sizeof (struct rsl), rslcmp);
1881 	rsluniq(*begin, *end, &fmep->nsuspects, &fmep->nonfault);
1882 }
1883 
1884 /*
1885  * addpayloadprop -- add a payload prop to a problem
1886  */
1887 static void
1888 addpayloadprop(const char *lhs, struct evalue *rhs, nvlist_t *fault)
1889 {
1890 	ASSERT(fault != NULL);
1891 	ASSERT(lhs != NULL);
1892 	ASSERT(rhs != NULL);
1893 
1894 	if (rhs->t == UINT64) {
1895 		out(O_ALTFP|O_VERB2, "addpayloadprop: %s=%llu", lhs, rhs->v);
1896 
1897 		if (nvlist_add_uint64(fault, lhs, rhs->v) != 0)
1898 			out(O_DIE,
1899 			    "cannot add payloadprop \"%s\" to fault", lhs);
1900 	} else {
1901 		out(O_ALTFP|O_VERB2, "addpayloadprop: %s=\"%s\"",
1902 		    lhs, (char *)(uintptr_t)rhs->v);
1903 
1904 		if (nvlist_add_string(fault, lhs, (char *)(uintptr_t)rhs->v) !=
1905 		    0)
1906 			out(O_DIE,
1907 			    "cannot add payloadprop \"%s\" to fault", lhs);
1908 	}
1909 }
1910 
1911 static char *Istatbuf;
1912 static char *Istatbufptr;
1913 static int Istatsz;
1914 
1915 /*
1916  * istataddsize -- calculate size of istat and add it to Istatsz
1917  */
1918 /*ARGSUSED2*/
1919 static void
1920 istataddsize(const struct istat_entry *lhs, struct stats *rhs, void *arg)
1921 {
1922 	int val;
1923 
1924 	ASSERT(lhs != NULL);
1925 	ASSERT(rhs != NULL);
1926 
1927 	if ((val = stats_counter_value(rhs)) == 0)
1928 		return;	/* skip zero-valued stats */
1929 
1930 	/* count up the size of the stat name */
1931 	Istatsz += ipath2strlen(lhs->ename, lhs->ipath);
1932 	Istatsz++;	/* for the trailing NULL byte */
1933 
1934 	/* count up the size of the stat value */
1935 	Istatsz += snprintf(NULL, 0, "%d", val);
1936 	Istatsz++;	/* for the trailing NULL byte */
1937 }
1938 
1939 /*
1940  * istat2str -- serialize an istat, writing result to *Istatbufptr
1941  */
1942 /*ARGSUSED2*/
1943 static void
1944 istat2str(const struct istat_entry *lhs, struct stats *rhs, void *arg)
1945 {
1946 	char *str;
1947 	int len;
1948 	int val;
1949 
1950 	ASSERT(lhs != NULL);
1951 	ASSERT(rhs != NULL);
1952 
1953 	if ((val = stats_counter_value(rhs)) == 0)
1954 		return;	/* skip zero-valued stats */
1955 
1956 	/* serialize the stat name */
1957 	str = ipath2str(lhs->ename, lhs->ipath);
1958 	len = strlen(str);
1959 
1960 	ASSERT(Istatbufptr + len + 1 < &Istatbuf[Istatsz]);
1961 	(void) strlcpy(Istatbufptr, str, &Istatbuf[Istatsz] - Istatbufptr);
1962 	Istatbufptr += len;
1963 	FREE(str);
1964 	*Istatbufptr++ = '\0';
1965 
1966 	/* serialize the stat value */
1967 	Istatbufptr += snprintf(Istatbufptr, &Istatbuf[Istatsz] - Istatbufptr,
1968 	    "%d", val);
1969 	*Istatbufptr++ = '\0';
1970 
1971 	ASSERT(Istatbufptr <= &Istatbuf[Istatsz]);
1972 }
1973 
1974 void
1975 istat_save()
1976 {
1977 	if (Istat_need_save == 0)
1978 		return;
1979 
1980 	/* figure out how big the serialzed info is */
1981 	Istatsz = 0;
1982 	lut_walk(Istats, (lut_cb)istataddsize, NULL);
1983 
1984 	if (Istatsz == 0) {
1985 		/* no stats to save */
1986 		fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS);
1987 		return;
1988 	}
1989 
1990 	/* create the serialized buffer */
1991 	Istatbufptr = Istatbuf = MALLOC(Istatsz);
1992 	lut_walk(Istats, (lut_cb)istat2str, NULL);
1993 
1994 	/* clear out current saved stats */
1995 	fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS);
1996 
1997 	/* write out the new version */
1998 	fmd_buf_write(Hdl, NULL, WOBUF_ISTATS, Istatbuf, Istatsz);
1999 	FREE(Istatbuf);
2000 
2001 	Istat_need_save = 0;
2002 }
2003 
2004 int
2005 istat_cmp(struct istat_entry *ent1, struct istat_entry *ent2)
2006 {
2007 	if (ent1->ename != ent2->ename)
2008 		return (ent2->ename - ent1->ename);
2009 	if (ent1->ipath != ent2->ipath)
2010 		return ((char *)ent2->ipath - (char *)ent1->ipath);
2011 
2012 	return (0);
2013 }
2014 
2015 /*
2016  * istat-verify -- verify the component associated with a stat still exists
2017  *
2018  * if the component no longer exists, this routine resets the stat and
2019  * returns 0.  if the component still exists, it returns 1.
2020  */
2021 static int
2022 istat_verify(struct node *snp, struct istat_entry *entp)
2023 {
2024 	struct stats *statp;
2025 	nvlist_t *fmri;
2026 
2027 	fmri = node2fmri(snp->u.event.epname);
2028 	if (platform_path_exists(fmri)) {
2029 		nvlist_free(fmri);
2030 		return (1);
2031 	}
2032 	nvlist_free(fmri);
2033 
2034 	/* component no longer in system.  zero out the associated stats */
2035 	if ((statp = (struct stats *)
2036 	    lut_lookup(Istats, entp, (lut_cmp)istat_cmp)) == NULL ||
2037 	    stats_counter_value(statp) == 0)
2038 		return (0);	/* stat is already reset */
2039 
2040 	Istat_need_save = 1;
2041 	stats_counter_reset(statp);
2042 	return (0);
2043 }
2044 
2045 static void
2046 istat_bump(struct node *snp, int n)
2047 {
2048 	struct stats *statp;
2049 	struct istat_entry ent;
2050 
2051 	ASSERT(snp != NULL);
2052 	ASSERTinfo(snp->t == T_EVENT, ptree_nodetype2str(snp->t));
2053 	ASSERT(snp->u.event.epname != NULL);
2054 
2055 	/* class name should be hoisted into a single stable entry */
2056 	ASSERT(snp->u.event.ename->u.name.next == NULL);
2057 	ent.ename = snp->u.event.ename->u.name.s;
2058 	ent.ipath = ipath(snp->u.event.epname);
2059 
2060 	if (!istat_verify(snp, &ent)) {
2061 		/* component no longer exists in system, nothing to do */
2062 		return;
2063 	}
2064 
2065 	if ((statp = (struct stats *)
2066 	    lut_lookup(Istats, &ent, (lut_cmp)istat_cmp)) == NULL) {
2067 		/* need to create the counter */
2068 		int cnt = 0;
2069 		struct node *np;
2070 		char *sname;
2071 		char *snamep;
2072 		struct istat_entry *newentp;
2073 
2074 		/* count up the size of the stat name */
2075 		np = snp->u.event.ename;
2076 		while (np != NULL) {
2077 			cnt += strlen(np->u.name.s);
2078 			cnt++;	/* for the '.' or '@' */
2079 			np = np->u.name.next;
2080 		}
2081 		np = snp->u.event.epname;
2082 		while (np != NULL) {
2083 			cnt += snprintf(NULL, 0, "%s%llu",
2084 			    np->u.name.s, np->u.name.child->u.ull);
2085 			cnt++;	/* for the '/' or trailing NULL byte */
2086 			np = np->u.name.next;
2087 		}
2088 
2089 		/* build the stat name */
2090 		snamep = sname = alloca(cnt);
2091 		np = snp->u.event.ename;
2092 		while (np != NULL) {
2093 			snamep += snprintf(snamep, &sname[cnt] - snamep,
2094 			    "%s", np->u.name.s);
2095 			np = np->u.name.next;
2096 			if (np)
2097 				*snamep++ = '.';
2098 		}
2099 		*snamep++ = '@';
2100 		np = snp->u.event.epname;
2101 		while (np != NULL) {
2102 			snamep += snprintf(snamep, &sname[cnt] - snamep,
2103 			    "%s%llu", np->u.name.s, np->u.name.child->u.ull);
2104 			np = np->u.name.next;
2105 			if (np)
2106 				*snamep++ = '/';
2107 		}
2108 		*snamep++ = '\0';
2109 
2110 		/* create the new stat & add it to our list */
2111 		newentp = MALLOC(sizeof (*newentp));
2112 		*newentp = ent;
2113 		statp = stats_new_counter(NULL, sname, 0);
2114 		Istats = lut_add(Istats, (void *)newentp, (void *)statp,
2115 		    (lut_cmp)istat_cmp);
2116 	}
2117 
2118 	/* if n is non-zero, set that value instead of bumping */
2119 	if (n) {
2120 		stats_counter_reset(statp);
2121 		stats_counter_add(statp, n);
2122 	} else
2123 		stats_counter_bump(statp);
2124 	Istat_need_save = 1;
2125 
2126 	ipath_print(O_ALTFP|O_VERB2, ent.ename, ent.ipath);
2127 	out(O_ALTFP|O_VERB2, " %s to value %d", n ? "set" : "incremented",
2128 	    stats_counter_value(statp));
2129 }
2130 
2131 /*ARGSUSED*/
2132 static void
2133 istat_destructor(void *left, void *right, void *arg)
2134 {
2135 	struct istat_entry *entp = (struct istat_entry *)left;
2136 	struct stats *statp = (struct stats *)right;
2137 	FREE(entp);
2138 	stats_delete(statp);
2139 }
2140 
2141 /*
2142  * Callback used in a walk of the Istats to reset matching stat counters.
2143  */
2144 static void
2145 istat_counter_reset_cb(struct istat_entry *entp, struct stats *statp,
2146     const struct ipath *ipp)
2147 {
2148 	char *path;
2149 
2150 	if (entp->ipath == ipp) {
2151 		path = ipath2str(entp->ename, ipp);
2152 		out(O_ALTFP, "istat_counter_reset_cb: resetting %s", path);
2153 		FREE(path);
2154 		stats_counter_reset(statp);
2155 		Istat_need_save = 1;
2156 	}
2157 }
2158 
2159 void
2160 istat_fini(void)
2161 {
2162 	lut_free(Istats, istat_destructor, NULL);
2163 }
2164 
2165 static char *Serdbuf;
2166 static char *Serdbufptr;
2167 static int Serdsz;
2168 
2169 /*
2170  * serdaddsize -- calculate size of serd and add it to Serdsz
2171  */
2172 /*ARGSUSED*/
2173 static void
2174 serdaddsize(const struct serd_entry *lhs, struct stats *rhs, void *arg)
2175 {
2176 	ASSERT(lhs != NULL);
2177 
2178 	/* count up the size of the stat name */
2179 	Serdsz += ipath2strlen(lhs->ename, lhs->ipath);
2180 	Serdsz++;	/* for the trailing NULL byte */
2181 }
2182 
2183 /*
2184  * serd2str -- serialize a serd engine, writing result to *Serdbufptr
2185  */
2186 /*ARGSUSED*/
2187 static void
2188 serd2str(const struct serd_entry *lhs, struct stats *rhs, void *arg)
2189 {
2190 	char *str;
2191 	int len;
2192 
2193 	ASSERT(lhs != NULL);
2194 
2195 	/* serialize the serd engine name */
2196 	str = ipath2str(lhs->ename, lhs->ipath);
2197 	len = strlen(str);
2198 
2199 	ASSERT(Serdbufptr + len + 1 <= &Serdbuf[Serdsz]);
2200 	(void) strlcpy(Serdbufptr, str, &Serdbuf[Serdsz] - Serdbufptr);
2201 	Serdbufptr += len;
2202 	FREE(str);
2203 	*Serdbufptr++ = '\0';
2204 	ASSERT(Serdbufptr <= &Serdbuf[Serdsz]);
2205 }
2206 
2207 void
2208 serd_save()
2209 {
2210 	if (Serd_need_save == 0)
2211 		return;
2212 
2213 	/* figure out how big the serialzed info is */
2214 	Serdsz = 0;
2215 	lut_walk(SerdEngines, (lut_cb)serdaddsize, NULL);
2216 
2217 	if (Serdsz == 0) {
2218 		/* no serd engines to save */
2219 		fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS);
2220 		return;
2221 	}
2222 
2223 	/* create the serialized buffer */
2224 	Serdbufptr = Serdbuf = MALLOC(Serdsz);
2225 	lut_walk(SerdEngines, (lut_cb)serd2str, NULL);
2226 
2227 	/* clear out current saved stats */
2228 	fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS);
2229 
2230 	/* write out the new version */
2231 	fmd_buf_write(Hdl, NULL, WOBUF_SERDS, Serdbuf, Serdsz);
2232 	FREE(Serdbuf);
2233 	Serd_need_save = 0;
2234 }
2235 
2236 int
2237 serd_cmp(struct serd_entry *ent1, struct serd_entry *ent2)
2238 {
2239 	if (ent1->ename != ent2->ename)
2240 		return (ent2->ename - ent1->ename);
2241 	if (ent1->ipath != ent2->ipath)
2242 		return ((char *)ent2->ipath - (char *)ent1->ipath);
2243 
2244 	return (0);
2245 }
2246 
2247 void
2248 fme_serd_load(fmd_hdl_t *hdl)
2249 {
2250 	int sz;
2251 	char *sbuf;
2252 	char *sepptr;
2253 	char *ptr;
2254 	struct serd_entry *newentp;
2255 	struct node *epname;
2256 	nvlist_t *fmri;
2257 	char *namestring;
2258 
2259 	if ((sz = fmd_buf_size(hdl, NULL, WOBUF_SERDS)) == 0)
2260 		return;
2261 	sbuf = alloca(sz);
2262 	fmd_buf_read(hdl, NULL, WOBUF_SERDS, sbuf, sz);
2263 	ptr = sbuf;
2264 	while (ptr < &sbuf[sz]) {
2265 		sepptr = strchr(ptr, '@');
2266 		*sepptr = '\0';
2267 		namestring = ptr;
2268 		sepptr++;
2269 		ptr = sepptr;
2270 		ptr += strlen(ptr);
2271 		ptr++;	/* move past the '\0' separating paths */
2272 		epname = pathstring2epnamenp(sepptr);
2273 		fmri = node2fmri(epname);
2274 		if (platform_path_exists(fmri)) {
2275 			newentp = MALLOC(sizeof (*newentp));
2276 			newentp->hdl = hdl;
2277 			newentp->ipath = ipath(epname);
2278 			newentp->ename = stable(namestring);
2279 			SerdEngines = lut_add(SerdEngines, (void *)newentp,
2280 			    (void *)NULL, (lut_cmp)serd_cmp);
2281 		} else
2282 			Serd_need_save = 1;
2283 		nvlist_free(fmri);
2284 	}
2285 	/* save it back again in case some of the paths no longer exist */
2286 	serd_save();
2287 }
2288 
2289 /*ARGSUSED*/
2290 static void
2291 serd_destructor(void *left, void *right, void *arg)
2292 {
2293 	struct serd_entry *entp = (struct serd_entry *)left;
2294 	FREE(entp);
2295 }
2296 
2297 /*
2298  * Callback used in a walk of the SerdEngines to reset matching serd engines.
2299  */
2300 /*ARGSUSED*/
2301 static void
2302 serd_reset_cb(struct serd_entry *entp, void *unused, const struct ipath *ipp)
2303 {
2304 	char *path;
2305 
2306 	if (entp->ipath == ipp) {
2307 		path = ipath2str(entp->ename, ipp);
2308 		out(O_ALTFP, "serd_reset_cb: resetting %s", path);
2309 		fmd_serd_reset(entp->hdl, path);
2310 		FREE(path);
2311 		Serd_need_save = 1;
2312 	}
2313 }
2314 
2315 void
2316 serd_fini(void)
2317 {
2318 	lut_free(SerdEngines, serd_destructor, NULL);
2319 }
2320 
2321 static void
2322 publish_suspects(struct fme *fmep)
2323 {
2324 	struct rsl *srl = NULL;
2325 	struct rsl *erl;
2326 	struct rsl *rp;
2327 	nvlist_t *fault;
2328 	uint8_t cert;
2329 	uint_t *frs;
2330 	uint_t fravg, frsum, fr;
2331 	uint_t messval;
2332 	struct node *snp;
2333 	int frcnt, fridx;
2334 	boolean_t no_upsets = B_FALSE;
2335 	boolean_t allfaulty = B_TRUE;
2336 
2337 	stats_counter_bump(fmep->diags);
2338 
2339 	/*
2340 	 * If we're auto-closing upsets, we don't want to include them
2341 	 * in any produced suspect lists or certainty accounting.
2342 	 */
2343 	if (Autoclose != NULL)
2344 		if (strcmp(Autoclose, "true") == 0 ||
2345 		    strcmp(Autoclose, "all") == 0 ||
2346 		    strcmp(Autoclose, "upsets") == 0)
2347 			no_upsets = B_TRUE;
2348 
2349 	trim_suspects(fmep, no_upsets, &srl, &erl);
2350 
2351 	/*
2352 	 * If the resulting suspect list has no members, we're
2353 	 * done.  Returning here will simply close the case.
2354 	 */
2355 	if (fmep->nsuspects == 0) {
2356 		out(O_ALTFP,
2357 		    "[FME%d, case %s (all suspects are upsets)]",
2358 		    fmep->id, fmd_case_uuid(fmep->hdl, fmep->fmcase));
2359 		FREE(srl);
2360 		restore_suspects(fmep);
2361 		return;
2362 	}
2363 
2364 	/*
2365 	 * If the suspect list is all faults, then for a given fault,
2366 	 * say X of N, X's certainty is computed via:
2367 	 *
2368 	 * fitrate(X) / (fitrate(1) + ... + fitrate(N)) * 100
2369 	 *
2370 	 * If none of the suspects are faults, and there are N suspects,
2371 	 * the certainty of a given suspect is 100/N.
2372 	 *
2373 	 * If there are are a mixture of faults and other problems in
2374 	 * the suspect list, we take an average of the faults'
2375 	 * FITrates and treat this average as the FITrate for any
2376 	 * non-faults.  The fitrate of any given suspect is then
2377 	 * computed per the first formula above.
2378 	 */
2379 	if (fmep->nonfault == fmep->nsuspects) {
2380 		/* NO faults in the suspect list */
2381 		cert = percentof(1, fmep->nsuspects);
2382 	} else {
2383 		/* sum the fitrates */
2384 		frs = alloca(fmep->nsuspects * sizeof (uint_t));
2385 		fridx = frcnt = frsum = 0;
2386 
2387 		for (rp = srl; rp <= erl; rp++) {
2388 			struct node *n;
2389 
2390 			if (rp->suspect == NULL)
2391 				continue;
2392 			if (!is_fault(rp->suspect->t)) {
2393 				frs[fridx++] = 0;
2394 				continue;
2395 			}
2396 			n = eventprop_lookup(rp->suspect, L_FITrate);
2397 			if (node2uint(n, &fr) != 0) {
2398 				out(O_DEBUG|O_NONL, "event ");
2399 				ipath_print(O_DEBUG|O_NONL,
2400 				    rp->suspect->enode->u.event.ename->u.name.s,
2401 				    rp->suspect->ipp);
2402 				out(O_DEBUG, " has no FITrate (using 1)");
2403 				fr = 1;
2404 			} else if (fr == 0) {
2405 				out(O_DEBUG|O_NONL, "event ");
2406 				ipath_print(O_DEBUG|O_NONL,
2407 				    rp->suspect->enode->u.event.ename->u.name.s,
2408 				    rp->suspect->ipp);
2409 				out(O_DEBUG, " has zero FITrate (using 1)");
2410 				fr = 1;
2411 			}
2412 
2413 			frs[fridx++] = fr;
2414 			frsum += fr;
2415 			frcnt++;
2416 		}
2417 		fravg = avg(frsum, frcnt);
2418 		for (fridx = 0; fridx < fmep->nsuspects; fridx++)
2419 			if (frs[fridx] == 0) {
2420 				frs[fridx] = fravg;
2421 				frsum += fravg;
2422 			}
2423 	}
2424 
2425 	/* Add them in reverse order of our sort, as fmd reverses order */
2426 	for (rp = erl; rp >= srl; rp--) {
2427 		if (rp->suspect == NULL)
2428 			continue;
2429 		if (!is_fault(rp->suspect->t))
2430 			allfaulty = B_FALSE;
2431 		if (fmep->nonfault != fmep->nsuspects)
2432 			cert = percentof(frs[--fridx], frsum);
2433 		fault = fmd_nvl_create_fault(fmep->hdl,
2434 		    rp->suspect->enode->u.event.ename->u.name.s,
2435 		    cert,
2436 		    rp->asru,
2437 		    rp->fru,
2438 		    rp->rsrc);
2439 		if (fault == NULL)
2440 			out(O_DIE, "fault creation failed");
2441 		/* if "message" property exists, add it to the fault */
2442 		if (node2uint(eventprop_lookup(rp->suspect, L_message),
2443 		    &messval) == 0) {
2444 
2445 			out(O_ALTFP,
2446 			    "[FME%d, %s adds message=%d to suspect list]",
2447 			    fmep->id,
2448 			    rp->suspect->enode->u.event.ename->u.name.s,
2449 			    messval);
2450 			if (nvlist_add_boolean_value(fault,
2451 			    FM_SUSPECT_MESSAGE,
2452 			    (messval) ? B_TRUE : B_FALSE) != 0) {
2453 				out(O_DIE, "cannot add no-message to fault");
2454 			}
2455 		}
2456 		/* add any payload properties */
2457 		lut_walk(rp->suspect->payloadprops,
2458 		    (lut_cb)addpayloadprop, (void *)fault);
2459 		fmd_case_add_suspect(fmep->hdl, fmep->fmcase, fault);
2460 		rslfree(rp);
2461 
2462 		/*
2463 		 * If "action" property exists, evaluate it;  this must be done
2464 		 * before the dupclose check below since some actions may
2465 		 * modify the asru to be used in fmd_nvl_fmri_faulty.  This
2466 		 * needs to be restructured if any new actions are introduced
2467 		 * that have effects that we do not want to be visible if
2468 		 * we decide not to publish in the dupclose check below.
2469 		 */
2470 		if ((snp = eventprop_lookup(rp->suspect, L_action)) != NULL) {
2471 			struct evalue evalue;
2472 
2473 			out(O_ALTFP|O_NONL,
2474 			    "[FME%d, %s action ", fmep->id,
2475 			    rp->suspect->enode->u.event.ename->u.name.s);
2476 			ptree_name_iter(O_ALTFP|O_NONL, snp);
2477 			out(O_ALTFP, "]");
2478 			Action_nvl = fault;
2479 			(void) eval_expr(snp, NULL, NULL, NULL, NULL,
2480 			    NULL, 0, &evalue);
2481 		}
2482 
2483 		/*
2484 		 * if "dupclose" tunable is set, check if the asru is
2485 		 * already marked as "faulty".
2486 		 */
2487 		if (Dupclose && allfaulty) {
2488 			nvlist_t *asru;
2489 
2490 			out(O_ALTFP|O_VERB, "FMD%d dupclose check ", fmep->id);
2491 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, rp->suspect);
2492 			out(O_ALTFP|O_VERB|O_NONL, " ");
2493 			if (nvlist_lookup_nvlist(fault,
2494 			    FM_FAULT_ASRU, &asru) != 0) {
2495 				out(O_ALTFP|O_VERB, "NULL asru");
2496 				allfaulty = B_FALSE;
2497 			} else if (fmd_nvl_fmri_faulty(fmep->hdl, asru)) {
2498 				out(O_ALTFP|O_VERB, "faulty");
2499 			} else {
2500 				out(O_ALTFP|O_VERB, "not faulty");
2501 				allfaulty = B_FALSE;
2502 			}
2503 		}
2504 
2505 	}
2506 
2507 	/*
2508 	 * Close the case if all asrus are already known to be faulty and if
2509 	 * Dupclose is enabled.  Otherwise we are going to publish so take
2510 	 * any pre-publication actions.
2511 	 */
2512 	if (Dupclose && allfaulty) {
2513 		out(O_ALTFP, "[dupclose FME%d, case %s]", fmep->id,
2514 		    fmd_case_uuid(fmep->hdl, fmep->fmcase));
2515 		fmd_case_close(fmep->hdl, fmep->fmcase);
2516 	} else {
2517 		for (rp = erl; rp >= srl; rp--) {
2518 			struct event *suspect = rp->suspect;
2519 
2520 			if (suspect == NULL)
2521 				continue;
2522 
2523 			/* if "count" exists, increment the appropriate stat */
2524 			if ((snp = eventprop_lookup(suspect,
2525 			    L_count)) != NULL) {
2526 				out(O_ALTFP|O_NONL,
2527 				    "[FME%d, %s count ", fmep->id,
2528 				    suspect->enode->u.event.ename->u.name.s);
2529 				ptree_name_iter(O_ALTFP|O_NONL, snp);
2530 				out(O_ALTFP, "]");
2531 				istat_bump(snp, 0);
2532 
2533 			}
2534 		}
2535 		istat_save();	/* write out any istat changes */
2536 
2537 		out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
2538 		    fmd_case_uuid(fmep->hdl, fmep->fmcase));
2539 		fmd_case_solve(fmep->hdl, fmep->fmcase);
2540 	}
2541 
2542 	/*
2543 	 * revert to the original suspect list
2544 	 */
2545 	FREE(srl);
2546 	restore_suspects(fmep);
2547 }
2548 
2549 static void
2550 publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep, fmd_case_t *fmcase)
2551 {
2552 	struct case_list *newcase;
2553 	nvlist_t *defect;
2554 
2555 	out(O_ALTFP,
2556 	    "[undiagnosable ereport received, "
2557 	    "creating and closing a new case (%s)]",
2558 	    Undiag_reason ? Undiag_reason : "reason not provided");
2559 
2560 	newcase = MALLOC(sizeof (struct case_list));
2561 	newcase->next = NULL;
2562 	newcase->fmcase = fmcase;
2563 	if (Undiagablecaselist != NULL)
2564 		newcase->next = Undiagablecaselist;
2565 	Undiagablecaselist = newcase;
2566 
2567 	if (ffep != NULL)
2568 		fmd_case_add_ereport(hdl, newcase->fmcase, ffep);
2569 
2570 	defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100,
2571 	    NULL, NULL, NULL);
2572 	if (Undiag_reason != NULL)
2573 		(void) nvlist_add_string(defect, UNDIAG_REASON, Undiag_reason);
2574 	fmd_case_add_suspect(hdl, newcase->fmcase, defect);
2575 
2576 	fmd_case_solve(hdl, newcase->fmcase);
2577 	fmd_case_close(hdl, newcase->fmcase);
2578 }
2579 
2580 static void
2581 fme_undiagnosable(struct fme *f)
2582 {
2583 	nvlist_t *defect;
2584 
2585 	out(O_ALTFP, "[solving/closing FME%d, case %s (%s)]",
2586 	    f->id, fmd_case_uuid(f->hdl, f->fmcase),
2587 	    Undiag_reason ? Undiag_reason : "undiagnosable");
2588 
2589 	defect = fmd_nvl_create_fault(f->hdl, UNDIAGNOSABLE_DEFECT, 100,
2590 	    NULL, NULL, NULL);
2591 	if (Undiag_reason != NULL)
2592 		(void) nvlist_add_string(defect, UNDIAG_REASON, Undiag_reason);
2593 	fmd_case_add_suspect(f->hdl, f->fmcase, defect);
2594 	fmd_case_solve(f->hdl, f->fmcase);
2595 	fmd_case_close(f->hdl, f->fmcase);
2596 }
2597 
2598 /*
2599  * fme_close_case
2600  *
2601  *	Find the requested case amongst our fmes and close it.  Free up
2602  *	the related fme.
2603  */
2604 void
2605 fme_close_case(fmd_hdl_t *hdl, fmd_case_t *fmcase)
2606 {
2607 	struct case_list *ucasep, *prevcasep = NULL;
2608 	struct fme *prev = NULL;
2609 	struct fme *fmep;
2610 
2611 	for (ucasep = Undiagablecaselist; ucasep; ucasep = ucasep->next) {
2612 		if (fmcase != ucasep->fmcase) {
2613 			prevcasep = ucasep;
2614 			continue;
2615 		}
2616 
2617 		if (prevcasep == NULL)
2618 			Undiagablecaselist = Undiagablecaselist->next;
2619 		else
2620 			prevcasep->next = ucasep->next;
2621 
2622 		FREE(ucasep);
2623 		return;
2624 	}
2625 
2626 	for (fmep = FMElist; fmep; fmep = fmep->next) {
2627 		if (fmep->hdl == hdl && fmep->fmcase == fmcase)
2628 			break;
2629 		prev = fmep;
2630 	}
2631 
2632 	if (fmep == NULL) {
2633 		out(O_WARN, "Eft asked to close unrecognized case [%s].",
2634 		    fmd_case_uuid(hdl, fmcase));
2635 		return;
2636 	}
2637 
2638 	if (EFMElist == fmep)
2639 		EFMElist = prev;
2640 
2641 	if (prev == NULL)
2642 		FMElist = FMElist->next;
2643 	else
2644 		prev->next = fmep->next;
2645 
2646 	fmep->next = NULL;
2647 
2648 	/* Get rid of any timer this fme has set */
2649 	if (fmep->wull != 0)
2650 		fmd_timer_remove(fmep->hdl, fmep->timer);
2651 
2652 	if (ClosedFMEs == NULL) {
2653 		ClosedFMEs = fmep;
2654 	} else {
2655 		fmep->next = ClosedFMEs;
2656 		ClosedFMEs = fmep;
2657 	}
2658 
2659 	Open_fme_count--;
2660 
2661 	/* See if we can close the overflow FME */
2662 	if (Open_fme_count <= Max_fme) {
2663 		for (fmep = FMElist; fmep; fmep = fmep->next) {
2664 			if (fmep->overflow && !(fmd_case_closed(fmep->hdl,
2665 			    fmep->fmcase)))
2666 				break;
2667 		}
2668 
2669 		if (fmep != NULL)
2670 			fmd_case_close(fmep->hdl, fmep->fmcase);
2671 	}
2672 }
2673 
2674 /*
2675  * fme_set_timer()
2676  *	If the time we need to wait for the given FME is less than the
2677  *	current timer, kick that old timer out and establish a new one.
2678  */
2679 static int
2680 fme_set_timer(struct fme *fmep, unsigned long long wull)
2681 {
2682 	out(O_ALTFP|O_VERB|O_NONL, " fme_set_timer: request to wait ");
2683 	ptree_timeval(O_ALTFP|O_VERB, &wull);
2684 
2685 	if (wull <= fmep->pull) {
2686 		out(O_ALTFP|O_VERB|O_NONL, "already have waited at least ");
2687 		ptree_timeval(O_ALTFP|O_VERB, &fmep->pull);
2688 		out(O_ALTFP|O_VERB, NULL);
2689 		/* we've waited at least wull already, don't need timer */
2690 		return (0);
2691 	}
2692 
2693 	out(O_ALTFP|O_VERB|O_NONL, " currently ");
2694 	if (fmep->wull != 0) {
2695 		out(O_ALTFP|O_VERB|O_NONL, "waiting ");
2696 		ptree_timeval(O_ALTFP|O_VERB, &fmep->wull);
2697 		out(O_ALTFP|O_VERB, NULL);
2698 	} else {
2699 		out(O_ALTFP|O_VERB|O_NONL, "not waiting");
2700 		out(O_ALTFP|O_VERB, NULL);
2701 	}
2702 
2703 	if (fmep->wull != 0)
2704 		if (wull >= fmep->wull)
2705 			/* New timer would fire later than established timer */
2706 			return (0);
2707 
2708 	if (fmep->wull != 0) {
2709 		fmd_timer_remove(fmep->hdl, fmep->timer);
2710 	}
2711 
2712 	fmep->timer = fmd_timer_install(fmep->hdl, (void *)fmep,
2713 	    fmep->e0r, wull);
2714 	out(O_ALTFP|O_VERB, "timer set, id is %ld", fmep->timer);
2715 	fmep->wull = wull;
2716 	return (1);
2717 }
2718 
2719 void
2720 fme_timer_fired(struct fme *fmep, id_t tid)
2721 {
2722 	struct fme *ffmep = NULL;
2723 
2724 	for (ffmep = FMElist; ffmep; ffmep = ffmep->next)
2725 		if (ffmep == fmep)
2726 			break;
2727 
2728 	if (ffmep == NULL) {
2729 		out(O_WARN, "Timer fired for an FME (%p) not in FMEs list.",
2730 		    (void *)fmep);
2731 		return;
2732 	}
2733 
2734 	out(O_ALTFP|O_VERB, "Timer fired %lx", tid);
2735 	fmep->pull = fmep->wull;
2736 	fmep->wull = 0;
2737 	fmd_buf_write(fmep->hdl, fmep->fmcase,
2738 	    WOBUF_PULL, (void *)&fmep->pull, sizeof (fmep->pull));
2739 
2740 	fme_reload_cfgdata(fmep);
2741 
2742 	fme_eval(fmep, fmep->e0r);
2743 }
2744 
2745 /*
2746  * Preserve the fme's suspect list in its psuspects list, NULLing the
2747  * suspects list in the meantime.
2748  */
2749 static void
2750 save_suspects(struct fme *fmep)
2751 {
2752 	struct event *ep;
2753 	struct event *nextep;
2754 
2755 	/* zero out the previous suspect list */
2756 	for (ep = fmep->psuspects; ep; ep = nextep) {
2757 		nextep = ep->psuspects;
2758 		ep->psuspects = NULL;
2759 	}
2760 	fmep->psuspects = NULL;
2761 
2762 	/* zero out the suspect list, copying it to previous suspect list */
2763 	fmep->psuspects = fmep->suspects;
2764 	for (ep = fmep->suspects; ep; ep = nextep) {
2765 		nextep = ep->suspects;
2766 		ep->psuspects = ep->suspects;
2767 		ep->suspects = NULL;
2768 		ep->is_suspect = 0;
2769 	}
2770 	fmep->suspects = NULL;
2771 	fmep->nsuspects = 0;
2772 	fmep->nonfault = 0;
2773 }
2774 
2775 /*
2776  * Retrieve the fme's suspect list from its psuspects list.
2777  */
2778 static void
2779 restore_suspects(struct fme *fmep)
2780 {
2781 	struct event *ep;
2782 	struct event *nextep;
2783 
2784 	fmep->nsuspects = fmep->nonfault = 0;
2785 	fmep->suspects = fmep->psuspects;
2786 	for (ep = fmep->psuspects; ep; ep = nextep) {
2787 		fmep->nsuspects++;
2788 		if (!is_fault(ep->t))
2789 			fmep->nonfault++;
2790 		nextep = ep->psuspects;
2791 		ep->suspects = ep->psuspects;
2792 	}
2793 }
2794 
2795 /*
2796  * this is what we use to call the Emrys prototype code instead of main()
2797  */
2798 static void
2799 fme_eval(struct fme *fmep, fmd_event_t *ffep)
2800 {
2801 	struct event *ep;
2802 	unsigned long long my_delay = TIMEVAL_EVENTUALLY;
2803 
2804 	save_suspects(fmep);
2805 
2806 	out(O_ALTFP, "Evaluate FME %d", fmep->id);
2807 	indent_set("  ");
2808 
2809 	lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep);
2810 	fmep->state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay);
2811 
2812 	out(O_ALTFP|O_NONL, "FME%d state: %s, suspect list:", fmep->id,
2813 	    fme_state2str(fmep->state));
2814 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
2815 		out(O_ALTFP|O_NONL, " ");
2816 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
2817 	}
2818 	out(O_ALTFP, NULL);
2819 
2820 	switch (fmep->state) {
2821 	case FME_CREDIBLE:
2822 		print_suspects(SLNEW, fmep);
2823 		(void) upsets_eval(fmep, ffep);
2824 
2825 		/*
2826 		 * we may have already posted suspects in upsets_eval() which
2827 		 * can recurse into fme_eval() again. If so then just return.
2828 		 */
2829 		if (fmep->posted_suspects)
2830 			return;
2831 
2832 		publish_suspects(fmep);
2833 		fmep->posted_suspects = 1;
2834 		fmd_buf_write(fmep->hdl, fmep->fmcase,
2835 		    WOBUF_POSTD,
2836 		    (void *)&fmep->posted_suspects,
2837 		    sizeof (fmep->posted_suspects));
2838 
2839 		/*
2840 		 * Now the suspects have been posted, we can clear up
2841 		 * the instance tree as we won't be looking at it again.
2842 		 * Also cancel the timer as the case is now solved.
2843 		 */
2844 		if (fmep->wull != 0) {
2845 			fmd_timer_remove(fmep->hdl, fmep->timer);
2846 			fmep->wull = 0;
2847 		}
2848 		break;
2849 
2850 	case FME_WAIT:
2851 		ASSERT(my_delay > fmep->ull);
2852 		(void) fme_set_timer(fmep, my_delay);
2853 		print_suspects(SLWAIT, fmep);
2854 		itree_prune(fmep->eventtree);
2855 		config_free(fmep->cfgdata);
2856 		fmep->cfgdata = NULL;
2857 		return;
2858 
2859 	case FME_DISPROVED:
2860 		print_suspects(SLDISPROVED, fmep);
2861 		Undiag_reason = UD_UNSOLVD;
2862 		fme_undiagnosable(fmep);
2863 		break;
2864 	}
2865 
2866 	if (fmep->posted_suspects == 1 && Autoclose != NULL) {
2867 		int doclose = 0;
2868 
2869 		if (strcmp(Autoclose, "true") == 0 ||
2870 		    strcmp(Autoclose, "all") == 0)
2871 			doclose = 1;
2872 
2873 		if (strcmp(Autoclose, "upsets") == 0) {
2874 			doclose = 1;
2875 			for (ep = fmep->suspects; ep; ep = ep->suspects) {
2876 				if (ep->t != N_UPSET) {
2877 					doclose = 0;
2878 					break;
2879 				}
2880 			}
2881 		}
2882 
2883 		if (doclose) {
2884 			out(O_ALTFP, "[closing FME%d, case %s (autoclose)]",
2885 			    fmep->id, fmd_case_uuid(fmep->hdl, fmep->fmcase));
2886 			fmd_case_close(fmep->hdl, fmep->fmcase);
2887 		}
2888 	}
2889 	itree_free(fmep->eventtree);
2890 	fmep->eventtree = NULL;
2891 	config_free(fmep->cfgdata);
2892 	fmep->cfgdata = NULL;
2893 	destroy_fme_bufs(fmep);
2894 }
2895 
2896 static void indent(void);
2897 static int triggered(struct fme *fmep, struct event *ep, int mark);
2898 static enum fme_state effects_test(struct fme *fmep,
2899     struct event *fault_event, unsigned long long at_latest_by,
2900     unsigned long long *pdelay);
2901 static enum fme_state requirements_test(struct fme *fmep, struct event *ep,
2902     unsigned long long at_latest_by, unsigned long long *pdelay);
2903 static enum fme_state causes_test(struct fme *fmep, struct event *ep,
2904     unsigned long long at_latest_by, unsigned long long *pdelay);
2905 
2906 static int
2907 checkconstraints(struct fme *fmep, struct arrow *arrowp)
2908 {
2909 	struct constraintlist *ctp;
2910 	struct evalue value;
2911 	char *sep = "";
2912 
2913 	if (arrowp->forever_false) {
2914 		indent();
2915 		out(O_ALTFP|O_VERB|O_NONL, "  Forever false constraint: ");
2916 		for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
2917 			out(O_ALTFP|O_VERB|O_NONL, sep);
2918 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
2919 			sep = ", ";
2920 		}
2921 		out(O_ALTFP|O_VERB, NULL);
2922 		return (0);
2923 	}
2924 	if (arrowp->forever_true) {
2925 		indent();
2926 		out(O_ALTFP|O_VERB|O_NONL, "  Forever true constraint: ");
2927 		for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
2928 			out(O_ALTFP|O_VERB|O_NONL, sep);
2929 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
2930 			sep = ", ";
2931 		}
2932 		out(O_ALTFP|O_VERB, NULL);
2933 		return (1);
2934 	}
2935 
2936 	for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
2937 		if (eval_expr(ctp->cnode, NULL, NULL,
2938 		    &fmep->globals, fmep->cfgdata->cooked,
2939 		    arrowp, 0, &value)) {
2940 			/* evaluation successful */
2941 			if (value.t == UNDEFINED || value.v == 0) {
2942 				/* known false */
2943 				arrowp->forever_false = 1;
2944 				indent();
2945 				out(O_ALTFP|O_VERB|O_NONL,
2946 				    "  False constraint: ");
2947 				ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
2948 				out(O_ALTFP|O_VERB, NULL);
2949 				return (0);
2950 			}
2951 		} else {
2952 			/* evaluation unsuccessful -- unknown value */
2953 			indent();
2954 			out(O_ALTFP|O_VERB|O_NONL,
2955 			    "  Deferred constraint: ");
2956 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
2957 			out(O_ALTFP|O_VERB, NULL);
2958 			return (1);
2959 		}
2960 	}
2961 	/* known true */
2962 	arrowp->forever_true = 1;
2963 	indent();
2964 	out(O_ALTFP|O_VERB|O_NONL, "  True constraint: ");
2965 	for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
2966 		out(O_ALTFP|O_VERB|O_NONL, sep);
2967 		ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
2968 		sep = ", ";
2969 	}
2970 	out(O_ALTFP|O_VERB, NULL);
2971 	return (1);
2972 }
2973 
2974 static int
2975 triggered(struct fme *fmep, struct event *ep, int mark)
2976 {
2977 	struct bubble *bp;
2978 	struct arrowlist *ap;
2979 	int count = 0;
2980 
2981 	stats_counter_bump(fmep->Tcallcount);
2982 	for (bp = itree_next_bubble(ep, NULL); bp;
2983 	    bp = itree_next_bubble(ep, bp)) {
2984 		if (bp->t != B_TO)
2985 			continue;
2986 		for (ap = itree_next_arrow(bp, NULL); ap;
2987 		    ap = itree_next_arrow(bp, ap)) {
2988 			/* check count of marks against K in the bubble */
2989 			if ((ap->arrowp->mark & mark) &&
2990 			    ++count >= bp->nork)
2991 				return (1);
2992 		}
2993 	}
2994 	return (0);
2995 }
2996 
2997 static int
2998 mark_arrows(struct fme *fmep, struct event *ep, int mark,
2999     unsigned long long at_latest_by, unsigned long long *pdelay, int keep)
3000 {
3001 	struct bubble *bp;
3002 	struct arrowlist *ap;
3003 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3004 	unsigned long long my_delay;
3005 	enum fme_state result;
3006 	int retval = 0;
3007 
3008 	for (bp = itree_next_bubble(ep, NULL); bp;
3009 	    bp = itree_next_bubble(ep, bp)) {
3010 		if (bp->t != B_FROM)
3011 			continue;
3012 		stats_counter_bump(fmep->Marrowcount);
3013 		for (ap = itree_next_arrow(bp, NULL); ap;
3014 		    ap = itree_next_arrow(bp, ap)) {
3015 			struct event *ep2 = ap->arrowp->head->myevent;
3016 			/*
3017 			 * if we're clearing marks, we can avoid doing
3018 			 * all that work evaluating constraints.
3019 			 */
3020 			if (mark == 0) {
3021 				if (ap->arrowp->arrow_marked == 0)
3022 					continue;
3023 				ap->arrowp->arrow_marked = 0;
3024 				ap->arrowp->mark &= ~EFFECTS_COUNTER;
3025 				if (keep && (ep2->cached_state &
3026 				    (WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT)))
3027 					ep2->keep_in_tree = 1;
3028 				ep2->cached_state &=
3029 				    ~(WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT);
3030 				(void) mark_arrows(fmep, ep2, mark, 0, NULL,
3031 				    keep);
3032 				continue;
3033 			}
3034 			ap->arrowp->arrow_marked = 1;
3035 			if (ep2->cached_state & REQMNTS_DISPROVED) {
3036 				indent();
3037 				out(O_ALTFP|O_VERB|O_NONL,
3038 				    "  ALREADY DISPROVED ");
3039 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3040 				out(O_ALTFP|O_VERB, NULL);
3041 				continue;
3042 			}
3043 			if (ep2->cached_state & WAIT_EFFECT) {
3044 				indent();
3045 				out(O_ALTFP|O_VERB|O_NONL,
3046 				    "  ALREADY EFFECTS WAIT ");
3047 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3048 				out(O_ALTFP|O_VERB, NULL);
3049 				continue;
3050 			}
3051 			if (ep2->cached_state & CREDIBLE_EFFECT) {
3052 				indent();
3053 				out(O_ALTFP|O_VERB|O_NONL,
3054 				    "  ALREADY EFFECTS CREDIBLE ");
3055 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3056 				out(O_ALTFP|O_VERB, NULL);
3057 				continue;
3058 			}
3059 			if ((ep2->cached_state & PARENT_WAIT) &&
3060 			    (mark & PARENT_WAIT)) {
3061 				indent();
3062 				out(O_ALTFP|O_VERB|O_NONL,
3063 				    "  ALREADY PARENT EFFECTS WAIT ");
3064 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3065 				out(O_ALTFP|O_VERB, NULL);
3066 				continue;
3067 			}
3068 			platform_set_payloadnvp(ep2->nvp);
3069 			if (checkconstraints(fmep, ap->arrowp) == 0) {
3070 				platform_set_payloadnvp(NULL);
3071 				indent();
3072 				out(O_ALTFP|O_VERB|O_NONL,
3073 				    "  CONSTRAINTS FAIL ");
3074 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3075 				out(O_ALTFP|O_VERB, NULL);
3076 				continue;
3077 			}
3078 			platform_set_payloadnvp(NULL);
3079 			ap->arrowp->mark |= EFFECTS_COUNTER;
3080 			if (!triggered(fmep, ep2, EFFECTS_COUNTER)) {
3081 				indent();
3082 				out(O_ALTFP|O_VERB|O_NONL,
3083 				    "  K-COUNT NOT YET MET ");
3084 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3085 				out(O_ALTFP|O_VERB, NULL);
3086 				continue;
3087 			}
3088 			ep2->cached_state &= ~PARENT_WAIT;
3089 			/*
3090 			 * if we've reached an ereport and no propagation time
3091 			 * is specified, use the Hesitate value
3092 			 */
3093 			if (ep2->t == N_EREPORT && at_latest_by == 0ULL &&
3094 			    ap->arrowp->maxdelay == 0ULL) {
3095 				out(O_ALTFP|O_VERB|O_NONL, "  default wait ");
3096 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3097 				out(O_ALTFP|O_VERB, NULL);
3098 				result = requirements_test(fmep, ep2, Hesitate,
3099 				    &my_delay);
3100 			} else {
3101 				result = requirements_test(fmep, ep2,
3102 				    at_latest_by + ap->arrowp->maxdelay,
3103 				    &my_delay);
3104 			}
3105 			if (result == FME_WAIT) {
3106 				retval = WAIT_EFFECT;
3107 				if (overall_delay > my_delay)
3108 					overall_delay = my_delay;
3109 				ep2->cached_state |= WAIT_EFFECT;
3110 				indent();
3111 				out(O_ALTFP|O_VERB|O_NONL, "  EFFECTS WAIT ");
3112 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3113 				out(O_ALTFP|O_VERB, NULL);
3114 				indent_push("  E");
3115 				if (mark_arrows(fmep, ep2, PARENT_WAIT,
3116 				    at_latest_by, &my_delay, 0) ==
3117 				    WAIT_EFFECT) {
3118 					retval = WAIT_EFFECT;
3119 					if (overall_delay > my_delay)
3120 						overall_delay = my_delay;
3121 				}
3122 				indent_pop();
3123 			} else if (result == FME_DISPROVED) {
3124 				indent();
3125 				out(O_ALTFP|O_VERB|O_NONL,
3126 				    "  EFFECTS DISPROVED ");
3127 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3128 				out(O_ALTFP|O_VERB, NULL);
3129 			} else {
3130 				ep2->cached_state |= mark;
3131 				indent();
3132 				if (mark == CREDIBLE_EFFECT)
3133 					out(O_ALTFP|O_VERB|O_NONL,
3134 					    "  EFFECTS CREDIBLE ");
3135 				else
3136 					out(O_ALTFP|O_VERB|O_NONL,
3137 					    "  PARENT EFFECTS WAIT ");
3138 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3139 				out(O_ALTFP|O_VERB, NULL);
3140 				indent_push("  E");
3141 				if (mark_arrows(fmep, ep2, mark, at_latest_by,
3142 				    &my_delay, 0) == WAIT_EFFECT) {
3143 					retval = WAIT_EFFECT;
3144 					if (overall_delay > my_delay)
3145 						overall_delay = my_delay;
3146 				}
3147 				indent_pop();
3148 			}
3149 		}
3150 	}
3151 	if (retval == WAIT_EFFECT)
3152 		*pdelay = overall_delay;
3153 	return (retval);
3154 }
3155 
3156 static enum fme_state
3157 effects_test(struct fme *fmep, struct event *fault_event,
3158     unsigned long long at_latest_by, unsigned long long *pdelay)
3159 {
3160 	struct event *error_event;
3161 	enum fme_state return_value = FME_CREDIBLE;
3162 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3163 	unsigned long long my_delay;
3164 
3165 	stats_counter_bump(fmep->Ecallcount);
3166 	indent_push("  E");
3167 	indent();
3168 	out(O_ALTFP|O_VERB|O_NONL, "->");
3169 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event);
3170 	out(O_ALTFP|O_VERB, NULL);
3171 
3172 	if (mark_arrows(fmep, fault_event, CREDIBLE_EFFECT, at_latest_by,
3173 	    &my_delay, 0) == WAIT_EFFECT) {
3174 		return_value = FME_WAIT;
3175 		if (overall_delay > my_delay)
3176 			overall_delay = my_delay;
3177 	}
3178 	for (error_event = fmep->observations;
3179 	    error_event; error_event = error_event->observations) {
3180 		indent();
3181 		out(O_ALTFP|O_VERB|O_NONL, " ");
3182 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, error_event);
3183 		if (!(error_event->cached_state & CREDIBLE_EFFECT)) {
3184 			if (error_event->cached_state &
3185 			    (PARENT_WAIT|WAIT_EFFECT)) {
3186 				out(O_ALTFP|O_VERB, " NOT YET triggered");
3187 				continue;
3188 			}
3189 			return_value = FME_DISPROVED;
3190 			out(O_ALTFP|O_VERB, " NOT triggered");
3191 			break;
3192 		} else {
3193 			out(O_ALTFP|O_VERB, " triggered");
3194 		}
3195 	}
3196 	if (return_value == FME_DISPROVED) {
3197 		(void) mark_arrows(fmep, fault_event, 0, 0, NULL, 0);
3198 	} else {
3199 		fault_event->keep_in_tree = 1;
3200 		(void) mark_arrows(fmep, fault_event, 0, 0, NULL, 1);
3201 	}
3202 
3203 	indent();
3204 	out(O_ALTFP|O_VERB|O_NONL, "<-EFFECTS %s ",
3205 	    fme_state2str(return_value));
3206 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event);
3207 	out(O_ALTFP|O_VERB, NULL);
3208 	indent_pop();
3209 	if (return_value == FME_WAIT)
3210 		*pdelay = overall_delay;
3211 	return (return_value);
3212 }
3213 
3214 static enum fme_state
3215 requirements_test(struct fme *fmep, struct event *ep,
3216     unsigned long long at_latest_by, unsigned long long *pdelay)
3217 {
3218 	int waiting_events;
3219 	int credible_events;
3220 	int deferred_events;
3221 	enum fme_state return_value = FME_CREDIBLE;
3222 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3223 	unsigned long long arrow_delay;
3224 	unsigned long long my_delay;
3225 	struct event *ep2;
3226 	struct bubble *bp;
3227 	struct arrowlist *ap;
3228 
3229 	if (ep->cached_state & REQMNTS_CREDIBLE) {
3230 		indent();
3231 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY CREDIBLE ");
3232 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3233 		out(O_ALTFP|O_VERB, NULL);
3234 		return (FME_CREDIBLE);
3235 	}
3236 	if (ep->cached_state & REQMNTS_DISPROVED) {
3237 		indent();
3238 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY DISPROVED ");
3239 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3240 		out(O_ALTFP|O_VERB, NULL);
3241 		return (FME_DISPROVED);
3242 	}
3243 	if (ep->cached_state & REQMNTS_WAIT) {
3244 		indent();
3245 		*pdelay = ep->cached_delay;
3246 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY WAIT ");
3247 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3248 		out(O_ALTFP|O_VERB|O_NONL, ", wait for: ");
3249 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3250 		out(O_ALTFP|O_VERB, NULL);
3251 		return (FME_WAIT);
3252 	}
3253 	stats_counter_bump(fmep->Rcallcount);
3254 	indent_push("  R");
3255 	indent();
3256 	out(O_ALTFP|O_VERB|O_NONL, "->");
3257 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3258 	out(O_ALTFP|O_VERB|O_NONL, ", at latest by: ");
3259 	ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3260 	out(O_ALTFP|O_VERB, NULL);
3261 
3262 	if (ep->t == N_EREPORT) {
3263 		if (ep->count == 0) {
3264 			if (fmep->pull >= at_latest_by) {
3265 				return_value = FME_DISPROVED;
3266 			} else {
3267 				ep->cached_delay = *pdelay = at_latest_by;
3268 				return_value = FME_WAIT;
3269 			}
3270 		}
3271 
3272 		indent();
3273 		switch (return_value) {
3274 		case FME_CREDIBLE:
3275 			ep->cached_state |= REQMNTS_CREDIBLE;
3276 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS CREDIBLE ");
3277 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3278 			break;
3279 		case FME_DISPROVED:
3280 			ep->cached_state |= REQMNTS_DISPROVED;
3281 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED ");
3282 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3283 			break;
3284 		case FME_WAIT:
3285 			ep->cached_state |= REQMNTS_WAIT;
3286 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS WAIT ");
3287 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3288 			out(O_ALTFP|O_VERB|O_NONL, " to ");
3289 			ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3290 			break;
3291 		default:
3292 			out(O_DIE, "requirements_test: unexpected fme_state");
3293 			break;
3294 		}
3295 		out(O_ALTFP|O_VERB, NULL);
3296 		indent_pop();
3297 
3298 		return (return_value);
3299 	}
3300 
3301 	/* this event is not a report, descend the tree */
3302 	for (bp = itree_next_bubble(ep, NULL); bp;
3303 	    bp = itree_next_bubble(ep, bp)) {
3304 		int n;
3305 
3306 		if (bp->t != B_FROM)
3307 			continue;
3308 
3309 		n = bp->nork;
3310 
3311 		credible_events = 0;
3312 		waiting_events = 0;
3313 		deferred_events = 0;
3314 		arrow_delay = TIMEVAL_EVENTUALLY;
3315 		/*
3316 		 * n is -1 for 'A' so adjust it.
3317 		 * XXX just count up the arrows for now.
3318 		 */
3319 		if (n < 0) {
3320 			n = 0;
3321 			for (ap = itree_next_arrow(bp, NULL); ap;
3322 			    ap = itree_next_arrow(bp, ap))
3323 				n++;
3324 			indent();
3325 			out(O_ALTFP|O_VERB, " Bubble Counted N=%d", n);
3326 		} else {
3327 			indent();
3328 			out(O_ALTFP|O_VERB, " Bubble N=%d", n);
3329 		}
3330 
3331 		if (n == 0)
3332 			continue;
3333 		if (!(bp->mark & (BUBBLE_ELIDED|BUBBLE_OK))) {
3334 			for (ap = itree_next_arrow(bp, NULL); ap;
3335 			    ap = itree_next_arrow(bp, ap)) {
3336 				ep2 = ap->arrowp->head->myevent;
3337 				platform_set_payloadnvp(ep2->nvp);
3338 				if (checkconstraints(fmep, ap->arrowp) == 0) {
3339 					/*
3340 					 * if any arrow is invalidated by the
3341 					 * constraints, then we should elide the
3342 					 * whole bubble to be consistant with
3343 					 * the tree creation time behaviour
3344 					 */
3345 					bp->mark |= BUBBLE_ELIDED;
3346 					platform_set_payloadnvp(NULL);
3347 					break;
3348 				}
3349 				platform_set_payloadnvp(NULL);
3350 			}
3351 		}
3352 		if (bp->mark & BUBBLE_ELIDED)
3353 			continue;
3354 		bp->mark |= BUBBLE_OK;
3355 		for (ap = itree_next_arrow(bp, NULL); ap;
3356 		    ap = itree_next_arrow(bp, ap)) {
3357 			ep2 = ap->arrowp->head->myevent;
3358 			if (n <= credible_events)
3359 				break;
3360 
3361 			ap->arrowp->mark |= REQMNTS_COUNTER;
3362 			if (triggered(fmep, ep2, REQMNTS_COUNTER))
3363 				/* XXX adding max timevals! */
3364 				switch (requirements_test(fmep, ep2,
3365 				    at_latest_by + ap->arrowp->maxdelay,
3366 				    &my_delay)) {
3367 				case FME_DEFERRED:
3368 					deferred_events++;
3369 					break;
3370 				case FME_CREDIBLE:
3371 					credible_events++;
3372 					break;
3373 				case FME_DISPROVED:
3374 					break;
3375 				case FME_WAIT:
3376 					if (my_delay < arrow_delay)
3377 						arrow_delay = my_delay;
3378 					waiting_events++;
3379 					break;
3380 				default:
3381 					out(O_DIE,
3382 					"Bug in requirements_test.");
3383 				}
3384 			else
3385 				deferred_events++;
3386 		}
3387 		indent();
3388 		out(O_ALTFP|O_VERB, " Credible: %d Waiting %d",
3389 		    credible_events + deferred_events, waiting_events);
3390 		if (credible_events + deferred_events + waiting_events < n) {
3391 			/* Can never meet requirements */
3392 			ep->cached_state |= REQMNTS_DISPROVED;
3393 			indent();
3394 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED ");
3395 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3396 			out(O_ALTFP|O_VERB, NULL);
3397 			indent_pop();
3398 			return (FME_DISPROVED);
3399 		}
3400 		if (credible_events + deferred_events < n) {
3401 			/* will have to wait */
3402 			/* wait time is shortest known */
3403 			if (arrow_delay < overall_delay)
3404 				overall_delay = arrow_delay;
3405 			return_value = FME_WAIT;
3406 		} else if (credible_events < n) {
3407 			if (return_value != FME_WAIT)
3408 				return_value = FME_DEFERRED;
3409 		}
3410 	}
3411 
3412 	/*
3413 	 * don't mark as FME_DEFERRED. If this event isn't reached by another
3414 	 * path, then this will be considered FME_CREDIBLE. But if it is
3415 	 * reached by a different path so the K-count is met, then might
3416 	 * get overridden by FME_WAIT or FME_DISPROVED.
3417 	 */
3418 	if (return_value == FME_WAIT) {
3419 		ep->cached_state |= REQMNTS_WAIT;
3420 		ep->cached_delay = *pdelay = overall_delay;
3421 	} else if (return_value == FME_CREDIBLE) {
3422 		ep->cached_state |= REQMNTS_CREDIBLE;
3423 	}
3424 	indent();
3425 	out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS %s ",
3426 	    fme_state2str(return_value));
3427 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3428 	out(O_ALTFP|O_VERB, NULL);
3429 	indent_pop();
3430 	return (return_value);
3431 }
3432 
3433 static enum fme_state
3434 causes_test(struct fme *fmep, struct event *ep,
3435     unsigned long long at_latest_by, unsigned long long *pdelay)
3436 {
3437 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3438 	unsigned long long my_delay;
3439 	int credible_results = 0;
3440 	int waiting_results = 0;
3441 	enum fme_state fstate;
3442 	struct event *tail_event;
3443 	struct bubble *bp;
3444 	struct arrowlist *ap;
3445 	int k = 1;
3446 
3447 	stats_counter_bump(fmep->Ccallcount);
3448 	indent_push("  C");
3449 	indent();
3450 	out(O_ALTFP|O_VERB|O_NONL, "->");
3451 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3452 	out(O_ALTFP|O_VERB, NULL);
3453 
3454 	for (bp = itree_next_bubble(ep, NULL); bp;
3455 	    bp = itree_next_bubble(ep, bp)) {
3456 		if (bp->t != B_TO)
3457 			continue;
3458 		k = bp->nork;	/* remember the K value */
3459 		for (ap = itree_next_arrow(bp, NULL); ap;
3460 		    ap = itree_next_arrow(bp, ap)) {
3461 			int do_not_follow = 0;
3462 
3463 			/*
3464 			 * if we get to the same event multiple times
3465 			 * only worry about the first one.
3466 			 */
3467 			if (ap->arrowp->tail->myevent->cached_state &
3468 			    CAUSES_TESTED) {
3469 				indent();
3470 				out(O_ALTFP|O_VERB|O_NONL,
3471 				    "  causes test already run for ");
3472 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL,
3473 				    ap->arrowp->tail->myevent);
3474 				out(O_ALTFP|O_VERB, NULL);
3475 				continue;
3476 			}
3477 
3478 			/*
3479 			 * see if false constraint prevents us
3480 			 * from traversing this arrow
3481 			 */
3482 			platform_set_payloadnvp(ep->nvp);
3483 			if (checkconstraints(fmep, ap->arrowp) == 0)
3484 				do_not_follow = 1;
3485 			platform_set_payloadnvp(NULL);
3486 			if (do_not_follow) {
3487 				indent();
3488 				out(O_ALTFP|O_VERB|O_NONL,
3489 				    "  False arrow from ");
3490 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL,
3491 				    ap->arrowp->tail->myevent);
3492 				out(O_ALTFP|O_VERB, NULL);
3493 				continue;
3494 			}
3495 
3496 			ap->arrowp->tail->myevent->cached_state |=
3497 			    CAUSES_TESTED;
3498 			tail_event = ap->arrowp->tail->myevent;
3499 			fstate = hypothesise(fmep, tail_event, at_latest_by,
3500 			    &my_delay);
3501 
3502 			switch (fstate) {
3503 			case FME_WAIT:
3504 				if (my_delay < overall_delay)
3505 					overall_delay = my_delay;
3506 				waiting_results++;
3507 				break;
3508 			case FME_CREDIBLE:
3509 				credible_results++;
3510 				break;
3511 			case FME_DISPROVED:
3512 				break;
3513 			default:
3514 				out(O_DIE, "Bug in causes_test");
3515 			}
3516 		}
3517 	}
3518 	/* compare against K */
3519 	if (credible_results + waiting_results < k) {
3520 		indent();
3521 		out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES DISPROVED ");
3522 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3523 		out(O_ALTFP|O_VERB, NULL);
3524 		indent_pop();
3525 		return (FME_DISPROVED);
3526 	}
3527 	if (waiting_results != 0) {
3528 		*pdelay = overall_delay;
3529 		indent();
3530 		out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES WAIT ");
3531 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3532 		out(O_ALTFP|O_VERB|O_NONL, " to ");
3533 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3534 		out(O_ALTFP|O_VERB, NULL);
3535 		indent_pop();
3536 		return (FME_WAIT);
3537 	}
3538 	indent();
3539 	out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES CREDIBLE ");
3540 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3541 	out(O_ALTFP|O_VERB, NULL);
3542 	indent_pop();
3543 	return (FME_CREDIBLE);
3544 }
3545 
3546 static enum fme_state
3547 hypothesise(struct fme *fmep, struct event *ep,
3548 	unsigned long long at_latest_by, unsigned long long *pdelay)
3549 {
3550 	enum fme_state rtr, otr;
3551 	unsigned long long my_delay;
3552 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3553 
3554 	stats_counter_bump(fmep->Hcallcount);
3555 	indent_push("  H");
3556 	indent();
3557 	out(O_ALTFP|O_VERB|O_NONL, "->");
3558 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3559 	out(O_ALTFP|O_VERB|O_NONL, ", at latest by: ");
3560 	ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3561 	out(O_ALTFP|O_VERB, NULL);
3562 
3563 	rtr = requirements_test(fmep, ep, at_latest_by, &my_delay);
3564 	if ((rtr == FME_WAIT) && (my_delay < overall_delay))
3565 		overall_delay = my_delay;
3566 	if (rtr != FME_DISPROVED) {
3567 		if (is_problem(ep->t)) {
3568 			otr = effects_test(fmep, ep, at_latest_by, &my_delay);
3569 			if (otr != FME_DISPROVED) {
3570 				if (fmep->peek == 0 && ep->is_suspect == 0) {
3571 					ep->suspects = fmep->suspects;
3572 					ep->is_suspect = 1;
3573 					fmep->suspects = ep;
3574 					fmep->nsuspects++;
3575 					if (!is_fault(ep->t))
3576 						fmep->nonfault++;
3577 				}
3578 			}
3579 		} else
3580 			otr = causes_test(fmep, ep, at_latest_by, &my_delay);
3581 		if ((otr == FME_WAIT) && (my_delay < overall_delay))
3582 			overall_delay = my_delay;
3583 		if ((otr != FME_DISPROVED) &&
3584 		    ((rtr == FME_WAIT) || (otr == FME_WAIT)))
3585 			*pdelay = overall_delay;
3586 	}
3587 	if (rtr == FME_DISPROVED) {
3588 		indent();
3589 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
3590 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3591 		out(O_ALTFP|O_VERB, " (doesn't meet requirements)");
3592 		indent_pop();
3593 		return (FME_DISPROVED);
3594 	}
3595 	if ((otr == FME_DISPROVED) && is_problem(ep->t)) {
3596 		indent();
3597 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
3598 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3599 		out(O_ALTFP|O_VERB, " (doesn't explain all reports)");
3600 		indent_pop();
3601 		return (FME_DISPROVED);
3602 	}
3603 	if (otr == FME_DISPROVED) {
3604 		indent();
3605 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
3606 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3607 		out(O_ALTFP|O_VERB, " (causes are not credible)");
3608 		indent_pop();
3609 		return (FME_DISPROVED);
3610 	}
3611 	if ((rtr == FME_WAIT) || (otr == FME_WAIT)) {
3612 		indent();
3613 		out(O_ALTFP|O_VERB|O_NONL, "<-WAIT ");
3614 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3615 		out(O_ALTFP|O_VERB|O_NONL, " to ");
3616 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &overall_delay);
3617 		out(O_ALTFP|O_VERB, NULL);
3618 		indent_pop();
3619 		return (FME_WAIT);
3620 	}
3621 	indent();
3622 	out(O_ALTFP|O_VERB|O_NONL, "<-CREDIBLE ");
3623 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3624 	out(O_ALTFP|O_VERB, NULL);
3625 	indent_pop();
3626 	return (FME_CREDIBLE);
3627 }
3628 
3629 /*
3630  * fme_istat_load -- reconstitute any persistent istats
3631  */
3632 void
3633 fme_istat_load(fmd_hdl_t *hdl)
3634 {
3635 	int sz;
3636 	char *sbuf;
3637 	char *ptr;
3638 
3639 	if ((sz = fmd_buf_size(hdl, NULL, WOBUF_ISTATS)) == 0) {
3640 		out(O_ALTFP, "fme_istat_load: No stats");
3641 		return;
3642 	}
3643 
3644 	sbuf = alloca(sz);
3645 
3646 	fmd_buf_read(hdl, NULL, WOBUF_ISTATS, sbuf, sz);
3647 
3648 	/*
3649 	 * pick apart the serialized stats
3650 	 *
3651 	 * format is:
3652 	 *	<class-name>, '@', <path>, '\0', <value>, '\0'
3653 	 * for example:
3654 	 *	"stat.first@stat0/path0\02\0stat.second@stat0/path1\023\0"
3655 	 *
3656 	 * since this is parsing our own serialized data, any parsing issues
3657 	 * are fatal, so we check for them all with ASSERT() below.
3658 	 */
3659 	ptr = sbuf;
3660 	while (ptr < &sbuf[sz]) {
3661 		char *sepptr;
3662 		struct node *np;
3663 		int val;
3664 
3665 		sepptr = strchr(ptr, '@');
3666 		ASSERT(sepptr != NULL);
3667 		*sepptr = '\0';
3668 
3669 		/* construct the event */
3670 		np = newnode(T_EVENT, NULL, 0);
3671 		np->u.event.ename = newnode(T_NAME, NULL, 0);
3672 		np->u.event.ename->u.name.t = N_STAT;
3673 		np->u.event.ename->u.name.s = stable(ptr);
3674 		np->u.event.ename->u.name.it = IT_ENAME;
3675 		np->u.event.ename->u.name.last = np->u.event.ename;
3676 
3677 		ptr = sepptr + 1;
3678 		ASSERT(ptr < &sbuf[sz]);
3679 		ptr += strlen(ptr);
3680 		ptr++;	/* move past the '\0' separating path from value */
3681 		ASSERT(ptr < &sbuf[sz]);
3682 		ASSERT(isdigit(*ptr));
3683 		val = atoi(ptr);
3684 		ASSERT(val > 0);
3685 		ptr += strlen(ptr);
3686 		ptr++;	/* move past the final '\0' for this entry */
3687 
3688 		np->u.event.epname = pathstring2epnamenp(sepptr + 1);
3689 		ASSERT(np->u.event.epname != NULL);
3690 
3691 		istat_bump(np, val);
3692 		tree_free(np);
3693 	}
3694 
3695 	istat_save();
3696 }
3697