xref: /titanic_52/usr/src/cmd/fm/modules/common/eversholt/fme.c (revision 968633ad8faee931821fd6b656eb0d96d4b186c0)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  *
26  * fme.c -- fault management exercise module
27  *
28  * this module provides the simulated fault management exercise.
29  */
30 
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <strings.h>
35 #include <ctype.h>
36 #include <alloca.h>
37 #include <libnvpair.h>
38 #include <sys/fm/protocol.h>
39 #include <fm/fmd_api.h>
40 #include "alloc.h"
41 #include "out.h"
42 #include "stats.h"
43 #include "stable.h"
44 #include "literals.h"
45 #include "lut.h"
46 #include "tree.h"
47 #include "ptree.h"
48 #include "itree.h"
49 #include "ipath.h"
50 #include "fme.h"
51 #include "evnv.h"
52 #include "eval.h"
53 #include "config.h"
54 #include "platform.h"
55 #include "esclex.h"
56 
57 /* imported from eft.c... */
58 extern hrtime_t Hesitate;
59 extern char *Serd_Override;
60 extern nv_alloc_t Eft_nv_hdl;
61 extern int Max_fme;
62 extern fmd_hdl_t *Hdl;
63 
64 static int Istat_need_save;
65 static int Serd_need_save;
66 void istat_save(void);
67 void serd_save(void);
68 
69 /* fme under construction is global so we can free it on module abort */
70 static struct fme *Nfmep;
71 
72 static int Undiag_reason = UD_VAL_UNKNOWN;
73 
74 static int Nextid = 0;
75 
76 static int Open_fme_count = 0;	/* Count of open FMEs */
77 
78 /* list of fault management exercises underway */
79 static struct fme {
80 	struct fme *next;		/* next exercise */
81 	unsigned long long ull;		/* time when fme was created */
82 	int id;				/* FME id */
83 	struct config *config;		/* cooked configuration data */
84 	struct lut *eventtree;		/* propagation tree for this FME */
85 	/*
86 	 * The initial error report that created this FME is kept in
87 	 * two forms.  e0 points to the instance tree node and is used
88 	 * by fme_eval() as the starting point for the inference
89 	 * algorithm.  e0r is the event handle FMD passed to us when
90 	 * the ereport first arrived and is used when setting timers,
91 	 * which are always relative to the time of this initial
92 	 * report.
93 	 */
94 	struct event *e0;
95 	fmd_event_t *e0r;
96 
97 	id_t    timer;			/* for setting an fmd time-out */
98 
99 	struct event *ecurrent;		/* ereport under consideration */
100 	struct event *suspects;		/* current suspect list */
101 	struct event *psuspects;	/* previous suspect list */
102 	int nsuspects;			/* count of suspects */
103 	int nonfault;			/* zero if all suspects T_FAULT */
104 	int posted_suspects;		/* true if we've posted a diagnosis */
105 	int uniqobs;			/* number of unique events observed */
106 	int peek;			/* just peeking, don't track suspects */
107 	int overflow;			/* true if overflow FME */
108 	enum fme_state {
109 		FME_NOTHING = 5000,	/* not evaluated yet */
110 		FME_WAIT,		/* need to wait for more info */
111 		FME_CREDIBLE,		/* suspect list is credible */
112 		FME_DISPROVED,		/* no valid suspects found */
113 		FME_DEFERRED		/* don't know yet (k-count not met) */
114 	} state;
115 
116 	unsigned long long pull;	/* time passed since created */
117 	unsigned long long wull;	/* wait until this time for re-eval */
118 	struct event *observations;	/* observation list */
119 	struct lut *globals;		/* values of global variables */
120 	/* fmd interfacing */
121 	fmd_hdl_t *hdl;			/* handle for talking with fmd */
122 	fmd_case_t *fmcase;		/* what fmd 'case' we associate with */
123 	/* stats */
124 	struct stats *Rcount;
125 	struct stats *Hcallcount;
126 	struct stats *Rcallcount;
127 	struct stats *Ccallcount;
128 	struct stats *Ecallcount;
129 	struct stats *Tcallcount;
130 	struct stats *Marrowcount;
131 	struct stats *diags;
132 } *FMElist, *EFMElist, *ClosedFMEs;
133 
134 static struct case_list {
135 	fmd_case_t *fmcase;
136 	struct case_list *next;
137 } *Undiagablecaselist;
138 
139 static void fme_eval(struct fme *fmep, fmd_event_t *ffep);
140 static enum fme_state hypothesise(struct fme *fmep, struct event *ep,
141 	unsigned long long at_latest_by, unsigned long long *pdelay);
142 static struct node *eventprop_lookup(struct event *ep, const char *propname);
143 static struct node *pathstring2epnamenp(char *path);
144 static void publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep,
145 	fmd_case_t *fmcase);
146 static const char *undiag_2reason_str(int ud);
147 static const char *undiag_2defect_str(int ud);
148 static void restore_suspects(struct fme *fmep);
149 static void save_suspects(struct fme *fmep);
150 static void destroy_fme(struct fme *f);
151 static void fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep,
152     const char *eventstring, const struct ipath *ipp, nvlist_t *nvl);
153 static void istat_counter_reset_cb(struct istat_entry *entp,
154     struct stats *statp, const struct ipath *ipp);
155 static void istat_counter_topo_chg_cb(struct istat_entry *entp,
156     struct stats *statp, void *unused);
157 static void serd_reset_cb(struct serd_entry *entp, void *unused,
158     const struct ipath *ipp);
159 static void serd_topo_chg_cb(struct serd_entry *entp, void *unused,
160     void *unused2);
161 static void destroy_fme_bufs(struct fme *fp);
162 
163 static struct fme *
164 alloc_fme(void)
165 {
166 	struct fme *fmep;
167 
168 	fmep = MALLOC(sizeof (*fmep));
169 	bzero(fmep, sizeof (*fmep));
170 	return (fmep);
171 }
172 
173 /*
174  * fme_ready -- called when all initialization of the FME (except for
175  *	stats) has completed successfully.  Adds the fme to global lists
176  *	and establishes its stats.
177  */
178 static struct fme *
179 fme_ready(struct fme *fmep)
180 {
181 	char nbuf[100];
182 
183 	Nfmep = NULL;	/* don't need to free this on module abort now */
184 
185 	if (EFMElist) {
186 		EFMElist->next = fmep;
187 		EFMElist = fmep;
188 	} else
189 		FMElist = EFMElist = fmep;
190 
191 	(void) sprintf(nbuf, "fme%d.Rcount", fmep->id);
192 	fmep->Rcount = stats_new_counter(nbuf, "ereports received", 0);
193 	(void) sprintf(nbuf, "fme%d.Hcall", fmep->id);
194 	fmep->Hcallcount = stats_new_counter(nbuf, "calls to hypothesise()", 1);
195 	(void) sprintf(nbuf, "fme%d.Rcall", fmep->id);
196 	fmep->Rcallcount = stats_new_counter(nbuf,
197 	    "calls to requirements_test()", 1);
198 	(void) sprintf(nbuf, "fme%d.Ccall", fmep->id);
199 	fmep->Ccallcount = stats_new_counter(nbuf, "calls to causes_test()", 1);
200 	(void) sprintf(nbuf, "fme%d.Ecall", fmep->id);
201 	fmep->Ecallcount =
202 	    stats_new_counter(nbuf, "calls to effects_test()", 1);
203 	(void) sprintf(nbuf, "fme%d.Tcall", fmep->id);
204 	fmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1);
205 	(void) sprintf(nbuf, "fme%d.Marrow", fmep->id);
206 	fmep->Marrowcount = stats_new_counter(nbuf,
207 	    "arrows marked by mark_arrows()", 1);
208 	(void) sprintf(nbuf, "fme%d.diags", fmep->id);
209 	fmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0);
210 
211 	out(O_ALTFP|O_VERB2, "newfme: config snapshot contains...");
212 	config_print(O_ALTFP|O_VERB2, fmep->config);
213 
214 	return (fmep);
215 }
216 
217 extern void ipath_dummy_lut(struct arrow *);
218 extern struct lut *itree_create_dummy(const char *, const struct ipath *);
219 
220 /* ARGSUSED */
221 static void
222 set_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
223 {
224 	struct bubble *bp;
225 	struct arrowlist *ap;
226 
227 	for (bp = itree_next_bubble(ep, NULL); bp;
228 	    bp = itree_next_bubble(ep, bp)) {
229 		if (bp->t != B_FROM)
230 			continue;
231 		for (ap = itree_next_arrow(bp, NULL); ap;
232 		    ap = itree_next_arrow(bp, ap)) {
233 			ap->arrowp->pnode->u.arrow.needed = 1;
234 			ipath_dummy_lut(ap->arrowp);
235 		}
236 	}
237 }
238 
239 /* ARGSUSED */
240 static void
241 unset_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
242 {
243 	struct bubble *bp;
244 	struct arrowlist *ap;
245 
246 	for (bp = itree_next_bubble(ep, NULL); bp;
247 	    bp = itree_next_bubble(ep, bp)) {
248 		if (bp->t != B_FROM)
249 			continue;
250 		for (ap = itree_next_arrow(bp, NULL); ap;
251 		    ap = itree_next_arrow(bp, ap))
252 			ap->arrowp->pnode->u.arrow.needed = 0;
253 	}
254 }
255 
256 static void globals_destructor(void *left, void *right, void *arg);
257 static void clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep);
258 
259 static void
260 prune_propagations(const char *e0class, const struct ipath *e0ipp)
261 {
262 	char nbuf[100];
263 	unsigned long long my_delay = TIMEVAL_EVENTUALLY;
264 	extern struct lut *Usednames;
265 
266 	Nfmep = alloc_fme();
267 	Nfmep->id = Nextid;
268 	Nfmep->state = FME_NOTHING;
269 	Nfmep->eventtree = itree_create_dummy(e0class, e0ipp);
270 	if ((Nfmep->e0 =
271 	    itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) {
272 		out(O_ALTFP, "prune_propagations: e0 not in instance tree");
273 		itree_free(Nfmep->eventtree);
274 		FREE(Nfmep);
275 		Nfmep = NULL;
276 		return;
277 	}
278 	Nfmep->ecurrent = Nfmep->observations = Nfmep->e0;
279 	Nfmep->e0->count++;
280 
281 	(void) sprintf(nbuf, "fme%d.Rcount", Nfmep->id);
282 	Nfmep->Rcount = stats_new_counter(nbuf, "ereports received", 0);
283 	(void) sprintf(nbuf, "fme%d.Hcall", Nfmep->id);
284 	Nfmep->Hcallcount =
285 	    stats_new_counter(nbuf, "calls to hypothesise()", 1);
286 	(void) sprintf(nbuf, "fme%d.Rcall", Nfmep->id);
287 	Nfmep->Rcallcount = stats_new_counter(nbuf,
288 	    "calls to requirements_test()", 1);
289 	(void) sprintf(nbuf, "fme%d.Ccall", Nfmep->id);
290 	Nfmep->Ccallcount =
291 	    stats_new_counter(nbuf, "calls to causes_test()", 1);
292 	(void) sprintf(nbuf, "fme%d.Ecall", Nfmep->id);
293 	Nfmep->Ecallcount =
294 	    stats_new_counter(nbuf, "calls to effects_test()", 1);
295 	(void) sprintf(nbuf, "fme%d.Tcall", Nfmep->id);
296 	Nfmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1);
297 	(void) sprintf(nbuf, "fme%d.Marrow", Nfmep->id);
298 	Nfmep->Marrowcount = stats_new_counter(nbuf,
299 	    "arrows marked by mark_arrows()", 1);
300 	(void) sprintf(nbuf, "fme%d.diags", Nfmep->id);
301 	Nfmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0);
302 
303 	Nfmep->peek = 1;
304 	lut_walk(Nfmep->eventtree, (lut_cb)unset_needed_arrows, (void *)Nfmep);
305 	lut_free(Usednames, NULL, NULL);
306 	Usednames = NULL;
307 	lut_walk(Nfmep->eventtree, (lut_cb)clear_arrows, (void *)Nfmep);
308 	(void) hypothesise(Nfmep, Nfmep->e0, Nfmep->ull, &my_delay);
309 	itree_prune(Nfmep->eventtree);
310 	lut_walk(Nfmep->eventtree, (lut_cb)set_needed_arrows, (void *)Nfmep);
311 
312 	stats_delete(Nfmep->Rcount);
313 	stats_delete(Nfmep->Hcallcount);
314 	stats_delete(Nfmep->Rcallcount);
315 	stats_delete(Nfmep->Ccallcount);
316 	stats_delete(Nfmep->Ecallcount);
317 	stats_delete(Nfmep->Tcallcount);
318 	stats_delete(Nfmep->Marrowcount);
319 	stats_delete(Nfmep->diags);
320 	itree_free(Nfmep->eventtree);
321 	lut_free(Nfmep->globals, globals_destructor, NULL);
322 	FREE(Nfmep);
323 }
324 
325 static struct fme *
326 newfme(const char *e0class, const struct ipath *e0ipp, fmd_hdl_t *hdl,
327 	fmd_case_t *fmcase)
328 {
329 	struct cfgdata *cfgdata;
330 	int init_size;
331 	extern int alloc_total();
332 
333 	init_size = alloc_total();
334 	out(O_ALTFP|O_STAMP, "start config_snapshot using %d bytes", init_size);
335 	cfgdata = config_snapshot();
336 	platform_save_config(hdl, fmcase);
337 	out(O_ALTFP|O_STAMP, "config_snapshot added %d bytes",
338 	    alloc_total() - init_size);
339 
340 	Nfmep = alloc_fme();
341 
342 	Nfmep->id = Nextid++;
343 	Nfmep->config = cfgdata->cooked;
344 	config_free(cfgdata);
345 	Nfmep->posted_suspects = 0;
346 	Nfmep->uniqobs = 0;
347 	Nfmep->state = FME_NOTHING;
348 	Nfmep->pull = 0ULL;
349 	Nfmep->overflow = 0;
350 
351 	Nfmep->fmcase = fmcase;
352 	Nfmep->hdl = hdl;
353 
354 	if ((Nfmep->eventtree = itree_create(Nfmep->config)) == NULL) {
355 		out(O_ALTFP, "newfme: NULL instance tree");
356 		Undiag_reason = UD_VAL_INSTFAIL;
357 		structconfig_free(Nfmep->config);
358 		destroy_fme_bufs(Nfmep);
359 		FREE(Nfmep);
360 		Nfmep = NULL;
361 		return (NULL);
362 	}
363 
364 	itree_ptree(O_ALTFP|O_VERB2, Nfmep->eventtree);
365 
366 	if ((Nfmep->e0 =
367 	    itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) {
368 		out(O_ALTFP, "newfme: e0 not in instance tree");
369 		Undiag_reason = UD_VAL_BADEVENTI;
370 		itree_free(Nfmep->eventtree);
371 		structconfig_free(Nfmep->config);
372 		destroy_fme_bufs(Nfmep);
373 		FREE(Nfmep);
374 		Nfmep = NULL;
375 		return (NULL);
376 	}
377 
378 	return (fme_ready(Nfmep));
379 }
380 
381 void
382 fme_fini(void)
383 {
384 	struct fme *sfp, *fp;
385 	struct case_list *ucasep, *nextcasep;
386 
387 	ucasep = Undiagablecaselist;
388 	while (ucasep != NULL) {
389 		nextcasep = ucasep->next;
390 		FREE(ucasep);
391 		ucasep = nextcasep;
392 	}
393 	Undiagablecaselist = NULL;
394 
395 	/* clean up closed fmes */
396 	fp = ClosedFMEs;
397 	while (fp != NULL) {
398 		sfp = fp->next;
399 		destroy_fme(fp);
400 		fp = sfp;
401 	}
402 	ClosedFMEs = NULL;
403 
404 	fp = FMElist;
405 	while (fp != NULL) {
406 		sfp = fp->next;
407 		destroy_fme(fp);
408 		fp = sfp;
409 	}
410 	FMElist = EFMElist = NULL;
411 
412 	/* if we were in the middle of creating an fme, free it now */
413 	if (Nfmep) {
414 		destroy_fme(Nfmep);
415 		Nfmep = NULL;
416 	}
417 }
418 
419 /*
420  * Allocated space for a buffer name.  20 bytes allows for
421  * a ridiculous 9,999,999 unique observations.
422  */
423 #define	OBBUFNMSZ 20
424 
425 /*
426  *  serialize_observation
427  *
428  *  Create a recoverable version of the current observation
429  *  (f->ecurrent).  We keep a serialized version of each unique
430  *  observation in order that we may resume correctly the fme in the
431  *  correct state if eft or fmd crashes and we're restarted.
432  */
433 static void
434 serialize_observation(struct fme *fp, const char *cls, const struct ipath *ipp)
435 {
436 	size_t pkdlen;
437 	char tmpbuf[OBBUFNMSZ];
438 	char *pkd = NULL;
439 	char *estr;
440 
441 	(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", fp->uniqobs);
442 	estr = ipath2str(cls, ipp);
443 	fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, strlen(estr) + 1);
444 	fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)estr,
445 	    strlen(estr) + 1);
446 	FREE(estr);
447 
448 	if (fp->ecurrent != NULL && fp->ecurrent->nvp != NULL) {
449 		(void) snprintf(tmpbuf,
450 		    OBBUFNMSZ, "observed%d.nvp", fp->uniqobs);
451 		if (nvlist_xpack(fp->ecurrent->nvp,
452 		    &pkd, &pkdlen, NV_ENCODE_XDR, &Eft_nv_hdl) != 0)
453 			out(O_DIE|O_SYS, "pack of observed nvl failed");
454 		fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, pkdlen);
455 		fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)pkd, pkdlen);
456 		FREE(pkd);
457 	}
458 
459 	fp->uniqobs++;
460 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs,
461 	    sizeof (fp->uniqobs));
462 }
463 
464 /*
465  *  init_fme_bufs -- We keep several bits of state about an fme for
466  *	use if eft or fmd crashes and we're restarted.
467  */
468 static void
469 init_fme_bufs(struct fme *fp)
470 {
471 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_PULL, sizeof (fp->pull));
472 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_PULL, (void *)&fp->pull,
473 	    sizeof (fp->pull));
474 
475 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_ID, sizeof (fp->id));
476 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_ID, (void *)&fp->id,
477 	    sizeof (fp->id));
478 
479 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_NOBS, sizeof (fp->uniqobs));
480 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs,
481 	    sizeof (fp->uniqobs));
482 
483 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_POSTD,
484 	    sizeof (fp->posted_suspects));
485 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_POSTD,
486 	    (void *)&fp->posted_suspects, sizeof (fp->posted_suspects));
487 }
488 
489 static void
490 destroy_fme_bufs(struct fme *fp)
491 {
492 	char tmpbuf[OBBUFNMSZ];
493 	int o;
494 
495 	platform_restore_config(fp->hdl, fp->fmcase);
496 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFGLEN);
497 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFG);
498 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_PULL);
499 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_ID);
500 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_POSTD);
501 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_NOBS);
502 
503 	for (o = 0; o < fp->uniqobs; o++) {
504 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", o);
505 		fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf);
506 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", o);
507 		fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf);
508 	}
509 }
510 
511 /*
512  * reconstitute_observations -- convert a case's serialized observations
513  *	back into struct events.  Returns zero if all observations are
514  *	successfully reconstituted.
515  */
516 static int
517 reconstitute_observations(struct fme *fmep)
518 {
519 	struct event *ep;
520 	struct node *epnamenp = NULL;
521 	size_t pkdlen;
522 	char *pkd = NULL;
523 	char *tmpbuf = alloca(OBBUFNMSZ);
524 	char *sepptr;
525 	char *estr;
526 	int ocnt;
527 	int elen;
528 
529 	for (ocnt = 0; ocnt < fmep->uniqobs; ocnt++) {
530 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", ocnt);
531 		elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
532 		if (elen == 0) {
533 			out(O_ALTFP,
534 			    "reconstitute_observation: no %s buffer found.",
535 			    tmpbuf);
536 			Undiag_reason = UD_VAL_MISSINGOBS;
537 			break;
538 		}
539 
540 		estr = MALLOC(elen);
541 		fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen);
542 		sepptr = strchr(estr, '@');
543 		if (sepptr == NULL) {
544 			out(O_ALTFP,
545 			    "reconstitute_observation: %s: "
546 			    "missing @ separator in %s.",
547 			    tmpbuf, estr);
548 			Undiag_reason = UD_VAL_MISSINGPATH;
549 			FREE(estr);
550 			break;
551 		}
552 
553 		*sepptr = '\0';
554 		if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) {
555 			out(O_ALTFP,
556 			    "reconstitute_observation: %s: "
557 			    "trouble converting path string \"%s\" "
558 			    "to internal representation.",
559 			    tmpbuf, sepptr + 1);
560 			Undiag_reason = UD_VAL_MISSINGPATH;
561 			FREE(estr);
562 			break;
563 		}
564 
565 		/* construct the event */
566 		ep = itree_lookup(fmep->eventtree,
567 		    stable(estr), ipath(epnamenp));
568 		if (ep == NULL) {
569 			out(O_ALTFP,
570 			    "reconstitute_observation: %s: "
571 			    "lookup of  \"%s\" in itree failed.",
572 			    tmpbuf, ipath2str(estr, ipath(epnamenp)));
573 			Undiag_reason = UD_VAL_BADOBS;
574 			tree_free(epnamenp);
575 			FREE(estr);
576 			break;
577 		}
578 		tree_free(epnamenp);
579 
580 		/*
581 		 * We may or may not have a saved nvlist for the observation
582 		 */
583 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", ocnt);
584 		pkdlen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
585 		if (pkdlen != 0) {
586 			pkd = MALLOC(pkdlen);
587 			fmd_buf_read(fmep->hdl,
588 			    fmep->fmcase, tmpbuf, pkd, pkdlen);
589 			ASSERT(ep->nvp == NULL);
590 			if (nvlist_xunpack(pkd,
591 			    pkdlen, &ep->nvp, &Eft_nv_hdl) != 0)
592 				out(O_DIE|O_SYS, "pack of observed nvl failed");
593 			FREE(pkd);
594 		}
595 
596 		if (ocnt == 0)
597 			fmep->e0 = ep;
598 
599 		FREE(estr);
600 		fmep->ecurrent = ep;
601 		ep->count++;
602 
603 		/* link it into list of observations seen */
604 		ep->observations = fmep->observations;
605 		fmep->observations = ep;
606 	}
607 
608 	if (ocnt == fmep->uniqobs) {
609 		(void) fme_ready(fmep);
610 		return (0);
611 	}
612 
613 	return (1);
614 }
615 
616 /*
617  * restart_fme -- called during eft initialization.  Reconstitutes
618  *	an in-progress fme.
619  */
620 void
621 fme_restart(fmd_hdl_t *hdl, fmd_case_t *inprogress)
622 {
623 	nvlist_t *defect;
624 	struct case_list *bad;
625 	struct fme *fmep;
626 	struct cfgdata *cfgdata;
627 	size_t rawsz;
628 	struct event *ep;
629 	char *tmpbuf = alloca(OBBUFNMSZ);
630 	char *sepptr;
631 	char *estr;
632 	int elen;
633 	struct node *epnamenp = NULL;
634 	int init_size;
635 	extern int alloc_total();
636 
637 	/*
638 	 * ignore solved or closed cases
639 	 */
640 	if (fmd_case_solved(hdl, inprogress) ||
641 	    fmd_case_closed(hdl, inprogress))
642 		return;
643 
644 	fmep = alloc_fme();
645 	fmep->fmcase = inprogress;
646 	fmep->hdl = hdl;
647 
648 	if (fmd_buf_size(hdl, inprogress, WOBUF_POSTD) == 0) {
649 		out(O_ALTFP, "restart_fme: no saved posted status");
650 		Undiag_reason = UD_VAL_MISSINGINFO;
651 		goto badcase;
652 	} else {
653 		fmd_buf_read(hdl, inprogress, WOBUF_POSTD,
654 		    (void *)&fmep->posted_suspects,
655 		    sizeof (fmep->posted_suspects));
656 	}
657 
658 	if (fmd_buf_size(hdl, inprogress, WOBUF_ID) == 0) {
659 		out(O_ALTFP, "restart_fme: no saved id");
660 		Undiag_reason = UD_VAL_MISSINGINFO;
661 		goto badcase;
662 	} else {
663 		fmd_buf_read(hdl, inprogress, WOBUF_ID, (void *)&fmep->id,
664 		    sizeof (fmep->id));
665 	}
666 	if (Nextid <= fmep->id)
667 		Nextid = fmep->id + 1;
668 
669 	out(O_ALTFP, "Replay FME %d", fmep->id);
670 
671 	if (fmd_buf_size(hdl, inprogress, WOBUF_CFGLEN) != sizeof (size_t)) {
672 		out(O_ALTFP, "restart_fme: No config data");
673 		Undiag_reason = UD_VAL_MISSINGINFO;
674 		goto badcase;
675 	}
676 	fmd_buf_read(hdl, inprogress, WOBUF_CFGLEN, (void *)&rawsz,
677 	    sizeof (size_t));
678 
679 	if ((fmep->e0r = fmd_case_getprincipal(hdl, inprogress)) == NULL) {
680 		out(O_ALTFP, "restart_fme: No event zero");
681 		Undiag_reason = UD_VAL_MISSINGZERO;
682 		goto badcase;
683 	}
684 
685 	if (fmd_buf_size(hdl, inprogress, WOBUF_PULL) == 0) {
686 		out(O_ALTFP, "restart_fme: no saved wait time");
687 		Undiag_reason = UD_VAL_MISSINGINFO;
688 		goto badcase;
689 	} else {
690 		fmd_buf_read(hdl, inprogress, WOBUF_PULL, (void *)&fmep->pull,
691 		    sizeof (fmep->pull));
692 	}
693 
694 	if (fmd_buf_size(hdl, inprogress, WOBUF_NOBS) == 0) {
695 		out(O_ALTFP, "restart_fme: no count of observations");
696 		Undiag_reason = UD_VAL_MISSINGINFO;
697 		goto badcase;
698 	} else {
699 		fmd_buf_read(hdl, inprogress, WOBUF_NOBS,
700 		    (void *)&fmep->uniqobs, sizeof (fmep->uniqobs));
701 	}
702 
703 	(void) snprintf(tmpbuf, OBBUFNMSZ, "observed0");
704 	elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
705 	if (elen == 0) {
706 		out(O_ALTFP, "reconstitute_observation: no %s buffer found.",
707 		    tmpbuf);
708 		Undiag_reason = UD_VAL_MISSINGOBS;
709 		goto badcase;
710 	}
711 	estr = MALLOC(elen);
712 	fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen);
713 	sepptr = strchr(estr, '@');
714 	if (sepptr == NULL) {
715 		out(O_ALTFP, "reconstitute_observation: %s: "
716 		    "missing @ separator in %s.",
717 		    tmpbuf, estr);
718 		Undiag_reason = UD_VAL_MISSINGPATH;
719 		FREE(estr);
720 		goto badcase;
721 	}
722 	*sepptr = '\0';
723 	if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) {
724 		out(O_ALTFP, "reconstitute_observation: %s: "
725 		    "trouble converting path string \"%s\" "
726 		    "to internal representation.", tmpbuf, sepptr + 1);
727 		Undiag_reason = UD_VAL_MISSINGPATH;
728 		FREE(estr);
729 		goto badcase;
730 	}
731 	prune_propagations(stable(estr), ipath(epnamenp));
732 	tree_free(epnamenp);
733 	FREE(estr);
734 
735 	init_size = alloc_total();
736 	out(O_ALTFP|O_STAMP, "start config_restore using %d bytes", init_size);
737 	cfgdata = MALLOC(sizeof (struct cfgdata));
738 	cfgdata->cooked = NULL;
739 	cfgdata->devcache = NULL;
740 	cfgdata->devidcache = NULL;
741 	cfgdata->cpucache = NULL;
742 	cfgdata->raw_refcnt = 1;
743 
744 	if (rawsz > 0) {
745 		if (fmd_buf_size(hdl, inprogress, WOBUF_CFG) != rawsz) {
746 			out(O_ALTFP, "restart_fme: Config data size mismatch");
747 			Undiag_reason = UD_VAL_CFGMISMATCH;
748 			goto badcase;
749 		}
750 		cfgdata->begin = MALLOC(rawsz);
751 		cfgdata->end = cfgdata->nextfree = cfgdata->begin + rawsz;
752 		fmd_buf_read(hdl,
753 		    inprogress, WOBUF_CFG, cfgdata->begin, rawsz);
754 	} else {
755 		cfgdata->begin = cfgdata->end = cfgdata->nextfree = NULL;
756 	}
757 
758 	config_cook(cfgdata);
759 	fmep->config = cfgdata->cooked;
760 	config_free(cfgdata);
761 	out(O_ALTFP|O_STAMP, "config_restore added %d bytes",
762 	    alloc_total() - init_size);
763 
764 	if ((fmep->eventtree = itree_create(fmep->config)) == NULL) {
765 		/* case not properly saved or irretrievable */
766 		out(O_ALTFP, "restart_fme: NULL instance tree");
767 		Undiag_reason = UD_VAL_INSTFAIL;
768 		goto badcase;
769 	}
770 
771 	itree_ptree(O_ALTFP|O_VERB2, fmep->eventtree);
772 
773 	if (reconstitute_observations(fmep) != 0)
774 		goto badcase;
775 
776 	out(O_ALTFP|O_NONL, "FME %d replay observations: ", fmep->id);
777 	for (ep = fmep->observations; ep; ep = ep->observations) {
778 		out(O_ALTFP|O_NONL, " ");
779 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
780 	}
781 	out(O_ALTFP, NULL);
782 
783 	Open_fme_count++;
784 
785 	/* give the diagnosis algorithm a shot at the new FME state */
786 	fme_eval(fmep, fmep->e0r);
787 	return;
788 
789 badcase:
790 	if (fmep->eventtree != NULL)
791 		itree_free(fmep->eventtree);
792 	if (fmep->config)
793 		structconfig_free(fmep->config);
794 	destroy_fme_bufs(fmep);
795 	FREE(fmep);
796 
797 	/*
798 	 * Since we're unable to restart the case, add it to the undiagable
799 	 * list and solve and close it as appropriate.
800 	 */
801 	bad = MALLOC(sizeof (struct case_list));
802 	bad->next = NULL;
803 
804 	if (Undiagablecaselist != NULL)
805 		bad->next = Undiagablecaselist;
806 	Undiagablecaselist = bad;
807 	bad->fmcase = inprogress;
808 
809 	out(O_ALTFP|O_NONL, "[case %s (unable to restart), ",
810 	    fmd_case_uuid(hdl, bad->fmcase));
811 
812 	if (fmd_case_solved(hdl, bad->fmcase)) {
813 		out(O_ALTFP|O_NONL, "already solved, ");
814 	} else {
815 		out(O_ALTFP|O_NONL, "solving, ");
816 		defect = fmd_nvl_create_fault(hdl,
817 		    undiag_2defect_str(Undiag_reason), 100, NULL, NULL, NULL);
818 		(void) nvlist_add_string(defect, UNDIAG_REASON,
819 		    undiag_2reason_str(Undiag_reason));
820 		fmd_case_add_suspect(hdl, bad->fmcase, defect);
821 		fmd_case_solve(hdl, bad->fmcase);
822 		Undiag_reason = UD_VAL_UNKNOWN;
823 	}
824 
825 	if (fmd_case_closed(hdl, bad->fmcase)) {
826 		out(O_ALTFP, "already closed ]");
827 	} else {
828 		out(O_ALTFP, "closing ]");
829 		fmd_case_close(hdl, bad->fmcase);
830 	}
831 }
832 
833 /*ARGSUSED*/
834 static void
835 globals_destructor(void *left, void *right, void *arg)
836 {
837 	struct evalue *evp = (struct evalue *)right;
838 	if (evp->t == NODEPTR)
839 		tree_free((struct node *)(uintptr_t)evp->v);
840 	evp->v = (uintptr_t)NULL;
841 	FREE(evp);
842 }
843 
844 void
845 destroy_fme(struct fme *f)
846 {
847 	stats_delete(f->Rcount);
848 	stats_delete(f->Hcallcount);
849 	stats_delete(f->Rcallcount);
850 	stats_delete(f->Ccallcount);
851 	stats_delete(f->Ecallcount);
852 	stats_delete(f->Tcallcount);
853 	stats_delete(f->Marrowcount);
854 	stats_delete(f->diags);
855 
856 	if (f->eventtree != NULL)
857 		itree_free(f->eventtree);
858 	if (f->config)
859 		structconfig_free(f->config);
860 	lut_free(f->globals, globals_destructor, NULL);
861 	FREE(f);
862 }
863 
864 static const char *
865 fme_state2str(enum fme_state s)
866 {
867 	switch (s) {
868 	case FME_NOTHING:	return ("NOTHING");
869 	case FME_WAIT:		return ("WAIT");
870 	case FME_CREDIBLE:	return ("CREDIBLE");
871 	case FME_DISPROVED:	return ("DISPROVED");
872 	case FME_DEFERRED:	return ("DEFERRED");
873 	default:		return ("UNKNOWN");
874 	}
875 }
876 
877 static int
878 is_problem(enum nametype t)
879 {
880 	return (t == N_FAULT || t == N_DEFECT || t == N_UPSET);
881 }
882 
883 static int
884 is_fault(enum nametype t)
885 {
886 	return (t == N_FAULT);
887 }
888 
889 static int
890 is_defect(enum nametype t)
891 {
892 	return (t == N_DEFECT);
893 }
894 
895 static int
896 is_upset(enum nametype t)
897 {
898 	return (t == N_UPSET);
899 }
900 
901 static void
902 fme_print(int flags, struct fme *fmep)
903 {
904 	struct event *ep;
905 
906 	out(flags, "Fault Management Exercise %d", fmep->id);
907 	out(flags, "\t       State: %s", fme_state2str(fmep->state));
908 	out(flags|O_NONL, "\t  Start time: ");
909 	ptree_timeval(flags|O_NONL, &fmep->ull);
910 	out(flags, NULL);
911 	if (fmep->wull) {
912 		out(flags|O_NONL, "\t   Wait time: ");
913 		ptree_timeval(flags|O_NONL, &fmep->wull);
914 		out(flags, NULL);
915 	}
916 	out(flags|O_NONL, "\t          E0: ");
917 	if (fmep->e0)
918 		itree_pevent_brief(flags|O_NONL, fmep->e0);
919 	else
920 		out(flags|O_NONL, "NULL");
921 	out(flags, NULL);
922 	out(flags|O_NONL, "\tObservations:");
923 	for (ep = fmep->observations; ep; ep = ep->observations) {
924 		out(flags|O_NONL, " ");
925 		itree_pevent_brief(flags|O_NONL, ep);
926 	}
927 	out(flags, NULL);
928 	out(flags|O_NONL, "\tSuspect list:");
929 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
930 		out(flags|O_NONL, " ");
931 		itree_pevent_brief(flags|O_NONL, ep);
932 	}
933 	out(flags, NULL);
934 	if (fmep->eventtree != NULL) {
935 		out(flags|O_VERB2, "\t        Tree:");
936 		itree_ptree(flags|O_VERB2, fmep->eventtree);
937 	}
938 }
939 
940 static struct node *
941 pathstring2epnamenp(char *path)
942 {
943 	char *sep = "/";
944 	struct node *ret;
945 	char *ptr;
946 
947 	if ((ptr = strtok(path, sep)) == NULL)
948 		out(O_DIE, "pathstring2epnamenp: invalid empty class");
949 
950 	ret = tree_iname(stable(ptr), NULL, 0);
951 
952 	while ((ptr = strtok(NULL, sep)) != NULL)
953 		ret = tree_name_append(ret,
954 		    tree_iname(stable(ptr), NULL, 0));
955 
956 	return (ret);
957 }
958 
959 /*
960  * for a given upset sp, increment the corresponding SERD engine.  if the
961  * SERD engine trips, return the ename and ipp of the resulting ereport.
962  * returns true if engine tripped and *enamep and *ippp were filled in.
963  */
964 static int
965 serd_eval(struct fme *fmep, fmd_hdl_t *hdl, fmd_event_t *ffep,
966     fmd_case_t *fmcase, struct event *sp, const char **enamep,
967     const struct ipath **ippp)
968 {
969 	struct node *serdinst;
970 	char *serdname;
971 	char *serdresource;
972 	struct node *nid;
973 	struct serd_entry *newentp;
974 	int i, serdn = -1, serdincrement = 1, len = 0;
975 	char *serdsuffix = NULL, *serdt = NULL, *ptr;
976 	struct evalue *ep;
977 
978 	ASSERT(sp->t == N_UPSET);
979 	ASSERT(ffep != NULL);
980 
981 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
982 	    (void *)"n", (lut_cmp)strcmp)) != NULL) {
983 		ASSERT(ep->t == UINT64);
984 		serdn = (int)ep->v;
985 	}
986 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
987 	    (void *)"t", (lut_cmp)strcmp)) != NULL) {
988 		ASSERT(ep->t == STRING);
989 		serdt = (char *)(uintptr_t)ep->v;
990 	}
991 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
992 	    (void *)"suffix", (lut_cmp)strcmp)) != NULL) {
993 		ASSERT(ep->t == STRING);
994 		serdsuffix = (char *)(uintptr_t)ep->v;
995 	}
996 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
997 	    (void *)"increment", (lut_cmp)strcmp)) != NULL) {
998 		ASSERT(ep->t == UINT64);
999 		serdincrement = (int)ep->v;
1000 	}
1001 
1002 	/*
1003 	 * obtain instanced SERD engine from the upset sp.  from this
1004 	 * derive serdname, the string used to identify the SERD engine.
1005 	 */
1006 	serdinst = eventprop_lookup(sp, L_engine);
1007 
1008 	if (serdinst == NULL)
1009 		return (-1);
1010 
1011 	serdname = ipath2str(serdinst->u.stmt.np->u.event.ename->u.name.s,
1012 	    NULL);
1013 	serdresource = ipath2str(NULL,
1014 	    ipath(serdinst->u.stmt.np->u.event.epname));
1015 
1016 	len = strlen(serdname) + strlen(serdresource) + 2;
1017 	if (serdsuffix != NULL)
1018 		len += strlen(serdsuffix);
1019 
1020 	ptr = MALLOC(len);
1021 	if (serdsuffix != NULL) {
1022 		(void) snprintf(ptr, len, "%s%s@%s", serdname, serdsuffix,
1023 		    serdresource);
1024 	} else {
1025 		(void) snprintf(ptr, len, "%s@%s", serdname, serdresource);
1026 	}
1027 	FREE(serdname);
1028 	FREE(serdresource);
1029 	serdname = ptr;
1030 
1031 	/* handle serd engine "id" property, if there is one */
1032 	if ((nid =
1033 	    lut_lookup(serdinst->u.stmt.lutp, (void *)L_id, NULL)) != NULL) {
1034 		struct evalue *gval;
1035 		char suffixbuf[200];
1036 		char *suffix;
1037 		char *nserdname;
1038 		size_t nname;
1039 
1040 		out(O_ALTFP|O_NONL, "serd \"%s\" id: ", serdname);
1041 		ptree_name_iter(O_ALTFP|O_NONL, nid);
1042 
1043 		ASSERTinfo(nid->t == T_GLOBID, ptree_nodetype2str(nid->t));
1044 
1045 		if ((gval = lut_lookup(fmep->globals,
1046 		    (void *)nid->u.globid.s, NULL)) == NULL) {
1047 			out(O_ALTFP, " undefined");
1048 		} else if (gval->t == UINT64) {
1049 			out(O_ALTFP, " %llu", gval->v);
1050 			(void) sprintf(suffixbuf, "%llu", gval->v);
1051 			suffix = suffixbuf;
1052 		} else {
1053 			out(O_ALTFP, " \"%s\"", (char *)(uintptr_t)gval->v);
1054 			suffix = (char *)(uintptr_t)gval->v;
1055 		}
1056 
1057 		nname = strlen(serdname) + strlen(suffix) + 2;
1058 		nserdname = MALLOC(nname);
1059 		(void) snprintf(nserdname, nname, "%s:%s", serdname, suffix);
1060 		FREE(serdname);
1061 		serdname = nserdname;
1062 	}
1063 
1064 	/*
1065 	 * if the engine is empty, and we have an override for n/t then
1066 	 * destroy and recreate it.
1067 	 */
1068 	if ((serdn != -1 || serdt != NULL) && fmd_serd_exists(hdl, serdname) &&
1069 	    fmd_serd_empty(hdl, serdname))
1070 		fmd_serd_destroy(hdl, serdname);
1071 
1072 	if (!fmd_serd_exists(hdl, serdname)) {
1073 		struct node *nN, *nT;
1074 		const char *s;
1075 		struct node *nodep;
1076 		struct config *cp;
1077 		char *path;
1078 		uint_t nval;
1079 		hrtime_t tval;
1080 		const char *name;
1081 		char *tptr;
1082 		char *serd_name;
1083 		int i;
1084 		int tmplen;
1085 		char *ptr;
1086 		int got_n_override = 0, got_t_override = 0;
1087 
1088 		/* no SERD engine yet, so create it */
1089 		nodep = serdinst->u.stmt.np->u.event.epname;
1090 		tmplen = strlen(serdinst->u.stmt.np->u.event.ename->u.name.s)
1091 		    + 2;
1092 		if (serdsuffix != NULL)
1093 			tmplen += strlen(serdsuffix);
1094 		tptr = MALLOC(tmplen);
1095 		if (serdsuffix != NULL) {
1096 			(void) snprintf(tptr, len, "%s%s",
1097 			    serdinst->u.stmt.np->u.event.ename->u.name.s,
1098 			    serdsuffix);
1099 		} else {
1100 			(void) snprintf(tptr, len, "%s",
1101 			    serdinst->u.stmt.np->u.event.ename->u.name.s);
1102 		}
1103 		name = (const char *)tptr;
1104 		path = ipath2str(NULL, ipath(nodep));
1105 		cp = config_lookup(fmep->config, path, 0);
1106 		FREE((void *)path);
1107 
1108 		/*
1109 		 * We allow serd paramaters to be overridden, either from
1110 		 * eft.conf file values (if Serd_Override is set) or from
1111 		 * driver properties (for "serd.io.device" engines).
1112 		 */
1113 		if (Serd_Override != NULL) {
1114 			char *save_ptr, *ptr1, *ptr2, *ptr3;
1115 			ptr3 = save_ptr = STRDUP(Serd_Override);
1116 			while (*ptr3 != '\0') {
1117 				ptr1 = strchr(ptr3, ',');
1118 				*ptr1 = '\0';
1119 				if (strcmp(ptr3, name) == 0) {
1120 					ptr2 =  strchr(ptr1 + 1, ',');
1121 					*ptr2 = '\0';
1122 					nval = atoi(ptr1 + 1);
1123 					out(O_ALTFP, "serd override %s_n %d",
1124 					    name, nval);
1125 					ptr3 =  strchr(ptr2 + 1, ' ');
1126 					if (ptr3)
1127 						*ptr3 = '\0';
1128 					ptr = STRDUP(ptr2 + 1);
1129 					out(O_ALTFP, "serd override %s_t %s",
1130 					    name, ptr);
1131 					got_n_override = 1;
1132 					got_t_override = 1;
1133 					break;
1134 				} else {
1135 					ptr2 =  strchr(ptr1 + 1, ',');
1136 					ptr3 =  strchr(ptr2 + 1, ' ');
1137 					if (ptr3 == NULL)
1138 						break;
1139 				}
1140 				ptr3++;
1141 			}
1142 			FREE(save_ptr);
1143 		}
1144 
1145 		if (cp && got_n_override == 0) {
1146 			/*
1147 			 * convert serd engine name into property name
1148 			 */
1149 			serd_name = MALLOC(strlen(name) + 3);
1150 			for (i = 0; i < strlen(name); i++) {
1151 				if (name[i] == '.')
1152 					serd_name[i] = '_';
1153 				else
1154 					serd_name[i] = name[i];
1155 			}
1156 			serd_name[i++] = '_';
1157 			serd_name[i++] = 'n';
1158 			serd_name[i] = '\0';
1159 			if (s = config_getprop(cp, serd_name)) {
1160 				nval = atoi(s);
1161 				out(O_ALTFP, "serd override %s_n %s", name, s);
1162 				got_n_override = 1;
1163 			}
1164 			serd_name[i - 1] = 't';
1165 			if (s = config_getprop(cp, serd_name)) {
1166 				ptr = STRDUP(s);
1167 				out(O_ALTFP, "serd override %s_t %s", name, s);
1168 				got_t_override = 1;
1169 			}
1170 			FREE(serd_name);
1171 		}
1172 
1173 		if (serdn != -1 && got_n_override == 0) {
1174 			nval = serdn;
1175 			out(O_ALTFP, "serd override %s_n %d", name, serdn);
1176 			got_n_override = 1;
1177 		}
1178 		if (serdt != NULL && got_t_override == 0) {
1179 			ptr = STRDUP(serdt);
1180 			out(O_ALTFP, "serd override %s_t %s", name, serdt);
1181 			got_t_override = 1;
1182 		}
1183 
1184 		if (!got_n_override) {
1185 			nN = lut_lookup(serdinst->u.stmt.lutp, (void *)L_N,
1186 			    NULL);
1187 			ASSERT(nN->t == T_NUM);
1188 			nval = (uint_t)nN->u.ull;
1189 		}
1190 		if (!got_t_override) {
1191 			nT = lut_lookup(serdinst->u.stmt.lutp, (void *)L_T,
1192 			    NULL);
1193 			ASSERT(nT->t == T_TIMEVAL);
1194 			tval = (hrtime_t)nT->u.ull;
1195 		} else {
1196 			const unsigned long long *ullp;
1197 			const char *suffix;
1198 			int len;
1199 
1200 			len = strspn(ptr, "0123456789");
1201 			suffix = stable(&ptr[len]);
1202 			ullp = (unsigned long long *)lut_lookup(Timesuffixlut,
1203 			    (void *)suffix, NULL);
1204 			ptr[len] = '\0';
1205 			tval = strtoull(ptr, NULL, 0) * (ullp ? *ullp : 1ll);
1206 			FREE(ptr);
1207 		}
1208 		fmd_serd_create(hdl, serdname, nval, tval);
1209 		FREE(tptr);
1210 	}
1211 
1212 	newentp = MALLOC(sizeof (*newentp));
1213 	newentp->ename = stable(serdinst->u.stmt.np->u.event.ename->u.name.s);
1214 	newentp->ipath = ipath(serdinst->u.stmt.np->u.event.epname);
1215 	newentp->hdl = hdl;
1216 	if (lut_lookup(SerdEngines, newentp, (lut_cmp)serd_cmp) == NULL) {
1217 		SerdEngines = lut_add(SerdEngines, (void *)newentp,
1218 		    (void *)newentp, (lut_cmp)serd_cmp);
1219 		Serd_need_save = 1;
1220 		serd_save();
1221 	} else {
1222 		FREE(newentp);
1223 	}
1224 
1225 
1226 	/*
1227 	 * increment SERD engine.  if engine fires, reset serd
1228 	 * engine and return trip_strcode if required.
1229 	 */
1230 	for (i = 0; i < serdincrement; i++) {
1231 		if (fmd_serd_record(hdl, serdname, ffep)) {
1232 			fmd_case_add_serd(hdl, fmcase, serdname);
1233 			fmd_serd_reset(hdl, serdname);
1234 
1235 			if (ippp) {
1236 				struct node *tripinst =
1237 				    lut_lookup(serdinst->u.stmt.lutp,
1238 				    (void *)L_trip, NULL);
1239 				ASSERT(tripinst != NULL);
1240 				*enamep = tripinst->u.event.ename->u.name.s;
1241 				*ippp = ipath(tripinst->u.event.epname);
1242 				out(O_ALTFP|O_NONL,
1243 				    "[engine fired: %s, sending: ", serdname);
1244 				ipath_print(O_ALTFP|O_NONL, *enamep, *ippp);
1245 				out(O_ALTFP, "]");
1246 			} else {
1247 				out(O_ALTFP, "[engine fired: %s, no trip]",
1248 				    serdname);
1249 			}
1250 			FREE(serdname);
1251 			return (1);
1252 		}
1253 	}
1254 
1255 	FREE(serdname);
1256 	return (0);
1257 }
1258 
1259 /*
1260  * search a suspect list for upsets.  feed each upset to serd_eval() and
1261  * build up tripped[], an array of ereports produced by the firing of
1262  * any SERD engines.  then feed each ereport back into
1263  * fme_receive_report().
1264  *
1265  * returns ntrip, the number of these ereports produced.
1266  */
1267 static int
1268 upsets_eval(struct fme *fmep, fmd_event_t *ffep)
1269 {
1270 	/* we build an array of tripped ereports that we send ourselves */
1271 	struct {
1272 		const char *ename;
1273 		const struct ipath *ipp;
1274 	} *tripped;
1275 	struct event *sp;
1276 	int ntrip, nupset, i;
1277 
1278 	/*
1279 	 * count the number of upsets to determine the upper limit on
1280 	 * expected trip ereport strings.  remember that one upset can
1281 	 * lead to at most one ereport.
1282 	 */
1283 	nupset = 0;
1284 	for (sp = fmep->suspects; sp; sp = sp->suspects) {
1285 		if (sp->t == N_UPSET)
1286 			nupset++;
1287 	}
1288 
1289 	if (nupset == 0)
1290 		return (0);
1291 
1292 	/*
1293 	 * get to this point if we have upsets and expect some trip
1294 	 * ereports
1295 	 */
1296 	tripped = alloca(sizeof (*tripped) * nupset);
1297 	bzero((void *)tripped, sizeof (*tripped) * nupset);
1298 
1299 	ntrip = 0;
1300 	for (sp = fmep->suspects; sp; sp = sp->suspects)
1301 		if (sp->t == N_UPSET &&
1302 		    serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, sp,
1303 		    &tripped[ntrip].ename, &tripped[ntrip].ipp) == 1)
1304 			ntrip++;
1305 
1306 	for (i = 0; i < ntrip; i++) {
1307 		struct event *ep, *nep;
1308 		struct fme *nfmep;
1309 		fmd_case_t *fmcase;
1310 		const struct ipath *ipp;
1311 		const char *eventstring;
1312 		int prev_verbose;
1313 		unsigned long long my_delay = TIMEVAL_EVENTUALLY;
1314 		enum fme_state state;
1315 
1316 		/*
1317 		 * First try and evaluate a case with the trip ereport plus
1318 		 * all the other ereports that cause the trip. If that fails
1319 		 * to evaluate then try again with just this ereport on its own.
1320 		 */
1321 		out(O_ALTFP|O_NONL, "fme_receive_report_serd: ");
1322 		ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp);
1323 		out(O_ALTFP|O_STAMP, NULL);
1324 		ep = fmep->e0;
1325 		eventstring = ep->enode->u.event.ename->u.name.s;
1326 		ipp = ep->ipp;
1327 		prune_propagations(eventstring, ipp);
1328 
1329 		/*
1330 		 * create a duplicate fme and case
1331 		 */
1332 		fmcase = fmd_case_open(fmep->hdl, NULL);
1333 		out(O_ALTFP|O_NONL, "duplicate fme for event [");
1334 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1335 		out(O_ALTFP, " ]");
1336 		if ((nfmep = newfme(eventstring, ipp, fmep->hdl,
1337 		    fmcase)) == NULL) {
1338 			out(O_ALTFP|O_NONL, "[");
1339 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1340 			out(O_ALTFP, " CANNOT DIAGNOSE]");
1341 			publish_undiagnosable(fmep->hdl, ffep, fmcase);
1342 			continue;
1343 		}
1344 		Open_fme_count++;
1345 		nfmep->pull = fmep->pull;
1346 		init_fme_bufs(nfmep);
1347 		out(O_ALTFP|O_NONL, "[");
1348 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1349 		out(O_ALTFP, " created FME%d, case %s]", nfmep->id,
1350 		    fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
1351 		if (ffep) {
1352 			fmd_case_setprincipal(nfmep->hdl, nfmep->fmcase, ffep);
1353 			fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase, ffep);
1354 			nfmep->e0r = ffep;
1355 		}
1356 
1357 		/*
1358 		 * add the original ereports
1359 		 */
1360 		for (ep = fmep->observations; ep; ep = ep->observations) {
1361 			eventstring = ep->enode->u.event.ename->u.name.s;
1362 			ipp = ep->ipp;
1363 			out(O_ALTFP|O_NONL, "adding event [");
1364 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1365 			out(O_ALTFP, " ]");
1366 			nep = itree_lookup(nfmep->eventtree, eventstring, ipp);
1367 			if (nep->count++ == 0) {
1368 				nep->observations = nfmep->observations;
1369 				nfmep->observations = nep;
1370 				serialize_observation(nfmep, eventstring, ipp);
1371 				nep->nvp = evnv_dupnvl(ep->nvp);
1372 			}
1373 			if (ep->ffep && ep->ffep != ffep)
1374 				fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase,
1375 				    ep->ffep);
1376 			stats_counter_bump(nfmep->Rcount);
1377 		}
1378 
1379 		/*
1380 		 * add the serd trigger ereport
1381 		 */
1382 		if ((ep = itree_lookup(nfmep->eventtree, tripped[i].ename,
1383 		    tripped[i].ipp)) == NULL) {
1384 			/*
1385 			 * The trigger ereport is not in the instance tree. It
1386 			 * was presumably removed by prune_propagations() as
1387 			 * this combination of events is not present in the
1388 			 * rules.
1389 			 */
1390 			out(O_ALTFP, "upsets_eval: e0 not in instance tree");
1391 			Undiag_reason = UD_VAL_BADEVENTI;
1392 			goto retry_lone_ereport;
1393 		}
1394 		out(O_ALTFP|O_NONL, "adding event [");
1395 		ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp);
1396 		out(O_ALTFP, " ]");
1397 		nfmep->ecurrent = ep;
1398 		ep->nvp = NULL;
1399 		ep->count = 1;
1400 		ep->observations = nfmep->observations;
1401 		nfmep->observations = ep;
1402 
1403 		/*
1404 		 * just peek first.
1405 		 */
1406 		nfmep->peek = 1;
1407 		prev_verbose = Verbose;
1408 		if (Debug == 0)
1409 			Verbose = 0;
1410 		lut_walk(nfmep->eventtree, (lut_cb)clear_arrows, (void *)nfmep);
1411 		state = hypothesise(nfmep, nfmep->e0, nfmep->ull, &my_delay);
1412 		nfmep->peek = 0;
1413 		Verbose = prev_verbose;
1414 		if (state == FME_DISPROVED) {
1415 			out(O_ALTFP, "upsets_eval: hypothesis disproved");
1416 			Undiag_reason = UD_VAL_UNSOLVD;
1417 retry_lone_ereport:
1418 			/*
1419 			 * However the trigger ereport on its own might be
1420 			 * diagnosable, so check for that. Undo the new fme
1421 			 * and case we just created and call fme_receive_report.
1422 			 */
1423 			out(O_ALTFP|O_NONL, "[");
1424 			ipath_print(O_ALTFP|O_NONL, tripped[i].ename,
1425 			    tripped[i].ipp);
1426 			out(O_ALTFP, " retrying with just trigger ereport]");
1427 			itree_free(nfmep->eventtree);
1428 			nfmep->eventtree = NULL;
1429 			structconfig_free(nfmep->config);
1430 			nfmep->config = NULL;
1431 			destroy_fme_bufs(nfmep);
1432 			fmd_case_close(nfmep->hdl, nfmep->fmcase);
1433 			fme_receive_report(fmep->hdl, ffep,
1434 			    tripped[i].ename, tripped[i].ipp, NULL);
1435 			continue;
1436 		}
1437 
1438 		/*
1439 		 * and evaluate
1440 		 */
1441 		serialize_observation(nfmep, tripped[i].ename, tripped[i].ipp);
1442 		fme_eval(nfmep, ffep);
1443 	}
1444 
1445 	return (ntrip);
1446 }
1447 
1448 /*
1449  * fme_receive_external_report -- call when an external ereport comes in
1450  *
1451  * this routine just converts the relevant information from the ereport
1452  * into a format used internally and passes it on to fme_receive_report().
1453  */
1454 void
1455 fme_receive_external_report(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl,
1456     const char *class)
1457 {
1458 	struct node		*epnamenp;
1459 	fmd_case_t		*fmcase;
1460 	const struct ipath	*ipp;
1461 
1462 	class = stable(class);
1463 
1464 	/* Get the component path from the ereport */
1465 	epnamenp = platform_getpath(nvl);
1466 
1467 	/* See if we ended up without a path. */
1468 	if (epnamenp == NULL) {
1469 		/* See if class permits silent discard on unknown component. */
1470 		if (lut_lookup(Ereportenames_discard, (void *)class, NULL)) {
1471 			out(O_ALTFP|O_VERB2, "Unable to map \"%s\" ereport "
1472 			    "to component path, but silent discard allowed.",
1473 			    class);
1474 		} else {
1475 			/*
1476 			 * XFILE: Failure to find a component is bad unless
1477 			 * 'discard_if_config_unknown=1' was specified in the
1478 			 * ereport definition. Indicate undiagnosable.
1479 			 */
1480 			out(O_ALTFP, "XFILE: Unable to map \"%s\" ereport "
1481 			    "to component path.", class);
1482 			Undiag_reason = UD_VAL_NOPATH;
1483 			fmcase = fmd_case_open(hdl, NULL);
1484 			publish_undiagnosable(hdl, ffep, fmcase);
1485 		}
1486 		return;
1487 	}
1488 
1489 	ipp = ipath(epnamenp);
1490 	tree_free(epnamenp);
1491 	fme_receive_report(hdl, ffep, class, ipp, nvl);
1492 }
1493 
1494 /*ARGSUSED*/
1495 void
1496 fme_receive_repair_list(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl,
1497     const char *eventstring)
1498 {
1499 	char *uuid;
1500 	nvlist_t **nva;
1501 	uint_t nvc;
1502 	const struct ipath *ipp;
1503 
1504 	if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) != 0 ||
1505 	    nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST,
1506 	    &nva, &nvc) != 0) {
1507 		out(O_ALTFP, "No uuid or fault list for list.repaired event");
1508 		return;
1509 	}
1510 
1511 	out(O_ALTFP, "Processing list.repaired from case %s", uuid);
1512 
1513 	while (nvc-- != 0) {
1514 		/*
1515 		 * Reset any istat or serd engine associated with this path.
1516 		 */
1517 		char *path;
1518 
1519 		if ((ipp = platform_fault2ipath(*nva++)) == NULL)
1520 			continue;
1521 
1522 		path = ipath2str(NULL, ipp);
1523 		out(O_ALTFP, "fme_receive_repair_list: resetting state for %s",
1524 		    path);
1525 		FREE(path);
1526 
1527 		lut_walk(Istats, (lut_cb)istat_counter_reset_cb, (void *)ipp);
1528 		istat_save();
1529 
1530 		lut_walk(SerdEngines, (lut_cb)serd_reset_cb, (void *)ipp);
1531 		serd_save();
1532 	}
1533 }
1534 
1535 /*ARGSUSED*/
1536 void
1537 fme_receive_topology_change(void)
1538 {
1539 	lut_walk(Istats, (lut_cb)istat_counter_topo_chg_cb, NULL);
1540 	istat_save();
1541 
1542 	lut_walk(SerdEngines, (lut_cb)serd_topo_chg_cb, NULL);
1543 	serd_save();
1544 }
1545 
1546 static int mark_arrows(struct fme *fmep, struct event *ep, int mark,
1547     unsigned long long at_latest_by, unsigned long long *pdelay, int keep);
1548 
1549 /* ARGSUSED */
1550 static void
1551 clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
1552 {
1553 	struct bubble *bp;
1554 	struct arrowlist *ap;
1555 
1556 	ep->cached_state = 0;
1557 	ep->keep_in_tree = 0;
1558 	for (bp = itree_next_bubble(ep, NULL); bp;
1559 	    bp = itree_next_bubble(ep, bp)) {
1560 		if (bp->t != B_FROM)
1561 			continue;
1562 		bp->mark = 0;
1563 		for (ap = itree_next_arrow(bp, NULL); ap;
1564 		    ap = itree_next_arrow(bp, ap))
1565 			ap->arrowp->mark = 0;
1566 	}
1567 }
1568 
1569 static void
1570 fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep,
1571     const char *eventstring, const struct ipath *ipp, nvlist_t *nvl)
1572 {
1573 	struct event *ep;
1574 	struct fme *fmep = NULL;
1575 	struct fme *ofmep = NULL;
1576 	struct fme *cfmep, *svfmep;
1577 	int matched = 0;
1578 	nvlist_t *defect;
1579 	fmd_case_t *fmcase;
1580 
1581 	out(O_ALTFP|O_NONL, "fme_receive_report: ");
1582 	ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1583 	out(O_ALTFP|O_STAMP, NULL);
1584 
1585 	/* decide which FME it goes to */
1586 	for (fmep = FMElist; fmep; fmep = fmep->next) {
1587 		int prev_verbose;
1588 		unsigned long long my_delay = TIMEVAL_EVENTUALLY;
1589 		enum fme_state state;
1590 		nvlist_t *pre_peek_nvp = NULL;
1591 
1592 		if (fmep->overflow) {
1593 			if (!(fmd_case_closed(fmep->hdl, fmep->fmcase)))
1594 				ofmep = fmep;
1595 
1596 			continue;
1597 		}
1598 
1599 		/*
1600 		 * ignore solved or closed cases
1601 		 */
1602 		if (fmep->posted_suspects ||
1603 		    fmd_case_solved(fmep->hdl, fmep->fmcase) ||
1604 		    fmd_case_closed(fmep->hdl, fmep->fmcase))
1605 			continue;
1606 
1607 		/* look up event in event tree for this FME */
1608 		if ((ep = itree_lookup(fmep->eventtree,
1609 		    eventstring, ipp)) == NULL)
1610 			continue;
1611 
1612 		/* note observation */
1613 		fmep->ecurrent = ep;
1614 		if (ep->count++ == 0) {
1615 			/* link it into list of observations seen */
1616 			ep->observations = fmep->observations;
1617 			fmep->observations = ep;
1618 			ep->nvp = evnv_dupnvl(nvl);
1619 		} else {
1620 			/* use new payload values for peek */
1621 			pre_peek_nvp = ep->nvp;
1622 			ep->nvp = evnv_dupnvl(nvl);
1623 		}
1624 
1625 		/* tell hypothesise() not to mess with suspect list */
1626 		fmep->peek = 1;
1627 
1628 		/* don't want this to be verbose (unless Debug is set) */
1629 		prev_verbose = Verbose;
1630 		if (Debug == 0)
1631 			Verbose = 0;
1632 
1633 		lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep);
1634 		state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay);
1635 
1636 		fmep->peek = 0;
1637 
1638 		/* put verbose flag back */
1639 		Verbose = prev_verbose;
1640 
1641 		if (state != FME_DISPROVED) {
1642 			/* found an FME that explains the ereport */
1643 			matched++;
1644 			out(O_ALTFP|O_NONL, "[");
1645 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1646 			out(O_ALTFP, " explained by FME%d]", fmep->id);
1647 
1648 			if (pre_peek_nvp)
1649 				nvlist_free(pre_peek_nvp);
1650 
1651 			if (ep->count == 1)
1652 				serialize_observation(fmep, eventstring, ipp);
1653 
1654 			if (ffep) {
1655 				fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1656 				ep->ffep = ffep;
1657 			}
1658 
1659 			stats_counter_bump(fmep->Rcount);
1660 
1661 			/* re-eval FME */
1662 			fme_eval(fmep, ffep);
1663 		} else {
1664 
1665 			/* not a match, undo noting of observation */
1666 			fmep->ecurrent = NULL;
1667 			if (--ep->count == 0) {
1668 				/* unlink it from observations */
1669 				fmep->observations = ep->observations;
1670 				ep->observations = NULL;
1671 				nvlist_free(ep->nvp);
1672 				ep->nvp = NULL;
1673 			} else {
1674 				nvlist_free(ep->nvp);
1675 				ep->nvp = pre_peek_nvp;
1676 			}
1677 		}
1678 	}
1679 
1680 	if (matched)
1681 		return;	/* explained by at least one existing FME */
1682 
1683 	/* clean up closed fmes */
1684 	cfmep = ClosedFMEs;
1685 	while (cfmep != NULL) {
1686 		svfmep = cfmep->next;
1687 		destroy_fme(cfmep);
1688 		cfmep = svfmep;
1689 	}
1690 	ClosedFMEs = NULL;
1691 	prune_propagations(eventstring, ipp);
1692 
1693 	if (ofmep) {
1694 		out(O_ALTFP|O_NONL, "[");
1695 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1696 		out(O_ALTFP, " ADDING TO OVERFLOW FME]");
1697 		if (ffep)
1698 			fmd_case_add_ereport(hdl, ofmep->fmcase, ffep);
1699 
1700 		return;
1701 
1702 	} else if (Max_fme && (Open_fme_count >= Max_fme)) {
1703 		out(O_ALTFP|O_NONL, "[");
1704 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1705 		out(O_ALTFP, " MAX OPEN FME REACHED]");
1706 
1707 		fmcase = fmd_case_open(hdl, NULL);
1708 
1709 		/* Create overflow fme */
1710 		if ((fmep = newfme(eventstring, ipp, hdl, fmcase)) == NULL) {
1711 			out(O_ALTFP|O_NONL, "[");
1712 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1713 			out(O_ALTFP, " CANNOT OPEN OVERFLOW FME]");
1714 			publish_undiagnosable(hdl, ffep, fmcase);
1715 			return;
1716 		}
1717 
1718 		Open_fme_count++;
1719 
1720 		init_fme_bufs(fmep);
1721 		fmep->overflow = B_TRUE;
1722 
1723 		if (ffep)
1724 			fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1725 
1726 		Undiag_reason = UD_VAL_MAXFME;
1727 		defect = fmd_nvl_create_fault(hdl,
1728 		    undiag_2defect_str(Undiag_reason), 100, NULL, NULL, NULL);
1729 		(void) nvlist_add_string(defect, UNDIAG_REASON,
1730 		    undiag_2reason_str(Undiag_reason));
1731 		fmd_case_add_suspect(hdl, fmep->fmcase, defect);
1732 		fmd_case_solve(hdl, fmep->fmcase);
1733 		Undiag_reason = UD_VAL_UNKNOWN;
1734 		return;
1735 	}
1736 
1737 	/* open a case */
1738 	fmcase = fmd_case_open(hdl, NULL);
1739 
1740 	/* start a new FME */
1741 	if ((fmep = newfme(eventstring, ipp, hdl, fmcase)) == NULL) {
1742 		out(O_ALTFP|O_NONL, "[");
1743 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1744 		out(O_ALTFP, " CANNOT DIAGNOSE]");
1745 		publish_undiagnosable(hdl, ffep, fmcase);
1746 		return;
1747 	}
1748 
1749 	Open_fme_count++;
1750 
1751 	init_fme_bufs(fmep);
1752 
1753 	out(O_ALTFP|O_NONL, "[");
1754 	ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1755 	out(O_ALTFP, " created FME%d, case %s]", fmep->id,
1756 	    fmd_case_uuid(hdl, fmep->fmcase));
1757 
1758 	ep = fmep->e0;
1759 	ASSERT(ep != NULL);
1760 
1761 	/* note observation */
1762 	fmep->ecurrent = ep;
1763 	if (ep->count++ == 0) {
1764 		/* link it into list of observations seen */
1765 		ep->observations = fmep->observations;
1766 		fmep->observations = ep;
1767 		ep->nvp = evnv_dupnvl(nvl);
1768 		serialize_observation(fmep, eventstring, ipp);
1769 	} else {
1770 		/* new payload overrides any previous */
1771 		nvlist_free(ep->nvp);
1772 		ep->nvp = evnv_dupnvl(nvl);
1773 	}
1774 
1775 	stats_counter_bump(fmep->Rcount);
1776 
1777 	if (ffep) {
1778 		fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1779 		fmd_case_setprincipal(hdl, fmep->fmcase, ffep);
1780 		fmep->e0r = ffep;
1781 		ep->ffep = ffep;
1782 	}
1783 
1784 	/* give the diagnosis algorithm a shot at the new FME state */
1785 	fme_eval(fmep, ffep);
1786 }
1787 
1788 void
1789 fme_status(int flags)
1790 {
1791 	struct fme *fmep;
1792 
1793 	if (FMElist == NULL) {
1794 		out(flags, "No fault management exercises underway.");
1795 		return;
1796 	}
1797 
1798 	for (fmep = FMElist; fmep; fmep = fmep->next)
1799 		fme_print(flags, fmep);
1800 }
1801 
1802 /*
1803  * "indent" routines used mostly for nicely formatted debug output, but also
1804  * for sanity checking for infinite recursion bugs.
1805  */
1806 
1807 #define	MAX_INDENT 1024
1808 static const char *indent_s[MAX_INDENT];
1809 static int current_indent;
1810 
1811 static void
1812 indent_push(const char *s)
1813 {
1814 	if (current_indent < MAX_INDENT)
1815 		indent_s[current_indent++] = s;
1816 	else
1817 		out(O_DIE, "unexpected recursion depth (%d)", current_indent);
1818 }
1819 
1820 static void
1821 indent_set(const char *s)
1822 {
1823 	current_indent = 0;
1824 	indent_push(s);
1825 }
1826 
1827 static void
1828 indent_pop(void)
1829 {
1830 	if (current_indent > 0)
1831 		current_indent--;
1832 	else
1833 		out(O_DIE, "recursion underflow");
1834 }
1835 
1836 static void
1837 indent(void)
1838 {
1839 	int i;
1840 	if (!Verbose)
1841 		return;
1842 	for (i = 0; i < current_indent; i++)
1843 		out(O_ALTFP|O_VERB|O_NONL, indent_s[i]);
1844 }
1845 
1846 #define	SLNEW		1
1847 #define	SLCHANGED	2
1848 #define	SLWAIT		3
1849 #define	SLDISPROVED	4
1850 
1851 static void
1852 print_suspects(int circumstance, struct fme *fmep)
1853 {
1854 	struct event *ep;
1855 
1856 	out(O_ALTFP|O_NONL, "[");
1857 	if (circumstance == SLCHANGED) {
1858 		out(O_ALTFP|O_NONL, "FME%d diagnosis changed. state: %s, "
1859 		    "suspect list:", fmep->id, fme_state2str(fmep->state));
1860 	} else if (circumstance == SLWAIT) {
1861 		out(O_ALTFP|O_NONL, "FME%d set wait timer %ld ", fmep->id,
1862 		    fmep->timer);
1863 		ptree_timeval(O_ALTFP|O_NONL, &fmep->wull);
1864 	} else if (circumstance == SLDISPROVED) {
1865 		out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS UNKNOWN", fmep->id);
1866 	} else {
1867 		out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS PRODUCED:", fmep->id);
1868 	}
1869 
1870 	if (circumstance == SLWAIT || circumstance == SLDISPROVED) {
1871 		out(O_ALTFP, "]");
1872 		return;
1873 	}
1874 
1875 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
1876 		out(O_ALTFP|O_NONL, " ");
1877 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
1878 	}
1879 	out(O_ALTFP, "]");
1880 }
1881 
1882 static struct node *
1883 eventprop_lookup(struct event *ep, const char *propname)
1884 {
1885 	return (lut_lookup(ep->props, (void *)propname, NULL));
1886 }
1887 
1888 #define	MAXDIGITIDX	23
1889 static char numbuf[MAXDIGITIDX + 1];
1890 
1891 static int
1892 node2uint(struct node *n, uint_t *valp)
1893 {
1894 	struct evalue value;
1895 	struct lut *globals = NULL;
1896 
1897 	if (n == NULL)
1898 		return (1);
1899 
1900 	/*
1901 	 * check value.v since we are being asked to convert an unsigned
1902 	 * long long int to an unsigned int
1903 	 */
1904 	if (! eval_expr(n, NULL, NULL, &globals, NULL, NULL, 0, &value) ||
1905 	    value.t != UINT64 || value.v > (1ULL << 32))
1906 		return (1);
1907 
1908 	*valp = (uint_t)value.v;
1909 
1910 	return (0);
1911 }
1912 
1913 static nvlist_t *
1914 node2fmri(struct node *n)
1915 {
1916 	nvlist_t **pa, *f, *p;
1917 	struct node *nc;
1918 	uint_t depth = 0;
1919 	char *numstr, *nullbyte;
1920 	char *failure;
1921 	int err, i;
1922 
1923 	/* XXX do we need to be able to handle a non-T_NAME node? */
1924 	if (n == NULL || n->t != T_NAME)
1925 		return (NULL);
1926 
1927 	for (nc = n; nc != NULL; nc = nc->u.name.next) {
1928 		if (nc->u.name.child == NULL || nc->u.name.child->t != T_NUM)
1929 			break;
1930 		depth++;
1931 	}
1932 
1933 	if (nc != NULL) {
1934 		/* We bailed early, something went wrong */
1935 		return (NULL);
1936 	}
1937 
1938 	if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0)
1939 		out(O_DIE|O_SYS, "alloc of fmri nvl failed");
1940 	pa = alloca(depth * sizeof (nvlist_t *));
1941 	for (i = 0; i < depth; i++)
1942 		pa[i] = NULL;
1943 
1944 	err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC);
1945 	err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION);
1946 	err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, "");
1947 	err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth);
1948 	if (err != 0) {
1949 		failure = "basic construction of FMRI failed";
1950 		goto boom;
1951 	}
1952 
1953 	numbuf[MAXDIGITIDX] = '\0';
1954 	nullbyte = &numbuf[MAXDIGITIDX];
1955 	i = 0;
1956 
1957 	for (nc = n; nc != NULL; nc = nc->u.name.next) {
1958 		err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl);
1959 		if (err != 0) {
1960 			failure = "alloc of an hc-pair failed";
1961 			goto boom;
1962 		}
1963 		err = nvlist_add_string(p, FM_FMRI_HC_NAME, nc->u.name.s);
1964 		numstr = ulltostr(nc->u.name.child->u.ull, nullbyte);
1965 		err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr);
1966 		if (err != 0) {
1967 			failure = "construction of an hc-pair failed";
1968 			goto boom;
1969 		}
1970 		pa[i++] = p;
1971 	}
1972 
1973 	err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth);
1974 	if (err == 0) {
1975 		for (i = 0; i < depth; i++)
1976 			if (pa[i] != NULL)
1977 				nvlist_free(pa[i]);
1978 		return (f);
1979 	}
1980 	failure = "addition of hc-pair array to FMRI failed";
1981 
1982 boom:
1983 	for (i = 0; i < depth; i++)
1984 		if (pa[i] != NULL)
1985 			nvlist_free(pa[i]);
1986 	nvlist_free(f);
1987 	out(O_DIE, "%s", failure);
1988 	/*NOTREACHED*/
1989 	return (NULL);
1990 }
1991 
1992 /* an ipath cache entry is an array of these, with s==NULL at the end */
1993 struct ipath {
1994 	const char *s;	/* component name (in stable) */
1995 	int i;		/* instance number */
1996 };
1997 
1998 static nvlist_t *
1999 ipath2fmri(struct ipath *ipath)
2000 {
2001 	nvlist_t **pa, *f, *p;
2002 	uint_t depth = 0;
2003 	char *numstr, *nullbyte;
2004 	char *failure;
2005 	int err, i;
2006 	struct ipath *ipp;
2007 
2008 	for (ipp = ipath; ipp->s != NULL; ipp++)
2009 		depth++;
2010 
2011 	if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0)
2012 		out(O_DIE|O_SYS, "alloc of fmri nvl failed");
2013 	pa = alloca(depth * sizeof (nvlist_t *));
2014 	for (i = 0; i < depth; i++)
2015 		pa[i] = NULL;
2016 
2017 	err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC);
2018 	err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION);
2019 	err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, "");
2020 	err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth);
2021 	if (err != 0) {
2022 		failure = "basic construction of FMRI failed";
2023 		goto boom;
2024 	}
2025 
2026 	numbuf[MAXDIGITIDX] = '\0';
2027 	nullbyte = &numbuf[MAXDIGITIDX];
2028 	i = 0;
2029 
2030 	for (ipp = ipath; ipp->s != NULL; ipp++) {
2031 		err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl);
2032 		if (err != 0) {
2033 			failure = "alloc of an hc-pair failed";
2034 			goto boom;
2035 		}
2036 		err = nvlist_add_string(p, FM_FMRI_HC_NAME, ipp->s);
2037 		numstr = ulltostr(ipp->i, nullbyte);
2038 		err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr);
2039 		if (err != 0) {
2040 			failure = "construction of an hc-pair failed";
2041 			goto boom;
2042 		}
2043 		pa[i++] = p;
2044 	}
2045 
2046 	err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth);
2047 	if (err == 0) {
2048 		for (i = 0; i < depth; i++)
2049 			if (pa[i] != NULL)
2050 				nvlist_free(pa[i]);
2051 		return (f);
2052 	}
2053 	failure = "addition of hc-pair array to FMRI failed";
2054 
2055 boom:
2056 	for (i = 0; i < depth; i++)
2057 		if (pa[i] != NULL)
2058 			nvlist_free(pa[i]);
2059 	nvlist_free(f);
2060 	out(O_DIE, "%s", failure);
2061 	/*NOTREACHED*/
2062 	return (NULL);
2063 }
2064 
2065 static uint_t
2066 avg(uint_t sum, uint_t cnt)
2067 {
2068 	unsigned long long s = sum * 10;
2069 
2070 	return ((s / cnt / 10) + (((s / cnt % 10) >= 5) ? 1 : 0));
2071 }
2072 
2073 static uint8_t
2074 percentof(uint_t part, uint_t whole)
2075 {
2076 	unsigned long long p = part * 1000;
2077 
2078 	return ((p / whole / 10) + (((p / whole % 10) >= 5) ? 1 : 0));
2079 }
2080 
2081 struct rsl {
2082 	struct event *suspect;
2083 	nvlist_t *asru;
2084 	nvlist_t *fru;
2085 	nvlist_t *rsrc;
2086 };
2087 
2088 static void publish_suspects(struct fme *fmep, struct rsl *srl);
2089 
2090 /*
2091  *  rslfree -- free internal members of struct rsl not expected to be
2092  *	freed elsewhere.
2093  */
2094 static void
2095 rslfree(struct rsl *freeme)
2096 {
2097 	if (freeme->asru != NULL)
2098 		nvlist_free(freeme->asru);
2099 	if (freeme->fru != NULL)
2100 		nvlist_free(freeme->fru);
2101 	if (freeme->rsrc != NULL && freeme->rsrc != freeme->asru)
2102 		nvlist_free(freeme->rsrc);
2103 }
2104 
2105 /*
2106  *  rslcmp -- compare two rsl structures.  Use the following
2107  *	comparisons to establish cardinality:
2108  *
2109  *	1. Name of the suspect's class. (simple strcmp)
2110  *	2. Name of the suspect's ASRU. (trickier, since nvlist)
2111  *
2112  */
2113 static int
2114 rslcmp(const void *a, const void *b)
2115 {
2116 	struct rsl *r1 = (struct rsl *)a;
2117 	struct rsl *r2 = (struct rsl *)b;
2118 	int rv;
2119 
2120 	rv = strcmp(r1->suspect->enode->u.event.ename->u.name.s,
2121 	    r2->suspect->enode->u.event.ename->u.name.s);
2122 	if (rv != 0)
2123 		return (rv);
2124 
2125 	if (r1->rsrc == NULL && r2->rsrc == NULL)
2126 		return (0);
2127 	if (r1->rsrc == NULL)
2128 		return (-1);
2129 	if (r2->rsrc == NULL)
2130 		return (1);
2131 	return (evnv_cmpnvl(r1->rsrc, r2->rsrc, 0));
2132 }
2133 
2134 /*
2135  *  rsluniq -- given an array of rsl structures, seek out and "remove"
2136  *	any duplicates.  Dups are "remove"d by NULLing the suspect pointer
2137  *	of the array element.  Removal also means updating the number of
2138  *	problems and the number of problems which are not faults.  User
2139  *	provides the first and last element pointers.
2140  */
2141 static void
2142 rsluniq(struct rsl *first, struct rsl *last, int *nprobs, int *nnonf)
2143 {
2144 	struct rsl *cr;
2145 
2146 	if (*nprobs == 1)
2147 		return;
2148 
2149 	/*
2150 	 *  At this point, we only expect duplicate defects.
2151 	 *  Eversholt's diagnosis algorithm prevents duplicate
2152 	 *  suspects, but we rewrite defects in the platform code after
2153 	 *  the diagnosis is made, and that can introduce new
2154 	 *  duplicates.
2155 	 */
2156 	while (first <= last) {
2157 		if (first->suspect == NULL || !is_defect(first->suspect->t)) {
2158 			first++;
2159 			continue;
2160 		}
2161 		cr = first + 1;
2162 		while (cr <= last) {
2163 			if (is_defect(first->suspect->t)) {
2164 				if (rslcmp(first, cr) == 0) {
2165 					cr->suspect = NULL;
2166 					rslfree(cr);
2167 					(*nprobs)--;
2168 					(*nnonf)--;
2169 				}
2170 			}
2171 			/*
2172 			 * assume all defects are in order after our
2173 			 * sort and short circuit here with "else break" ?
2174 			 */
2175 			cr++;
2176 		}
2177 		first++;
2178 	}
2179 }
2180 
2181 /*
2182  * get_resources -- for a given suspect, determine what ASRU, FRU and
2183  *     RSRC nvlists should be advertised in the final suspect list.
2184  */
2185 void
2186 get_resources(struct event *sp, struct rsl *rsrcs, struct config *croot)
2187 {
2188 	struct node *asrudef, *frudef;
2189 	nvlist_t *asru, *fru;
2190 	nvlist_t *rsrc = NULL;
2191 	char *pathstr;
2192 
2193 	/*
2194 	 * First find any ASRU and/or FRU defined in the
2195 	 * initial fault tree.
2196 	 */
2197 	asrudef = eventprop_lookup(sp, L_ASRU);
2198 	frudef = eventprop_lookup(sp, L_FRU);
2199 
2200 	/*
2201 	 * Create FMRIs based on those definitions
2202 	 */
2203 	asru = node2fmri(asrudef);
2204 	fru = node2fmri(frudef);
2205 	pathstr = ipath2str(NULL, sp->ipp);
2206 
2207 	/*
2208 	 *  Allow for platform translations of the FMRIs
2209 	 */
2210 	platform_units_translate(is_defect(sp->t), croot, &asru, &fru, &rsrc,
2211 	    pathstr);
2212 
2213 	FREE(pathstr);
2214 	rsrcs->suspect = sp;
2215 	rsrcs->asru = asru;
2216 	rsrcs->fru = fru;
2217 	rsrcs->rsrc = rsrc;
2218 }
2219 
2220 /*
2221  * trim_suspects -- prior to publishing, we may need to remove some
2222  *    suspects from the list.  If we're auto-closing upsets, we don't
2223  *    want any of those in the published list.  If the ASRUs for multiple
2224  *    defects resolve to the same ASRU (driver) we only want to publish
2225  *    that as a single suspect.
2226  */
2227 static int
2228 trim_suspects(struct fme *fmep, struct rsl *begin, struct rsl *begin2,
2229     fmd_event_t *ffep, int *mess_zero_nonfaultp)
2230 {
2231 	struct event *ep;
2232 	struct rsl *rp = begin;
2233 	struct rsl *rp2 = begin2;
2234 	int mess_zero_count = 0;
2235 	int serd_rval;
2236 	uint_t messval;
2237 
2238 	/* remove any unwanted upsets and populate our array */
2239 	for (ep = fmep->psuspects; ep; ep = ep->psuspects) {
2240 		if (is_upset(ep->t))
2241 			continue;
2242 		serd_rval = serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, ep,
2243 		    NULL, NULL);
2244 		if (serd_rval == 0)
2245 			continue;
2246 		if (node2uint(eventprop_lookup(ep, L_message),
2247 		    &messval) == 0 && messval == 0) {
2248 			get_resources(ep, rp2, fmep->config);
2249 			rp2++;
2250 			mess_zero_count++;
2251 			if (!is_fault(ep->t))
2252 				(*mess_zero_nonfaultp)++;
2253 		} else {
2254 			get_resources(ep, rp, fmep->config);
2255 			rp++;
2256 			fmep->nsuspects++;
2257 			if (!is_fault(ep->t))
2258 				fmep->nonfault++;
2259 		}
2260 	}
2261 	return (mess_zero_count);
2262 }
2263 
2264 /*
2265  * addpayloadprop -- add a payload prop to a problem
2266  */
2267 static void
2268 addpayloadprop(const char *lhs, struct evalue *rhs, nvlist_t *fault)
2269 {
2270 	nvlist_t *rsrc, *hcs;
2271 
2272 	ASSERT(fault != NULL);
2273 	ASSERT(lhs != NULL);
2274 	ASSERT(rhs != NULL);
2275 
2276 	if (nvlist_lookup_nvlist(fault, FM_FAULT_RESOURCE, &rsrc) != 0)
2277 		out(O_DIE, "cannot add payloadprop \"%s\" to fault", lhs);
2278 
2279 	if (nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcs) != 0) {
2280 		out(O_ALTFP|O_VERB2, "addpayloadprop: create hc_specific");
2281 		if (nvlist_xalloc(&hcs, NV_UNIQUE_NAME, &Eft_nv_hdl) != 0)
2282 			out(O_DIE,
2283 			    "cannot add payloadprop \"%s\" to fault", lhs);
2284 		if (nvlist_add_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, hcs) != 0)
2285 			out(O_DIE,
2286 			    "cannot add payloadprop \"%s\" to fault", lhs);
2287 		nvlist_free(hcs);
2288 		if (nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcs) != 0)
2289 			out(O_DIE,
2290 			    "cannot add payloadprop \"%s\" to fault", lhs);
2291 	} else
2292 		out(O_ALTFP|O_VERB2, "addpayloadprop: reuse hc_specific");
2293 
2294 	if (rhs->t == UINT64) {
2295 		out(O_ALTFP|O_VERB2, "addpayloadprop: %s=%llu", lhs, rhs->v);
2296 
2297 		if (nvlist_add_uint64(hcs, lhs, rhs->v) != 0)
2298 			out(O_DIE,
2299 			    "cannot add payloadprop \"%s\" to fault", lhs);
2300 	} else {
2301 		out(O_ALTFP|O_VERB2, "addpayloadprop: %s=\"%s\"",
2302 		    lhs, (char *)(uintptr_t)rhs->v);
2303 
2304 		if (nvlist_add_string(hcs, lhs, (char *)(uintptr_t)rhs->v) != 0)
2305 			out(O_DIE,
2306 			    "cannot add payloadprop \"%s\" to fault", lhs);
2307 	}
2308 }
2309 
2310 static char *Istatbuf;
2311 static char *Istatbufptr;
2312 static int Istatsz;
2313 
2314 /*
2315  * istataddsize -- calculate size of istat and add it to Istatsz
2316  */
2317 /*ARGSUSED2*/
2318 static void
2319 istataddsize(const struct istat_entry *lhs, struct stats *rhs, void *arg)
2320 {
2321 	int val;
2322 
2323 	ASSERT(lhs != NULL);
2324 	ASSERT(rhs != NULL);
2325 
2326 	if ((val = stats_counter_value(rhs)) == 0)
2327 		return;	/* skip zero-valued stats */
2328 
2329 	/* count up the size of the stat name */
2330 	Istatsz += ipath2strlen(lhs->ename, lhs->ipath);
2331 	Istatsz++;	/* for the trailing NULL byte */
2332 
2333 	/* count up the size of the stat value */
2334 	Istatsz += snprintf(NULL, 0, "%d", val);
2335 	Istatsz++;	/* for the trailing NULL byte */
2336 }
2337 
2338 /*
2339  * istat2str -- serialize an istat, writing result to *Istatbufptr
2340  */
2341 /*ARGSUSED2*/
2342 static void
2343 istat2str(const struct istat_entry *lhs, struct stats *rhs, void *arg)
2344 {
2345 	char *str;
2346 	int len;
2347 	int val;
2348 
2349 	ASSERT(lhs != NULL);
2350 	ASSERT(rhs != NULL);
2351 
2352 	if ((val = stats_counter_value(rhs)) == 0)
2353 		return;	/* skip zero-valued stats */
2354 
2355 	/* serialize the stat name */
2356 	str = ipath2str(lhs->ename, lhs->ipath);
2357 	len = strlen(str);
2358 
2359 	ASSERT(Istatbufptr + len + 1 < &Istatbuf[Istatsz]);
2360 	(void) strlcpy(Istatbufptr, str, &Istatbuf[Istatsz] - Istatbufptr);
2361 	Istatbufptr += len;
2362 	FREE(str);
2363 	*Istatbufptr++ = '\0';
2364 
2365 	/* serialize the stat value */
2366 	Istatbufptr += snprintf(Istatbufptr, &Istatbuf[Istatsz] - Istatbufptr,
2367 	    "%d", val);
2368 	*Istatbufptr++ = '\0';
2369 
2370 	ASSERT(Istatbufptr <= &Istatbuf[Istatsz]);
2371 }
2372 
2373 void
2374 istat_save()
2375 {
2376 	if (Istat_need_save == 0)
2377 		return;
2378 
2379 	/* figure out how big the serialzed info is */
2380 	Istatsz = 0;
2381 	lut_walk(Istats, (lut_cb)istataddsize, NULL);
2382 
2383 	if (Istatsz == 0) {
2384 		/* no stats to save */
2385 		fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS);
2386 		return;
2387 	}
2388 
2389 	/* create the serialized buffer */
2390 	Istatbufptr = Istatbuf = MALLOC(Istatsz);
2391 	lut_walk(Istats, (lut_cb)istat2str, NULL);
2392 
2393 	/* clear out current saved stats */
2394 	fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS);
2395 
2396 	/* write out the new version */
2397 	fmd_buf_write(Hdl, NULL, WOBUF_ISTATS, Istatbuf, Istatsz);
2398 	FREE(Istatbuf);
2399 
2400 	Istat_need_save = 0;
2401 }
2402 
2403 int
2404 istat_cmp(struct istat_entry *ent1, struct istat_entry *ent2)
2405 {
2406 	if (ent1->ename != ent2->ename)
2407 		return (ent2->ename - ent1->ename);
2408 	if (ent1->ipath != ent2->ipath)
2409 		return ((char *)ent2->ipath - (char *)ent1->ipath);
2410 
2411 	return (0);
2412 }
2413 
2414 /*
2415  * istat-verify -- verify the component associated with a stat still exists
2416  *
2417  * if the component no longer exists, this routine resets the stat and
2418  * returns 0.  if the component still exists, it returns 1.
2419  */
2420 static int
2421 istat_verify(struct node *snp, struct istat_entry *entp)
2422 {
2423 	struct stats *statp;
2424 	nvlist_t *fmri;
2425 
2426 	fmri = node2fmri(snp->u.event.epname);
2427 	if (platform_path_exists(fmri)) {
2428 		nvlist_free(fmri);
2429 		return (1);
2430 	}
2431 	nvlist_free(fmri);
2432 
2433 	/* component no longer in system.  zero out the associated stats */
2434 	if ((statp = (struct stats *)
2435 	    lut_lookup(Istats, entp, (lut_cmp)istat_cmp)) == NULL ||
2436 	    stats_counter_value(statp) == 0)
2437 		return (0);	/* stat is already reset */
2438 
2439 	Istat_need_save = 1;
2440 	stats_counter_reset(statp);
2441 	return (0);
2442 }
2443 
2444 static void
2445 istat_bump(struct node *snp, int n)
2446 {
2447 	struct stats *statp;
2448 	struct istat_entry ent;
2449 
2450 	ASSERT(snp != NULL);
2451 	ASSERTinfo(snp->t == T_EVENT, ptree_nodetype2str(snp->t));
2452 	ASSERT(snp->u.event.epname != NULL);
2453 
2454 	/* class name should be hoisted into a single stable entry */
2455 	ASSERT(snp->u.event.ename->u.name.next == NULL);
2456 	ent.ename = snp->u.event.ename->u.name.s;
2457 	ent.ipath = ipath(snp->u.event.epname);
2458 
2459 	if (!istat_verify(snp, &ent)) {
2460 		/* component no longer exists in system, nothing to do */
2461 		return;
2462 	}
2463 
2464 	if ((statp = (struct stats *)
2465 	    lut_lookup(Istats, &ent, (lut_cmp)istat_cmp)) == NULL) {
2466 		/* need to create the counter */
2467 		int cnt = 0;
2468 		struct node *np;
2469 		char *sname;
2470 		char *snamep;
2471 		struct istat_entry *newentp;
2472 
2473 		/* count up the size of the stat name */
2474 		np = snp->u.event.ename;
2475 		while (np != NULL) {
2476 			cnt += strlen(np->u.name.s);
2477 			cnt++;	/* for the '.' or '@' */
2478 			np = np->u.name.next;
2479 		}
2480 		np = snp->u.event.epname;
2481 		while (np != NULL) {
2482 			cnt += snprintf(NULL, 0, "%s%llu",
2483 			    np->u.name.s, np->u.name.child->u.ull);
2484 			cnt++;	/* for the '/' or trailing NULL byte */
2485 			np = np->u.name.next;
2486 		}
2487 
2488 		/* build the stat name */
2489 		snamep = sname = alloca(cnt);
2490 		np = snp->u.event.ename;
2491 		while (np != NULL) {
2492 			snamep += snprintf(snamep, &sname[cnt] - snamep,
2493 			    "%s", np->u.name.s);
2494 			np = np->u.name.next;
2495 			if (np)
2496 				*snamep++ = '.';
2497 		}
2498 		*snamep++ = '@';
2499 		np = snp->u.event.epname;
2500 		while (np != NULL) {
2501 			snamep += snprintf(snamep, &sname[cnt] - snamep,
2502 			    "%s%llu", np->u.name.s, np->u.name.child->u.ull);
2503 			np = np->u.name.next;
2504 			if (np)
2505 				*snamep++ = '/';
2506 		}
2507 		*snamep++ = '\0';
2508 
2509 		/* create the new stat & add it to our list */
2510 		newentp = MALLOC(sizeof (*newentp));
2511 		*newentp = ent;
2512 		statp = stats_new_counter(NULL, sname, 0);
2513 		Istats = lut_add(Istats, (void *)newentp, (void *)statp,
2514 		    (lut_cmp)istat_cmp);
2515 	}
2516 
2517 	/* if n is non-zero, set that value instead of bumping */
2518 	if (n) {
2519 		stats_counter_reset(statp);
2520 		stats_counter_add(statp, n);
2521 	} else
2522 		stats_counter_bump(statp);
2523 	Istat_need_save = 1;
2524 
2525 	ipath_print(O_ALTFP|O_VERB2, ent.ename, ent.ipath);
2526 	out(O_ALTFP|O_VERB2, " %s to value %d", n ? "set" : "incremented",
2527 	    stats_counter_value(statp));
2528 }
2529 
2530 /*ARGSUSED*/
2531 static void
2532 istat_destructor(void *left, void *right, void *arg)
2533 {
2534 	struct istat_entry *entp = (struct istat_entry *)left;
2535 	struct stats *statp = (struct stats *)right;
2536 	FREE(entp);
2537 	stats_delete(statp);
2538 }
2539 
2540 /*
2541  * Callback used in a walk of the Istats to reset matching stat counters.
2542  */
2543 static void
2544 istat_counter_reset_cb(struct istat_entry *entp, struct stats *statp,
2545     const struct ipath *ipp)
2546 {
2547 	char *path;
2548 
2549 	if (entp->ipath == ipp) {
2550 		path = ipath2str(entp->ename, ipp);
2551 		out(O_ALTFP, "istat_counter_reset_cb: resetting %s", path);
2552 		FREE(path);
2553 		stats_counter_reset(statp);
2554 		Istat_need_save = 1;
2555 	}
2556 }
2557 
2558 /*ARGSUSED*/
2559 static void
2560 istat_counter_topo_chg_cb(struct istat_entry *entp, struct stats *statp,
2561     void *unused)
2562 {
2563 	char *path;
2564 	nvlist_t *fmri;
2565 
2566 	fmri = ipath2fmri((struct ipath *)(entp->ipath));
2567 	if (!platform_path_exists(fmri)) {
2568 		path = ipath2str(entp->ename, entp->ipath);
2569 		out(O_ALTFP, "istat_counter_topo_chg_cb: not present %s", path);
2570 		FREE(path);
2571 		stats_counter_reset(statp);
2572 		Istat_need_save = 1;
2573 	}
2574 	nvlist_free(fmri);
2575 }
2576 
2577 void
2578 istat_fini(void)
2579 {
2580 	lut_free(Istats, istat_destructor, NULL);
2581 }
2582 
2583 static char *Serdbuf;
2584 static char *Serdbufptr;
2585 static int Serdsz;
2586 
2587 /*
2588  * serdaddsize -- calculate size of serd and add it to Serdsz
2589  */
2590 /*ARGSUSED*/
2591 static void
2592 serdaddsize(const struct serd_entry *lhs, struct stats *rhs, void *arg)
2593 {
2594 	ASSERT(lhs != NULL);
2595 
2596 	/* count up the size of the stat name */
2597 	Serdsz += ipath2strlen(lhs->ename, lhs->ipath);
2598 	Serdsz++;	/* for the trailing NULL byte */
2599 }
2600 
2601 /*
2602  * serd2str -- serialize a serd engine, writing result to *Serdbufptr
2603  */
2604 /*ARGSUSED*/
2605 static void
2606 serd2str(const struct serd_entry *lhs, struct stats *rhs, void *arg)
2607 {
2608 	char *str;
2609 	int len;
2610 
2611 	ASSERT(lhs != NULL);
2612 
2613 	/* serialize the serd engine name */
2614 	str = ipath2str(lhs->ename, lhs->ipath);
2615 	len = strlen(str);
2616 
2617 	ASSERT(Serdbufptr + len + 1 <= &Serdbuf[Serdsz]);
2618 	(void) strlcpy(Serdbufptr, str, &Serdbuf[Serdsz] - Serdbufptr);
2619 	Serdbufptr += len;
2620 	FREE(str);
2621 	*Serdbufptr++ = '\0';
2622 	ASSERT(Serdbufptr <= &Serdbuf[Serdsz]);
2623 }
2624 
2625 void
2626 serd_save()
2627 {
2628 	if (Serd_need_save == 0)
2629 		return;
2630 
2631 	/* figure out how big the serialzed info is */
2632 	Serdsz = 0;
2633 	lut_walk(SerdEngines, (lut_cb)serdaddsize, NULL);
2634 
2635 	if (Serdsz == 0) {
2636 		/* no serd engines to save */
2637 		fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS);
2638 		return;
2639 	}
2640 
2641 	/* create the serialized buffer */
2642 	Serdbufptr = Serdbuf = MALLOC(Serdsz);
2643 	lut_walk(SerdEngines, (lut_cb)serd2str, NULL);
2644 
2645 	/* clear out current saved stats */
2646 	fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS);
2647 
2648 	/* write out the new version */
2649 	fmd_buf_write(Hdl, NULL, WOBUF_SERDS, Serdbuf, Serdsz);
2650 	FREE(Serdbuf);
2651 	Serd_need_save = 0;
2652 }
2653 
2654 int
2655 serd_cmp(struct serd_entry *ent1, struct serd_entry *ent2)
2656 {
2657 	if (ent1->ename != ent2->ename)
2658 		return (ent2->ename - ent1->ename);
2659 	if (ent1->ipath != ent2->ipath)
2660 		return ((char *)ent2->ipath - (char *)ent1->ipath);
2661 
2662 	return (0);
2663 }
2664 
2665 void
2666 fme_serd_load(fmd_hdl_t *hdl)
2667 {
2668 	int sz;
2669 	char *sbuf;
2670 	char *sepptr;
2671 	char *ptr;
2672 	struct serd_entry *newentp;
2673 	struct node *epname;
2674 	nvlist_t *fmri;
2675 	char *namestring;
2676 
2677 	if ((sz = fmd_buf_size(hdl, NULL, WOBUF_SERDS)) == 0)
2678 		return;
2679 	sbuf = alloca(sz);
2680 	fmd_buf_read(hdl, NULL, WOBUF_SERDS, sbuf, sz);
2681 	ptr = sbuf;
2682 	while (ptr < &sbuf[sz]) {
2683 		sepptr = strchr(ptr, '@');
2684 		*sepptr = '\0';
2685 		namestring = ptr;
2686 		sepptr++;
2687 		ptr = sepptr;
2688 		ptr += strlen(ptr);
2689 		ptr++;	/* move past the '\0' separating paths */
2690 		epname = pathstring2epnamenp(sepptr);
2691 		fmri = node2fmri(epname);
2692 		if (platform_path_exists(fmri)) {
2693 			newentp = MALLOC(sizeof (*newentp));
2694 			newentp->hdl = hdl;
2695 			newentp->ipath = ipath(epname);
2696 			newentp->ename = stable(namestring);
2697 			SerdEngines = lut_add(SerdEngines, (void *)newentp,
2698 			    (void *)newentp, (lut_cmp)serd_cmp);
2699 		} else
2700 			Serd_need_save = 1;
2701 		tree_free(epname);
2702 		nvlist_free(fmri);
2703 	}
2704 	/* save it back again in case some of the paths no longer exist */
2705 	serd_save();
2706 }
2707 
2708 /*ARGSUSED*/
2709 static void
2710 serd_destructor(void *left, void *right, void *arg)
2711 {
2712 	struct serd_entry *entp = (struct serd_entry *)left;
2713 	FREE(entp);
2714 }
2715 
2716 /*
2717  * Callback used in a walk of the SerdEngines to reset matching serd engines.
2718  */
2719 /*ARGSUSED*/
2720 static void
2721 serd_reset_cb(struct serd_entry *entp, void *unused, const struct ipath *ipp)
2722 {
2723 	char *path;
2724 
2725 	if (entp->ipath == ipp) {
2726 		path = ipath2str(entp->ename, ipp);
2727 		out(O_ALTFP, "serd_reset_cb: resetting %s", path);
2728 		fmd_serd_reset(entp->hdl, path);
2729 		FREE(path);
2730 		Serd_need_save = 1;
2731 	}
2732 }
2733 
2734 /*ARGSUSED*/
2735 static void
2736 serd_topo_chg_cb(struct serd_entry *entp, void *unused, void *unused2)
2737 {
2738 	char *path;
2739 	nvlist_t *fmri;
2740 
2741 	fmri = ipath2fmri((struct ipath *)(entp->ipath));
2742 	if (!platform_path_exists(fmri)) {
2743 		path = ipath2str(entp->ename, entp->ipath);
2744 		out(O_ALTFP, "serd_topo_chg_cb: not present %s", path);
2745 		fmd_serd_reset(entp->hdl, path);
2746 		FREE(path);
2747 		Serd_need_save = 1;
2748 	}
2749 	nvlist_free(fmri);
2750 }
2751 
2752 void
2753 serd_fini(void)
2754 {
2755 	lut_free(SerdEngines, serd_destructor, NULL);
2756 }
2757 
2758 static void
2759 publish_suspects(struct fme *fmep, struct rsl *srl)
2760 {
2761 	struct rsl *rp;
2762 	nvlist_t *fault;
2763 	uint8_t cert;
2764 	uint_t *frs;
2765 	uint_t fravg, frsum, fr;
2766 	uint_t messval;
2767 	uint_t retireval;
2768 	uint_t responseval;
2769 	struct node *snp;
2770 	int frcnt, fridx;
2771 	boolean_t allfaulty = B_TRUE;
2772 	struct rsl *erl = srl + fmep->nsuspects - 1;
2773 
2774 	/*
2775 	 * sort the array
2776 	 */
2777 	qsort(srl, fmep->nsuspects, sizeof (struct rsl), rslcmp);
2778 	rsluniq(srl, erl, &fmep->nsuspects, &fmep->nonfault);
2779 
2780 	/*
2781 	 * If the suspect list is all faults, then for a given fault,
2782 	 * say X of N, X's certainty is computed via:
2783 	 *
2784 	 * fitrate(X) / (fitrate(1) + ... + fitrate(N)) * 100
2785 	 *
2786 	 * If none of the suspects are faults, and there are N suspects,
2787 	 * the certainty of a given suspect is 100/N.
2788 	 *
2789 	 * If there are are a mixture of faults and other problems in
2790 	 * the suspect list, we take an average of the faults'
2791 	 * FITrates and treat this average as the FITrate for any
2792 	 * non-faults.  The fitrate of any given suspect is then
2793 	 * computed per the first formula above.
2794 	 */
2795 	if (fmep->nonfault == fmep->nsuspects) {
2796 		/* NO faults in the suspect list */
2797 		cert = percentof(1, fmep->nsuspects);
2798 	} else {
2799 		/* sum the fitrates */
2800 		frs = alloca(fmep->nsuspects * sizeof (uint_t));
2801 		fridx = frcnt = frsum = 0;
2802 
2803 		for (rp = srl; rp <= erl; rp++) {
2804 			struct node *n;
2805 
2806 			if (rp->suspect == NULL)
2807 				continue;
2808 			if (!is_fault(rp->suspect->t)) {
2809 				frs[fridx++] = 0;
2810 				continue;
2811 			}
2812 			n = eventprop_lookup(rp->suspect, L_FITrate);
2813 			if (node2uint(n, &fr) != 0) {
2814 				out(O_DEBUG|O_NONL, "event ");
2815 				ipath_print(O_DEBUG|O_NONL,
2816 				    rp->suspect->enode->u.event.ename->u.name.s,
2817 				    rp->suspect->ipp);
2818 				out(O_DEBUG, " has no FITrate (using 1)");
2819 				fr = 1;
2820 			} else if (fr == 0) {
2821 				out(O_DEBUG|O_NONL, "event ");
2822 				ipath_print(O_DEBUG|O_NONL,
2823 				    rp->suspect->enode->u.event.ename->u.name.s,
2824 				    rp->suspect->ipp);
2825 				out(O_DEBUG, " has zero FITrate (using 1)");
2826 				fr = 1;
2827 			}
2828 
2829 			frs[fridx++] = fr;
2830 			frsum += fr;
2831 			frcnt++;
2832 		}
2833 		fravg = avg(frsum, frcnt);
2834 		for (fridx = 0; fridx < fmep->nsuspects; fridx++)
2835 			if (frs[fridx] == 0) {
2836 				frs[fridx] = fravg;
2837 				frsum += fravg;
2838 			}
2839 	}
2840 
2841 	/* Add them in reverse order of our sort, as fmd reverses order */
2842 	for (rp = erl; rp >= srl; rp--) {
2843 		if (rp->suspect == NULL)
2844 			continue;
2845 		if (!is_fault(rp->suspect->t))
2846 			allfaulty = B_FALSE;
2847 		if (fmep->nonfault != fmep->nsuspects)
2848 			cert = percentof(frs[--fridx], frsum);
2849 		fault = fmd_nvl_create_fault(fmep->hdl,
2850 		    rp->suspect->enode->u.event.ename->u.name.s,
2851 		    cert,
2852 		    rp->asru,
2853 		    rp->fru,
2854 		    rp->rsrc);
2855 		if (fault == NULL)
2856 			out(O_DIE, "fault creation failed");
2857 		/* if "message" property exists, add it to the fault */
2858 		if (node2uint(eventprop_lookup(rp->suspect, L_message),
2859 		    &messval) == 0) {
2860 
2861 			out(O_ALTFP,
2862 			    "[FME%d, %s adds message=%d to suspect list]",
2863 			    fmep->id,
2864 			    rp->suspect->enode->u.event.ename->u.name.s,
2865 			    messval);
2866 			if (nvlist_add_boolean_value(fault,
2867 			    FM_SUSPECT_MESSAGE,
2868 			    (messval) ? B_TRUE : B_FALSE) != 0) {
2869 				out(O_DIE, "cannot add no-message to fault");
2870 			}
2871 		}
2872 
2873 		/* if "retire" property exists, add it to the fault */
2874 		if (node2uint(eventprop_lookup(rp->suspect, L_retire),
2875 		    &retireval) == 0) {
2876 
2877 			out(O_ALTFP,
2878 			    "[FME%d, %s adds retire=%d to suspect list]",
2879 			    fmep->id,
2880 			    rp->suspect->enode->u.event.ename->u.name.s,
2881 			    retireval);
2882 			if (nvlist_add_boolean_value(fault,
2883 			    FM_SUSPECT_RETIRE,
2884 			    (retireval) ? B_TRUE : B_FALSE) != 0) {
2885 				out(O_DIE, "cannot add no-retire to fault");
2886 			}
2887 		}
2888 
2889 		/* if "response" property exists, add it to the fault */
2890 		if (node2uint(eventprop_lookup(rp->suspect, L_response),
2891 		    &responseval) == 0) {
2892 
2893 			out(O_ALTFP,
2894 			    "[FME%d, %s adds response=%d to suspect list]",
2895 			    fmep->id,
2896 			    rp->suspect->enode->u.event.ename->u.name.s,
2897 			    responseval);
2898 			if (nvlist_add_boolean_value(fault,
2899 			    FM_SUSPECT_RESPONSE,
2900 			    (responseval) ? B_TRUE : B_FALSE) != 0) {
2901 				out(O_DIE, "cannot add no-response to fault");
2902 			}
2903 		}
2904 
2905 		/* add any payload properties */
2906 		lut_walk(rp->suspect->payloadprops,
2907 		    (lut_cb)addpayloadprop, (void *)fault);
2908 		rslfree(rp);
2909 
2910 		/*
2911 		 * If "action" property exists, evaluate it;  this must be done
2912 		 * before the allfaulty check below since some actions may
2913 		 * modify the asru to be used in fmd_nvl_fmri_has_fault.  This
2914 		 * needs to be restructured if any new actions are introduced
2915 		 * that have effects that we do not want to be visible if
2916 		 * we decide not to publish in the dupclose check below.
2917 		 */
2918 		if ((snp = eventprop_lookup(rp->suspect, L_action)) != NULL) {
2919 			struct evalue evalue;
2920 
2921 			out(O_ALTFP|O_NONL,
2922 			    "[FME%d, %s action ", fmep->id,
2923 			    rp->suspect->enode->u.event.ename->u.name.s);
2924 			ptree_name_iter(O_ALTFP|O_NONL, snp);
2925 			out(O_ALTFP, "]");
2926 			Action_nvl = fault;
2927 			(void) eval_expr(snp, NULL, NULL, NULL, NULL,
2928 			    NULL, 0, &evalue);
2929 		}
2930 
2931 		fmd_case_add_suspect(fmep->hdl, fmep->fmcase, fault);
2932 
2933 		/*
2934 		 * check if the asru is already marked as "faulty".
2935 		 */
2936 		if (allfaulty) {
2937 			nvlist_t *asru;
2938 
2939 			out(O_ALTFP|O_VERB, "FME%d dup check ", fmep->id);
2940 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, rp->suspect);
2941 			out(O_ALTFP|O_VERB|O_NONL, " ");
2942 			if (nvlist_lookup_nvlist(fault,
2943 			    FM_FAULT_ASRU, &asru) != 0) {
2944 				out(O_ALTFP|O_VERB, "NULL asru");
2945 				allfaulty = B_FALSE;
2946 			} else if (fmd_nvl_fmri_has_fault(fmep->hdl, asru,
2947 			    FMD_HAS_FAULT_ASRU, NULL)) {
2948 				out(O_ALTFP|O_VERB, "faulty");
2949 			} else {
2950 				out(O_ALTFP|O_VERB, "not faulty");
2951 				allfaulty = B_FALSE;
2952 			}
2953 		}
2954 
2955 	}
2956 
2957 	if (!allfaulty) {
2958 		/*
2959 		 * don't update the count stat if all asrus are already
2960 		 * present and unrepaired in the asru cache
2961 		 */
2962 		for (rp = erl; rp >= srl; rp--) {
2963 			struct event *suspect = rp->suspect;
2964 
2965 			if (suspect == NULL)
2966 				continue;
2967 
2968 			/* if "count" exists, increment the appropriate stat */
2969 			if ((snp = eventprop_lookup(suspect,
2970 			    L_count)) != NULL) {
2971 				out(O_ALTFP|O_NONL,
2972 				    "[FME%d, %s count ", fmep->id,
2973 				    suspect->enode->u.event.ename->u.name.s);
2974 				ptree_name_iter(O_ALTFP|O_NONL, snp);
2975 				out(O_ALTFP, "]");
2976 				istat_bump(snp, 0);
2977 
2978 			}
2979 		}
2980 		istat_save();	/* write out any istat changes */
2981 	}
2982 }
2983 
2984 static const char *
2985 undiag_2defect_str(int ud)
2986 {
2987 	switch (ud) {
2988 	case UD_VAL_MISSINGINFO:
2989 	case UD_VAL_MISSINGOBS:
2990 	case UD_VAL_MISSINGPATH:
2991 	case UD_VAL_MISSINGZERO:
2992 	case UD_VAL_BADOBS:
2993 	case UD_VAL_CFGMISMATCH:
2994 		return (UNDIAG_DEFECT_CHKPT);
2995 		break;
2996 
2997 	case UD_VAL_BADEVENTI:
2998 	case UD_VAL_INSTFAIL:
2999 	case UD_VAL_NOPATH:
3000 	case UD_VAL_UNSOLVD:
3001 		return (UNDIAG_DEFECT_FME);
3002 		break;
3003 
3004 	case UD_VAL_MAXFME:
3005 		return (UNDIAG_DEFECT_LIMIT);
3006 		break;
3007 
3008 	case UD_VAL_UNKNOWN:
3009 	default:
3010 		return (UNDIAG_DEFECT_UNKNOWN);
3011 		break;
3012 	}
3013 }
3014 
3015 const char *
3016 undiag_2reason_str(int ud)
3017 {
3018 	switch (ud) {
3019 	case UD_VAL_BADEVENTI:
3020 		return (UD_STR_BADEVENTI);
3021 	case UD_VAL_BADOBS:
3022 		return (UD_STR_BADOBS);
3023 	case UD_VAL_CFGMISMATCH:
3024 		return (UD_STR_CFGMISMATCH);
3025 	case UD_VAL_INSTFAIL:
3026 		return (UD_STR_INSTFAIL);
3027 	case UD_VAL_MAXFME:
3028 		return (UD_STR_MAXFME);
3029 	case UD_VAL_MISSINGINFO:
3030 		return (UD_STR_MISSINGINFO);
3031 	case UD_VAL_MISSINGOBS:
3032 		return (UD_STR_MISSINGOBS);
3033 	case UD_VAL_MISSINGPATH:
3034 		return (UD_STR_MISSINGPATH);
3035 	case UD_VAL_MISSINGZERO:
3036 		return (UD_STR_MISSINGZERO);
3037 	case UD_VAL_NOPATH:
3038 		return (UD_STR_NOPATH);
3039 	case UD_VAL_UNSOLVD:
3040 		return (UD_STR_UNSOLVD);
3041 	case UD_VAL_UNKNOWN:
3042 	default:
3043 		return (UD_STR_UNKNOWN);
3044 	}
3045 }
3046 
3047 static void
3048 publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep, fmd_case_t *fmcase)
3049 {
3050 	struct case_list *newcase;
3051 	nvlist_t *defect;
3052 
3053 	out(O_ALTFP,
3054 	    "[undiagnosable ereport received, "
3055 	    "creating and closing a new case (%s)]",
3056 	    undiag_2reason_str(Undiag_reason));
3057 
3058 	newcase = MALLOC(sizeof (struct case_list));
3059 	newcase->next = NULL;
3060 	newcase->fmcase = fmcase;
3061 	if (Undiagablecaselist != NULL)
3062 		newcase->next = Undiagablecaselist;
3063 	Undiagablecaselist = newcase;
3064 
3065 	if (ffep != NULL)
3066 		fmd_case_add_ereport(hdl, newcase->fmcase, ffep);
3067 
3068 	defect = fmd_nvl_create_fault(hdl,
3069 	    undiag_2defect_str(Undiag_reason), 100, NULL, NULL, NULL);
3070 	(void) nvlist_add_string(defect, UNDIAG_REASON,
3071 	    undiag_2reason_str(Undiag_reason));
3072 	fmd_case_add_suspect(hdl, newcase->fmcase, defect);
3073 
3074 	fmd_case_solve(hdl, newcase->fmcase);
3075 	fmd_case_close(hdl, newcase->fmcase);
3076 	Undiag_reason = UD_VAL_UNKNOWN;
3077 }
3078 
3079 static void
3080 fme_undiagnosable(struct fme *f)
3081 {
3082 	nvlist_t *defect;
3083 
3084 	out(O_ALTFP, "[solving/closing FME%d, case %s (%s)]",
3085 	    f->id, fmd_case_uuid(f->hdl, f->fmcase),
3086 	    undiag_2reason_str(Undiag_reason));
3087 
3088 	defect = fmd_nvl_create_fault(f->hdl,
3089 	    undiag_2defect_str(Undiag_reason), 100, NULL, NULL, NULL);
3090 	(void) nvlist_add_string(defect, UNDIAG_REASON,
3091 	    undiag_2reason_str(Undiag_reason));
3092 	fmd_case_add_suspect(f->hdl, f->fmcase, defect);
3093 	fmd_case_solve(f->hdl, f->fmcase);
3094 	fmd_case_close(f->hdl, f->fmcase);
3095 	Undiag_reason = UD_VAL_UNKNOWN;
3096 }
3097 
3098 /*
3099  * fme_close_case
3100  *
3101  *	Find the requested case amongst our fmes and close it.  Free up
3102  *	the related fme.
3103  */
3104 void
3105 fme_close_case(fmd_hdl_t *hdl, fmd_case_t *fmcase)
3106 {
3107 	struct case_list *ucasep, *prevcasep = NULL;
3108 	struct fme *prev = NULL;
3109 	struct fme *fmep;
3110 
3111 	for (ucasep = Undiagablecaselist; ucasep; ucasep = ucasep->next) {
3112 		if (fmcase != ucasep->fmcase) {
3113 			prevcasep = ucasep;
3114 			continue;
3115 		}
3116 
3117 		if (prevcasep == NULL)
3118 			Undiagablecaselist = Undiagablecaselist->next;
3119 		else
3120 			prevcasep->next = ucasep->next;
3121 
3122 		FREE(ucasep);
3123 		return;
3124 	}
3125 
3126 	for (fmep = FMElist; fmep; fmep = fmep->next) {
3127 		if (fmep->hdl == hdl && fmep->fmcase == fmcase)
3128 			break;
3129 		prev = fmep;
3130 	}
3131 
3132 	if (fmep == NULL) {
3133 		out(O_WARN, "Eft asked to close unrecognized case [%s].",
3134 		    fmd_case_uuid(hdl, fmcase));
3135 		return;
3136 	}
3137 
3138 	if (EFMElist == fmep)
3139 		EFMElist = prev;
3140 
3141 	if (prev == NULL)
3142 		FMElist = FMElist->next;
3143 	else
3144 		prev->next = fmep->next;
3145 
3146 	fmep->next = NULL;
3147 
3148 	/* Get rid of any timer this fme has set */
3149 	if (fmep->wull != 0)
3150 		fmd_timer_remove(fmep->hdl, fmep->timer);
3151 
3152 	if (ClosedFMEs == NULL) {
3153 		ClosedFMEs = fmep;
3154 	} else {
3155 		fmep->next = ClosedFMEs;
3156 		ClosedFMEs = fmep;
3157 	}
3158 
3159 	Open_fme_count--;
3160 
3161 	/* See if we can close the overflow FME */
3162 	if (Open_fme_count <= Max_fme) {
3163 		for (fmep = FMElist; fmep; fmep = fmep->next) {
3164 			if (fmep->overflow && !(fmd_case_closed(fmep->hdl,
3165 			    fmep->fmcase)))
3166 				break;
3167 		}
3168 
3169 		if (fmep != NULL)
3170 			fmd_case_close(fmep->hdl, fmep->fmcase);
3171 	}
3172 }
3173 
3174 /*
3175  * fme_set_timer()
3176  *	If the time we need to wait for the given FME is less than the
3177  *	current timer, kick that old timer out and establish a new one.
3178  */
3179 static int
3180 fme_set_timer(struct fme *fmep, unsigned long long wull)
3181 {
3182 	out(O_ALTFP|O_VERB|O_NONL, " fme_set_timer: request to wait ");
3183 	ptree_timeval(O_ALTFP|O_VERB, &wull);
3184 
3185 	if (wull <= fmep->pull) {
3186 		out(O_ALTFP|O_VERB|O_NONL, "already have waited at least ");
3187 		ptree_timeval(O_ALTFP|O_VERB, &fmep->pull);
3188 		out(O_ALTFP|O_VERB, NULL);
3189 		/* we've waited at least wull already, don't need timer */
3190 		return (0);
3191 	}
3192 
3193 	out(O_ALTFP|O_VERB|O_NONL, " currently ");
3194 	if (fmep->wull != 0) {
3195 		out(O_ALTFP|O_VERB|O_NONL, "waiting ");
3196 		ptree_timeval(O_ALTFP|O_VERB, &fmep->wull);
3197 		out(O_ALTFP|O_VERB, NULL);
3198 	} else {
3199 		out(O_ALTFP|O_VERB|O_NONL, "not waiting");
3200 		out(O_ALTFP|O_VERB, NULL);
3201 	}
3202 
3203 	if (fmep->wull != 0)
3204 		if (wull >= fmep->wull)
3205 			/* New timer would fire later than established timer */
3206 			return (0);
3207 
3208 	if (fmep->wull != 0) {
3209 		fmd_timer_remove(fmep->hdl, fmep->timer);
3210 	}
3211 
3212 	fmep->timer = fmd_timer_install(fmep->hdl, (void *)fmep,
3213 	    fmep->e0r, wull);
3214 	out(O_ALTFP|O_VERB, "timer set, id is %ld", fmep->timer);
3215 	fmep->wull = wull;
3216 	return (1);
3217 }
3218 
3219 void
3220 fme_timer_fired(struct fme *fmep, id_t tid)
3221 {
3222 	struct fme *ffmep = NULL;
3223 
3224 	for (ffmep = FMElist; ffmep; ffmep = ffmep->next)
3225 		if (ffmep == fmep)
3226 			break;
3227 
3228 	if (ffmep == NULL) {
3229 		out(O_WARN, "Timer fired for an FME (%p) not in FMEs list.",
3230 		    (void *)fmep);
3231 		return;
3232 	}
3233 
3234 	out(O_ALTFP|O_VERB, "Timer fired %lx", tid);
3235 	fmep->pull = fmep->wull;
3236 	fmep->wull = 0;
3237 	fmd_buf_write(fmep->hdl, fmep->fmcase,
3238 	    WOBUF_PULL, (void *)&fmep->pull, sizeof (fmep->pull));
3239 
3240 	fme_eval(fmep, fmep->e0r);
3241 }
3242 
3243 /*
3244  * Preserve the fme's suspect list in its psuspects list, NULLing the
3245  * suspects list in the meantime.
3246  */
3247 static void
3248 save_suspects(struct fme *fmep)
3249 {
3250 	struct event *ep;
3251 	struct event *nextep;
3252 
3253 	/* zero out the previous suspect list */
3254 	for (ep = fmep->psuspects; ep; ep = nextep) {
3255 		nextep = ep->psuspects;
3256 		ep->psuspects = NULL;
3257 	}
3258 	fmep->psuspects = NULL;
3259 
3260 	/* zero out the suspect list, copying it to previous suspect list */
3261 	fmep->psuspects = fmep->suspects;
3262 	for (ep = fmep->suspects; ep; ep = nextep) {
3263 		nextep = ep->suspects;
3264 		ep->psuspects = ep->suspects;
3265 		ep->suspects = NULL;
3266 		ep->is_suspect = 0;
3267 	}
3268 	fmep->suspects = NULL;
3269 	fmep->nsuspects = 0;
3270 	fmep->nonfault = 0;
3271 }
3272 
3273 /*
3274  * Retrieve the fme's suspect list from its psuspects list.
3275  */
3276 static void
3277 restore_suspects(struct fme *fmep)
3278 {
3279 	struct event *ep;
3280 	struct event *nextep;
3281 
3282 	fmep->nsuspects = fmep->nonfault = 0;
3283 	fmep->suspects = fmep->psuspects;
3284 	for (ep = fmep->psuspects; ep; ep = nextep) {
3285 		fmep->nsuspects++;
3286 		if (!is_fault(ep->t))
3287 			fmep->nonfault++;
3288 		nextep = ep->psuspects;
3289 		ep->suspects = ep->psuspects;
3290 	}
3291 }
3292 
3293 /*
3294  * this is what we use to call the Emrys prototype code instead of main()
3295  */
3296 static void
3297 fme_eval(struct fme *fmep, fmd_event_t *ffep)
3298 {
3299 	struct event *ep;
3300 	unsigned long long my_delay = TIMEVAL_EVENTUALLY;
3301 	struct rsl *srl = NULL;
3302 	struct rsl *srl2 = NULL;
3303 	int mess_zero_count;
3304 	int mess_zero_nonfault = 0;
3305 	int rpcnt;
3306 
3307 	save_suspects(fmep);
3308 
3309 	out(O_ALTFP, "Evaluate FME %d", fmep->id);
3310 	indent_set("  ");
3311 
3312 	lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep);
3313 	fmep->state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay);
3314 
3315 	out(O_ALTFP|O_NONL, "FME%d state: %s, suspect list:", fmep->id,
3316 	    fme_state2str(fmep->state));
3317 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
3318 		out(O_ALTFP|O_NONL, " ");
3319 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
3320 	}
3321 	out(O_ALTFP, NULL);
3322 
3323 	switch (fmep->state) {
3324 	case FME_CREDIBLE:
3325 		print_suspects(SLNEW, fmep);
3326 		(void) upsets_eval(fmep, ffep);
3327 
3328 		/*
3329 		 * we may have already posted suspects in upsets_eval() which
3330 		 * can recurse into fme_eval() again. If so then just return.
3331 		 */
3332 		if (fmep->posted_suspects)
3333 			return;
3334 
3335 		stats_counter_bump(fmep->diags);
3336 		rpcnt = fmep->nsuspects;
3337 		save_suspects(fmep);
3338 
3339 		/*
3340 		 * create two lists, one for "message=1" faults and one for
3341 		 * "message=0" faults. If we have a mixture we will generate
3342 		 * two separate suspect lists.
3343 		 */
3344 		srl = MALLOC(rpcnt * sizeof (struct rsl));
3345 		bzero(srl, rpcnt * sizeof (struct rsl));
3346 		srl2 = MALLOC(rpcnt * sizeof (struct rsl));
3347 		bzero(srl2, rpcnt * sizeof (struct rsl));
3348 		mess_zero_count = trim_suspects(fmep, srl, srl2, ffep,
3349 		    &mess_zero_nonfault);
3350 
3351 		/*
3352 		 * If the resulting suspect list has no members, we're
3353 		 * done so simply close the case. Otherwise sort and publish.
3354 		 */
3355 		if (fmep->nsuspects == 0 && mess_zero_count == 0) {
3356 			out(O_ALTFP,
3357 			    "[FME%d, case %s (all suspects are upsets)]",
3358 			    fmep->id, fmd_case_uuid(fmep->hdl, fmep->fmcase));
3359 			fmd_case_close(fmep->hdl, fmep->fmcase);
3360 		} else if (fmep->nsuspects != 0 && mess_zero_count == 0) {
3361 			publish_suspects(fmep, srl);
3362 			out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3363 			    fmd_case_uuid(fmep->hdl, fmep->fmcase));
3364 			fmd_case_solve(fmep->hdl, fmep->fmcase);
3365 		} else if (fmep->nsuspects == 0 && mess_zero_count != 0) {
3366 			fmep->nsuspects = mess_zero_count;
3367 			fmep->nonfault = mess_zero_nonfault;
3368 			publish_suspects(fmep, srl2);
3369 			out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3370 			    fmd_case_uuid(fmep->hdl, fmep->fmcase));
3371 			fmd_case_solve(fmep->hdl, fmep->fmcase);
3372 		} else {
3373 			struct event *obsp;
3374 			struct fme *nfmep;
3375 
3376 			publish_suspects(fmep, srl);
3377 			out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3378 			    fmd_case_uuid(fmep->hdl, fmep->fmcase));
3379 			fmd_case_solve(fmep->hdl, fmep->fmcase);
3380 
3381 			/*
3382 			 * Got both message=0 and message=1 so create a
3383 			 * duplicate case. Also need a temporary duplicate fme
3384 			 * structure for use by publish_suspects().
3385 			 */
3386 			nfmep = alloc_fme();
3387 			nfmep->id =  Nextid++;
3388 			nfmep->hdl = fmep->hdl;
3389 			nfmep->nsuspects = mess_zero_count;
3390 			nfmep->nonfault = mess_zero_nonfault;
3391 			nfmep->fmcase = fmd_case_open(fmep->hdl, NULL);
3392 			out(O_ALTFP|O_STAMP,
3393 			    "[creating parallel FME%d, case %s]", nfmep->id,
3394 			    fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
3395 			Open_fme_count++;
3396 			if (ffep) {
3397 				fmd_case_setprincipal(nfmep->hdl,
3398 				    nfmep->fmcase, ffep);
3399 				fmd_case_add_ereport(nfmep->hdl,
3400 				    nfmep->fmcase, ffep);
3401 			}
3402 			for (obsp = fmep->observations; obsp;
3403 			    obsp = obsp->observations)
3404 				if (obsp->ffep && obsp->ffep != ffep)
3405 					fmd_case_add_ereport(nfmep->hdl,
3406 					    nfmep->fmcase, obsp->ffep);
3407 
3408 			publish_suspects(nfmep, srl2);
3409 			out(O_ALTFP, "[solving FME%d, case %s]", nfmep->id,
3410 			    fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
3411 			fmd_case_solve(nfmep->hdl, nfmep->fmcase);
3412 			FREE(nfmep);
3413 		}
3414 		FREE(srl);
3415 		FREE(srl2);
3416 		restore_suspects(fmep);
3417 
3418 		fmep->posted_suspects = 1;
3419 		fmd_buf_write(fmep->hdl, fmep->fmcase,
3420 		    WOBUF_POSTD,
3421 		    (void *)&fmep->posted_suspects,
3422 		    sizeof (fmep->posted_suspects));
3423 
3424 		/*
3425 		 * Now the suspects have been posted, we can clear up
3426 		 * the instance tree as we won't be looking at it again.
3427 		 * Also cancel the timer as the case is now solved.
3428 		 */
3429 		if (fmep->wull != 0) {
3430 			fmd_timer_remove(fmep->hdl, fmep->timer);
3431 			fmep->wull = 0;
3432 		}
3433 		break;
3434 
3435 	case FME_WAIT:
3436 		ASSERT(my_delay > fmep->ull);
3437 		(void) fme_set_timer(fmep, my_delay);
3438 		print_suspects(SLWAIT, fmep);
3439 		itree_prune(fmep->eventtree);
3440 		return;
3441 
3442 	case FME_DISPROVED:
3443 		print_suspects(SLDISPROVED, fmep);
3444 		Undiag_reason = UD_VAL_UNSOLVD;
3445 		fme_undiagnosable(fmep);
3446 		break;
3447 	}
3448 
3449 	itree_free(fmep->eventtree);
3450 	fmep->eventtree = NULL;
3451 	structconfig_free(fmep->config);
3452 	fmep->config = NULL;
3453 	destroy_fme_bufs(fmep);
3454 }
3455 
3456 static void indent(void);
3457 static int triggered(struct fme *fmep, struct event *ep, int mark);
3458 static enum fme_state effects_test(struct fme *fmep,
3459     struct event *fault_event, unsigned long long at_latest_by,
3460     unsigned long long *pdelay);
3461 static enum fme_state requirements_test(struct fme *fmep, struct event *ep,
3462     unsigned long long at_latest_by, unsigned long long *pdelay);
3463 static enum fme_state causes_test(struct fme *fmep, struct event *ep,
3464     unsigned long long at_latest_by, unsigned long long *pdelay);
3465 
3466 static int
3467 checkconstraints(struct fme *fmep, struct arrow *arrowp)
3468 {
3469 	struct constraintlist *ctp;
3470 	struct evalue value;
3471 	char *sep = "";
3472 
3473 	if (arrowp->forever_false) {
3474 		indent();
3475 		out(O_ALTFP|O_VERB|O_NONL, "  Forever false constraint: ");
3476 		for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3477 			out(O_ALTFP|O_VERB|O_NONL, sep);
3478 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3479 			sep = ", ";
3480 		}
3481 		out(O_ALTFP|O_VERB, NULL);
3482 		return (0);
3483 	}
3484 	if (arrowp->forever_true) {
3485 		indent();
3486 		out(O_ALTFP|O_VERB|O_NONL, "  Forever true constraint: ");
3487 		for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3488 			out(O_ALTFP|O_VERB|O_NONL, sep);
3489 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3490 			sep = ", ";
3491 		}
3492 		out(O_ALTFP|O_VERB, NULL);
3493 		return (1);
3494 	}
3495 
3496 	for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3497 		if (eval_expr(ctp->cnode, NULL, NULL,
3498 		    &fmep->globals, fmep->config,
3499 		    arrowp, 0, &value)) {
3500 			/* evaluation successful */
3501 			if (value.t == UNDEFINED || value.v == 0) {
3502 				/* known false */
3503 				arrowp->forever_false = 1;
3504 				indent();
3505 				out(O_ALTFP|O_VERB|O_NONL,
3506 				    "  False constraint: ");
3507 				ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3508 				out(O_ALTFP|O_VERB, NULL);
3509 				return (0);
3510 			}
3511 		} else {
3512 			/* evaluation unsuccessful -- unknown value */
3513 			indent();
3514 			out(O_ALTFP|O_VERB|O_NONL,
3515 			    "  Deferred constraint: ");
3516 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3517 			out(O_ALTFP|O_VERB, NULL);
3518 			return (1);
3519 		}
3520 	}
3521 	/* known true */
3522 	arrowp->forever_true = 1;
3523 	indent();
3524 	out(O_ALTFP|O_VERB|O_NONL, "  True constraint: ");
3525 	for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3526 		out(O_ALTFP|O_VERB|O_NONL, sep);
3527 		ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3528 		sep = ", ";
3529 	}
3530 	out(O_ALTFP|O_VERB, NULL);
3531 	return (1);
3532 }
3533 
3534 static int
3535 triggered(struct fme *fmep, struct event *ep, int mark)
3536 {
3537 	struct bubble *bp;
3538 	struct arrowlist *ap;
3539 	int count = 0;
3540 
3541 	stats_counter_bump(fmep->Tcallcount);
3542 	for (bp = itree_next_bubble(ep, NULL); bp;
3543 	    bp = itree_next_bubble(ep, bp)) {
3544 		if (bp->t != B_TO)
3545 			continue;
3546 		for (ap = itree_next_arrow(bp, NULL); ap;
3547 		    ap = itree_next_arrow(bp, ap)) {
3548 			/* check count of marks against K in the bubble */
3549 			if ((ap->arrowp->mark & mark) &&
3550 			    ++count >= bp->nork)
3551 				return (1);
3552 		}
3553 	}
3554 	return (0);
3555 }
3556 
3557 static int
3558 mark_arrows(struct fme *fmep, struct event *ep, int mark,
3559     unsigned long long at_latest_by, unsigned long long *pdelay, int keep)
3560 {
3561 	struct bubble *bp;
3562 	struct arrowlist *ap;
3563 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3564 	unsigned long long my_delay;
3565 	enum fme_state result;
3566 	int retval = 0;
3567 
3568 	for (bp = itree_next_bubble(ep, NULL); bp;
3569 	    bp = itree_next_bubble(ep, bp)) {
3570 		if (bp->t != B_FROM)
3571 			continue;
3572 		stats_counter_bump(fmep->Marrowcount);
3573 		for (ap = itree_next_arrow(bp, NULL); ap;
3574 		    ap = itree_next_arrow(bp, ap)) {
3575 			struct event *ep2 = ap->arrowp->head->myevent;
3576 			/*
3577 			 * if we're clearing marks, we can avoid doing
3578 			 * all that work evaluating constraints.
3579 			 */
3580 			if (mark == 0) {
3581 				if (ap->arrowp->arrow_marked == 0)
3582 					continue;
3583 				ap->arrowp->arrow_marked = 0;
3584 				ap->arrowp->mark &= ~EFFECTS_COUNTER;
3585 				if (keep && (ep2->cached_state &
3586 				    (WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT)))
3587 					ep2->keep_in_tree = 1;
3588 				ep2->cached_state &=
3589 				    ~(WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT);
3590 				(void) mark_arrows(fmep, ep2, mark, 0, NULL,
3591 				    keep);
3592 				continue;
3593 			}
3594 			ap->arrowp->arrow_marked = 1;
3595 			if (ep2->cached_state & REQMNTS_DISPROVED) {
3596 				indent();
3597 				out(O_ALTFP|O_VERB|O_NONL,
3598 				    "  ALREADY DISPROVED ");
3599 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3600 				out(O_ALTFP|O_VERB, NULL);
3601 				continue;
3602 			}
3603 			if (ep2->cached_state & WAIT_EFFECT) {
3604 				indent();
3605 				out(O_ALTFP|O_VERB|O_NONL,
3606 				    "  ALREADY EFFECTS WAIT ");
3607 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3608 				out(O_ALTFP|O_VERB, NULL);
3609 				continue;
3610 			}
3611 			if (ep2->cached_state & CREDIBLE_EFFECT) {
3612 				indent();
3613 				out(O_ALTFP|O_VERB|O_NONL,
3614 				    "  ALREADY EFFECTS CREDIBLE ");
3615 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3616 				out(O_ALTFP|O_VERB, NULL);
3617 				continue;
3618 			}
3619 			if ((ep2->cached_state & PARENT_WAIT) &&
3620 			    (mark & PARENT_WAIT)) {
3621 				indent();
3622 				out(O_ALTFP|O_VERB|O_NONL,
3623 				    "  ALREADY PARENT EFFECTS WAIT ");
3624 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3625 				out(O_ALTFP|O_VERB, NULL);
3626 				continue;
3627 			}
3628 			platform_set_payloadnvp(ep2->nvp);
3629 			if (checkconstraints(fmep, ap->arrowp) == 0) {
3630 				platform_set_payloadnvp(NULL);
3631 				indent();
3632 				out(O_ALTFP|O_VERB|O_NONL,
3633 				    "  CONSTRAINTS FAIL ");
3634 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3635 				out(O_ALTFP|O_VERB, NULL);
3636 				continue;
3637 			}
3638 			platform_set_payloadnvp(NULL);
3639 			ap->arrowp->mark |= EFFECTS_COUNTER;
3640 			if (!triggered(fmep, ep2, EFFECTS_COUNTER)) {
3641 				indent();
3642 				out(O_ALTFP|O_VERB|O_NONL,
3643 				    "  K-COUNT NOT YET MET ");
3644 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3645 				out(O_ALTFP|O_VERB, NULL);
3646 				continue;
3647 			}
3648 			ep2->cached_state &= ~PARENT_WAIT;
3649 			/*
3650 			 * if we've reached an ereport and no propagation time
3651 			 * is specified, use the Hesitate value
3652 			 */
3653 			if (ep2->t == N_EREPORT && at_latest_by == 0ULL &&
3654 			    ap->arrowp->maxdelay == 0ULL) {
3655 				out(O_ALTFP|O_VERB|O_NONL, "  default wait ");
3656 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3657 				out(O_ALTFP|O_VERB, NULL);
3658 				result = requirements_test(fmep, ep2, Hesitate,
3659 				    &my_delay);
3660 			} else {
3661 				result = requirements_test(fmep, ep2,
3662 				    at_latest_by + ap->arrowp->maxdelay,
3663 				    &my_delay);
3664 			}
3665 			if (result == FME_WAIT) {
3666 				retval = WAIT_EFFECT;
3667 				if (overall_delay > my_delay)
3668 					overall_delay = my_delay;
3669 				ep2->cached_state |= WAIT_EFFECT;
3670 				indent();
3671 				out(O_ALTFP|O_VERB|O_NONL, "  EFFECTS WAIT ");
3672 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3673 				out(O_ALTFP|O_VERB, NULL);
3674 				indent_push("  E");
3675 				if (mark_arrows(fmep, ep2, PARENT_WAIT,
3676 				    at_latest_by, &my_delay, 0) ==
3677 				    WAIT_EFFECT) {
3678 					retval = WAIT_EFFECT;
3679 					if (overall_delay > my_delay)
3680 						overall_delay = my_delay;
3681 				}
3682 				indent_pop();
3683 			} else if (result == FME_DISPROVED) {
3684 				indent();
3685 				out(O_ALTFP|O_VERB|O_NONL,
3686 				    "  EFFECTS DISPROVED ");
3687 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3688 				out(O_ALTFP|O_VERB, NULL);
3689 			} else {
3690 				ep2->cached_state |= mark;
3691 				indent();
3692 				if (mark == CREDIBLE_EFFECT)
3693 					out(O_ALTFP|O_VERB|O_NONL,
3694 					    "  EFFECTS CREDIBLE ");
3695 				else
3696 					out(O_ALTFP|O_VERB|O_NONL,
3697 					    "  PARENT EFFECTS WAIT ");
3698 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3699 				out(O_ALTFP|O_VERB, NULL);
3700 				indent_push("  E");
3701 				if (mark_arrows(fmep, ep2, mark, at_latest_by,
3702 				    &my_delay, 0) == WAIT_EFFECT) {
3703 					retval = WAIT_EFFECT;
3704 					if (overall_delay > my_delay)
3705 						overall_delay = my_delay;
3706 				}
3707 				indent_pop();
3708 			}
3709 		}
3710 	}
3711 	if (retval == WAIT_EFFECT)
3712 		*pdelay = overall_delay;
3713 	return (retval);
3714 }
3715 
3716 static enum fme_state
3717 effects_test(struct fme *fmep, struct event *fault_event,
3718     unsigned long long at_latest_by, unsigned long long *pdelay)
3719 {
3720 	struct event *error_event;
3721 	enum fme_state return_value = FME_CREDIBLE;
3722 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3723 	unsigned long long my_delay;
3724 
3725 	stats_counter_bump(fmep->Ecallcount);
3726 	indent_push("  E");
3727 	indent();
3728 	out(O_ALTFP|O_VERB|O_NONL, "->");
3729 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event);
3730 	out(O_ALTFP|O_VERB, NULL);
3731 
3732 	if (mark_arrows(fmep, fault_event, CREDIBLE_EFFECT, at_latest_by,
3733 	    &my_delay, 0) == WAIT_EFFECT) {
3734 		return_value = FME_WAIT;
3735 		if (overall_delay > my_delay)
3736 			overall_delay = my_delay;
3737 	}
3738 	for (error_event = fmep->observations;
3739 	    error_event; error_event = error_event->observations) {
3740 		indent();
3741 		out(O_ALTFP|O_VERB|O_NONL, " ");
3742 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, error_event);
3743 		if (!(error_event->cached_state & CREDIBLE_EFFECT)) {
3744 			if (error_event->cached_state &
3745 			    (PARENT_WAIT|WAIT_EFFECT)) {
3746 				out(O_ALTFP|O_VERB, " NOT YET triggered");
3747 				continue;
3748 			}
3749 			return_value = FME_DISPROVED;
3750 			out(O_ALTFP|O_VERB, " NOT triggered");
3751 			break;
3752 		} else {
3753 			out(O_ALTFP|O_VERB, " triggered");
3754 		}
3755 	}
3756 	if (return_value == FME_DISPROVED) {
3757 		(void) mark_arrows(fmep, fault_event, 0, 0, NULL, 0);
3758 	} else {
3759 		fault_event->keep_in_tree = 1;
3760 		(void) mark_arrows(fmep, fault_event, 0, 0, NULL, 1);
3761 	}
3762 
3763 	indent();
3764 	out(O_ALTFP|O_VERB|O_NONL, "<-EFFECTS %s ",
3765 	    fme_state2str(return_value));
3766 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event);
3767 	out(O_ALTFP|O_VERB, NULL);
3768 	indent_pop();
3769 	if (return_value == FME_WAIT)
3770 		*pdelay = overall_delay;
3771 	return (return_value);
3772 }
3773 
3774 static enum fme_state
3775 requirements_test(struct fme *fmep, struct event *ep,
3776     unsigned long long at_latest_by, unsigned long long *pdelay)
3777 {
3778 	int waiting_events;
3779 	int credible_events;
3780 	int deferred_events;
3781 	enum fme_state return_value = FME_CREDIBLE;
3782 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3783 	unsigned long long arrow_delay;
3784 	unsigned long long my_delay;
3785 	struct event *ep2;
3786 	struct bubble *bp;
3787 	struct arrowlist *ap;
3788 
3789 	if (ep->cached_state & REQMNTS_CREDIBLE) {
3790 		indent();
3791 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY CREDIBLE ");
3792 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3793 		out(O_ALTFP|O_VERB, NULL);
3794 		return (FME_CREDIBLE);
3795 	}
3796 	if (ep->cached_state & REQMNTS_DISPROVED) {
3797 		indent();
3798 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY DISPROVED ");
3799 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3800 		out(O_ALTFP|O_VERB, NULL);
3801 		return (FME_DISPROVED);
3802 	}
3803 	if (ep->cached_state & REQMNTS_WAIT) {
3804 		indent();
3805 		*pdelay = ep->cached_delay;
3806 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY WAIT ");
3807 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3808 		out(O_ALTFP|O_VERB|O_NONL, ", wait for: ");
3809 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3810 		out(O_ALTFP|O_VERB, NULL);
3811 		return (FME_WAIT);
3812 	}
3813 	stats_counter_bump(fmep->Rcallcount);
3814 	indent_push("  R");
3815 	indent();
3816 	out(O_ALTFP|O_VERB|O_NONL, "->");
3817 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3818 	out(O_ALTFP|O_VERB|O_NONL, ", at latest by: ");
3819 	ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3820 	out(O_ALTFP|O_VERB, NULL);
3821 
3822 	if (ep->t == N_EREPORT) {
3823 		if (ep->count == 0) {
3824 			if (fmep->pull >= at_latest_by) {
3825 				return_value = FME_DISPROVED;
3826 			} else {
3827 				ep->cached_delay = *pdelay = at_latest_by;
3828 				return_value = FME_WAIT;
3829 			}
3830 		}
3831 
3832 		indent();
3833 		switch (return_value) {
3834 		case FME_CREDIBLE:
3835 			ep->cached_state |= REQMNTS_CREDIBLE;
3836 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS CREDIBLE ");
3837 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3838 			break;
3839 		case FME_DISPROVED:
3840 			ep->cached_state |= REQMNTS_DISPROVED;
3841 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED ");
3842 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3843 			break;
3844 		case FME_WAIT:
3845 			ep->cached_state |= REQMNTS_WAIT;
3846 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS WAIT ");
3847 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3848 			out(O_ALTFP|O_VERB|O_NONL, " to ");
3849 			ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3850 			break;
3851 		default:
3852 			out(O_DIE, "requirements_test: unexpected fme_state");
3853 			break;
3854 		}
3855 		out(O_ALTFP|O_VERB, NULL);
3856 		indent_pop();
3857 
3858 		return (return_value);
3859 	}
3860 
3861 	/* this event is not a report, descend the tree */
3862 	for (bp = itree_next_bubble(ep, NULL); bp;
3863 	    bp = itree_next_bubble(ep, bp)) {
3864 		int n;
3865 
3866 		if (bp->t != B_FROM)
3867 			continue;
3868 
3869 		n = bp->nork;
3870 
3871 		credible_events = 0;
3872 		waiting_events = 0;
3873 		deferred_events = 0;
3874 		arrow_delay = TIMEVAL_EVENTUALLY;
3875 		/*
3876 		 * n is -1 for 'A' so adjust it.
3877 		 * XXX just count up the arrows for now.
3878 		 */
3879 		if (n < 0) {
3880 			n = 0;
3881 			for (ap = itree_next_arrow(bp, NULL); ap;
3882 			    ap = itree_next_arrow(bp, ap))
3883 				n++;
3884 			indent();
3885 			out(O_ALTFP|O_VERB, " Bubble Counted N=%d", n);
3886 		} else {
3887 			indent();
3888 			out(O_ALTFP|O_VERB, " Bubble N=%d", n);
3889 		}
3890 
3891 		if (n == 0)
3892 			continue;
3893 		if (!(bp->mark & (BUBBLE_ELIDED|BUBBLE_OK))) {
3894 			for (ap = itree_next_arrow(bp, NULL); ap;
3895 			    ap = itree_next_arrow(bp, ap)) {
3896 				ep2 = ap->arrowp->head->myevent;
3897 				platform_set_payloadnvp(ep2->nvp);
3898 				(void) checkconstraints(fmep, ap->arrowp);
3899 				if (ap->arrowp->forever_true) {
3900 					/*
3901 					 * if all arrows are invalidated by the
3902 					 * constraints, then we should elide the
3903 					 * whole bubble to be consistant with
3904 					 * the tree creation time behaviour
3905 					 */
3906 					bp->mark |= BUBBLE_OK;
3907 					platform_set_payloadnvp(NULL);
3908 					break;
3909 				}
3910 				platform_set_payloadnvp(NULL);
3911 			}
3912 		}
3913 		for (ap = itree_next_arrow(bp, NULL); ap;
3914 		    ap = itree_next_arrow(bp, ap)) {
3915 			ep2 = ap->arrowp->head->myevent;
3916 			if (n <= credible_events)
3917 				break;
3918 
3919 			ap->arrowp->mark |= REQMNTS_COUNTER;
3920 			if (triggered(fmep, ep2, REQMNTS_COUNTER))
3921 				/* XXX adding max timevals! */
3922 				switch (requirements_test(fmep, ep2,
3923 				    at_latest_by + ap->arrowp->maxdelay,
3924 				    &my_delay)) {
3925 				case FME_DEFERRED:
3926 					deferred_events++;
3927 					break;
3928 				case FME_CREDIBLE:
3929 					credible_events++;
3930 					break;
3931 				case FME_DISPROVED:
3932 					break;
3933 				case FME_WAIT:
3934 					if (my_delay < arrow_delay)
3935 						arrow_delay = my_delay;
3936 					waiting_events++;
3937 					break;
3938 				default:
3939 					out(O_DIE,
3940 					"Bug in requirements_test.");
3941 				}
3942 			else
3943 				deferred_events++;
3944 		}
3945 		if (!(bp->mark & BUBBLE_OK) && waiting_events == 0) {
3946 			bp->mark |= BUBBLE_ELIDED;
3947 			continue;
3948 		}
3949 		indent();
3950 		out(O_ALTFP|O_VERB, " Credible: %d Waiting %d",
3951 		    credible_events + deferred_events, waiting_events);
3952 		if (credible_events + deferred_events + waiting_events < n) {
3953 			/* Can never meet requirements */
3954 			ep->cached_state |= REQMNTS_DISPROVED;
3955 			indent();
3956 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED ");
3957 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3958 			out(O_ALTFP|O_VERB, NULL);
3959 			indent_pop();
3960 			return (FME_DISPROVED);
3961 		}
3962 		if (credible_events + deferred_events < n) {
3963 			/* will have to wait */
3964 			/* wait time is shortest known */
3965 			if (arrow_delay < overall_delay)
3966 				overall_delay = arrow_delay;
3967 			return_value = FME_WAIT;
3968 		} else if (credible_events < n) {
3969 			if (return_value != FME_WAIT)
3970 				return_value = FME_DEFERRED;
3971 		}
3972 	}
3973 
3974 	/*
3975 	 * don't mark as FME_DEFERRED. If this event isn't reached by another
3976 	 * path, then this will be considered FME_CREDIBLE. But if it is
3977 	 * reached by a different path so the K-count is met, then might
3978 	 * get overridden by FME_WAIT or FME_DISPROVED.
3979 	 */
3980 	if (return_value == FME_WAIT) {
3981 		ep->cached_state |= REQMNTS_WAIT;
3982 		ep->cached_delay = *pdelay = overall_delay;
3983 	} else if (return_value == FME_CREDIBLE) {
3984 		ep->cached_state |= REQMNTS_CREDIBLE;
3985 	}
3986 	indent();
3987 	out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS %s ",
3988 	    fme_state2str(return_value));
3989 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3990 	out(O_ALTFP|O_VERB, NULL);
3991 	indent_pop();
3992 	return (return_value);
3993 }
3994 
3995 static enum fme_state
3996 causes_test(struct fme *fmep, struct event *ep,
3997     unsigned long long at_latest_by, unsigned long long *pdelay)
3998 {
3999 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
4000 	unsigned long long my_delay;
4001 	int credible_results = 0;
4002 	int waiting_results = 0;
4003 	enum fme_state fstate;
4004 	struct event *tail_event;
4005 	struct bubble *bp;
4006 	struct arrowlist *ap;
4007 	int k = 1;
4008 
4009 	stats_counter_bump(fmep->Ccallcount);
4010 	indent_push("  C");
4011 	indent();
4012 	out(O_ALTFP|O_VERB|O_NONL, "->");
4013 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4014 	out(O_ALTFP|O_VERB, NULL);
4015 
4016 	for (bp = itree_next_bubble(ep, NULL); bp;
4017 	    bp = itree_next_bubble(ep, bp)) {
4018 		if (bp->t != B_TO)
4019 			continue;
4020 		k = bp->nork;	/* remember the K value */
4021 		for (ap = itree_next_arrow(bp, NULL); ap;
4022 		    ap = itree_next_arrow(bp, ap)) {
4023 			int do_not_follow = 0;
4024 
4025 			/*
4026 			 * if we get to the same event multiple times
4027 			 * only worry about the first one.
4028 			 */
4029 			if (ap->arrowp->tail->myevent->cached_state &
4030 			    CAUSES_TESTED) {
4031 				indent();
4032 				out(O_ALTFP|O_VERB|O_NONL,
4033 				    "  causes test already run for ");
4034 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL,
4035 				    ap->arrowp->tail->myevent);
4036 				out(O_ALTFP|O_VERB, NULL);
4037 				continue;
4038 			}
4039 
4040 			/*
4041 			 * see if false constraint prevents us
4042 			 * from traversing this arrow
4043 			 */
4044 			platform_set_payloadnvp(ep->nvp);
4045 			if (checkconstraints(fmep, ap->arrowp) == 0)
4046 				do_not_follow = 1;
4047 			platform_set_payloadnvp(NULL);
4048 			if (do_not_follow) {
4049 				indent();
4050 				out(O_ALTFP|O_VERB|O_NONL,
4051 				    "  False arrow from ");
4052 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL,
4053 				    ap->arrowp->tail->myevent);
4054 				out(O_ALTFP|O_VERB, NULL);
4055 				continue;
4056 			}
4057 
4058 			ap->arrowp->tail->myevent->cached_state |=
4059 			    CAUSES_TESTED;
4060 			tail_event = ap->arrowp->tail->myevent;
4061 			fstate = hypothesise(fmep, tail_event, at_latest_by,
4062 			    &my_delay);
4063 
4064 			switch (fstate) {
4065 			case FME_WAIT:
4066 				if (my_delay < overall_delay)
4067 					overall_delay = my_delay;
4068 				waiting_results++;
4069 				break;
4070 			case FME_CREDIBLE:
4071 				credible_results++;
4072 				break;
4073 			case FME_DISPROVED:
4074 				break;
4075 			default:
4076 				out(O_DIE, "Bug in causes_test");
4077 			}
4078 		}
4079 	}
4080 	/* compare against K */
4081 	if (credible_results + waiting_results < k) {
4082 		indent();
4083 		out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES DISPROVED ");
4084 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4085 		out(O_ALTFP|O_VERB, NULL);
4086 		indent_pop();
4087 		return (FME_DISPROVED);
4088 	}
4089 	if (waiting_results != 0) {
4090 		*pdelay = overall_delay;
4091 		indent();
4092 		out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES WAIT ");
4093 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4094 		out(O_ALTFP|O_VERB|O_NONL, " to ");
4095 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
4096 		out(O_ALTFP|O_VERB, NULL);
4097 		indent_pop();
4098 		return (FME_WAIT);
4099 	}
4100 	indent();
4101 	out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES CREDIBLE ");
4102 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4103 	out(O_ALTFP|O_VERB, NULL);
4104 	indent_pop();
4105 	return (FME_CREDIBLE);
4106 }
4107 
4108 static enum fme_state
4109 hypothesise(struct fme *fmep, struct event *ep,
4110 	unsigned long long at_latest_by, unsigned long long *pdelay)
4111 {
4112 	enum fme_state rtr, otr;
4113 	unsigned long long my_delay;
4114 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
4115 
4116 	stats_counter_bump(fmep->Hcallcount);
4117 	indent_push("  H");
4118 	indent();
4119 	out(O_ALTFP|O_VERB|O_NONL, "->");
4120 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4121 	out(O_ALTFP|O_VERB|O_NONL, ", at latest by: ");
4122 	ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
4123 	out(O_ALTFP|O_VERB, NULL);
4124 
4125 	rtr = requirements_test(fmep, ep, at_latest_by, &my_delay);
4126 	if ((rtr == FME_WAIT) && (my_delay < overall_delay))
4127 		overall_delay = my_delay;
4128 	if (rtr != FME_DISPROVED) {
4129 		if (is_problem(ep->t)) {
4130 			otr = effects_test(fmep, ep, at_latest_by, &my_delay);
4131 			if (otr != FME_DISPROVED) {
4132 				if (fmep->peek == 0 && ep->is_suspect == 0) {
4133 					ep->suspects = fmep->suspects;
4134 					ep->is_suspect = 1;
4135 					fmep->suspects = ep;
4136 					fmep->nsuspects++;
4137 					if (!is_fault(ep->t))
4138 						fmep->nonfault++;
4139 				}
4140 			}
4141 		} else
4142 			otr = causes_test(fmep, ep, at_latest_by, &my_delay);
4143 		if ((otr == FME_WAIT) && (my_delay < overall_delay))
4144 			overall_delay = my_delay;
4145 		if ((otr != FME_DISPROVED) &&
4146 		    ((rtr == FME_WAIT) || (otr == FME_WAIT)))
4147 			*pdelay = overall_delay;
4148 	}
4149 	if (rtr == FME_DISPROVED) {
4150 		indent();
4151 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4152 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4153 		out(O_ALTFP|O_VERB, " (doesn't meet requirements)");
4154 		indent_pop();
4155 		return (FME_DISPROVED);
4156 	}
4157 	if ((otr == FME_DISPROVED) && is_problem(ep->t)) {
4158 		indent();
4159 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4160 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4161 		out(O_ALTFP|O_VERB, " (doesn't explain all reports)");
4162 		indent_pop();
4163 		return (FME_DISPROVED);
4164 	}
4165 	if (otr == FME_DISPROVED) {
4166 		indent();
4167 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4168 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4169 		out(O_ALTFP|O_VERB, " (causes are not credible)");
4170 		indent_pop();
4171 		return (FME_DISPROVED);
4172 	}
4173 	if ((rtr == FME_WAIT) || (otr == FME_WAIT)) {
4174 		indent();
4175 		out(O_ALTFP|O_VERB|O_NONL, "<-WAIT ");
4176 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4177 		out(O_ALTFP|O_VERB|O_NONL, " to ");
4178 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &overall_delay);
4179 		out(O_ALTFP|O_VERB, NULL);
4180 		indent_pop();
4181 		return (FME_WAIT);
4182 	}
4183 	indent();
4184 	out(O_ALTFP|O_VERB|O_NONL, "<-CREDIBLE ");
4185 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4186 	out(O_ALTFP|O_VERB, NULL);
4187 	indent_pop();
4188 	return (FME_CREDIBLE);
4189 }
4190 
4191 /*
4192  * fme_istat_load -- reconstitute any persistent istats
4193  */
4194 void
4195 fme_istat_load(fmd_hdl_t *hdl)
4196 {
4197 	int sz;
4198 	char *sbuf;
4199 	char *ptr;
4200 
4201 	if ((sz = fmd_buf_size(hdl, NULL, WOBUF_ISTATS)) == 0) {
4202 		out(O_ALTFP, "fme_istat_load: No stats");
4203 		return;
4204 	}
4205 
4206 	sbuf = alloca(sz);
4207 
4208 	fmd_buf_read(hdl, NULL, WOBUF_ISTATS, sbuf, sz);
4209 
4210 	/*
4211 	 * pick apart the serialized stats
4212 	 *
4213 	 * format is:
4214 	 *	<class-name>, '@', <path>, '\0', <value>, '\0'
4215 	 * for example:
4216 	 *	"stat.first@stat0/path0\02\0stat.second@stat0/path1\023\0"
4217 	 *
4218 	 * since this is parsing our own serialized data, any parsing issues
4219 	 * are fatal, so we check for them all with ASSERT() below.
4220 	 */
4221 	ptr = sbuf;
4222 	while (ptr < &sbuf[sz]) {
4223 		char *sepptr;
4224 		struct node *np;
4225 		int val;
4226 
4227 		sepptr = strchr(ptr, '@');
4228 		ASSERT(sepptr != NULL);
4229 		*sepptr = '\0';
4230 
4231 		/* construct the event */
4232 		np = newnode(T_EVENT, NULL, 0);
4233 		np->u.event.ename = newnode(T_NAME, NULL, 0);
4234 		np->u.event.ename->u.name.t = N_STAT;
4235 		np->u.event.ename->u.name.s = stable(ptr);
4236 		np->u.event.ename->u.name.it = IT_ENAME;
4237 		np->u.event.ename->u.name.last = np->u.event.ename;
4238 
4239 		ptr = sepptr + 1;
4240 		ASSERT(ptr < &sbuf[sz]);
4241 		ptr += strlen(ptr);
4242 		ptr++;	/* move past the '\0' separating path from value */
4243 		ASSERT(ptr < &sbuf[sz]);
4244 		ASSERT(isdigit(*ptr));
4245 		val = atoi(ptr);
4246 		ASSERT(val > 0);
4247 		ptr += strlen(ptr);
4248 		ptr++;	/* move past the final '\0' for this entry */
4249 
4250 		np->u.event.epname = pathstring2epnamenp(sepptr + 1);
4251 		ASSERT(np->u.event.epname != NULL);
4252 
4253 		istat_bump(np, val);
4254 		tree_free(np);
4255 	}
4256 
4257 	istat_save();
4258 }
4259