xref: /titanic_52/usr/src/cmd/fm/modules/common/eversholt/fme.c (revision aef83d42faaccf25ad8bd8dc892c2fb6fa7efdad)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  *
26  * fme.c -- fault management exercise module
27  *
28  * this module provides the simulated fault management exercise.
29  */
30 
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <strings.h>
35 #include <ctype.h>
36 #include <alloca.h>
37 #include <libnvpair.h>
38 #include <sys/fm/protocol.h>
39 #include <fm/fmd_api.h>
40 #include "alloc.h"
41 #include "out.h"
42 #include "stats.h"
43 #include "stable.h"
44 #include "literals.h"
45 #include "lut.h"
46 #include "tree.h"
47 #include "ptree.h"
48 #include "itree.h"
49 #include "ipath.h"
50 #include "fme.h"
51 #include "evnv.h"
52 #include "eval.h"
53 #include "config.h"
54 #include "platform.h"
55 #include "esclex.h"
56 
57 /* imported from eft.c... */
58 extern hrtime_t Hesitate;
59 extern char *Serd_Override;
60 extern nv_alloc_t Eft_nv_hdl;
61 extern int Max_fme;
62 extern fmd_hdl_t *Hdl;
63 
64 static int Istat_need_save;
65 static int Serd_need_save;
66 void istat_save(void);
67 void serd_save(void);
68 
69 /* fme under construction is global so we can free it on module abort */
70 static struct fme *Nfmep;
71 
72 static int Undiag_reason = UD_VAL_UNKNOWN;
73 
74 static int Nextid = 0;
75 
76 static int Open_fme_count = 0;	/* Count of open FMEs */
77 
78 /* list of fault management exercises underway */
79 static struct fme {
80 	struct fme *next;		/* next exercise */
81 	unsigned long long ull;		/* time when fme was created */
82 	int id;				/* FME id */
83 	struct config *config;		/* cooked configuration data */
84 	struct lut *eventtree;		/* propagation tree for this FME */
85 	/*
86 	 * The initial error report that created this FME is kept in
87 	 * two forms.  e0 points to the instance tree node and is used
88 	 * by fme_eval() as the starting point for the inference
89 	 * algorithm.  e0r is the event handle FMD passed to us when
90 	 * the ereport first arrived and is used when setting timers,
91 	 * which are always relative to the time of this initial
92 	 * report.
93 	 */
94 	struct event *e0;
95 	fmd_event_t *e0r;
96 
97 	id_t    timer;			/* for setting an fmd time-out */
98 
99 	struct event *ecurrent;		/* ereport under consideration */
100 	struct event *suspects;		/* current suspect list */
101 	struct event *psuspects;	/* previous suspect list */
102 	int nsuspects;			/* count of suspects */
103 	int posted_suspects;		/* true if we've posted a diagnosis */
104 	int uniqobs;			/* number of unique events observed */
105 	int peek;			/* just peeking, don't track suspects */
106 	int overflow;			/* true if overflow FME */
107 	enum fme_state {
108 		FME_NOTHING = 5000,	/* not evaluated yet */
109 		FME_WAIT,		/* need to wait for more info */
110 		FME_CREDIBLE,		/* suspect list is credible */
111 		FME_DISPROVED,		/* no valid suspects found */
112 		FME_DEFERRED		/* don't know yet (k-count not met) */
113 	} state;
114 
115 	unsigned long long pull;	/* time passed since created */
116 	unsigned long long wull;	/* wait until this time for re-eval */
117 	struct event *observations;	/* observation list */
118 	struct lut *globals;		/* values of global variables */
119 	/* fmd interfacing */
120 	fmd_hdl_t *hdl;			/* handle for talking with fmd */
121 	fmd_case_t *fmcase;		/* what fmd 'case' we associate with */
122 	/* stats */
123 	struct stats *Rcount;
124 	struct stats *Hcallcount;
125 	struct stats *Rcallcount;
126 	struct stats *Ccallcount;
127 	struct stats *Ecallcount;
128 	struct stats *Tcallcount;
129 	struct stats *Marrowcount;
130 	struct stats *diags;
131 } *FMElist, *EFMElist, *ClosedFMEs;
132 
133 static struct case_list {
134 	fmd_case_t *fmcase;
135 	struct case_list *next;
136 } *Undiagablecaselist;
137 
138 static void fme_eval(struct fme *fmep, fmd_event_t *ffep);
139 static enum fme_state hypothesise(struct fme *fmep, struct event *ep,
140 	unsigned long long at_latest_by, unsigned long long *pdelay);
141 static struct node *eventprop_lookup(struct event *ep, const char *propname);
142 static struct node *pathstring2epnamenp(char *path);
143 static void publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep,
144 	fmd_case_t *fmcase, nvlist_t *detector, char *arg);
145 static char *undiag_2reason_str(int ud, char *arg);
146 static const char *undiag_2defect_str(int ud);
147 static void restore_suspects(struct fme *fmep);
148 static void save_suspects(struct fme *fmep);
149 static void destroy_fme(struct fme *f);
150 static void fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep,
151     const char *eventstring, const struct ipath *ipp, nvlist_t *nvl);
152 static void istat_counter_reset_cb(struct istat_entry *entp,
153     struct stats *statp, const struct ipath *ipp);
154 static void istat_counter_topo_chg_cb(struct istat_entry *entp,
155     struct stats *statp, void *unused);
156 static void serd_reset_cb(struct serd_entry *entp, void *unused,
157     const struct ipath *ipp);
158 static void serd_topo_chg_cb(struct serd_entry *entp, void *unused,
159     void *unused2);
160 static void destroy_fme_bufs(struct fme *fp);
161 
162 static struct fme *
163 alloc_fme(void)
164 {
165 	struct fme *fmep;
166 
167 	fmep = MALLOC(sizeof (*fmep));
168 	bzero(fmep, sizeof (*fmep));
169 	return (fmep);
170 }
171 
172 /*
173  * fme_ready -- called when all initialization of the FME (except for
174  *	stats) has completed successfully.  Adds the fme to global lists
175  *	and establishes its stats.
176  */
177 static struct fme *
178 fme_ready(struct fme *fmep)
179 {
180 	char nbuf[100];
181 
182 	Nfmep = NULL;	/* don't need to free this on module abort now */
183 
184 	if (EFMElist) {
185 		EFMElist->next = fmep;
186 		EFMElist = fmep;
187 	} else
188 		FMElist = EFMElist = fmep;
189 
190 	(void) sprintf(nbuf, "fme%d.Rcount", fmep->id);
191 	fmep->Rcount = stats_new_counter(nbuf, "ereports received", 0);
192 	(void) sprintf(nbuf, "fme%d.Hcall", fmep->id);
193 	fmep->Hcallcount = stats_new_counter(nbuf, "calls to hypothesise()", 1);
194 	(void) sprintf(nbuf, "fme%d.Rcall", fmep->id);
195 	fmep->Rcallcount = stats_new_counter(nbuf,
196 	    "calls to requirements_test()", 1);
197 	(void) sprintf(nbuf, "fme%d.Ccall", fmep->id);
198 	fmep->Ccallcount = stats_new_counter(nbuf, "calls to causes_test()", 1);
199 	(void) sprintf(nbuf, "fme%d.Ecall", fmep->id);
200 	fmep->Ecallcount =
201 	    stats_new_counter(nbuf, "calls to effects_test()", 1);
202 	(void) sprintf(nbuf, "fme%d.Tcall", fmep->id);
203 	fmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1);
204 	(void) sprintf(nbuf, "fme%d.Marrow", fmep->id);
205 	fmep->Marrowcount = stats_new_counter(nbuf,
206 	    "arrows marked by mark_arrows()", 1);
207 	(void) sprintf(nbuf, "fme%d.diags", fmep->id);
208 	fmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0);
209 
210 	out(O_ALTFP|O_VERB2, "newfme: config snapshot contains...");
211 	config_print(O_ALTFP|O_VERB2, fmep->config);
212 
213 	return (fmep);
214 }
215 
216 extern void ipath_dummy_lut(struct arrow *);
217 extern struct lut *itree_create_dummy(const char *, const struct ipath *);
218 
219 /* ARGSUSED */
220 static void
221 set_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
222 {
223 	struct bubble *bp;
224 	struct arrowlist *ap;
225 
226 	for (bp = itree_next_bubble(ep, NULL); bp;
227 	    bp = itree_next_bubble(ep, bp)) {
228 		if (bp->t != B_FROM)
229 			continue;
230 		for (ap = itree_next_arrow(bp, NULL); ap;
231 		    ap = itree_next_arrow(bp, ap)) {
232 			ap->arrowp->pnode->u.arrow.needed = 1;
233 			ipath_dummy_lut(ap->arrowp);
234 		}
235 	}
236 }
237 
238 /* ARGSUSED */
239 static void
240 unset_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
241 {
242 	struct bubble *bp;
243 	struct arrowlist *ap;
244 
245 	for (bp = itree_next_bubble(ep, NULL); bp;
246 	    bp = itree_next_bubble(ep, bp)) {
247 		if (bp->t != B_FROM)
248 			continue;
249 		for (ap = itree_next_arrow(bp, NULL); ap;
250 		    ap = itree_next_arrow(bp, ap))
251 			ap->arrowp->pnode->u.arrow.needed = 0;
252 	}
253 }
254 
255 static void globals_destructor(void *left, void *right, void *arg);
256 static void clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep);
257 
258 static boolean_t
259 prune_propagations(const char *e0class, const struct ipath *e0ipp)
260 {
261 	char nbuf[100];
262 	unsigned long long my_delay = TIMEVAL_EVENTUALLY;
263 	extern struct lut *Usednames;
264 
265 	Nfmep = alloc_fme();
266 	Nfmep->id = Nextid;
267 	Nfmep->state = FME_NOTHING;
268 	Nfmep->eventtree = itree_create_dummy(e0class, e0ipp);
269 	if ((Nfmep->e0 =
270 	    itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) {
271 		itree_free(Nfmep->eventtree);
272 		FREE(Nfmep);
273 		Nfmep = NULL;
274 		return (B_FALSE);
275 	}
276 	Nfmep->ecurrent = Nfmep->observations = Nfmep->e0;
277 	Nfmep->e0->count++;
278 
279 	(void) sprintf(nbuf, "fme%d.Rcount", Nfmep->id);
280 	Nfmep->Rcount = stats_new_counter(nbuf, "ereports received", 0);
281 	(void) sprintf(nbuf, "fme%d.Hcall", Nfmep->id);
282 	Nfmep->Hcallcount =
283 	    stats_new_counter(nbuf, "calls to hypothesise()", 1);
284 	(void) sprintf(nbuf, "fme%d.Rcall", Nfmep->id);
285 	Nfmep->Rcallcount = stats_new_counter(nbuf,
286 	    "calls to requirements_test()", 1);
287 	(void) sprintf(nbuf, "fme%d.Ccall", Nfmep->id);
288 	Nfmep->Ccallcount =
289 	    stats_new_counter(nbuf, "calls to causes_test()", 1);
290 	(void) sprintf(nbuf, "fme%d.Ecall", Nfmep->id);
291 	Nfmep->Ecallcount =
292 	    stats_new_counter(nbuf, "calls to effects_test()", 1);
293 	(void) sprintf(nbuf, "fme%d.Tcall", Nfmep->id);
294 	Nfmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1);
295 	(void) sprintf(nbuf, "fme%d.Marrow", Nfmep->id);
296 	Nfmep->Marrowcount = stats_new_counter(nbuf,
297 	    "arrows marked by mark_arrows()", 1);
298 	(void) sprintf(nbuf, "fme%d.diags", Nfmep->id);
299 	Nfmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0);
300 
301 	Nfmep->peek = 1;
302 	lut_walk(Nfmep->eventtree, (lut_cb)unset_needed_arrows, (void *)Nfmep);
303 	lut_free(Usednames, NULL, NULL);
304 	Usednames = NULL;
305 	lut_walk(Nfmep->eventtree, (lut_cb)clear_arrows, (void *)Nfmep);
306 	(void) hypothesise(Nfmep, Nfmep->e0, Nfmep->ull, &my_delay);
307 	itree_prune(Nfmep->eventtree);
308 	lut_walk(Nfmep->eventtree, (lut_cb)set_needed_arrows, (void *)Nfmep);
309 
310 	stats_delete(Nfmep->Rcount);
311 	stats_delete(Nfmep->Hcallcount);
312 	stats_delete(Nfmep->Rcallcount);
313 	stats_delete(Nfmep->Ccallcount);
314 	stats_delete(Nfmep->Ecallcount);
315 	stats_delete(Nfmep->Tcallcount);
316 	stats_delete(Nfmep->Marrowcount);
317 	stats_delete(Nfmep->diags);
318 	itree_free(Nfmep->eventtree);
319 	lut_free(Nfmep->globals, globals_destructor, NULL);
320 	FREE(Nfmep);
321 	return (B_TRUE);
322 }
323 
324 static struct fme *
325 newfme(const char *e0class, const struct ipath *e0ipp, fmd_hdl_t *hdl,
326 	fmd_case_t *fmcase, fmd_event_t *ffep, nvlist_t *nvl)
327 {
328 	struct cfgdata *cfgdata;
329 	int init_size;
330 	extern int alloc_total();
331 	nvlist_t *detector = NULL;
332 	char *pathstr;
333 	char *arg;
334 
335 	/*
336 	 * First check if e0ipp is actually in the topology so we can give a
337 	 * more useful error message.
338 	 */
339 	ipathlastcomp(e0ipp);
340 	pathstr = ipath2str(NULL, e0ipp);
341 	cfgdata = config_snapshot();
342 	platform_units_translate(0, cfgdata->cooked, NULL, NULL,
343 	    &detector, pathstr);
344 	FREE(pathstr);
345 	structconfig_free(cfgdata->cooked);
346 	config_free(cfgdata);
347 	if (detector == NULL) {
348 		Undiag_reason = UD_VAL_BADEVENTPATH;
349 		(void) nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR,
350 		    &detector);
351 		arg = ipath2str(e0class, e0ipp);
352 		publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
353 		FREE(arg);
354 		return (NULL);
355 	}
356 
357 	/*
358 	 * Next run a quick first pass of the rules with a dummy config. This
359 	 * allows us to prune those rules which can't possibly cause this
360 	 * ereport.
361 	 */
362 	if (!prune_propagations(e0class, e0ipp)) {
363 		/*
364 		 * The fault class must have been in the rules or we would
365 		 * not have registered for it (and got a "nosub"), and the
366 		 * pathname must be in the topology or we would have failed the
367 		 * previous test. So to get here means the combination of
368 		 * class and pathname in the ereport must be invalid.
369 		 */
370 		Undiag_reason = UD_VAL_BADEVENTCLASS;
371 		arg = ipath2str(e0class, e0ipp);
372 		publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
373 		nvlist_free(detector);
374 		FREE(arg);
375 		return (NULL);
376 	}
377 
378 	/*
379 	 * Now go ahead and create the real fme using the pruned rules.
380 	 */
381 	init_size = alloc_total();
382 	out(O_ALTFP|O_STAMP, "start config_snapshot using %d bytes", init_size);
383 	cfgdata = config_snapshot();
384 	platform_save_config(hdl, fmcase);
385 	out(O_ALTFP|O_STAMP, "config_snapshot added %d bytes",
386 	    alloc_total() - init_size);
387 
388 	Nfmep = alloc_fme();
389 
390 	Nfmep->id = Nextid++;
391 	Nfmep->config = cfgdata->cooked;
392 	config_free(cfgdata);
393 	Nfmep->posted_suspects = 0;
394 	Nfmep->uniqobs = 0;
395 	Nfmep->state = FME_NOTHING;
396 	Nfmep->pull = 0ULL;
397 	Nfmep->overflow = 0;
398 
399 	Nfmep->fmcase = fmcase;
400 	Nfmep->hdl = hdl;
401 
402 	if ((Nfmep->eventtree = itree_create(Nfmep->config)) == NULL) {
403 		Undiag_reason = UD_VAL_INSTFAIL;
404 		arg = ipath2str(e0class, e0ipp);
405 		publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
406 		nvlist_free(detector);
407 		FREE(arg);
408 		structconfig_free(Nfmep->config);
409 		destroy_fme_bufs(Nfmep);
410 		FREE(Nfmep);
411 		Nfmep = NULL;
412 		return (NULL);
413 	}
414 
415 	itree_ptree(O_ALTFP|O_VERB2, Nfmep->eventtree);
416 
417 	if ((Nfmep->e0 =
418 	    itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) {
419 		Undiag_reason = UD_VAL_BADEVENTI;
420 		arg = ipath2str(e0class, e0ipp);
421 		publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
422 		nvlist_free(detector);
423 		FREE(arg);
424 		itree_free(Nfmep->eventtree);
425 		structconfig_free(Nfmep->config);
426 		destroy_fme_bufs(Nfmep);
427 		FREE(Nfmep);
428 		Nfmep = NULL;
429 		return (NULL);
430 	}
431 
432 	nvlist_free(detector);
433 	return (fme_ready(Nfmep));
434 }
435 
436 void
437 fme_fini(void)
438 {
439 	struct fme *sfp, *fp;
440 	struct case_list *ucasep, *nextcasep;
441 
442 	ucasep = Undiagablecaselist;
443 	while (ucasep != NULL) {
444 		nextcasep = ucasep->next;
445 		FREE(ucasep);
446 		ucasep = nextcasep;
447 	}
448 	Undiagablecaselist = NULL;
449 
450 	/* clean up closed fmes */
451 	fp = ClosedFMEs;
452 	while (fp != NULL) {
453 		sfp = fp->next;
454 		destroy_fme(fp);
455 		fp = sfp;
456 	}
457 	ClosedFMEs = NULL;
458 
459 	fp = FMElist;
460 	while (fp != NULL) {
461 		sfp = fp->next;
462 		destroy_fme(fp);
463 		fp = sfp;
464 	}
465 	FMElist = EFMElist = NULL;
466 
467 	/* if we were in the middle of creating an fme, free it now */
468 	if (Nfmep) {
469 		destroy_fme(Nfmep);
470 		Nfmep = NULL;
471 	}
472 }
473 
474 /*
475  * Allocated space for a buffer name.  20 bytes allows for
476  * a ridiculous 9,999,999 unique observations.
477  */
478 #define	OBBUFNMSZ 20
479 
480 /*
481  *  serialize_observation
482  *
483  *  Create a recoverable version of the current observation
484  *  (f->ecurrent).  We keep a serialized version of each unique
485  *  observation in order that we may resume correctly the fme in the
486  *  correct state if eft or fmd crashes and we're restarted.
487  */
488 static void
489 serialize_observation(struct fme *fp, const char *cls, const struct ipath *ipp)
490 {
491 	size_t pkdlen;
492 	char tmpbuf[OBBUFNMSZ];
493 	char *pkd = NULL;
494 	char *estr;
495 
496 	(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", fp->uniqobs);
497 	estr = ipath2str(cls, ipp);
498 	fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, strlen(estr) + 1);
499 	fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)estr,
500 	    strlen(estr) + 1);
501 	FREE(estr);
502 
503 	if (fp->ecurrent != NULL && fp->ecurrent->nvp != NULL) {
504 		(void) snprintf(tmpbuf,
505 		    OBBUFNMSZ, "observed%d.nvp", fp->uniqobs);
506 		if (nvlist_xpack(fp->ecurrent->nvp,
507 		    &pkd, &pkdlen, NV_ENCODE_XDR, &Eft_nv_hdl) != 0)
508 			out(O_DIE|O_SYS, "pack of observed nvl failed");
509 		fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, pkdlen);
510 		fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)pkd, pkdlen);
511 		FREE(pkd);
512 	}
513 
514 	fp->uniqobs++;
515 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs,
516 	    sizeof (fp->uniqobs));
517 }
518 
519 /*
520  *  init_fme_bufs -- We keep several bits of state about an fme for
521  *	use if eft or fmd crashes and we're restarted.
522  */
523 static void
524 init_fme_bufs(struct fme *fp)
525 {
526 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_PULL, sizeof (fp->pull));
527 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_PULL, (void *)&fp->pull,
528 	    sizeof (fp->pull));
529 
530 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_ID, sizeof (fp->id));
531 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_ID, (void *)&fp->id,
532 	    sizeof (fp->id));
533 
534 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_NOBS, sizeof (fp->uniqobs));
535 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs,
536 	    sizeof (fp->uniqobs));
537 
538 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_POSTD,
539 	    sizeof (fp->posted_suspects));
540 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_POSTD,
541 	    (void *)&fp->posted_suspects, sizeof (fp->posted_suspects));
542 }
543 
544 static void
545 destroy_fme_bufs(struct fme *fp)
546 {
547 	char tmpbuf[OBBUFNMSZ];
548 	int o;
549 
550 	platform_restore_config(fp->hdl, fp->fmcase);
551 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFGLEN);
552 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFG);
553 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_PULL);
554 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_ID);
555 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_POSTD);
556 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_NOBS);
557 
558 	for (o = 0; o < fp->uniqobs; o++) {
559 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", o);
560 		fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf);
561 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", o);
562 		fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf);
563 	}
564 }
565 
566 /*
567  * reconstitute_observations -- convert a case's serialized observations
568  *	back into struct events.  Returns zero if all observations are
569  *	successfully reconstituted.
570  */
571 static int
572 reconstitute_observations(struct fme *fmep)
573 {
574 	struct event *ep;
575 	struct node *epnamenp = NULL;
576 	size_t pkdlen;
577 	char *pkd = NULL;
578 	char *tmpbuf = alloca(OBBUFNMSZ);
579 	char *sepptr;
580 	char *estr;
581 	int ocnt;
582 	int elen;
583 
584 	for (ocnt = 0; ocnt < fmep->uniqobs; ocnt++) {
585 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", ocnt);
586 		elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
587 		if (elen == 0) {
588 			out(O_ALTFP,
589 			    "reconstitute_observation: no %s buffer found.",
590 			    tmpbuf);
591 			Undiag_reason = UD_VAL_MISSINGOBS;
592 			break;
593 		}
594 
595 		estr = MALLOC(elen);
596 		fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen);
597 		sepptr = strchr(estr, '@');
598 		if (sepptr == NULL) {
599 			out(O_ALTFP,
600 			    "reconstitute_observation: %s: "
601 			    "missing @ separator in %s.",
602 			    tmpbuf, estr);
603 			Undiag_reason = UD_VAL_MISSINGPATH;
604 			FREE(estr);
605 			break;
606 		}
607 
608 		*sepptr = '\0';
609 		if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) {
610 			out(O_ALTFP,
611 			    "reconstitute_observation: %s: "
612 			    "trouble converting path string \"%s\" "
613 			    "to internal representation.",
614 			    tmpbuf, sepptr + 1);
615 			Undiag_reason = UD_VAL_MISSINGPATH;
616 			FREE(estr);
617 			break;
618 		}
619 
620 		/* construct the event */
621 		ep = itree_lookup(fmep->eventtree,
622 		    stable(estr), ipath(epnamenp));
623 		if (ep == NULL) {
624 			out(O_ALTFP,
625 			    "reconstitute_observation: %s: "
626 			    "lookup of  \"%s\" in itree failed.",
627 			    tmpbuf, ipath2str(estr, ipath(epnamenp)));
628 			Undiag_reason = UD_VAL_BADOBS;
629 			tree_free(epnamenp);
630 			FREE(estr);
631 			break;
632 		}
633 		tree_free(epnamenp);
634 
635 		/*
636 		 * We may or may not have a saved nvlist for the observation
637 		 */
638 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", ocnt);
639 		pkdlen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
640 		if (pkdlen != 0) {
641 			pkd = MALLOC(pkdlen);
642 			fmd_buf_read(fmep->hdl,
643 			    fmep->fmcase, tmpbuf, pkd, pkdlen);
644 			ASSERT(ep->nvp == NULL);
645 			if (nvlist_xunpack(pkd,
646 			    pkdlen, &ep->nvp, &Eft_nv_hdl) != 0)
647 				out(O_DIE|O_SYS, "pack of observed nvl failed");
648 			FREE(pkd);
649 		}
650 
651 		if (ocnt == 0)
652 			fmep->e0 = ep;
653 
654 		FREE(estr);
655 		fmep->ecurrent = ep;
656 		ep->count++;
657 
658 		/* link it into list of observations seen */
659 		ep->observations = fmep->observations;
660 		fmep->observations = ep;
661 	}
662 
663 	if (ocnt == fmep->uniqobs) {
664 		(void) fme_ready(fmep);
665 		return (0);
666 	}
667 
668 	return (1);
669 }
670 
671 /*
672  * restart_fme -- called during eft initialization.  Reconstitutes
673  *	an in-progress fme.
674  */
675 void
676 fme_restart(fmd_hdl_t *hdl, fmd_case_t *inprogress)
677 {
678 	nvlist_t *defect;
679 	struct case_list *bad;
680 	struct fme *fmep;
681 	struct cfgdata *cfgdata;
682 	size_t rawsz;
683 	struct event *ep;
684 	char *tmpbuf = alloca(OBBUFNMSZ);
685 	char *sepptr;
686 	char *estr;
687 	int elen;
688 	struct node *epnamenp = NULL;
689 	int init_size;
690 	extern int alloc_total();
691 	char *reason;
692 
693 	/*
694 	 * ignore solved or closed cases
695 	 */
696 	if (fmd_case_solved(hdl, inprogress) ||
697 	    fmd_case_closed(hdl, inprogress))
698 		return;
699 
700 	fmep = alloc_fme();
701 	fmep->fmcase = inprogress;
702 	fmep->hdl = hdl;
703 
704 	if (fmd_buf_size(hdl, inprogress, WOBUF_POSTD) == 0) {
705 		out(O_ALTFP, "restart_fme: no saved posted status");
706 		Undiag_reason = UD_VAL_MISSINGINFO;
707 		goto badcase;
708 	} else {
709 		fmd_buf_read(hdl, inprogress, WOBUF_POSTD,
710 		    (void *)&fmep->posted_suspects,
711 		    sizeof (fmep->posted_suspects));
712 	}
713 
714 	if (fmd_buf_size(hdl, inprogress, WOBUF_ID) == 0) {
715 		out(O_ALTFP, "restart_fme: no saved id");
716 		Undiag_reason = UD_VAL_MISSINGINFO;
717 		goto badcase;
718 	} else {
719 		fmd_buf_read(hdl, inprogress, WOBUF_ID, (void *)&fmep->id,
720 		    sizeof (fmep->id));
721 	}
722 	if (Nextid <= fmep->id)
723 		Nextid = fmep->id + 1;
724 
725 	out(O_ALTFP, "Replay FME %d", fmep->id);
726 
727 	if (fmd_buf_size(hdl, inprogress, WOBUF_CFGLEN) != sizeof (size_t)) {
728 		out(O_ALTFP, "restart_fme: No config data");
729 		Undiag_reason = UD_VAL_MISSINGINFO;
730 		goto badcase;
731 	}
732 	fmd_buf_read(hdl, inprogress, WOBUF_CFGLEN, (void *)&rawsz,
733 	    sizeof (size_t));
734 
735 	if ((fmep->e0r = fmd_case_getprincipal(hdl, inprogress)) == NULL) {
736 		out(O_ALTFP, "restart_fme: No event zero");
737 		Undiag_reason = UD_VAL_MISSINGZERO;
738 		goto badcase;
739 	}
740 
741 	if (fmd_buf_size(hdl, inprogress, WOBUF_PULL) == 0) {
742 		out(O_ALTFP, "restart_fme: no saved wait time");
743 		Undiag_reason = UD_VAL_MISSINGINFO;
744 		goto badcase;
745 	} else {
746 		fmd_buf_read(hdl, inprogress, WOBUF_PULL, (void *)&fmep->pull,
747 		    sizeof (fmep->pull));
748 	}
749 
750 	if (fmd_buf_size(hdl, inprogress, WOBUF_NOBS) == 0) {
751 		out(O_ALTFP, "restart_fme: no count of observations");
752 		Undiag_reason = UD_VAL_MISSINGINFO;
753 		goto badcase;
754 	} else {
755 		fmd_buf_read(hdl, inprogress, WOBUF_NOBS,
756 		    (void *)&fmep->uniqobs, sizeof (fmep->uniqobs));
757 	}
758 
759 	(void) snprintf(tmpbuf, OBBUFNMSZ, "observed0");
760 	elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
761 	if (elen == 0) {
762 		out(O_ALTFP, "reconstitute_observation: no %s buffer found.",
763 		    tmpbuf);
764 		Undiag_reason = UD_VAL_MISSINGOBS;
765 		goto badcase;
766 	}
767 	estr = MALLOC(elen);
768 	fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen);
769 	sepptr = strchr(estr, '@');
770 	if (sepptr == NULL) {
771 		out(O_ALTFP, "reconstitute_observation: %s: "
772 		    "missing @ separator in %s.",
773 		    tmpbuf, estr);
774 		Undiag_reason = UD_VAL_MISSINGPATH;
775 		FREE(estr);
776 		goto badcase;
777 	}
778 	*sepptr = '\0';
779 	if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) {
780 		out(O_ALTFP, "reconstitute_observation: %s: "
781 		    "trouble converting path string \"%s\" "
782 		    "to internal representation.", tmpbuf, sepptr + 1);
783 		Undiag_reason = UD_VAL_MISSINGPATH;
784 		FREE(estr);
785 		goto badcase;
786 	}
787 	(void) prune_propagations(stable(estr), ipath(epnamenp));
788 	tree_free(epnamenp);
789 	FREE(estr);
790 
791 	init_size = alloc_total();
792 	out(O_ALTFP|O_STAMP, "start config_restore using %d bytes", init_size);
793 	cfgdata = MALLOC(sizeof (struct cfgdata));
794 	cfgdata->cooked = NULL;
795 	cfgdata->devcache = NULL;
796 	cfgdata->devidcache = NULL;
797 	cfgdata->cpucache = NULL;
798 	cfgdata->raw_refcnt = 1;
799 
800 	if (rawsz > 0) {
801 		if (fmd_buf_size(hdl, inprogress, WOBUF_CFG) != rawsz) {
802 			out(O_ALTFP, "restart_fme: Config data size mismatch");
803 			Undiag_reason = UD_VAL_CFGMISMATCH;
804 			goto badcase;
805 		}
806 		cfgdata->begin = MALLOC(rawsz);
807 		cfgdata->end = cfgdata->nextfree = cfgdata->begin + rawsz;
808 		fmd_buf_read(hdl,
809 		    inprogress, WOBUF_CFG, cfgdata->begin, rawsz);
810 	} else {
811 		cfgdata->begin = cfgdata->end = cfgdata->nextfree = NULL;
812 	}
813 
814 	config_cook(cfgdata);
815 	fmep->config = cfgdata->cooked;
816 	config_free(cfgdata);
817 	out(O_ALTFP|O_STAMP, "config_restore added %d bytes",
818 	    alloc_total() - init_size);
819 
820 	if ((fmep->eventtree = itree_create(fmep->config)) == NULL) {
821 		/* case not properly saved or irretrievable */
822 		out(O_ALTFP, "restart_fme: NULL instance tree");
823 		Undiag_reason = UD_VAL_INSTFAIL;
824 		goto badcase;
825 	}
826 
827 	itree_ptree(O_ALTFP|O_VERB2, fmep->eventtree);
828 
829 	if (reconstitute_observations(fmep) != 0)
830 		goto badcase;
831 
832 	out(O_ALTFP|O_NONL, "FME %d replay observations: ", fmep->id);
833 	for (ep = fmep->observations; ep; ep = ep->observations) {
834 		out(O_ALTFP|O_NONL, " ");
835 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
836 	}
837 	out(O_ALTFP, NULL);
838 
839 	Open_fme_count++;
840 
841 	/* give the diagnosis algorithm a shot at the new FME state */
842 	fme_eval(fmep, fmep->e0r);
843 	return;
844 
845 badcase:
846 	if (fmep->eventtree != NULL)
847 		itree_free(fmep->eventtree);
848 	if (fmep->config)
849 		structconfig_free(fmep->config);
850 	destroy_fme_bufs(fmep);
851 	FREE(fmep);
852 
853 	/*
854 	 * Since we're unable to restart the case, add it to the undiagable
855 	 * list and solve and close it as appropriate.
856 	 */
857 	bad = MALLOC(sizeof (struct case_list));
858 	bad->next = NULL;
859 
860 	if (Undiagablecaselist != NULL)
861 		bad->next = Undiagablecaselist;
862 	Undiagablecaselist = bad;
863 	bad->fmcase = inprogress;
864 
865 	out(O_ALTFP|O_NONL, "[case %s (unable to restart), ",
866 	    fmd_case_uuid(hdl, bad->fmcase));
867 
868 	if (fmd_case_solved(hdl, bad->fmcase)) {
869 		out(O_ALTFP|O_NONL, "already solved, ");
870 	} else {
871 		out(O_ALTFP|O_NONL, "solving, ");
872 		defect = fmd_nvl_create_fault(hdl,
873 		    undiag_2defect_str(Undiag_reason), 100, NULL, NULL, NULL);
874 		reason = undiag_2reason_str(Undiag_reason, NULL);
875 		(void) nvlist_add_string(defect, UNDIAG_REASON, reason);
876 		FREE(reason);
877 		fmd_case_add_suspect(hdl, bad->fmcase, defect);
878 		fmd_case_solve(hdl, bad->fmcase);
879 		Undiag_reason = UD_VAL_UNKNOWN;
880 	}
881 
882 	if (fmd_case_closed(hdl, bad->fmcase)) {
883 		out(O_ALTFP, "already closed ]");
884 	} else {
885 		out(O_ALTFP, "closing ]");
886 		fmd_case_close(hdl, bad->fmcase);
887 	}
888 }
889 
890 /*ARGSUSED*/
891 static void
892 globals_destructor(void *left, void *right, void *arg)
893 {
894 	struct evalue *evp = (struct evalue *)right;
895 	if (evp->t == NODEPTR)
896 		tree_free((struct node *)(uintptr_t)evp->v);
897 	evp->v = (uintptr_t)NULL;
898 	FREE(evp);
899 }
900 
901 void
902 destroy_fme(struct fme *f)
903 {
904 	stats_delete(f->Rcount);
905 	stats_delete(f->Hcallcount);
906 	stats_delete(f->Rcallcount);
907 	stats_delete(f->Ccallcount);
908 	stats_delete(f->Ecallcount);
909 	stats_delete(f->Tcallcount);
910 	stats_delete(f->Marrowcount);
911 	stats_delete(f->diags);
912 
913 	if (f->eventtree != NULL)
914 		itree_free(f->eventtree);
915 	if (f->config)
916 		structconfig_free(f->config);
917 	lut_free(f->globals, globals_destructor, NULL);
918 	FREE(f);
919 }
920 
921 static const char *
922 fme_state2str(enum fme_state s)
923 {
924 	switch (s) {
925 	case FME_NOTHING:	return ("NOTHING");
926 	case FME_WAIT:		return ("WAIT");
927 	case FME_CREDIBLE:	return ("CREDIBLE");
928 	case FME_DISPROVED:	return ("DISPROVED");
929 	case FME_DEFERRED:	return ("DEFERRED");
930 	default:		return ("UNKNOWN");
931 	}
932 }
933 
934 static int
935 is_problem(enum nametype t)
936 {
937 	return (t == N_FAULT || t == N_DEFECT || t == N_UPSET);
938 }
939 
940 static int
941 is_defect(enum nametype t)
942 {
943 	return (t == N_DEFECT);
944 }
945 
946 static int
947 is_upset(enum nametype t)
948 {
949 	return (t == N_UPSET);
950 }
951 
952 static void
953 fme_print(int flags, struct fme *fmep)
954 {
955 	struct event *ep;
956 
957 	out(flags, "Fault Management Exercise %d", fmep->id);
958 	out(flags, "\t       State: %s", fme_state2str(fmep->state));
959 	out(flags|O_NONL, "\t  Start time: ");
960 	ptree_timeval(flags|O_NONL, &fmep->ull);
961 	out(flags, NULL);
962 	if (fmep->wull) {
963 		out(flags|O_NONL, "\t   Wait time: ");
964 		ptree_timeval(flags|O_NONL, &fmep->wull);
965 		out(flags, NULL);
966 	}
967 	out(flags|O_NONL, "\t          E0: ");
968 	if (fmep->e0)
969 		itree_pevent_brief(flags|O_NONL, fmep->e0);
970 	else
971 		out(flags|O_NONL, "NULL");
972 	out(flags, NULL);
973 	out(flags|O_NONL, "\tObservations:");
974 	for (ep = fmep->observations; ep; ep = ep->observations) {
975 		out(flags|O_NONL, " ");
976 		itree_pevent_brief(flags|O_NONL, ep);
977 	}
978 	out(flags, NULL);
979 	out(flags|O_NONL, "\tSuspect list:");
980 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
981 		out(flags|O_NONL, " ");
982 		itree_pevent_brief(flags|O_NONL, ep);
983 	}
984 	out(flags, NULL);
985 	if (fmep->eventtree != NULL) {
986 		out(flags|O_VERB2, "\t        Tree:");
987 		itree_ptree(flags|O_VERB2, fmep->eventtree);
988 	}
989 }
990 
991 static struct node *
992 pathstring2epnamenp(char *path)
993 {
994 	char *sep = "/";
995 	struct node *ret;
996 	char *ptr;
997 
998 	if ((ptr = strtok(path, sep)) == NULL)
999 		out(O_DIE, "pathstring2epnamenp: invalid empty class");
1000 
1001 	ret = tree_iname(stable(ptr), NULL, 0);
1002 
1003 	while ((ptr = strtok(NULL, sep)) != NULL)
1004 		ret = tree_name_append(ret,
1005 		    tree_iname(stable(ptr), NULL, 0));
1006 
1007 	return (ret);
1008 }
1009 
1010 /*
1011  * for a given upset sp, increment the corresponding SERD engine.  if the
1012  * SERD engine trips, return the ename and ipp of the resulting ereport.
1013  * returns true if engine tripped and *enamep and *ippp were filled in.
1014  */
1015 static int
1016 serd_eval(struct fme *fmep, fmd_hdl_t *hdl, fmd_event_t *ffep,
1017     fmd_case_t *fmcase, struct event *sp, const char **enamep,
1018     const struct ipath **ippp)
1019 {
1020 	struct node *serdinst;
1021 	char *serdname;
1022 	char *serdresource;
1023 	char *serdclass;
1024 	struct node *nid;
1025 	struct serd_entry *newentp;
1026 	int i, serdn = -1, serdincrement = 1, len = 0;
1027 	char *serdsuffix = NULL, *serdt = NULL;
1028 	struct evalue *ep;
1029 
1030 	ASSERT(sp->t == N_UPSET);
1031 	ASSERT(ffep != NULL);
1032 
1033 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1034 	    (void *)"n", (lut_cmp)strcmp)) != NULL) {
1035 		ASSERT(ep->t == UINT64);
1036 		serdn = (int)ep->v;
1037 	}
1038 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1039 	    (void *)"t", (lut_cmp)strcmp)) != NULL) {
1040 		ASSERT(ep->t == STRING);
1041 		serdt = (char *)(uintptr_t)ep->v;
1042 	}
1043 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1044 	    (void *)"suffix", (lut_cmp)strcmp)) != NULL) {
1045 		ASSERT(ep->t == STRING);
1046 		serdsuffix = (char *)(uintptr_t)ep->v;
1047 	}
1048 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1049 	    (void *)"increment", (lut_cmp)strcmp)) != NULL) {
1050 		ASSERT(ep->t == UINT64);
1051 		serdincrement = (int)ep->v;
1052 	}
1053 
1054 	/*
1055 	 * obtain instanced SERD engine from the upset sp.  from this
1056 	 * derive serdname, the string used to identify the SERD engine.
1057 	 */
1058 	serdinst = eventprop_lookup(sp, L_engine);
1059 
1060 	if (serdinst == NULL)
1061 		return (-1);
1062 
1063 	len = strlen(serdinst->u.stmt.np->u.event.ename->u.name.s) + 1;
1064 	if (serdsuffix != NULL)
1065 		len += strlen(serdsuffix);
1066 	serdclass = MALLOC(len);
1067 	if (serdsuffix != NULL)
1068 		(void) snprintf(serdclass, len, "%s%s",
1069 		    serdinst->u.stmt.np->u.event.ename->u.name.s, serdsuffix);
1070 	else
1071 		(void) snprintf(serdclass, len, "%s",
1072 		    serdinst->u.stmt.np->u.event.ename->u.name.s);
1073 	serdresource = ipath2str(NULL,
1074 	    ipath(serdinst->u.stmt.np->u.event.epname));
1075 	len += strlen(serdresource) + 1;
1076 	serdname = MALLOC(len);
1077 	(void) snprintf(serdname, len, "%s@%s", serdclass, serdresource);
1078 	FREE(serdresource);
1079 
1080 	/* handle serd engine "id" property, if there is one */
1081 	if ((nid =
1082 	    lut_lookup(serdinst->u.stmt.lutp, (void *)L_id, NULL)) != NULL) {
1083 		struct evalue *gval;
1084 		char suffixbuf[200];
1085 		char *suffix;
1086 		char *nserdname;
1087 		size_t nname;
1088 
1089 		out(O_ALTFP|O_NONL, "serd \"%s\" id: ", serdname);
1090 		ptree_name_iter(O_ALTFP|O_NONL, nid);
1091 
1092 		ASSERTinfo(nid->t == T_GLOBID, ptree_nodetype2str(nid->t));
1093 
1094 		if ((gval = lut_lookup(fmep->globals,
1095 		    (void *)nid->u.globid.s, NULL)) == NULL) {
1096 			out(O_ALTFP, " undefined");
1097 		} else if (gval->t == UINT64) {
1098 			out(O_ALTFP, " %llu", gval->v);
1099 			(void) sprintf(suffixbuf, "%llu", gval->v);
1100 			suffix = suffixbuf;
1101 		} else {
1102 			out(O_ALTFP, " \"%s\"", (char *)(uintptr_t)gval->v);
1103 			suffix = (char *)(uintptr_t)gval->v;
1104 		}
1105 
1106 		nname = strlen(serdname) + strlen(suffix) + 2;
1107 		nserdname = MALLOC(nname);
1108 		(void) snprintf(nserdname, nname, "%s:%s", serdname, suffix);
1109 		FREE(serdname);
1110 		serdname = nserdname;
1111 	}
1112 
1113 	/*
1114 	 * if the engine is empty, and we have an override for n/t then
1115 	 * destroy and recreate it.
1116 	 */
1117 	if ((serdn != -1 || serdt != NULL) && fmd_serd_exists(hdl, serdname) &&
1118 	    fmd_serd_empty(hdl, serdname))
1119 		fmd_serd_destroy(hdl, serdname);
1120 
1121 	if (!fmd_serd_exists(hdl, serdname)) {
1122 		struct node *nN, *nT;
1123 		const char *s;
1124 		struct node *nodep;
1125 		struct config *cp;
1126 		char *path;
1127 		uint_t nval;
1128 		hrtime_t tval;
1129 		int i;
1130 		char *ptr;
1131 		int got_n_override = 0, got_t_override = 0;
1132 
1133 		/* no SERD engine yet, so create it */
1134 		nodep = serdinst->u.stmt.np->u.event.epname;
1135 		path = ipath2str(NULL, ipath(nodep));
1136 		cp = config_lookup(fmep->config, path, 0);
1137 		FREE((void *)path);
1138 
1139 		/*
1140 		 * We allow serd paramaters to be overridden, either from
1141 		 * eft.conf file values (if Serd_Override is set) or from
1142 		 * driver properties (for "serd.io.device" engines).
1143 		 */
1144 		if (Serd_Override != NULL) {
1145 			char *save_ptr, *ptr1, *ptr2, *ptr3;
1146 			ptr3 = save_ptr = STRDUP(Serd_Override);
1147 			while (*ptr3 != '\0') {
1148 				ptr1 = strchr(ptr3, ',');
1149 				*ptr1 = '\0';
1150 				if (strcmp(ptr3, serdclass) == 0) {
1151 					ptr2 =  strchr(ptr1 + 1, ',');
1152 					*ptr2 = '\0';
1153 					nval = atoi(ptr1 + 1);
1154 					out(O_ALTFP, "serd override %s_n %d",
1155 					    serdclass, nval);
1156 					ptr3 =  strchr(ptr2 + 1, ' ');
1157 					if (ptr3)
1158 						*ptr3 = '\0';
1159 					ptr = STRDUP(ptr2 + 1);
1160 					out(O_ALTFP, "serd override %s_t %s",
1161 					    serdclass, ptr);
1162 					got_n_override = 1;
1163 					got_t_override = 1;
1164 					break;
1165 				} else {
1166 					ptr2 =  strchr(ptr1 + 1, ',');
1167 					ptr3 =  strchr(ptr2 + 1, ' ');
1168 					if (ptr3 == NULL)
1169 						break;
1170 				}
1171 				ptr3++;
1172 			}
1173 			FREE(save_ptr);
1174 		}
1175 
1176 		if (cp && got_n_override == 0) {
1177 			/*
1178 			 * convert serd engine class into property name
1179 			 */
1180 			char *prop_name = MALLOC(strlen(serdclass) + 3);
1181 			for (i = 0; i < strlen(serdclass); i++) {
1182 				if (serdclass[i] == '.')
1183 					prop_name[i] = '_';
1184 				else
1185 					prop_name[i] = serdclass[i];
1186 			}
1187 			prop_name[i++] = '_';
1188 			prop_name[i++] = 'n';
1189 			prop_name[i] = '\0';
1190 			if (s = config_getprop(cp, prop_name)) {
1191 				nval = atoi(s);
1192 				out(O_ALTFP, "serd override %s_n %s",
1193 				    serdclass, s);
1194 				got_n_override = 1;
1195 			}
1196 			prop_name[i - 1] = 't';
1197 			if (s = config_getprop(cp, prop_name)) {
1198 				ptr = STRDUP(s);
1199 				out(O_ALTFP, "serd override %s_t %s",
1200 				    serdclass, s);
1201 				got_t_override = 1;
1202 			}
1203 			FREE(prop_name);
1204 		}
1205 
1206 		if (serdn != -1 && got_n_override == 0) {
1207 			nval = serdn;
1208 			out(O_ALTFP, "serd override %s_n %d", serdclass, serdn);
1209 			got_n_override = 1;
1210 		}
1211 		if (serdt != NULL && got_t_override == 0) {
1212 			ptr = STRDUP(serdt);
1213 			out(O_ALTFP, "serd override %s_t %s", serdclass, serdt);
1214 			got_t_override = 1;
1215 		}
1216 
1217 		if (!got_n_override) {
1218 			nN = lut_lookup(serdinst->u.stmt.lutp, (void *)L_N,
1219 			    NULL);
1220 			ASSERT(nN->t == T_NUM);
1221 			nval = (uint_t)nN->u.ull;
1222 		}
1223 		if (!got_t_override) {
1224 			nT = lut_lookup(serdinst->u.stmt.lutp, (void *)L_T,
1225 			    NULL);
1226 			ASSERT(nT->t == T_TIMEVAL);
1227 			tval = (hrtime_t)nT->u.ull;
1228 		} else {
1229 			const unsigned long long *ullp;
1230 			const char *suffix;
1231 			int len;
1232 
1233 			len = strspn(ptr, "0123456789");
1234 			suffix = stable(&ptr[len]);
1235 			ullp = (unsigned long long *)lut_lookup(Timesuffixlut,
1236 			    (void *)suffix, NULL);
1237 			ptr[len] = '\0';
1238 			tval = strtoull(ptr, NULL, 0) * (ullp ? *ullp : 1ll);
1239 			FREE(ptr);
1240 		}
1241 		fmd_serd_create(hdl, serdname, nval, tval);
1242 	}
1243 
1244 	newentp = MALLOC(sizeof (*newentp));
1245 	newentp->ename = stable(serdclass);
1246 	FREE(serdclass);
1247 	newentp->ipath = ipath(serdinst->u.stmt.np->u.event.epname);
1248 	newentp->hdl = hdl;
1249 	if (lut_lookup(SerdEngines, newentp, (lut_cmp)serd_cmp) == NULL) {
1250 		SerdEngines = lut_add(SerdEngines, (void *)newentp,
1251 		    (void *)newentp, (lut_cmp)serd_cmp);
1252 		Serd_need_save = 1;
1253 		serd_save();
1254 	} else {
1255 		FREE(newentp);
1256 	}
1257 
1258 
1259 	/*
1260 	 * increment SERD engine.  if engine fires, reset serd
1261 	 * engine and return trip_strcode if required.
1262 	 */
1263 	for (i = 0; i < serdincrement; i++) {
1264 		if (fmd_serd_record(hdl, serdname, ffep)) {
1265 			fmd_case_add_serd(hdl, fmcase, serdname);
1266 			fmd_serd_reset(hdl, serdname);
1267 
1268 			if (ippp) {
1269 				struct node *tripinst =
1270 				    lut_lookup(serdinst->u.stmt.lutp,
1271 				    (void *)L_trip, NULL);
1272 				ASSERT(tripinst != NULL);
1273 				*enamep = tripinst->u.event.ename->u.name.s;
1274 				*ippp = ipath(tripinst->u.event.epname);
1275 				out(O_ALTFP|O_NONL,
1276 				    "[engine fired: %s, sending: ", serdname);
1277 				ipath_print(O_ALTFP|O_NONL, *enamep, *ippp);
1278 				out(O_ALTFP, "]");
1279 			} else {
1280 				out(O_ALTFP, "[engine fired: %s, no trip]",
1281 				    serdname);
1282 			}
1283 			FREE(serdname);
1284 			return (1);
1285 		}
1286 	}
1287 
1288 	FREE(serdname);
1289 	return (0);
1290 }
1291 
1292 /*
1293  * search a suspect list for upsets.  feed each upset to serd_eval() and
1294  * build up tripped[], an array of ereports produced by the firing of
1295  * any SERD engines.  then feed each ereport back into
1296  * fme_receive_report().
1297  *
1298  * returns ntrip, the number of these ereports produced.
1299  */
1300 static int
1301 upsets_eval(struct fme *fmep, fmd_event_t *ffep)
1302 {
1303 	/* we build an array of tripped ereports that we send ourselves */
1304 	struct {
1305 		const char *ename;
1306 		const struct ipath *ipp;
1307 	} *tripped;
1308 	struct event *sp;
1309 	int ntrip, nupset, i;
1310 
1311 	/*
1312 	 * count the number of upsets to determine the upper limit on
1313 	 * expected trip ereport strings.  remember that one upset can
1314 	 * lead to at most one ereport.
1315 	 */
1316 	nupset = 0;
1317 	for (sp = fmep->suspects; sp; sp = sp->suspects) {
1318 		if (sp->t == N_UPSET)
1319 			nupset++;
1320 	}
1321 
1322 	if (nupset == 0)
1323 		return (0);
1324 
1325 	/*
1326 	 * get to this point if we have upsets and expect some trip
1327 	 * ereports
1328 	 */
1329 	tripped = alloca(sizeof (*tripped) * nupset);
1330 	bzero((void *)tripped, sizeof (*tripped) * nupset);
1331 
1332 	ntrip = 0;
1333 	for (sp = fmep->suspects; sp; sp = sp->suspects)
1334 		if (sp->t == N_UPSET &&
1335 		    serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, sp,
1336 		    &tripped[ntrip].ename, &tripped[ntrip].ipp) == 1)
1337 			ntrip++;
1338 
1339 	for (i = 0; i < ntrip; i++) {
1340 		struct event *ep, *nep;
1341 		struct fme *nfmep;
1342 		fmd_case_t *fmcase;
1343 		const struct ipath *ipp;
1344 		const char *eventstring;
1345 		int prev_verbose;
1346 		unsigned long long my_delay = TIMEVAL_EVENTUALLY;
1347 		enum fme_state state;
1348 
1349 		/*
1350 		 * First try and evaluate a case with the trip ereport plus
1351 		 * all the other ereports that cause the trip. If that fails
1352 		 * to evaluate then try again with just this ereport on its own.
1353 		 */
1354 		out(O_ALTFP|O_NONL, "fme_receive_report_serd: ");
1355 		ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp);
1356 		out(O_ALTFP|O_STAMP, NULL);
1357 		ep = fmep->e0;
1358 		eventstring = ep->enode->u.event.ename->u.name.s;
1359 		ipp = ep->ipp;
1360 
1361 		/*
1362 		 * create a duplicate fme and case
1363 		 */
1364 		fmcase = fmd_case_open(fmep->hdl, NULL);
1365 		out(O_ALTFP|O_NONL, "duplicate fme for event [");
1366 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1367 		out(O_ALTFP, " ]");
1368 
1369 		if ((nfmep = newfme(eventstring, ipp, fmep->hdl,
1370 		    fmcase, ffep, ep->nvp)) == NULL) {
1371 			out(O_ALTFP|O_NONL, "[");
1372 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1373 			out(O_ALTFP, " CANNOT DIAGNOSE]");
1374 			continue;
1375 		}
1376 
1377 		Open_fme_count++;
1378 		nfmep->pull = fmep->pull;
1379 		init_fme_bufs(nfmep);
1380 		out(O_ALTFP|O_NONL, "[");
1381 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1382 		out(O_ALTFP, " created FME%d, case %s]", nfmep->id,
1383 		    fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
1384 		if (ffep) {
1385 			fmd_case_setprincipal(nfmep->hdl, nfmep->fmcase, ffep);
1386 			fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase, ffep);
1387 			nfmep->e0r = ffep;
1388 		}
1389 
1390 		/*
1391 		 * add the original ereports
1392 		 */
1393 		for (ep = fmep->observations; ep; ep = ep->observations) {
1394 			eventstring = ep->enode->u.event.ename->u.name.s;
1395 			ipp = ep->ipp;
1396 			out(O_ALTFP|O_NONL, "adding event [");
1397 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1398 			out(O_ALTFP, " ]");
1399 			nep = itree_lookup(nfmep->eventtree, eventstring, ipp);
1400 			if (nep->count++ == 0) {
1401 				nep->observations = nfmep->observations;
1402 				nfmep->observations = nep;
1403 				serialize_observation(nfmep, eventstring, ipp);
1404 				nep->nvp = evnv_dupnvl(ep->nvp);
1405 			}
1406 			if (ep->ffep && ep->ffep != ffep)
1407 				fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase,
1408 				    ep->ffep);
1409 			stats_counter_bump(nfmep->Rcount);
1410 		}
1411 
1412 		/*
1413 		 * add the serd trigger ereport
1414 		 */
1415 		if ((ep = itree_lookup(nfmep->eventtree, tripped[i].ename,
1416 		    tripped[i].ipp)) == NULL) {
1417 			/*
1418 			 * The trigger ereport is not in the instance tree. It
1419 			 * was presumably removed by prune_propagations() as
1420 			 * this combination of events is not present in the
1421 			 * rules.
1422 			 */
1423 			out(O_ALTFP, "upsets_eval: e0 not in instance tree");
1424 			Undiag_reason = UD_VAL_BADEVENTI;
1425 			goto retry_lone_ereport;
1426 		}
1427 		out(O_ALTFP|O_NONL, "adding event [");
1428 		ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp);
1429 		out(O_ALTFP, " ]");
1430 		nfmep->ecurrent = ep;
1431 		ep->nvp = NULL;
1432 		ep->count = 1;
1433 		ep->observations = nfmep->observations;
1434 		nfmep->observations = ep;
1435 
1436 		/*
1437 		 * just peek first.
1438 		 */
1439 		nfmep->peek = 1;
1440 		prev_verbose = Verbose;
1441 		if (Debug == 0)
1442 			Verbose = 0;
1443 		lut_walk(nfmep->eventtree, (lut_cb)clear_arrows, (void *)nfmep);
1444 		state = hypothesise(nfmep, nfmep->e0, nfmep->ull, &my_delay);
1445 		nfmep->peek = 0;
1446 		Verbose = prev_verbose;
1447 		if (state == FME_DISPROVED) {
1448 			out(O_ALTFP, "upsets_eval: hypothesis disproved");
1449 			Undiag_reason = UD_VAL_UNSOLVD;
1450 retry_lone_ereport:
1451 			/*
1452 			 * However the trigger ereport on its own might be
1453 			 * diagnosable, so check for that. Undo the new fme
1454 			 * and case we just created and call fme_receive_report.
1455 			 */
1456 			out(O_ALTFP|O_NONL, "[");
1457 			ipath_print(O_ALTFP|O_NONL, tripped[i].ename,
1458 			    tripped[i].ipp);
1459 			out(O_ALTFP, " retrying with just trigger ereport]");
1460 			itree_free(nfmep->eventtree);
1461 			nfmep->eventtree = NULL;
1462 			structconfig_free(nfmep->config);
1463 			nfmep->config = NULL;
1464 			destroy_fme_bufs(nfmep);
1465 			fmd_case_close(nfmep->hdl, nfmep->fmcase);
1466 			fme_receive_report(fmep->hdl, ffep,
1467 			    tripped[i].ename, tripped[i].ipp, NULL);
1468 			continue;
1469 		}
1470 
1471 		/*
1472 		 * and evaluate
1473 		 */
1474 		serialize_observation(nfmep, tripped[i].ename, tripped[i].ipp);
1475 		fme_eval(nfmep, ffep);
1476 	}
1477 
1478 	return (ntrip);
1479 }
1480 
1481 /*
1482  * fme_receive_external_report -- call when an external ereport comes in
1483  *
1484  * this routine just converts the relevant information from the ereport
1485  * into a format used internally and passes it on to fme_receive_report().
1486  */
1487 void
1488 fme_receive_external_report(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl,
1489     const char *class)
1490 {
1491 	struct node		*epnamenp;
1492 	fmd_case_t		*fmcase;
1493 	const struct ipath	*ipp;
1494 	nvlist_t		*detector = NULL;
1495 
1496 	class = stable(class);
1497 
1498 	/* Get the component path from the ereport */
1499 	epnamenp = platform_getpath(nvl);
1500 
1501 	/* See if we ended up without a path. */
1502 	if (epnamenp == NULL) {
1503 		/* See if class permits silent discard on unknown component. */
1504 		if (lut_lookup(Ereportenames_discard, (void *)class, NULL)) {
1505 			out(O_ALTFP|O_VERB2, "Unable to map \"%s\" ereport "
1506 			    "to component path, but silent discard allowed.",
1507 			    class);
1508 		} else {
1509 			/*
1510 			 * XFILE: Failure to find a component is bad unless
1511 			 * 'discard_if_config_unknown=1' was specified in the
1512 			 * ereport definition. Indicate undiagnosable.
1513 			 */
1514 			Undiag_reason = UD_VAL_NOPATH;
1515 			fmcase = fmd_case_open(hdl, NULL);
1516 
1517 			/*
1518 			 * We don't have a component path here (which means that
1519 			 * the detector was not in hc-scheme and couldn't be
1520 			 * converted to hc-scheme. Report the raw detector as
1521 			 * the suspect resource if there is one.
1522 			 */
1523 			(void) nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR,
1524 			    &detector);
1525 			publish_undiagnosable(hdl, ffep, fmcase, detector,
1526 			    (char *)class);
1527 		}
1528 		return;
1529 	}
1530 
1531 	ipp = ipath(epnamenp);
1532 	tree_free(epnamenp);
1533 	fme_receive_report(hdl, ffep, class, ipp, nvl);
1534 }
1535 
1536 /*ARGSUSED*/
1537 void
1538 fme_receive_repair_list(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl,
1539     const char *eventstring)
1540 {
1541 	char *uuid;
1542 	nvlist_t **nva;
1543 	uint_t nvc;
1544 	const struct ipath *ipp;
1545 
1546 	if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) != 0 ||
1547 	    nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST,
1548 	    &nva, &nvc) != 0) {
1549 		out(O_ALTFP, "No uuid or fault list for list.repaired event");
1550 		return;
1551 	}
1552 
1553 	out(O_ALTFP, "Processing list.repaired from case %s", uuid);
1554 
1555 	while (nvc-- != 0) {
1556 		/*
1557 		 * Reset any istat or serd engine associated with this path.
1558 		 */
1559 		char *path;
1560 
1561 		if ((ipp = platform_fault2ipath(*nva++)) == NULL)
1562 			continue;
1563 
1564 		path = ipath2str(NULL, ipp);
1565 		out(O_ALTFP, "fme_receive_repair_list: resetting state for %s",
1566 		    path);
1567 		FREE(path);
1568 
1569 		lut_walk(Istats, (lut_cb)istat_counter_reset_cb, (void *)ipp);
1570 		istat_save();
1571 
1572 		lut_walk(SerdEngines, (lut_cb)serd_reset_cb, (void *)ipp);
1573 		serd_save();
1574 	}
1575 }
1576 
1577 /*ARGSUSED*/
1578 void
1579 fme_receive_topology_change(void)
1580 {
1581 	lut_walk(Istats, (lut_cb)istat_counter_topo_chg_cb, NULL);
1582 	istat_save();
1583 
1584 	lut_walk(SerdEngines, (lut_cb)serd_topo_chg_cb, NULL);
1585 	serd_save();
1586 }
1587 
1588 static int mark_arrows(struct fme *fmep, struct event *ep, int mark,
1589     unsigned long long at_latest_by, unsigned long long *pdelay, int keep);
1590 
1591 /* ARGSUSED */
1592 static void
1593 clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
1594 {
1595 	struct bubble *bp;
1596 	struct arrowlist *ap;
1597 
1598 	ep->cached_state = 0;
1599 	ep->keep_in_tree = 0;
1600 	for (bp = itree_next_bubble(ep, NULL); bp;
1601 	    bp = itree_next_bubble(ep, bp)) {
1602 		if (bp->t != B_FROM)
1603 			continue;
1604 		bp->mark = 0;
1605 		for (ap = itree_next_arrow(bp, NULL); ap;
1606 		    ap = itree_next_arrow(bp, ap))
1607 			ap->arrowp->mark = 0;
1608 	}
1609 }
1610 
1611 static void
1612 fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep,
1613     const char *eventstring, const struct ipath *ipp, nvlist_t *nvl)
1614 {
1615 	struct event *ep;
1616 	struct fme *fmep = NULL;
1617 	struct fme *ofmep = NULL;
1618 	struct fme *cfmep, *svfmep;
1619 	int matched = 0;
1620 	nvlist_t *defect;
1621 	fmd_case_t *fmcase;
1622 	char *reason;
1623 
1624 	out(O_ALTFP|O_NONL, "fme_receive_report: ");
1625 	ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1626 	out(O_ALTFP|O_STAMP, NULL);
1627 
1628 	/* decide which FME it goes to */
1629 	for (fmep = FMElist; fmep; fmep = fmep->next) {
1630 		int prev_verbose;
1631 		unsigned long long my_delay = TIMEVAL_EVENTUALLY;
1632 		enum fme_state state;
1633 		nvlist_t *pre_peek_nvp = NULL;
1634 
1635 		if (fmep->overflow) {
1636 			if (!(fmd_case_closed(fmep->hdl, fmep->fmcase)))
1637 				ofmep = fmep;
1638 
1639 			continue;
1640 		}
1641 
1642 		/*
1643 		 * ignore solved or closed cases
1644 		 */
1645 		if (fmep->posted_suspects ||
1646 		    fmd_case_solved(fmep->hdl, fmep->fmcase) ||
1647 		    fmd_case_closed(fmep->hdl, fmep->fmcase))
1648 			continue;
1649 
1650 		/* look up event in event tree for this FME */
1651 		if ((ep = itree_lookup(fmep->eventtree,
1652 		    eventstring, ipp)) == NULL)
1653 			continue;
1654 
1655 		/* note observation */
1656 		fmep->ecurrent = ep;
1657 		if (ep->count++ == 0) {
1658 			/* link it into list of observations seen */
1659 			ep->observations = fmep->observations;
1660 			fmep->observations = ep;
1661 			ep->nvp = evnv_dupnvl(nvl);
1662 		} else {
1663 			/* use new payload values for peek */
1664 			pre_peek_nvp = ep->nvp;
1665 			ep->nvp = evnv_dupnvl(nvl);
1666 		}
1667 
1668 		/* tell hypothesise() not to mess with suspect list */
1669 		fmep->peek = 1;
1670 
1671 		/* don't want this to be verbose (unless Debug is set) */
1672 		prev_verbose = Verbose;
1673 		if (Debug == 0)
1674 			Verbose = 0;
1675 
1676 		lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep);
1677 		state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay);
1678 
1679 		fmep->peek = 0;
1680 
1681 		/* put verbose flag back */
1682 		Verbose = prev_verbose;
1683 
1684 		if (state != FME_DISPROVED) {
1685 			/* found an FME that explains the ereport */
1686 			matched++;
1687 			out(O_ALTFP|O_NONL, "[");
1688 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1689 			out(O_ALTFP, " explained by FME%d]", fmep->id);
1690 
1691 			if (pre_peek_nvp)
1692 				nvlist_free(pre_peek_nvp);
1693 
1694 			if (ep->count == 1)
1695 				serialize_observation(fmep, eventstring, ipp);
1696 
1697 			if (ffep) {
1698 				fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1699 				ep->ffep = ffep;
1700 			}
1701 
1702 			stats_counter_bump(fmep->Rcount);
1703 
1704 			/* re-eval FME */
1705 			fme_eval(fmep, ffep);
1706 		} else {
1707 
1708 			/* not a match, undo noting of observation */
1709 			fmep->ecurrent = NULL;
1710 			if (--ep->count == 0) {
1711 				/* unlink it from observations */
1712 				fmep->observations = ep->observations;
1713 				ep->observations = NULL;
1714 				nvlist_free(ep->nvp);
1715 				ep->nvp = NULL;
1716 			} else {
1717 				nvlist_free(ep->nvp);
1718 				ep->nvp = pre_peek_nvp;
1719 			}
1720 		}
1721 	}
1722 
1723 	if (matched)
1724 		return;	/* explained by at least one existing FME */
1725 
1726 	/* clean up closed fmes */
1727 	cfmep = ClosedFMEs;
1728 	while (cfmep != NULL) {
1729 		svfmep = cfmep->next;
1730 		destroy_fme(cfmep);
1731 		cfmep = svfmep;
1732 	}
1733 	ClosedFMEs = NULL;
1734 
1735 	if (ofmep) {
1736 		out(O_ALTFP|O_NONL, "[");
1737 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1738 		out(O_ALTFP, " ADDING TO OVERFLOW FME]");
1739 		if (ffep)
1740 			fmd_case_add_ereport(hdl, ofmep->fmcase, ffep);
1741 
1742 		return;
1743 
1744 	} else if (Max_fme && (Open_fme_count >= Max_fme)) {
1745 		out(O_ALTFP|O_NONL, "[");
1746 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1747 		out(O_ALTFP, " MAX OPEN FME REACHED]");
1748 
1749 		fmcase = fmd_case_open(hdl, NULL);
1750 
1751 		/* Create overflow fme */
1752 		if ((fmep = newfme(eventstring, ipp, hdl, fmcase, ffep,
1753 		    nvl)) == NULL) {
1754 			out(O_ALTFP|O_NONL, "[");
1755 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1756 			out(O_ALTFP, " CANNOT OPEN OVERFLOW FME]");
1757 			return;
1758 		}
1759 
1760 		Open_fme_count++;
1761 
1762 		init_fme_bufs(fmep);
1763 		fmep->overflow = B_TRUE;
1764 
1765 		if (ffep)
1766 			fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1767 
1768 		Undiag_reason = UD_VAL_MAXFME;
1769 		defect = fmd_nvl_create_fault(hdl,
1770 		    undiag_2defect_str(Undiag_reason), 100, NULL, NULL, NULL);
1771 		reason = undiag_2reason_str(Undiag_reason, NULL);
1772 		(void) nvlist_add_string(defect, UNDIAG_REASON, reason);
1773 		FREE(reason);
1774 		fmd_case_add_suspect(hdl, fmep->fmcase, defect);
1775 		fmd_case_solve(hdl, fmep->fmcase);
1776 		Undiag_reason = UD_VAL_UNKNOWN;
1777 		return;
1778 	}
1779 
1780 	/* open a case */
1781 	fmcase = fmd_case_open(hdl, NULL);
1782 
1783 	/* start a new FME */
1784 	if ((fmep = newfme(eventstring, ipp, hdl, fmcase, ffep, nvl)) == NULL) {
1785 		out(O_ALTFP|O_NONL, "[");
1786 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1787 		out(O_ALTFP, " CANNOT DIAGNOSE]");
1788 		return;
1789 	}
1790 
1791 	Open_fme_count++;
1792 
1793 	init_fme_bufs(fmep);
1794 
1795 	out(O_ALTFP|O_NONL, "[");
1796 	ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1797 	out(O_ALTFP, " created FME%d, case %s]", fmep->id,
1798 	    fmd_case_uuid(hdl, fmep->fmcase));
1799 
1800 	ep = fmep->e0;
1801 	ASSERT(ep != NULL);
1802 
1803 	/* note observation */
1804 	fmep->ecurrent = ep;
1805 	if (ep->count++ == 0) {
1806 		/* link it into list of observations seen */
1807 		ep->observations = fmep->observations;
1808 		fmep->observations = ep;
1809 		ep->nvp = evnv_dupnvl(nvl);
1810 		serialize_observation(fmep, eventstring, ipp);
1811 	} else {
1812 		/* new payload overrides any previous */
1813 		nvlist_free(ep->nvp);
1814 		ep->nvp = evnv_dupnvl(nvl);
1815 	}
1816 
1817 	stats_counter_bump(fmep->Rcount);
1818 
1819 	if (ffep) {
1820 		fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1821 		fmd_case_setprincipal(hdl, fmep->fmcase, ffep);
1822 		fmep->e0r = ffep;
1823 		ep->ffep = ffep;
1824 	}
1825 
1826 	/* give the diagnosis algorithm a shot at the new FME state */
1827 	fme_eval(fmep, ffep);
1828 }
1829 
1830 void
1831 fme_status(int flags)
1832 {
1833 	struct fme *fmep;
1834 
1835 	if (FMElist == NULL) {
1836 		out(flags, "No fault management exercises underway.");
1837 		return;
1838 	}
1839 
1840 	for (fmep = FMElist; fmep; fmep = fmep->next)
1841 		fme_print(flags, fmep);
1842 }
1843 
1844 /*
1845  * "indent" routines used mostly for nicely formatted debug output, but also
1846  * for sanity checking for infinite recursion bugs.
1847  */
1848 
1849 #define	MAX_INDENT 1024
1850 static const char *indent_s[MAX_INDENT];
1851 static int current_indent;
1852 
1853 static void
1854 indent_push(const char *s)
1855 {
1856 	if (current_indent < MAX_INDENT)
1857 		indent_s[current_indent++] = s;
1858 	else
1859 		out(O_DIE, "unexpected recursion depth (%d)", current_indent);
1860 }
1861 
1862 static void
1863 indent_set(const char *s)
1864 {
1865 	current_indent = 0;
1866 	indent_push(s);
1867 }
1868 
1869 static void
1870 indent_pop(void)
1871 {
1872 	if (current_indent > 0)
1873 		current_indent--;
1874 	else
1875 		out(O_DIE, "recursion underflow");
1876 }
1877 
1878 static void
1879 indent(void)
1880 {
1881 	int i;
1882 	if (!Verbose)
1883 		return;
1884 	for (i = 0; i < current_indent; i++)
1885 		out(O_ALTFP|O_VERB|O_NONL, indent_s[i]);
1886 }
1887 
1888 #define	SLNEW		1
1889 #define	SLCHANGED	2
1890 #define	SLWAIT		3
1891 #define	SLDISPROVED	4
1892 
1893 static void
1894 print_suspects(int circumstance, struct fme *fmep)
1895 {
1896 	struct event *ep;
1897 
1898 	out(O_ALTFP|O_NONL, "[");
1899 	if (circumstance == SLCHANGED) {
1900 		out(O_ALTFP|O_NONL, "FME%d diagnosis changed. state: %s, "
1901 		    "suspect list:", fmep->id, fme_state2str(fmep->state));
1902 	} else if (circumstance == SLWAIT) {
1903 		out(O_ALTFP|O_NONL, "FME%d set wait timer %ld ", fmep->id,
1904 		    fmep->timer);
1905 		ptree_timeval(O_ALTFP|O_NONL, &fmep->wull);
1906 	} else if (circumstance == SLDISPROVED) {
1907 		out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS UNKNOWN", fmep->id);
1908 	} else {
1909 		out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS PRODUCED:", fmep->id);
1910 	}
1911 
1912 	if (circumstance == SLWAIT || circumstance == SLDISPROVED) {
1913 		out(O_ALTFP, "]");
1914 		return;
1915 	}
1916 
1917 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
1918 		out(O_ALTFP|O_NONL, " ");
1919 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
1920 	}
1921 	out(O_ALTFP, "]");
1922 }
1923 
1924 static struct node *
1925 eventprop_lookup(struct event *ep, const char *propname)
1926 {
1927 	return (lut_lookup(ep->props, (void *)propname, NULL));
1928 }
1929 
1930 #define	MAXDIGITIDX	23
1931 static char numbuf[MAXDIGITIDX + 1];
1932 
1933 static int
1934 node2uint(struct node *n, uint_t *valp)
1935 {
1936 	struct evalue value;
1937 	struct lut *globals = NULL;
1938 
1939 	if (n == NULL)
1940 		return (1);
1941 
1942 	/*
1943 	 * check value.v since we are being asked to convert an unsigned
1944 	 * long long int to an unsigned int
1945 	 */
1946 	if (! eval_expr(n, NULL, NULL, &globals, NULL, NULL, 0, &value) ||
1947 	    value.t != UINT64 || value.v > (1ULL << 32))
1948 		return (1);
1949 
1950 	*valp = (uint_t)value.v;
1951 
1952 	return (0);
1953 }
1954 
1955 static nvlist_t *
1956 node2fmri(struct node *n)
1957 {
1958 	nvlist_t **pa, *f, *p;
1959 	struct node *nc;
1960 	uint_t depth = 0;
1961 	char *numstr, *nullbyte;
1962 	char *failure;
1963 	int err, i;
1964 
1965 	/* XXX do we need to be able to handle a non-T_NAME node? */
1966 	if (n == NULL || n->t != T_NAME)
1967 		return (NULL);
1968 
1969 	for (nc = n; nc != NULL; nc = nc->u.name.next) {
1970 		if (nc->u.name.child == NULL || nc->u.name.child->t != T_NUM)
1971 			break;
1972 		depth++;
1973 	}
1974 
1975 	if (nc != NULL) {
1976 		/* We bailed early, something went wrong */
1977 		return (NULL);
1978 	}
1979 
1980 	if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0)
1981 		out(O_DIE|O_SYS, "alloc of fmri nvl failed");
1982 	pa = alloca(depth * sizeof (nvlist_t *));
1983 	for (i = 0; i < depth; i++)
1984 		pa[i] = NULL;
1985 
1986 	err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC);
1987 	err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION);
1988 	err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, "");
1989 	err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth);
1990 	if (err != 0) {
1991 		failure = "basic construction of FMRI failed";
1992 		goto boom;
1993 	}
1994 
1995 	numbuf[MAXDIGITIDX] = '\0';
1996 	nullbyte = &numbuf[MAXDIGITIDX];
1997 	i = 0;
1998 
1999 	for (nc = n; nc != NULL; nc = nc->u.name.next) {
2000 		err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl);
2001 		if (err != 0) {
2002 			failure = "alloc of an hc-pair failed";
2003 			goto boom;
2004 		}
2005 		err = nvlist_add_string(p, FM_FMRI_HC_NAME, nc->u.name.s);
2006 		numstr = ulltostr(nc->u.name.child->u.ull, nullbyte);
2007 		err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr);
2008 		if (err != 0) {
2009 			failure = "construction of an hc-pair failed";
2010 			goto boom;
2011 		}
2012 		pa[i++] = p;
2013 	}
2014 
2015 	err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth);
2016 	if (err == 0) {
2017 		for (i = 0; i < depth; i++)
2018 			if (pa[i] != NULL)
2019 				nvlist_free(pa[i]);
2020 		return (f);
2021 	}
2022 	failure = "addition of hc-pair array to FMRI failed";
2023 
2024 boom:
2025 	for (i = 0; i < depth; i++)
2026 		if (pa[i] != NULL)
2027 			nvlist_free(pa[i]);
2028 	nvlist_free(f);
2029 	out(O_DIE, "%s", failure);
2030 	/*NOTREACHED*/
2031 	return (NULL);
2032 }
2033 
2034 /* an ipath cache entry is an array of these, with s==NULL at the end */
2035 struct ipath {
2036 	const char *s;	/* component name (in stable) */
2037 	int i;		/* instance number */
2038 };
2039 
2040 static nvlist_t *
2041 ipath2fmri(struct ipath *ipath)
2042 {
2043 	nvlist_t **pa, *f, *p;
2044 	uint_t depth = 0;
2045 	char *numstr, *nullbyte;
2046 	char *failure;
2047 	int err, i;
2048 	struct ipath *ipp;
2049 
2050 	for (ipp = ipath; ipp->s != NULL; ipp++)
2051 		depth++;
2052 
2053 	if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0)
2054 		out(O_DIE|O_SYS, "alloc of fmri nvl failed");
2055 	pa = alloca(depth * sizeof (nvlist_t *));
2056 	for (i = 0; i < depth; i++)
2057 		pa[i] = NULL;
2058 
2059 	err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC);
2060 	err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION);
2061 	err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, "");
2062 	err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth);
2063 	if (err != 0) {
2064 		failure = "basic construction of FMRI failed";
2065 		goto boom;
2066 	}
2067 
2068 	numbuf[MAXDIGITIDX] = '\0';
2069 	nullbyte = &numbuf[MAXDIGITIDX];
2070 	i = 0;
2071 
2072 	for (ipp = ipath; ipp->s != NULL; ipp++) {
2073 		err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl);
2074 		if (err != 0) {
2075 			failure = "alloc of an hc-pair failed";
2076 			goto boom;
2077 		}
2078 		err = nvlist_add_string(p, FM_FMRI_HC_NAME, ipp->s);
2079 		numstr = ulltostr(ipp->i, nullbyte);
2080 		err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr);
2081 		if (err != 0) {
2082 			failure = "construction of an hc-pair failed";
2083 			goto boom;
2084 		}
2085 		pa[i++] = p;
2086 	}
2087 
2088 	err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth);
2089 	if (err == 0) {
2090 		for (i = 0; i < depth; i++)
2091 			if (pa[i] != NULL)
2092 				nvlist_free(pa[i]);
2093 		return (f);
2094 	}
2095 	failure = "addition of hc-pair array to FMRI failed";
2096 
2097 boom:
2098 	for (i = 0; i < depth; i++)
2099 		if (pa[i] != NULL)
2100 			nvlist_free(pa[i]);
2101 	nvlist_free(f);
2102 	out(O_DIE, "%s", failure);
2103 	/*NOTREACHED*/
2104 	return (NULL);
2105 }
2106 
2107 static uint8_t
2108 percentof(uint_t part, uint_t whole)
2109 {
2110 	unsigned long long p = part * 1000;
2111 
2112 	return ((p / whole / 10) + (((p / whole % 10) >= 5) ? 1 : 0));
2113 }
2114 
2115 struct rsl {
2116 	struct event *suspect;
2117 	nvlist_t *asru;
2118 	nvlist_t *fru;
2119 	nvlist_t *rsrc;
2120 };
2121 
2122 static void publish_suspects(struct fme *fmep, struct rsl *srl);
2123 
2124 /*
2125  *  rslfree -- free internal members of struct rsl not expected to be
2126  *	freed elsewhere.
2127  */
2128 static void
2129 rslfree(struct rsl *freeme)
2130 {
2131 	if (freeme->asru != NULL)
2132 		nvlist_free(freeme->asru);
2133 	if (freeme->fru != NULL)
2134 		nvlist_free(freeme->fru);
2135 	if (freeme->rsrc != NULL && freeme->rsrc != freeme->asru)
2136 		nvlist_free(freeme->rsrc);
2137 }
2138 
2139 /*
2140  *  rslcmp -- compare two rsl structures.  Use the following
2141  *	comparisons to establish cardinality:
2142  *
2143  *	1. Name of the suspect's class. (simple strcmp)
2144  *	2. Name of the suspect's ASRU. (trickier, since nvlist)
2145  *
2146  */
2147 static int
2148 rslcmp(const void *a, const void *b)
2149 {
2150 	struct rsl *r1 = (struct rsl *)a;
2151 	struct rsl *r2 = (struct rsl *)b;
2152 	int rv;
2153 
2154 	rv = strcmp(r1->suspect->enode->u.event.ename->u.name.s,
2155 	    r2->suspect->enode->u.event.ename->u.name.s);
2156 	if (rv != 0)
2157 		return (rv);
2158 
2159 	if (r1->rsrc == NULL && r2->rsrc == NULL)
2160 		return (0);
2161 	if (r1->rsrc == NULL)
2162 		return (-1);
2163 	if (r2->rsrc == NULL)
2164 		return (1);
2165 	return (evnv_cmpnvl(r1->rsrc, r2->rsrc, 0));
2166 }
2167 
2168 /*
2169  * get_resources -- for a given suspect, determine what ASRU, FRU and
2170  *     RSRC nvlists should be advertised in the final suspect list.
2171  */
2172 void
2173 get_resources(struct event *sp, struct rsl *rsrcs, struct config *croot)
2174 {
2175 	struct node *asrudef, *frudef;
2176 	nvlist_t *asru, *fru;
2177 	nvlist_t *rsrc = NULL;
2178 	char *pathstr;
2179 
2180 	/*
2181 	 * First find any ASRU and/or FRU defined in the
2182 	 * initial fault tree.
2183 	 */
2184 	asrudef = eventprop_lookup(sp, L_ASRU);
2185 	frudef = eventprop_lookup(sp, L_FRU);
2186 
2187 	/*
2188 	 * Create FMRIs based on those definitions
2189 	 */
2190 	asru = node2fmri(asrudef);
2191 	fru = node2fmri(frudef);
2192 	pathstr = ipath2str(NULL, sp->ipp);
2193 
2194 	/*
2195 	 *  Allow for platform translations of the FMRIs
2196 	 */
2197 	platform_units_translate(is_defect(sp->t), croot, &asru, &fru, &rsrc,
2198 	    pathstr);
2199 
2200 	FREE(pathstr);
2201 	rsrcs->suspect = sp;
2202 	rsrcs->asru = asru;
2203 	rsrcs->fru = fru;
2204 	rsrcs->rsrc = rsrc;
2205 }
2206 
2207 /*
2208  * trim_suspects -- prior to publishing, we may need to remove some
2209  *    suspects from the list.  If we're auto-closing upsets, we don't
2210  *    want any of those in the published list.  If the ASRUs for multiple
2211  *    defects resolve to the same ASRU (driver) we only want to publish
2212  *    that as a single suspect.
2213  */
2214 static int
2215 trim_suspects(struct fme *fmep, struct rsl *begin, struct rsl *begin2,
2216     fmd_event_t *ffep)
2217 {
2218 	struct event *ep;
2219 	struct rsl *rp = begin;
2220 	struct rsl *rp2 = begin2;
2221 	int mess_zero_count = 0;
2222 	int serd_rval;
2223 	uint_t messval;
2224 
2225 	/* remove any unwanted upsets and populate our array */
2226 	for (ep = fmep->psuspects; ep; ep = ep->psuspects) {
2227 		if (is_upset(ep->t))
2228 			continue;
2229 		serd_rval = serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, ep,
2230 		    NULL, NULL);
2231 		if (serd_rval == 0)
2232 			continue;
2233 		if (node2uint(eventprop_lookup(ep, L_message),
2234 		    &messval) == 0 && messval == 0) {
2235 			get_resources(ep, rp2, fmep->config);
2236 			rp2++;
2237 			mess_zero_count++;
2238 		} else {
2239 			get_resources(ep, rp, fmep->config);
2240 			rp++;
2241 			fmep->nsuspects++;
2242 		}
2243 	}
2244 	return (mess_zero_count);
2245 }
2246 
2247 /*
2248  * addpayloadprop -- add a payload prop to a problem
2249  */
2250 static void
2251 addpayloadprop(const char *lhs, struct evalue *rhs, nvlist_t *fault)
2252 {
2253 	nvlist_t *rsrc, *hcs;
2254 
2255 	ASSERT(fault != NULL);
2256 	ASSERT(lhs != NULL);
2257 	ASSERT(rhs != NULL);
2258 
2259 	if (nvlist_lookup_nvlist(fault, FM_FAULT_RESOURCE, &rsrc) != 0)
2260 		out(O_DIE, "cannot add payloadprop \"%s\" to fault", lhs);
2261 
2262 	if (nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcs) != 0) {
2263 		out(O_ALTFP|O_VERB2, "addpayloadprop: create hc_specific");
2264 		if (nvlist_xalloc(&hcs, NV_UNIQUE_NAME, &Eft_nv_hdl) != 0)
2265 			out(O_DIE,
2266 			    "cannot add payloadprop \"%s\" to fault", lhs);
2267 		if (nvlist_add_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, hcs) != 0)
2268 			out(O_DIE,
2269 			    "cannot add payloadprop \"%s\" to fault", lhs);
2270 		nvlist_free(hcs);
2271 		if (nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcs) != 0)
2272 			out(O_DIE,
2273 			    "cannot add payloadprop \"%s\" to fault", lhs);
2274 	} else
2275 		out(O_ALTFP|O_VERB2, "addpayloadprop: reuse hc_specific");
2276 
2277 	if (rhs->t == UINT64) {
2278 		out(O_ALTFP|O_VERB2, "addpayloadprop: %s=%llu", lhs, rhs->v);
2279 
2280 		if (nvlist_add_uint64(hcs, lhs, rhs->v) != 0)
2281 			out(O_DIE,
2282 			    "cannot add payloadprop \"%s\" to fault", lhs);
2283 	} else {
2284 		out(O_ALTFP|O_VERB2, "addpayloadprop: %s=\"%s\"",
2285 		    lhs, (char *)(uintptr_t)rhs->v);
2286 
2287 		if (nvlist_add_string(hcs, lhs, (char *)(uintptr_t)rhs->v) != 0)
2288 			out(O_DIE,
2289 			    "cannot add payloadprop \"%s\" to fault", lhs);
2290 	}
2291 }
2292 
2293 static char *Istatbuf;
2294 static char *Istatbufptr;
2295 static int Istatsz;
2296 
2297 /*
2298  * istataddsize -- calculate size of istat and add it to Istatsz
2299  */
2300 /*ARGSUSED2*/
2301 static void
2302 istataddsize(const struct istat_entry *lhs, struct stats *rhs, void *arg)
2303 {
2304 	int val;
2305 
2306 	ASSERT(lhs != NULL);
2307 	ASSERT(rhs != NULL);
2308 
2309 	if ((val = stats_counter_value(rhs)) == 0)
2310 		return;	/* skip zero-valued stats */
2311 
2312 	/* count up the size of the stat name */
2313 	Istatsz += ipath2strlen(lhs->ename, lhs->ipath);
2314 	Istatsz++;	/* for the trailing NULL byte */
2315 
2316 	/* count up the size of the stat value */
2317 	Istatsz += snprintf(NULL, 0, "%d", val);
2318 	Istatsz++;	/* for the trailing NULL byte */
2319 }
2320 
2321 /*
2322  * istat2str -- serialize an istat, writing result to *Istatbufptr
2323  */
2324 /*ARGSUSED2*/
2325 static void
2326 istat2str(const struct istat_entry *lhs, struct stats *rhs, void *arg)
2327 {
2328 	char *str;
2329 	int len;
2330 	int val;
2331 
2332 	ASSERT(lhs != NULL);
2333 	ASSERT(rhs != NULL);
2334 
2335 	if ((val = stats_counter_value(rhs)) == 0)
2336 		return;	/* skip zero-valued stats */
2337 
2338 	/* serialize the stat name */
2339 	str = ipath2str(lhs->ename, lhs->ipath);
2340 	len = strlen(str);
2341 
2342 	ASSERT(Istatbufptr + len + 1 < &Istatbuf[Istatsz]);
2343 	(void) strlcpy(Istatbufptr, str, &Istatbuf[Istatsz] - Istatbufptr);
2344 	Istatbufptr += len;
2345 	FREE(str);
2346 	*Istatbufptr++ = '\0';
2347 
2348 	/* serialize the stat value */
2349 	Istatbufptr += snprintf(Istatbufptr, &Istatbuf[Istatsz] - Istatbufptr,
2350 	    "%d", val);
2351 	*Istatbufptr++ = '\0';
2352 
2353 	ASSERT(Istatbufptr <= &Istatbuf[Istatsz]);
2354 }
2355 
2356 void
2357 istat_save()
2358 {
2359 	if (Istat_need_save == 0)
2360 		return;
2361 
2362 	/* figure out how big the serialzed info is */
2363 	Istatsz = 0;
2364 	lut_walk(Istats, (lut_cb)istataddsize, NULL);
2365 
2366 	if (Istatsz == 0) {
2367 		/* no stats to save */
2368 		fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS);
2369 		return;
2370 	}
2371 
2372 	/* create the serialized buffer */
2373 	Istatbufptr = Istatbuf = MALLOC(Istatsz);
2374 	lut_walk(Istats, (lut_cb)istat2str, NULL);
2375 
2376 	/* clear out current saved stats */
2377 	fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS);
2378 
2379 	/* write out the new version */
2380 	fmd_buf_write(Hdl, NULL, WOBUF_ISTATS, Istatbuf, Istatsz);
2381 	FREE(Istatbuf);
2382 
2383 	Istat_need_save = 0;
2384 }
2385 
2386 int
2387 istat_cmp(struct istat_entry *ent1, struct istat_entry *ent2)
2388 {
2389 	if (ent1->ename != ent2->ename)
2390 		return (ent2->ename - ent1->ename);
2391 	if (ent1->ipath != ent2->ipath)
2392 		return ((char *)ent2->ipath - (char *)ent1->ipath);
2393 
2394 	return (0);
2395 }
2396 
2397 /*
2398  * istat-verify -- verify the component associated with a stat still exists
2399  *
2400  * if the component no longer exists, this routine resets the stat and
2401  * returns 0.  if the component still exists, it returns 1.
2402  */
2403 static int
2404 istat_verify(struct node *snp, struct istat_entry *entp)
2405 {
2406 	struct stats *statp;
2407 	nvlist_t *fmri;
2408 
2409 	fmri = node2fmri(snp->u.event.epname);
2410 	if (platform_path_exists(fmri)) {
2411 		nvlist_free(fmri);
2412 		return (1);
2413 	}
2414 	nvlist_free(fmri);
2415 
2416 	/* component no longer in system.  zero out the associated stats */
2417 	if ((statp = (struct stats *)
2418 	    lut_lookup(Istats, entp, (lut_cmp)istat_cmp)) == NULL ||
2419 	    stats_counter_value(statp) == 0)
2420 		return (0);	/* stat is already reset */
2421 
2422 	Istat_need_save = 1;
2423 	stats_counter_reset(statp);
2424 	return (0);
2425 }
2426 
2427 static void
2428 istat_bump(struct node *snp, int n)
2429 {
2430 	struct stats *statp;
2431 	struct istat_entry ent;
2432 
2433 	ASSERT(snp != NULL);
2434 	ASSERTinfo(snp->t == T_EVENT, ptree_nodetype2str(snp->t));
2435 	ASSERT(snp->u.event.epname != NULL);
2436 
2437 	/* class name should be hoisted into a single stable entry */
2438 	ASSERT(snp->u.event.ename->u.name.next == NULL);
2439 	ent.ename = snp->u.event.ename->u.name.s;
2440 	ent.ipath = ipath(snp->u.event.epname);
2441 
2442 	if (!istat_verify(snp, &ent)) {
2443 		/* component no longer exists in system, nothing to do */
2444 		return;
2445 	}
2446 
2447 	if ((statp = (struct stats *)
2448 	    lut_lookup(Istats, &ent, (lut_cmp)istat_cmp)) == NULL) {
2449 		/* need to create the counter */
2450 		int cnt = 0;
2451 		struct node *np;
2452 		char *sname;
2453 		char *snamep;
2454 		struct istat_entry *newentp;
2455 
2456 		/* count up the size of the stat name */
2457 		np = snp->u.event.ename;
2458 		while (np != NULL) {
2459 			cnt += strlen(np->u.name.s);
2460 			cnt++;	/* for the '.' or '@' */
2461 			np = np->u.name.next;
2462 		}
2463 		np = snp->u.event.epname;
2464 		while (np != NULL) {
2465 			cnt += snprintf(NULL, 0, "%s%llu",
2466 			    np->u.name.s, np->u.name.child->u.ull);
2467 			cnt++;	/* for the '/' or trailing NULL byte */
2468 			np = np->u.name.next;
2469 		}
2470 
2471 		/* build the stat name */
2472 		snamep = sname = alloca(cnt);
2473 		np = snp->u.event.ename;
2474 		while (np != NULL) {
2475 			snamep += snprintf(snamep, &sname[cnt] - snamep,
2476 			    "%s", np->u.name.s);
2477 			np = np->u.name.next;
2478 			if (np)
2479 				*snamep++ = '.';
2480 		}
2481 		*snamep++ = '@';
2482 		np = snp->u.event.epname;
2483 		while (np != NULL) {
2484 			snamep += snprintf(snamep, &sname[cnt] - snamep,
2485 			    "%s%llu", np->u.name.s, np->u.name.child->u.ull);
2486 			np = np->u.name.next;
2487 			if (np)
2488 				*snamep++ = '/';
2489 		}
2490 		*snamep++ = '\0';
2491 
2492 		/* create the new stat & add it to our list */
2493 		newentp = MALLOC(sizeof (*newentp));
2494 		*newentp = ent;
2495 		statp = stats_new_counter(NULL, sname, 0);
2496 		Istats = lut_add(Istats, (void *)newentp, (void *)statp,
2497 		    (lut_cmp)istat_cmp);
2498 	}
2499 
2500 	/* if n is non-zero, set that value instead of bumping */
2501 	if (n) {
2502 		stats_counter_reset(statp);
2503 		stats_counter_add(statp, n);
2504 	} else
2505 		stats_counter_bump(statp);
2506 	Istat_need_save = 1;
2507 
2508 	ipath_print(O_ALTFP|O_VERB2, ent.ename, ent.ipath);
2509 	out(O_ALTFP|O_VERB2, " %s to value %d", n ? "set" : "incremented",
2510 	    stats_counter_value(statp));
2511 }
2512 
2513 /*ARGSUSED*/
2514 static void
2515 istat_destructor(void *left, void *right, void *arg)
2516 {
2517 	struct istat_entry *entp = (struct istat_entry *)left;
2518 	struct stats *statp = (struct stats *)right;
2519 	FREE(entp);
2520 	stats_delete(statp);
2521 }
2522 
2523 /*
2524  * Callback used in a walk of the Istats to reset matching stat counters.
2525  */
2526 static void
2527 istat_counter_reset_cb(struct istat_entry *entp, struct stats *statp,
2528     const struct ipath *ipp)
2529 {
2530 	char *path;
2531 
2532 	if (entp->ipath == ipp) {
2533 		path = ipath2str(entp->ename, ipp);
2534 		out(O_ALTFP, "istat_counter_reset_cb: resetting %s", path);
2535 		FREE(path);
2536 		stats_counter_reset(statp);
2537 		Istat_need_save = 1;
2538 	}
2539 }
2540 
2541 /*ARGSUSED*/
2542 static void
2543 istat_counter_topo_chg_cb(struct istat_entry *entp, struct stats *statp,
2544     void *unused)
2545 {
2546 	char *path;
2547 	nvlist_t *fmri;
2548 
2549 	fmri = ipath2fmri((struct ipath *)(entp->ipath));
2550 	if (!platform_path_exists(fmri)) {
2551 		path = ipath2str(entp->ename, entp->ipath);
2552 		out(O_ALTFP, "istat_counter_topo_chg_cb: not present %s", path);
2553 		FREE(path);
2554 		stats_counter_reset(statp);
2555 		Istat_need_save = 1;
2556 	}
2557 	nvlist_free(fmri);
2558 }
2559 
2560 void
2561 istat_fini(void)
2562 {
2563 	lut_free(Istats, istat_destructor, NULL);
2564 }
2565 
2566 static char *Serdbuf;
2567 static char *Serdbufptr;
2568 static int Serdsz;
2569 
2570 /*
2571  * serdaddsize -- calculate size of serd and add it to Serdsz
2572  */
2573 /*ARGSUSED*/
2574 static void
2575 serdaddsize(const struct serd_entry *lhs, struct stats *rhs, void *arg)
2576 {
2577 	ASSERT(lhs != NULL);
2578 
2579 	/* count up the size of the stat name */
2580 	Serdsz += ipath2strlen(lhs->ename, lhs->ipath);
2581 	Serdsz++;	/* for the trailing NULL byte */
2582 }
2583 
2584 /*
2585  * serd2str -- serialize a serd engine, writing result to *Serdbufptr
2586  */
2587 /*ARGSUSED*/
2588 static void
2589 serd2str(const struct serd_entry *lhs, struct stats *rhs, void *arg)
2590 {
2591 	char *str;
2592 	int len;
2593 
2594 	ASSERT(lhs != NULL);
2595 
2596 	/* serialize the serd engine name */
2597 	str = ipath2str(lhs->ename, lhs->ipath);
2598 	len = strlen(str);
2599 
2600 	ASSERT(Serdbufptr + len + 1 <= &Serdbuf[Serdsz]);
2601 	(void) strlcpy(Serdbufptr, str, &Serdbuf[Serdsz] - Serdbufptr);
2602 	Serdbufptr += len;
2603 	FREE(str);
2604 	*Serdbufptr++ = '\0';
2605 	ASSERT(Serdbufptr <= &Serdbuf[Serdsz]);
2606 }
2607 
2608 void
2609 serd_save()
2610 {
2611 	if (Serd_need_save == 0)
2612 		return;
2613 
2614 	/* figure out how big the serialzed info is */
2615 	Serdsz = 0;
2616 	lut_walk(SerdEngines, (lut_cb)serdaddsize, NULL);
2617 
2618 	if (Serdsz == 0) {
2619 		/* no serd engines to save */
2620 		fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS);
2621 		return;
2622 	}
2623 
2624 	/* create the serialized buffer */
2625 	Serdbufptr = Serdbuf = MALLOC(Serdsz);
2626 	lut_walk(SerdEngines, (lut_cb)serd2str, NULL);
2627 
2628 	/* clear out current saved stats */
2629 	fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS);
2630 
2631 	/* write out the new version */
2632 	fmd_buf_write(Hdl, NULL, WOBUF_SERDS, Serdbuf, Serdsz);
2633 	FREE(Serdbuf);
2634 	Serd_need_save = 0;
2635 }
2636 
2637 int
2638 serd_cmp(struct serd_entry *ent1, struct serd_entry *ent2)
2639 {
2640 	if (ent1->ename != ent2->ename)
2641 		return (ent2->ename - ent1->ename);
2642 	if (ent1->ipath != ent2->ipath)
2643 		return ((char *)ent2->ipath - (char *)ent1->ipath);
2644 
2645 	return (0);
2646 }
2647 
2648 void
2649 fme_serd_load(fmd_hdl_t *hdl)
2650 {
2651 	int sz;
2652 	char *sbuf;
2653 	char *sepptr;
2654 	char *ptr;
2655 	struct serd_entry *newentp;
2656 	struct node *epname;
2657 	nvlist_t *fmri;
2658 	char *namestring;
2659 
2660 	if ((sz = fmd_buf_size(hdl, NULL, WOBUF_SERDS)) == 0)
2661 		return;
2662 	sbuf = alloca(sz);
2663 	fmd_buf_read(hdl, NULL, WOBUF_SERDS, sbuf, sz);
2664 	ptr = sbuf;
2665 	while (ptr < &sbuf[sz]) {
2666 		sepptr = strchr(ptr, '@');
2667 		*sepptr = '\0';
2668 		namestring = ptr;
2669 		sepptr++;
2670 		ptr = sepptr;
2671 		ptr += strlen(ptr);
2672 		ptr++;	/* move past the '\0' separating paths */
2673 		epname = pathstring2epnamenp(sepptr);
2674 		fmri = node2fmri(epname);
2675 		if (platform_path_exists(fmri)) {
2676 			newentp = MALLOC(sizeof (*newentp));
2677 			newentp->hdl = hdl;
2678 			newentp->ipath = ipath(epname);
2679 			newentp->ename = stable(namestring);
2680 			SerdEngines = lut_add(SerdEngines, (void *)newentp,
2681 			    (void *)newentp, (lut_cmp)serd_cmp);
2682 		} else
2683 			Serd_need_save = 1;
2684 		tree_free(epname);
2685 		nvlist_free(fmri);
2686 	}
2687 	/* save it back again in case some of the paths no longer exist */
2688 	serd_save();
2689 }
2690 
2691 /*ARGSUSED*/
2692 static void
2693 serd_destructor(void *left, void *right, void *arg)
2694 {
2695 	struct serd_entry *entp = (struct serd_entry *)left;
2696 	FREE(entp);
2697 }
2698 
2699 /*
2700  * Callback used in a walk of the SerdEngines to reset matching serd engines.
2701  */
2702 /*ARGSUSED*/
2703 static void
2704 serd_reset_cb(struct serd_entry *entp, void *unused, const struct ipath *ipp)
2705 {
2706 	char *path;
2707 
2708 	if (entp->ipath == ipp) {
2709 		path = ipath2str(entp->ename, ipp);
2710 		out(O_ALTFP, "serd_reset_cb: resetting %s", path);
2711 		fmd_serd_reset(entp->hdl, path);
2712 		FREE(path);
2713 		Serd_need_save = 1;
2714 	}
2715 }
2716 
2717 /*ARGSUSED*/
2718 static void
2719 serd_topo_chg_cb(struct serd_entry *entp, void *unused, void *unused2)
2720 {
2721 	char *path;
2722 	nvlist_t *fmri;
2723 
2724 	fmri = ipath2fmri((struct ipath *)(entp->ipath));
2725 	if (!platform_path_exists(fmri)) {
2726 		path = ipath2str(entp->ename, entp->ipath);
2727 		out(O_ALTFP, "serd_topo_chg_cb: not present %s", path);
2728 		fmd_serd_reset(entp->hdl, path);
2729 		FREE(path);
2730 		Serd_need_save = 1;
2731 	}
2732 	nvlist_free(fmri);
2733 }
2734 
2735 void
2736 serd_fini(void)
2737 {
2738 	lut_free(SerdEngines, serd_destructor, NULL);
2739 }
2740 
2741 static void
2742 publish_suspects(struct fme *fmep, struct rsl *srl)
2743 {
2744 	struct rsl *rp;
2745 	nvlist_t *fault;
2746 	uint8_t cert;
2747 	uint_t *frs;
2748 	uint_t frsum, fr;
2749 	uint_t messval;
2750 	uint_t retireval;
2751 	uint_t responseval;
2752 	struct node *snp;
2753 	int frcnt, fridx;
2754 	boolean_t allfaulty = B_TRUE;
2755 	struct rsl *erl = srl + fmep->nsuspects - 1;
2756 
2757 	/*
2758 	 * sort the array
2759 	 */
2760 	qsort(srl, fmep->nsuspects, sizeof (struct rsl), rslcmp);
2761 
2762 	/* sum the fitrates */
2763 	frs = alloca(fmep->nsuspects * sizeof (uint_t));
2764 	fridx = frcnt = frsum = 0;
2765 
2766 	for (rp = srl; rp <= erl; rp++) {
2767 		struct node *n;
2768 
2769 		n = eventprop_lookup(rp->suspect, L_FITrate);
2770 		if (node2uint(n, &fr) != 0) {
2771 			out(O_DEBUG|O_NONL, "event ");
2772 			ipath_print(O_DEBUG|O_NONL,
2773 			    rp->suspect->enode->u.event.ename->u.name.s,
2774 			    rp->suspect->ipp);
2775 			out(O_DEBUG, " has no FITrate (using 1)");
2776 			fr = 1;
2777 		} else if (fr == 0) {
2778 			out(O_DEBUG|O_NONL, "event ");
2779 			ipath_print(O_DEBUG|O_NONL,
2780 			    rp->suspect->enode->u.event.ename->u.name.s,
2781 			    rp->suspect->ipp);
2782 			out(O_DEBUG, " has zero FITrate (using 1)");
2783 			fr = 1;
2784 		}
2785 
2786 		frs[fridx++] = fr;
2787 		frsum += fr;
2788 		frcnt++;
2789 	}
2790 
2791 	/* Add them in reverse order of our sort, as fmd reverses order */
2792 	for (rp = erl; rp >= srl; rp--) {
2793 		cert = percentof(frs[--fridx], frsum);
2794 		fault = fmd_nvl_create_fault(fmep->hdl,
2795 		    rp->suspect->enode->u.event.ename->u.name.s,
2796 		    cert,
2797 		    rp->asru,
2798 		    rp->fru,
2799 		    rp->rsrc);
2800 		if (fault == NULL)
2801 			out(O_DIE, "fault creation failed");
2802 		/* if "message" property exists, add it to the fault */
2803 		if (node2uint(eventprop_lookup(rp->suspect, L_message),
2804 		    &messval) == 0) {
2805 
2806 			out(O_ALTFP,
2807 			    "[FME%d, %s adds message=%d to suspect list]",
2808 			    fmep->id,
2809 			    rp->suspect->enode->u.event.ename->u.name.s,
2810 			    messval);
2811 			if (nvlist_add_boolean_value(fault,
2812 			    FM_SUSPECT_MESSAGE,
2813 			    (messval) ? B_TRUE : B_FALSE) != 0) {
2814 				out(O_DIE, "cannot add no-message to fault");
2815 			}
2816 		}
2817 
2818 		/* if "retire" property exists, add it to the fault */
2819 		if (node2uint(eventprop_lookup(rp->suspect, L_retire),
2820 		    &retireval) == 0) {
2821 
2822 			out(O_ALTFP,
2823 			    "[FME%d, %s adds retire=%d to suspect list]",
2824 			    fmep->id,
2825 			    rp->suspect->enode->u.event.ename->u.name.s,
2826 			    retireval);
2827 			if (nvlist_add_boolean_value(fault,
2828 			    FM_SUSPECT_RETIRE,
2829 			    (retireval) ? B_TRUE : B_FALSE) != 0) {
2830 				out(O_DIE, "cannot add no-retire to fault");
2831 			}
2832 		}
2833 
2834 		/* if "response" property exists, add it to the fault */
2835 		if (node2uint(eventprop_lookup(rp->suspect, L_response),
2836 		    &responseval) == 0) {
2837 
2838 			out(O_ALTFP,
2839 			    "[FME%d, %s adds response=%d to suspect list]",
2840 			    fmep->id,
2841 			    rp->suspect->enode->u.event.ename->u.name.s,
2842 			    responseval);
2843 			if (nvlist_add_boolean_value(fault,
2844 			    FM_SUSPECT_RESPONSE,
2845 			    (responseval) ? B_TRUE : B_FALSE) != 0) {
2846 				out(O_DIE, "cannot add no-response to fault");
2847 			}
2848 		}
2849 
2850 		/* add any payload properties */
2851 		lut_walk(rp->suspect->payloadprops,
2852 		    (lut_cb)addpayloadprop, (void *)fault);
2853 		rslfree(rp);
2854 
2855 		/*
2856 		 * If "action" property exists, evaluate it;  this must be done
2857 		 * before the allfaulty check below since some actions may
2858 		 * modify the asru to be used in fmd_nvl_fmri_has_fault.  This
2859 		 * needs to be restructured if any new actions are introduced
2860 		 * that have effects that we do not want to be visible if
2861 		 * we decide not to publish in the dupclose check below.
2862 		 */
2863 		if ((snp = eventprop_lookup(rp->suspect, L_action)) != NULL) {
2864 			struct evalue evalue;
2865 
2866 			out(O_ALTFP|O_NONL,
2867 			    "[FME%d, %s action ", fmep->id,
2868 			    rp->suspect->enode->u.event.ename->u.name.s);
2869 			ptree_name_iter(O_ALTFP|O_NONL, snp);
2870 			out(O_ALTFP, "]");
2871 			Action_nvl = fault;
2872 			(void) eval_expr(snp, NULL, NULL, NULL, NULL,
2873 			    NULL, 0, &evalue);
2874 		}
2875 
2876 		fmd_case_add_suspect(fmep->hdl, fmep->fmcase, fault);
2877 
2878 		/*
2879 		 * check if the asru is already marked as "faulty".
2880 		 */
2881 		if (allfaulty) {
2882 			nvlist_t *asru;
2883 
2884 			out(O_ALTFP|O_VERB, "FME%d dup check ", fmep->id);
2885 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, rp->suspect);
2886 			out(O_ALTFP|O_VERB|O_NONL, " ");
2887 			if (nvlist_lookup_nvlist(fault,
2888 			    FM_FAULT_ASRU, &asru) != 0) {
2889 				out(O_ALTFP|O_VERB, "NULL asru");
2890 				allfaulty = B_FALSE;
2891 			} else if (fmd_nvl_fmri_has_fault(fmep->hdl, asru,
2892 			    FMD_HAS_FAULT_ASRU, NULL)) {
2893 				out(O_ALTFP|O_VERB, "faulty");
2894 			} else {
2895 				out(O_ALTFP|O_VERB, "not faulty");
2896 				allfaulty = B_FALSE;
2897 			}
2898 		}
2899 
2900 	}
2901 
2902 	if (!allfaulty) {
2903 		/*
2904 		 * don't update the count stat if all asrus are already
2905 		 * present and unrepaired in the asru cache
2906 		 */
2907 		for (rp = erl; rp >= srl; rp--) {
2908 			struct event *suspect = rp->suspect;
2909 
2910 			if (suspect == NULL)
2911 				continue;
2912 
2913 			/* if "count" exists, increment the appropriate stat */
2914 			if ((snp = eventprop_lookup(suspect,
2915 			    L_count)) != NULL) {
2916 				out(O_ALTFP|O_NONL,
2917 				    "[FME%d, %s count ", fmep->id,
2918 				    suspect->enode->u.event.ename->u.name.s);
2919 				ptree_name_iter(O_ALTFP|O_NONL, snp);
2920 				out(O_ALTFP, "]");
2921 				istat_bump(snp, 0);
2922 
2923 			}
2924 		}
2925 		istat_save();	/* write out any istat changes */
2926 	}
2927 }
2928 
2929 static const char *
2930 undiag_2defect_str(int ud)
2931 {
2932 	switch (ud) {
2933 	case UD_VAL_MISSINGINFO:
2934 	case UD_VAL_MISSINGOBS:
2935 	case UD_VAL_MISSINGPATH:
2936 	case UD_VAL_MISSINGZERO:
2937 	case UD_VAL_BADOBS:
2938 	case UD_VAL_CFGMISMATCH:
2939 		return (UNDIAG_DEFECT_CHKPT);
2940 		break;
2941 
2942 	case UD_VAL_BADEVENTI:
2943 	case UD_VAL_BADEVENTPATH:
2944 	case UD_VAL_BADEVENTCLASS:
2945 	case UD_VAL_INSTFAIL:
2946 	case UD_VAL_NOPATH:
2947 	case UD_VAL_UNSOLVD:
2948 		return (UNDIAG_DEFECT_FME);
2949 		break;
2950 
2951 	case UD_VAL_MAXFME:
2952 		return (UNDIAG_DEFECT_LIMIT);
2953 		break;
2954 
2955 	case UD_VAL_UNKNOWN:
2956 	default:
2957 		return (UNDIAG_DEFECT_UNKNOWN);
2958 		break;
2959 	}
2960 }
2961 
2962 static const char *
2963 undiag_2fault_str(int ud)
2964 {
2965 	switch (ud) {
2966 	case UD_VAL_BADEVENTI:
2967 	case UD_VAL_BADEVENTPATH:
2968 	case UD_VAL_BADEVENTCLASS:
2969 	case UD_VAL_INSTFAIL:
2970 	case UD_VAL_NOPATH:
2971 	case UD_VAL_UNSOLVD:
2972 		return (UNDIAG_FAULT_FME);
2973 	default:
2974 		return (NULL);
2975 	}
2976 }
2977 
2978 static char *
2979 undiag_2reason_str(int ud, char *arg)
2980 {
2981 	const char *ptr;
2982 	char *buf;
2983 	int with_arg = 0;
2984 
2985 	switch (ud) {
2986 	case UD_VAL_BADEVENTPATH:
2987 		ptr = UD_STR_BADEVENTPATH;
2988 		with_arg = 1;
2989 		break;
2990 	case UD_VAL_BADEVENTCLASS:
2991 		ptr = UD_STR_BADEVENTCLASS;
2992 		with_arg = 1;
2993 		break;
2994 	case UD_VAL_BADEVENTI:
2995 		ptr = UD_STR_BADEVENTI;
2996 		with_arg = 1;
2997 		break;
2998 	case UD_VAL_BADOBS:
2999 		ptr = UD_STR_BADOBS;
3000 		break;
3001 	case UD_VAL_CFGMISMATCH:
3002 		ptr = UD_STR_CFGMISMATCH;
3003 		break;
3004 	case UD_VAL_INSTFAIL:
3005 		ptr = UD_STR_INSTFAIL;
3006 		with_arg = 1;
3007 		break;
3008 	case UD_VAL_MAXFME:
3009 		ptr = UD_STR_MAXFME;
3010 		break;
3011 	case UD_VAL_MISSINGINFO:
3012 		ptr = UD_STR_MISSINGINFO;
3013 		break;
3014 	case UD_VAL_MISSINGOBS:
3015 		ptr = UD_STR_MISSINGOBS;
3016 		break;
3017 	case UD_VAL_MISSINGPATH:
3018 		ptr = UD_STR_MISSINGPATH;
3019 		break;
3020 	case UD_VAL_MISSINGZERO:
3021 		ptr = UD_STR_MISSINGZERO;
3022 		break;
3023 	case UD_VAL_NOPATH:
3024 		ptr = UD_STR_NOPATH;
3025 		with_arg = 1;
3026 		break;
3027 	case UD_VAL_UNSOLVD:
3028 		ptr = UD_STR_UNSOLVD;
3029 		break;
3030 	case UD_VAL_UNKNOWN:
3031 	default:
3032 		ptr = UD_STR_UNKNOWN;
3033 		break;
3034 	}
3035 	if (with_arg) {
3036 		buf = MALLOC(strlen(ptr) + strlen(arg) - 1);
3037 		(void) sprintf(buf, ptr, arg);
3038 	} else {
3039 		buf = MALLOC(strlen(ptr) + 1);
3040 		(void) sprintf(buf, ptr);
3041 	}
3042 	return (buf);
3043 }
3044 
3045 static void
3046 publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep, fmd_case_t *fmcase,
3047     nvlist_t *detector, char *arg)
3048 {
3049 	struct case_list *newcase;
3050 	nvlist_t *defect, *fault;
3051 	const char *faultstr;
3052 	char *reason = undiag_2reason_str(Undiag_reason, arg);
3053 
3054 	out(O_ALTFP,
3055 	    "[undiagnosable ereport received, "
3056 	    "creating and closing a new case (%s)]", reason);
3057 
3058 	newcase = MALLOC(sizeof (struct case_list));
3059 	newcase->next = NULL;
3060 	newcase->fmcase = fmcase;
3061 	if (Undiagablecaselist != NULL)
3062 		newcase->next = Undiagablecaselist;
3063 	Undiagablecaselist = newcase;
3064 
3065 	if (ffep != NULL)
3066 		fmd_case_add_ereport(hdl, newcase->fmcase, ffep);
3067 
3068 	/* add defect */
3069 	defect = fmd_nvl_create_fault(hdl,
3070 	    undiag_2defect_str(Undiag_reason), 50, NULL, NULL, detector);
3071 	(void) nvlist_add_string(defect, UNDIAG_REASON, reason);
3072 	(void) nvlist_add_boolean_value(defect, FM_SUSPECT_RETIRE, B_FALSE);
3073 	(void) nvlist_add_boolean_value(defect, FM_SUSPECT_RESPONSE, B_FALSE);
3074 	fmd_case_add_suspect(hdl, newcase->fmcase, defect);
3075 
3076 	/* add fault if appropriate */
3077 	faultstr = undiag_2fault_str(Undiag_reason);
3078 	if (faultstr != NULL) {
3079 		fault = fmd_nvl_create_fault(hdl, faultstr, 50, NULL, NULL,
3080 		    detector);
3081 		(void) nvlist_add_string(fault, UNDIAG_REASON, reason);
3082 		(void) nvlist_add_boolean_value(fault, FM_SUSPECT_RETIRE,
3083 		    B_FALSE);
3084 		(void) nvlist_add_boolean_value(fault, FM_SUSPECT_RESPONSE,
3085 		    B_FALSE);
3086 		fmd_case_add_suspect(hdl, newcase->fmcase, fault);
3087 	}
3088 	FREE(reason);
3089 
3090 	/* solve and close case */
3091 	fmd_case_solve(hdl, newcase->fmcase);
3092 	fmd_case_close(hdl, newcase->fmcase);
3093 	Undiag_reason = UD_VAL_UNKNOWN;
3094 }
3095 
3096 static void
3097 fme_undiagnosable(struct fme *f)
3098 {
3099 	nvlist_t *defect, *fault, *detector = NULL;
3100 	struct event *ep;
3101 	char *pathstr;
3102 	const char *faultstr;
3103 	char *reason = undiag_2reason_str(Undiag_reason, NULL);
3104 
3105 	out(O_ALTFP, "[solving/closing FME%d, case %s (%s)]",
3106 	    f->id, fmd_case_uuid(f->hdl, f->fmcase), reason);
3107 
3108 	for (ep = f->observations; ep; ep = ep->observations) {
3109 
3110 		if (ep->ffep != f->e0r)
3111 			fmd_case_add_ereport(f->hdl, f->fmcase, ep->ffep);
3112 
3113 		pathstr = ipath2str(NULL, ipath(platform_getpath(ep->nvp)));
3114 		platform_units_translate(0, f->config, NULL, NULL, &detector,
3115 		    pathstr);
3116 		FREE(pathstr);
3117 
3118 		/* add defect */
3119 		defect = fmd_nvl_create_fault(f->hdl,
3120 		    undiag_2defect_str(Undiag_reason), 50 / f->uniqobs,
3121 		    NULL, NULL, detector);
3122 		(void) nvlist_add_string(defect, UNDIAG_REASON, reason);
3123 		(void) nvlist_add_boolean_value(defect, FM_SUSPECT_RETIRE,
3124 		    B_FALSE);
3125 		(void) nvlist_add_boolean_value(defect, FM_SUSPECT_RESPONSE,
3126 		    B_FALSE);
3127 		fmd_case_add_suspect(f->hdl, f->fmcase, defect);
3128 
3129 		/* add fault if appropriate */
3130 		faultstr = undiag_2fault_str(Undiag_reason);
3131 		if (faultstr == NULL)
3132 			continue;
3133 		fault = fmd_nvl_create_fault(f->hdl, faultstr, 50 / f->uniqobs,
3134 		    NULL, NULL, detector);
3135 		(void) nvlist_add_string(fault, UNDIAG_REASON, reason);
3136 		(void) nvlist_add_boolean_value(fault, FM_SUSPECT_RETIRE,
3137 		    B_FALSE);
3138 		(void) nvlist_add_boolean_value(fault, FM_SUSPECT_RESPONSE,
3139 		    B_FALSE);
3140 		fmd_case_add_suspect(f->hdl, f->fmcase, fault);
3141 		nvlist_free(detector);
3142 	}
3143 	FREE(reason);
3144 	fmd_case_solve(f->hdl, f->fmcase);
3145 	fmd_case_close(f->hdl, f->fmcase);
3146 	Undiag_reason = UD_VAL_UNKNOWN;
3147 }
3148 
3149 /*
3150  * fme_close_case
3151  *
3152  *	Find the requested case amongst our fmes and close it.  Free up
3153  *	the related fme.
3154  */
3155 void
3156 fme_close_case(fmd_hdl_t *hdl, fmd_case_t *fmcase)
3157 {
3158 	struct case_list *ucasep, *prevcasep = NULL;
3159 	struct fme *prev = NULL;
3160 	struct fme *fmep;
3161 
3162 	for (ucasep = Undiagablecaselist; ucasep; ucasep = ucasep->next) {
3163 		if (fmcase != ucasep->fmcase) {
3164 			prevcasep = ucasep;
3165 			continue;
3166 		}
3167 
3168 		if (prevcasep == NULL)
3169 			Undiagablecaselist = Undiagablecaselist->next;
3170 		else
3171 			prevcasep->next = ucasep->next;
3172 
3173 		FREE(ucasep);
3174 		return;
3175 	}
3176 
3177 	for (fmep = FMElist; fmep; fmep = fmep->next) {
3178 		if (fmep->hdl == hdl && fmep->fmcase == fmcase)
3179 			break;
3180 		prev = fmep;
3181 	}
3182 
3183 	if (fmep == NULL) {
3184 		out(O_WARN, "Eft asked to close unrecognized case [%s].",
3185 		    fmd_case_uuid(hdl, fmcase));
3186 		return;
3187 	}
3188 
3189 	if (EFMElist == fmep)
3190 		EFMElist = prev;
3191 
3192 	if (prev == NULL)
3193 		FMElist = FMElist->next;
3194 	else
3195 		prev->next = fmep->next;
3196 
3197 	fmep->next = NULL;
3198 
3199 	/* Get rid of any timer this fme has set */
3200 	if (fmep->wull != 0)
3201 		fmd_timer_remove(fmep->hdl, fmep->timer);
3202 
3203 	if (ClosedFMEs == NULL) {
3204 		ClosedFMEs = fmep;
3205 	} else {
3206 		fmep->next = ClosedFMEs;
3207 		ClosedFMEs = fmep;
3208 	}
3209 
3210 	Open_fme_count--;
3211 
3212 	/* See if we can close the overflow FME */
3213 	if (Open_fme_count <= Max_fme) {
3214 		for (fmep = FMElist; fmep; fmep = fmep->next) {
3215 			if (fmep->overflow && !(fmd_case_closed(fmep->hdl,
3216 			    fmep->fmcase)))
3217 				break;
3218 		}
3219 
3220 		if (fmep != NULL)
3221 			fmd_case_close(fmep->hdl, fmep->fmcase);
3222 	}
3223 }
3224 
3225 /*
3226  * fme_set_timer()
3227  *	If the time we need to wait for the given FME is less than the
3228  *	current timer, kick that old timer out and establish a new one.
3229  */
3230 static int
3231 fme_set_timer(struct fme *fmep, unsigned long long wull)
3232 {
3233 	out(O_ALTFP|O_VERB|O_NONL, " fme_set_timer: request to wait ");
3234 	ptree_timeval(O_ALTFP|O_VERB, &wull);
3235 
3236 	if (wull <= fmep->pull) {
3237 		out(O_ALTFP|O_VERB|O_NONL, "already have waited at least ");
3238 		ptree_timeval(O_ALTFP|O_VERB, &fmep->pull);
3239 		out(O_ALTFP|O_VERB, NULL);
3240 		/* we've waited at least wull already, don't need timer */
3241 		return (0);
3242 	}
3243 
3244 	out(O_ALTFP|O_VERB|O_NONL, " currently ");
3245 	if (fmep->wull != 0) {
3246 		out(O_ALTFP|O_VERB|O_NONL, "waiting ");
3247 		ptree_timeval(O_ALTFP|O_VERB, &fmep->wull);
3248 		out(O_ALTFP|O_VERB, NULL);
3249 	} else {
3250 		out(O_ALTFP|O_VERB|O_NONL, "not waiting");
3251 		out(O_ALTFP|O_VERB, NULL);
3252 	}
3253 
3254 	if (fmep->wull != 0)
3255 		if (wull >= fmep->wull)
3256 			/* New timer would fire later than established timer */
3257 			return (0);
3258 
3259 	if (fmep->wull != 0) {
3260 		fmd_timer_remove(fmep->hdl, fmep->timer);
3261 	}
3262 
3263 	fmep->timer = fmd_timer_install(fmep->hdl, (void *)fmep,
3264 	    fmep->e0r, wull);
3265 	out(O_ALTFP|O_VERB, "timer set, id is %ld", fmep->timer);
3266 	fmep->wull = wull;
3267 	return (1);
3268 }
3269 
3270 void
3271 fme_timer_fired(struct fme *fmep, id_t tid)
3272 {
3273 	struct fme *ffmep = NULL;
3274 
3275 	for (ffmep = FMElist; ffmep; ffmep = ffmep->next)
3276 		if (ffmep == fmep)
3277 			break;
3278 
3279 	if (ffmep == NULL) {
3280 		out(O_WARN, "Timer fired for an FME (%p) not in FMEs list.",
3281 		    (void *)fmep);
3282 		return;
3283 	}
3284 
3285 	out(O_ALTFP|O_VERB, "Timer fired %lx", tid);
3286 	fmep->pull = fmep->wull;
3287 	fmep->wull = 0;
3288 	fmd_buf_write(fmep->hdl, fmep->fmcase,
3289 	    WOBUF_PULL, (void *)&fmep->pull, sizeof (fmep->pull));
3290 
3291 	fme_eval(fmep, fmep->e0r);
3292 }
3293 
3294 /*
3295  * Preserve the fme's suspect list in its psuspects list, NULLing the
3296  * suspects list in the meantime.
3297  */
3298 static void
3299 save_suspects(struct fme *fmep)
3300 {
3301 	struct event *ep;
3302 	struct event *nextep;
3303 
3304 	/* zero out the previous suspect list */
3305 	for (ep = fmep->psuspects; ep; ep = nextep) {
3306 		nextep = ep->psuspects;
3307 		ep->psuspects = NULL;
3308 	}
3309 	fmep->psuspects = NULL;
3310 
3311 	/* zero out the suspect list, copying it to previous suspect list */
3312 	fmep->psuspects = fmep->suspects;
3313 	for (ep = fmep->suspects; ep; ep = nextep) {
3314 		nextep = ep->suspects;
3315 		ep->psuspects = ep->suspects;
3316 		ep->suspects = NULL;
3317 		ep->is_suspect = 0;
3318 	}
3319 	fmep->suspects = NULL;
3320 	fmep->nsuspects = 0;
3321 }
3322 
3323 /*
3324  * Retrieve the fme's suspect list from its psuspects list.
3325  */
3326 static void
3327 restore_suspects(struct fme *fmep)
3328 {
3329 	struct event *ep;
3330 	struct event *nextep;
3331 
3332 	fmep->nsuspects = 0;
3333 	fmep->suspects = fmep->psuspects;
3334 	for (ep = fmep->psuspects; ep; ep = nextep) {
3335 		fmep->nsuspects++;
3336 		nextep = ep->psuspects;
3337 		ep->suspects = ep->psuspects;
3338 	}
3339 }
3340 
3341 /*
3342  * this is what we use to call the Emrys prototype code instead of main()
3343  */
3344 static void
3345 fme_eval(struct fme *fmep, fmd_event_t *ffep)
3346 {
3347 	struct event *ep;
3348 	unsigned long long my_delay = TIMEVAL_EVENTUALLY;
3349 	struct rsl *srl = NULL;
3350 	struct rsl *srl2 = NULL;
3351 	int mess_zero_count;
3352 	int rpcnt;
3353 
3354 	save_suspects(fmep);
3355 
3356 	out(O_ALTFP, "Evaluate FME %d", fmep->id);
3357 	indent_set("  ");
3358 
3359 	lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep);
3360 	fmep->state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay);
3361 
3362 	out(O_ALTFP|O_NONL, "FME%d state: %s, suspect list:", fmep->id,
3363 	    fme_state2str(fmep->state));
3364 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
3365 		out(O_ALTFP|O_NONL, " ");
3366 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
3367 	}
3368 	out(O_ALTFP, NULL);
3369 
3370 	switch (fmep->state) {
3371 	case FME_CREDIBLE:
3372 		print_suspects(SLNEW, fmep);
3373 		(void) upsets_eval(fmep, ffep);
3374 
3375 		/*
3376 		 * we may have already posted suspects in upsets_eval() which
3377 		 * can recurse into fme_eval() again. If so then just return.
3378 		 */
3379 		if (fmep->posted_suspects)
3380 			return;
3381 
3382 		stats_counter_bump(fmep->diags);
3383 		rpcnt = fmep->nsuspects;
3384 		save_suspects(fmep);
3385 
3386 		/*
3387 		 * create two lists, one for "message=1" faults and one for
3388 		 * "message=0" faults. If we have a mixture we will generate
3389 		 * two separate suspect lists.
3390 		 */
3391 		srl = MALLOC(rpcnt * sizeof (struct rsl));
3392 		bzero(srl, rpcnt * sizeof (struct rsl));
3393 		srl2 = MALLOC(rpcnt * sizeof (struct rsl));
3394 		bzero(srl2, rpcnt * sizeof (struct rsl));
3395 		mess_zero_count = trim_suspects(fmep, srl, srl2, ffep);
3396 
3397 		/*
3398 		 * If the resulting suspect list has no members, we're
3399 		 * done so simply close the case. Otherwise sort and publish.
3400 		 */
3401 		if (fmep->nsuspects == 0 && mess_zero_count == 0) {
3402 			out(O_ALTFP,
3403 			    "[FME%d, case %s (all suspects are upsets)]",
3404 			    fmep->id, fmd_case_uuid(fmep->hdl, fmep->fmcase));
3405 			fmd_case_close(fmep->hdl, fmep->fmcase);
3406 		} else if (fmep->nsuspects != 0 && mess_zero_count == 0) {
3407 			publish_suspects(fmep, srl);
3408 			out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3409 			    fmd_case_uuid(fmep->hdl, fmep->fmcase));
3410 			fmd_case_solve(fmep->hdl, fmep->fmcase);
3411 		} else if (fmep->nsuspects == 0 && mess_zero_count != 0) {
3412 			fmep->nsuspects = mess_zero_count;
3413 			publish_suspects(fmep, srl2);
3414 			out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3415 			    fmd_case_uuid(fmep->hdl, fmep->fmcase));
3416 			fmd_case_solve(fmep->hdl, fmep->fmcase);
3417 		} else {
3418 			struct event *obsp;
3419 			struct fme *nfmep;
3420 
3421 			publish_suspects(fmep, srl);
3422 			out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3423 			    fmd_case_uuid(fmep->hdl, fmep->fmcase));
3424 			fmd_case_solve(fmep->hdl, fmep->fmcase);
3425 
3426 			/*
3427 			 * Got both message=0 and message=1 so create a
3428 			 * duplicate case. Also need a temporary duplicate fme
3429 			 * structure for use by publish_suspects().
3430 			 */
3431 			nfmep = alloc_fme();
3432 			nfmep->id =  Nextid++;
3433 			nfmep->hdl = fmep->hdl;
3434 			nfmep->nsuspects = mess_zero_count;
3435 			nfmep->fmcase = fmd_case_open(fmep->hdl, NULL);
3436 			out(O_ALTFP|O_STAMP,
3437 			    "[creating parallel FME%d, case %s]", nfmep->id,
3438 			    fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
3439 			Open_fme_count++;
3440 			if (ffep) {
3441 				fmd_case_setprincipal(nfmep->hdl,
3442 				    nfmep->fmcase, ffep);
3443 				fmd_case_add_ereport(nfmep->hdl,
3444 				    nfmep->fmcase, ffep);
3445 			}
3446 			for (obsp = fmep->observations; obsp;
3447 			    obsp = obsp->observations)
3448 				if (obsp->ffep && obsp->ffep != ffep)
3449 					fmd_case_add_ereport(nfmep->hdl,
3450 					    nfmep->fmcase, obsp->ffep);
3451 
3452 			publish_suspects(nfmep, srl2);
3453 			out(O_ALTFP, "[solving FME%d, case %s]", nfmep->id,
3454 			    fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
3455 			fmd_case_solve(nfmep->hdl, nfmep->fmcase);
3456 			FREE(nfmep);
3457 		}
3458 		FREE(srl);
3459 		FREE(srl2);
3460 		restore_suspects(fmep);
3461 
3462 		fmep->posted_suspects = 1;
3463 		fmd_buf_write(fmep->hdl, fmep->fmcase,
3464 		    WOBUF_POSTD,
3465 		    (void *)&fmep->posted_suspects,
3466 		    sizeof (fmep->posted_suspects));
3467 
3468 		/*
3469 		 * Now the suspects have been posted, we can clear up
3470 		 * the instance tree as we won't be looking at it again.
3471 		 * Also cancel the timer as the case is now solved.
3472 		 */
3473 		if (fmep->wull != 0) {
3474 			fmd_timer_remove(fmep->hdl, fmep->timer);
3475 			fmep->wull = 0;
3476 		}
3477 		break;
3478 
3479 	case FME_WAIT:
3480 		ASSERT(my_delay > fmep->ull);
3481 		(void) fme_set_timer(fmep, my_delay);
3482 		print_suspects(SLWAIT, fmep);
3483 		itree_prune(fmep->eventtree);
3484 		return;
3485 
3486 	case FME_DISPROVED:
3487 		print_suspects(SLDISPROVED, fmep);
3488 		Undiag_reason = UD_VAL_UNSOLVD;
3489 		fme_undiagnosable(fmep);
3490 		break;
3491 	}
3492 
3493 	itree_free(fmep->eventtree);
3494 	fmep->eventtree = NULL;
3495 	structconfig_free(fmep->config);
3496 	fmep->config = NULL;
3497 	destroy_fme_bufs(fmep);
3498 }
3499 
3500 static void indent(void);
3501 static int triggered(struct fme *fmep, struct event *ep, int mark);
3502 static enum fme_state effects_test(struct fme *fmep,
3503     struct event *fault_event, unsigned long long at_latest_by,
3504     unsigned long long *pdelay);
3505 static enum fme_state requirements_test(struct fme *fmep, struct event *ep,
3506     unsigned long long at_latest_by, unsigned long long *pdelay);
3507 static enum fme_state causes_test(struct fme *fmep, struct event *ep,
3508     unsigned long long at_latest_by, unsigned long long *pdelay);
3509 
3510 static int
3511 checkconstraints(struct fme *fmep, struct arrow *arrowp)
3512 {
3513 	struct constraintlist *ctp;
3514 	struct evalue value;
3515 	char *sep = "";
3516 
3517 	if (arrowp->forever_false) {
3518 		indent();
3519 		out(O_ALTFP|O_VERB|O_NONL, "  Forever false constraint: ");
3520 		for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3521 			out(O_ALTFP|O_VERB|O_NONL, sep);
3522 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3523 			sep = ", ";
3524 		}
3525 		out(O_ALTFP|O_VERB, NULL);
3526 		return (0);
3527 	}
3528 	if (arrowp->forever_true) {
3529 		indent();
3530 		out(O_ALTFP|O_VERB|O_NONL, "  Forever true constraint: ");
3531 		for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3532 			out(O_ALTFP|O_VERB|O_NONL, sep);
3533 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3534 			sep = ", ";
3535 		}
3536 		out(O_ALTFP|O_VERB, NULL);
3537 		return (1);
3538 	}
3539 
3540 	for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3541 		if (eval_expr(ctp->cnode, NULL, NULL,
3542 		    &fmep->globals, fmep->config,
3543 		    arrowp, 0, &value)) {
3544 			/* evaluation successful */
3545 			if (value.t == UNDEFINED || value.v == 0) {
3546 				/* known false */
3547 				arrowp->forever_false = 1;
3548 				indent();
3549 				out(O_ALTFP|O_VERB|O_NONL,
3550 				    "  False constraint: ");
3551 				ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3552 				out(O_ALTFP|O_VERB, NULL);
3553 				return (0);
3554 			}
3555 		} else {
3556 			/* evaluation unsuccessful -- unknown value */
3557 			indent();
3558 			out(O_ALTFP|O_VERB|O_NONL,
3559 			    "  Deferred constraint: ");
3560 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3561 			out(O_ALTFP|O_VERB, NULL);
3562 			return (1);
3563 		}
3564 	}
3565 	/* known true */
3566 	arrowp->forever_true = 1;
3567 	indent();
3568 	out(O_ALTFP|O_VERB|O_NONL, "  True constraint: ");
3569 	for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3570 		out(O_ALTFP|O_VERB|O_NONL, sep);
3571 		ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3572 		sep = ", ";
3573 	}
3574 	out(O_ALTFP|O_VERB, NULL);
3575 	return (1);
3576 }
3577 
3578 static int
3579 triggered(struct fme *fmep, struct event *ep, int mark)
3580 {
3581 	struct bubble *bp;
3582 	struct arrowlist *ap;
3583 	int count = 0;
3584 
3585 	stats_counter_bump(fmep->Tcallcount);
3586 	for (bp = itree_next_bubble(ep, NULL); bp;
3587 	    bp = itree_next_bubble(ep, bp)) {
3588 		if (bp->t != B_TO)
3589 			continue;
3590 		for (ap = itree_next_arrow(bp, NULL); ap;
3591 		    ap = itree_next_arrow(bp, ap)) {
3592 			/* check count of marks against K in the bubble */
3593 			if ((ap->arrowp->mark & mark) &&
3594 			    ++count >= bp->nork)
3595 				return (1);
3596 		}
3597 	}
3598 	return (0);
3599 }
3600 
3601 static int
3602 mark_arrows(struct fme *fmep, struct event *ep, int mark,
3603     unsigned long long at_latest_by, unsigned long long *pdelay, int keep)
3604 {
3605 	struct bubble *bp;
3606 	struct arrowlist *ap;
3607 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3608 	unsigned long long my_delay;
3609 	enum fme_state result;
3610 	int retval = 0;
3611 
3612 	for (bp = itree_next_bubble(ep, NULL); bp;
3613 	    bp = itree_next_bubble(ep, bp)) {
3614 		if (bp->t != B_FROM)
3615 			continue;
3616 		stats_counter_bump(fmep->Marrowcount);
3617 		for (ap = itree_next_arrow(bp, NULL); ap;
3618 		    ap = itree_next_arrow(bp, ap)) {
3619 			struct event *ep2 = ap->arrowp->head->myevent;
3620 			/*
3621 			 * if we're clearing marks, we can avoid doing
3622 			 * all that work evaluating constraints.
3623 			 */
3624 			if (mark == 0) {
3625 				if (ap->arrowp->arrow_marked == 0)
3626 					continue;
3627 				ap->arrowp->arrow_marked = 0;
3628 				ap->arrowp->mark &= ~EFFECTS_COUNTER;
3629 				if (keep && (ep2->cached_state &
3630 				    (WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT)))
3631 					ep2->keep_in_tree = 1;
3632 				ep2->cached_state &=
3633 				    ~(WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT);
3634 				(void) mark_arrows(fmep, ep2, mark, 0, NULL,
3635 				    keep);
3636 				continue;
3637 			}
3638 			ap->arrowp->arrow_marked = 1;
3639 			if (ep2->cached_state & REQMNTS_DISPROVED) {
3640 				indent();
3641 				out(O_ALTFP|O_VERB|O_NONL,
3642 				    "  ALREADY DISPROVED ");
3643 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3644 				out(O_ALTFP|O_VERB, NULL);
3645 				continue;
3646 			}
3647 			if (ep2->cached_state & WAIT_EFFECT) {
3648 				indent();
3649 				out(O_ALTFP|O_VERB|O_NONL,
3650 				    "  ALREADY EFFECTS WAIT ");
3651 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3652 				out(O_ALTFP|O_VERB, NULL);
3653 				continue;
3654 			}
3655 			if (ep2->cached_state & CREDIBLE_EFFECT) {
3656 				indent();
3657 				out(O_ALTFP|O_VERB|O_NONL,
3658 				    "  ALREADY EFFECTS CREDIBLE ");
3659 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3660 				out(O_ALTFP|O_VERB, NULL);
3661 				continue;
3662 			}
3663 			if ((ep2->cached_state & PARENT_WAIT) &&
3664 			    (mark & PARENT_WAIT)) {
3665 				indent();
3666 				out(O_ALTFP|O_VERB|O_NONL,
3667 				    "  ALREADY PARENT EFFECTS WAIT ");
3668 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3669 				out(O_ALTFP|O_VERB, NULL);
3670 				continue;
3671 			}
3672 			platform_set_payloadnvp(ep2->nvp);
3673 			if (checkconstraints(fmep, ap->arrowp) == 0) {
3674 				platform_set_payloadnvp(NULL);
3675 				indent();
3676 				out(O_ALTFP|O_VERB|O_NONL,
3677 				    "  CONSTRAINTS FAIL ");
3678 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3679 				out(O_ALTFP|O_VERB, NULL);
3680 				continue;
3681 			}
3682 			platform_set_payloadnvp(NULL);
3683 			ap->arrowp->mark |= EFFECTS_COUNTER;
3684 			if (!triggered(fmep, ep2, EFFECTS_COUNTER)) {
3685 				indent();
3686 				out(O_ALTFP|O_VERB|O_NONL,
3687 				    "  K-COUNT NOT YET MET ");
3688 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3689 				out(O_ALTFP|O_VERB, NULL);
3690 				continue;
3691 			}
3692 			ep2->cached_state &= ~PARENT_WAIT;
3693 			/*
3694 			 * if we've reached an ereport and no propagation time
3695 			 * is specified, use the Hesitate value
3696 			 */
3697 			if (ep2->t == N_EREPORT && at_latest_by == 0ULL &&
3698 			    ap->arrowp->maxdelay == 0ULL) {
3699 				out(O_ALTFP|O_VERB|O_NONL, "  default wait ");
3700 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3701 				out(O_ALTFP|O_VERB, NULL);
3702 				result = requirements_test(fmep, ep2, Hesitate,
3703 				    &my_delay);
3704 			} else {
3705 				result = requirements_test(fmep, ep2,
3706 				    at_latest_by + ap->arrowp->maxdelay,
3707 				    &my_delay);
3708 			}
3709 			if (result == FME_WAIT) {
3710 				retval = WAIT_EFFECT;
3711 				if (overall_delay > my_delay)
3712 					overall_delay = my_delay;
3713 				ep2->cached_state |= WAIT_EFFECT;
3714 				indent();
3715 				out(O_ALTFP|O_VERB|O_NONL, "  EFFECTS WAIT ");
3716 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3717 				out(O_ALTFP|O_VERB, NULL);
3718 				indent_push("  E");
3719 				if (mark_arrows(fmep, ep2, PARENT_WAIT,
3720 				    at_latest_by, &my_delay, 0) ==
3721 				    WAIT_EFFECT) {
3722 					retval = WAIT_EFFECT;
3723 					if (overall_delay > my_delay)
3724 						overall_delay = my_delay;
3725 				}
3726 				indent_pop();
3727 			} else if (result == FME_DISPROVED) {
3728 				indent();
3729 				out(O_ALTFP|O_VERB|O_NONL,
3730 				    "  EFFECTS DISPROVED ");
3731 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3732 				out(O_ALTFP|O_VERB, NULL);
3733 			} else {
3734 				ep2->cached_state |= mark;
3735 				indent();
3736 				if (mark == CREDIBLE_EFFECT)
3737 					out(O_ALTFP|O_VERB|O_NONL,
3738 					    "  EFFECTS CREDIBLE ");
3739 				else
3740 					out(O_ALTFP|O_VERB|O_NONL,
3741 					    "  PARENT EFFECTS WAIT ");
3742 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3743 				out(O_ALTFP|O_VERB, NULL);
3744 				indent_push("  E");
3745 				if (mark_arrows(fmep, ep2, mark, at_latest_by,
3746 				    &my_delay, 0) == WAIT_EFFECT) {
3747 					retval = WAIT_EFFECT;
3748 					if (overall_delay > my_delay)
3749 						overall_delay = my_delay;
3750 				}
3751 				indent_pop();
3752 			}
3753 		}
3754 	}
3755 	if (retval == WAIT_EFFECT)
3756 		*pdelay = overall_delay;
3757 	return (retval);
3758 }
3759 
3760 static enum fme_state
3761 effects_test(struct fme *fmep, struct event *fault_event,
3762     unsigned long long at_latest_by, unsigned long long *pdelay)
3763 {
3764 	struct event *error_event;
3765 	enum fme_state return_value = FME_CREDIBLE;
3766 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3767 	unsigned long long my_delay;
3768 
3769 	stats_counter_bump(fmep->Ecallcount);
3770 	indent_push("  E");
3771 	indent();
3772 	out(O_ALTFP|O_VERB|O_NONL, "->");
3773 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event);
3774 	out(O_ALTFP|O_VERB, NULL);
3775 
3776 	if (mark_arrows(fmep, fault_event, CREDIBLE_EFFECT, at_latest_by,
3777 	    &my_delay, 0) == WAIT_EFFECT) {
3778 		return_value = FME_WAIT;
3779 		if (overall_delay > my_delay)
3780 			overall_delay = my_delay;
3781 	}
3782 	for (error_event = fmep->observations;
3783 	    error_event; error_event = error_event->observations) {
3784 		indent();
3785 		out(O_ALTFP|O_VERB|O_NONL, " ");
3786 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, error_event);
3787 		if (!(error_event->cached_state & CREDIBLE_EFFECT)) {
3788 			if (error_event->cached_state &
3789 			    (PARENT_WAIT|WAIT_EFFECT)) {
3790 				out(O_ALTFP|O_VERB, " NOT YET triggered");
3791 				continue;
3792 			}
3793 			return_value = FME_DISPROVED;
3794 			out(O_ALTFP|O_VERB, " NOT triggered");
3795 			break;
3796 		} else {
3797 			out(O_ALTFP|O_VERB, " triggered");
3798 		}
3799 	}
3800 	if (return_value == FME_DISPROVED) {
3801 		(void) mark_arrows(fmep, fault_event, 0, 0, NULL, 0);
3802 	} else {
3803 		fault_event->keep_in_tree = 1;
3804 		(void) mark_arrows(fmep, fault_event, 0, 0, NULL, 1);
3805 	}
3806 
3807 	indent();
3808 	out(O_ALTFP|O_VERB|O_NONL, "<-EFFECTS %s ",
3809 	    fme_state2str(return_value));
3810 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event);
3811 	out(O_ALTFP|O_VERB, NULL);
3812 	indent_pop();
3813 	if (return_value == FME_WAIT)
3814 		*pdelay = overall_delay;
3815 	return (return_value);
3816 }
3817 
3818 static enum fme_state
3819 requirements_test(struct fme *fmep, struct event *ep,
3820     unsigned long long at_latest_by, unsigned long long *pdelay)
3821 {
3822 	int waiting_events;
3823 	int credible_events;
3824 	int deferred_events;
3825 	enum fme_state return_value = FME_CREDIBLE;
3826 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3827 	unsigned long long arrow_delay;
3828 	unsigned long long my_delay;
3829 	struct event *ep2;
3830 	struct bubble *bp;
3831 	struct arrowlist *ap;
3832 
3833 	if (ep->cached_state & REQMNTS_CREDIBLE) {
3834 		indent();
3835 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY CREDIBLE ");
3836 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3837 		out(O_ALTFP|O_VERB, NULL);
3838 		return (FME_CREDIBLE);
3839 	}
3840 	if (ep->cached_state & REQMNTS_DISPROVED) {
3841 		indent();
3842 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY DISPROVED ");
3843 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3844 		out(O_ALTFP|O_VERB, NULL);
3845 		return (FME_DISPROVED);
3846 	}
3847 	if (ep->cached_state & REQMNTS_WAIT) {
3848 		indent();
3849 		*pdelay = ep->cached_delay;
3850 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY WAIT ");
3851 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3852 		out(O_ALTFP|O_VERB|O_NONL, ", wait for: ");
3853 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3854 		out(O_ALTFP|O_VERB, NULL);
3855 		return (FME_WAIT);
3856 	}
3857 	stats_counter_bump(fmep->Rcallcount);
3858 	indent_push("  R");
3859 	indent();
3860 	out(O_ALTFP|O_VERB|O_NONL, "->");
3861 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3862 	out(O_ALTFP|O_VERB|O_NONL, ", at latest by: ");
3863 	ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3864 	out(O_ALTFP|O_VERB, NULL);
3865 
3866 	if (ep->t == N_EREPORT) {
3867 		if (ep->count == 0) {
3868 			if (fmep->pull >= at_latest_by) {
3869 				return_value = FME_DISPROVED;
3870 			} else {
3871 				ep->cached_delay = *pdelay = at_latest_by;
3872 				return_value = FME_WAIT;
3873 			}
3874 		}
3875 
3876 		indent();
3877 		switch (return_value) {
3878 		case FME_CREDIBLE:
3879 			ep->cached_state |= REQMNTS_CREDIBLE;
3880 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS CREDIBLE ");
3881 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3882 			break;
3883 		case FME_DISPROVED:
3884 			ep->cached_state |= REQMNTS_DISPROVED;
3885 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED ");
3886 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3887 			break;
3888 		case FME_WAIT:
3889 			ep->cached_state |= REQMNTS_WAIT;
3890 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS WAIT ");
3891 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3892 			out(O_ALTFP|O_VERB|O_NONL, " to ");
3893 			ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3894 			break;
3895 		default:
3896 			out(O_DIE, "requirements_test: unexpected fme_state");
3897 			break;
3898 		}
3899 		out(O_ALTFP|O_VERB, NULL);
3900 		indent_pop();
3901 
3902 		return (return_value);
3903 	}
3904 
3905 	/* this event is not a report, descend the tree */
3906 	for (bp = itree_next_bubble(ep, NULL); bp;
3907 	    bp = itree_next_bubble(ep, bp)) {
3908 		int n;
3909 
3910 		if (bp->t != B_FROM)
3911 			continue;
3912 
3913 		n = bp->nork;
3914 
3915 		credible_events = 0;
3916 		waiting_events = 0;
3917 		deferred_events = 0;
3918 		arrow_delay = TIMEVAL_EVENTUALLY;
3919 		/*
3920 		 * n is -1 for 'A' so adjust it.
3921 		 * XXX just count up the arrows for now.
3922 		 */
3923 		if (n < 0) {
3924 			n = 0;
3925 			for (ap = itree_next_arrow(bp, NULL); ap;
3926 			    ap = itree_next_arrow(bp, ap))
3927 				n++;
3928 			indent();
3929 			out(O_ALTFP|O_VERB, " Bubble Counted N=%d", n);
3930 		} else {
3931 			indent();
3932 			out(O_ALTFP|O_VERB, " Bubble N=%d", n);
3933 		}
3934 
3935 		if (n == 0)
3936 			continue;
3937 		if (!(bp->mark & (BUBBLE_ELIDED|BUBBLE_OK))) {
3938 			for (ap = itree_next_arrow(bp, NULL); ap;
3939 			    ap = itree_next_arrow(bp, ap)) {
3940 				ep2 = ap->arrowp->head->myevent;
3941 				platform_set_payloadnvp(ep2->nvp);
3942 				(void) checkconstraints(fmep, ap->arrowp);
3943 				if (!ap->arrowp->forever_false) {
3944 					/*
3945 					 * if all arrows are invalidated by the
3946 					 * constraints, then we should elide the
3947 					 * whole bubble to be consistant with
3948 					 * the tree creation time behaviour
3949 					 */
3950 					bp->mark |= BUBBLE_OK;
3951 					platform_set_payloadnvp(NULL);
3952 					break;
3953 				}
3954 				platform_set_payloadnvp(NULL);
3955 			}
3956 		}
3957 		for (ap = itree_next_arrow(bp, NULL); ap;
3958 		    ap = itree_next_arrow(bp, ap)) {
3959 			ep2 = ap->arrowp->head->myevent;
3960 			if (n <= credible_events)
3961 				break;
3962 
3963 			ap->arrowp->mark |= REQMNTS_COUNTER;
3964 			if (triggered(fmep, ep2, REQMNTS_COUNTER))
3965 				/* XXX adding max timevals! */
3966 				switch (requirements_test(fmep, ep2,
3967 				    at_latest_by + ap->arrowp->maxdelay,
3968 				    &my_delay)) {
3969 				case FME_DEFERRED:
3970 					deferred_events++;
3971 					break;
3972 				case FME_CREDIBLE:
3973 					credible_events++;
3974 					break;
3975 				case FME_DISPROVED:
3976 					break;
3977 				case FME_WAIT:
3978 					if (my_delay < arrow_delay)
3979 						arrow_delay = my_delay;
3980 					waiting_events++;
3981 					break;
3982 				default:
3983 					out(O_DIE,
3984 					"Bug in requirements_test.");
3985 				}
3986 			else
3987 				deferred_events++;
3988 		}
3989 		if (!(bp->mark & BUBBLE_OK) && waiting_events == 0) {
3990 			bp->mark |= BUBBLE_ELIDED;
3991 			continue;
3992 		}
3993 		indent();
3994 		out(O_ALTFP|O_VERB, " Credible: %d Waiting %d",
3995 		    credible_events + deferred_events, waiting_events);
3996 		if (credible_events + deferred_events + waiting_events < n) {
3997 			/* Can never meet requirements */
3998 			ep->cached_state |= REQMNTS_DISPROVED;
3999 			indent();
4000 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED ");
4001 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4002 			out(O_ALTFP|O_VERB, NULL);
4003 			indent_pop();
4004 			return (FME_DISPROVED);
4005 		}
4006 		if (credible_events + deferred_events < n) {
4007 			/* will have to wait */
4008 			/* wait time is shortest known */
4009 			if (arrow_delay < overall_delay)
4010 				overall_delay = arrow_delay;
4011 			return_value = FME_WAIT;
4012 		} else if (credible_events < n) {
4013 			if (return_value != FME_WAIT)
4014 				return_value = FME_DEFERRED;
4015 		}
4016 	}
4017 
4018 	/*
4019 	 * don't mark as FME_DEFERRED. If this event isn't reached by another
4020 	 * path, then this will be considered FME_CREDIBLE. But if it is
4021 	 * reached by a different path so the K-count is met, then might
4022 	 * get overridden by FME_WAIT or FME_DISPROVED.
4023 	 */
4024 	if (return_value == FME_WAIT) {
4025 		ep->cached_state |= REQMNTS_WAIT;
4026 		ep->cached_delay = *pdelay = overall_delay;
4027 	} else if (return_value == FME_CREDIBLE) {
4028 		ep->cached_state |= REQMNTS_CREDIBLE;
4029 	}
4030 	indent();
4031 	out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS %s ",
4032 	    fme_state2str(return_value));
4033 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4034 	out(O_ALTFP|O_VERB, NULL);
4035 	indent_pop();
4036 	return (return_value);
4037 }
4038 
4039 static enum fme_state
4040 causes_test(struct fme *fmep, struct event *ep,
4041     unsigned long long at_latest_by, unsigned long long *pdelay)
4042 {
4043 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
4044 	unsigned long long my_delay;
4045 	int credible_results = 0;
4046 	int waiting_results = 0;
4047 	enum fme_state fstate;
4048 	struct event *tail_event;
4049 	struct bubble *bp;
4050 	struct arrowlist *ap;
4051 	int k = 1;
4052 
4053 	stats_counter_bump(fmep->Ccallcount);
4054 	indent_push("  C");
4055 	indent();
4056 	out(O_ALTFP|O_VERB|O_NONL, "->");
4057 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4058 	out(O_ALTFP|O_VERB, NULL);
4059 
4060 	for (bp = itree_next_bubble(ep, NULL); bp;
4061 	    bp = itree_next_bubble(ep, bp)) {
4062 		if (bp->t != B_TO)
4063 			continue;
4064 		k = bp->nork;	/* remember the K value */
4065 		for (ap = itree_next_arrow(bp, NULL); ap;
4066 		    ap = itree_next_arrow(bp, ap)) {
4067 			int do_not_follow = 0;
4068 
4069 			/*
4070 			 * if we get to the same event multiple times
4071 			 * only worry about the first one.
4072 			 */
4073 			if (ap->arrowp->tail->myevent->cached_state &
4074 			    CAUSES_TESTED) {
4075 				indent();
4076 				out(O_ALTFP|O_VERB|O_NONL,
4077 				    "  causes test already run for ");
4078 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL,
4079 				    ap->arrowp->tail->myevent);
4080 				out(O_ALTFP|O_VERB, NULL);
4081 				continue;
4082 			}
4083 
4084 			/*
4085 			 * see if false constraint prevents us
4086 			 * from traversing this arrow
4087 			 */
4088 			platform_set_payloadnvp(ep->nvp);
4089 			if (checkconstraints(fmep, ap->arrowp) == 0)
4090 				do_not_follow = 1;
4091 			platform_set_payloadnvp(NULL);
4092 			if (do_not_follow) {
4093 				indent();
4094 				out(O_ALTFP|O_VERB|O_NONL,
4095 				    "  False arrow from ");
4096 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL,
4097 				    ap->arrowp->tail->myevent);
4098 				out(O_ALTFP|O_VERB, NULL);
4099 				continue;
4100 			}
4101 
4102 			ap->arrowp->tail->myevent->cached_state |=
4103 			    CAUSES_TESTED;
4104 			tail_event = ap->arrowp->tail->myevent;
4105 			fstate = hypothesise(fmep, tail_event, at_latest_by,
4106 			    &my_delay);
4107 
4108 			switch (fstate) {
4109 			case FME_WAIT:
4110 				if (my_delay < overall_delay)
4111 					overall_delay = my_delay;
4112 				waiting_results++;
4113 				break;
4114 			case FME_CREDIBLE:
4115 				credible_results++;
4116 				break;
4117 			case FME_DISPROVED:
4118 				break;
4119 			default:
4120 				out(O_DIE, "Bug in causes_test");
4121 			}
4122 		}
4123 	}
4124 	/* compare against K */
4125 	if (credible_results + waiting_results < k) {
4126 		indent();
4127 		out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES DISPROVED ");
4128 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4129 		out(O_ALTFP|O_VERB, NULL);
4130 		indent_pop();
4131 		return (FME_DISPROVED);
4132 	}
4133 	if (waiting_results != 0) {
4134 		*pdelay = overall_delay;
4135 		indent();
4136 		out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES WAIT ");
4137 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4138 		out(O_ALTFP|O_VERB|O_NONL, " to ");
4139 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
4140 		out(O_ALTFP|O_VERB, NULL);
4141 		indent_pop();
4142 		return (FME_WAIT);
4143 	}
4144 	indent();
4145 	out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES CREDIBLE ");
4146 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4147 	out(O_ALTFP|O_VERB, NULL);
4148 	indent_pop();
4149 	return (FME_CREDIBLE);
4150 }
4151 
4152 static enum fme_state
4153 hypothesise(struct fme *fmep, struct event *ep,
4154 	unsigned long long at_latest_by, unsigned long long *pdelay)
4155 {
4156 	enum fme_state rtr, otr;
4157 	unsigned long long my_delay;
4158 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
4159 
4160 	stats_counter_bump(fmep->Hcallcount);
4161 	indent_push("  H");
4162 	indent();
4163 	out(O_ALTFP|O_VERB|O_NONL, "->");
4164 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4165 	out(O_ALTFP|O_VERB|O_NONL, ", at latest by: ");
4166 	ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
4167 	out(O_ALTFP|O_VERB, NULL);
4168 
4169 	rtr = requirements_test(fmep, ep, at_latest_by, &my_delay);
4170 	if ((rtr == FME_WAIT) && (my_delay < overall_delay))
4171 		overall_delay = my_delay;
4172 	if (rtr != FME_DISPROVED) {
4173 		if (is_problem(ep->t)) {
4174 			otr = effects_test(fmep, ep, at_latest_by, &my_delay);
4175 			if (otr != FME_DISPROVED) {
4176 				if (fmep->peek == 0 && ep->is_suspect == 0) {
4177 					ep->suspects = fmep->suspects;
4178 					ep->is_suspect = 1;
4179 					fmep->suspects = ep;
4180 					fmep->nsuspects++;
4181 				}
4182 			}
4183 		} else
4184 			otr = causes_test(fmep, ep, at_latest_by, &my_delay);
4185 		if ((otr == FME_WAIT) && (my_delay < overall_delay))
4186 			overall_delay = my_delay;
4187 		if ((otr != FME_DISPROVED) &&
4188 		    ((rtr == FME_WAIT) || (otr == FME_WAIT)))
4189 			*pdelay = overall_delay;
4190 	}
4191 	if (rtr == FME_DISPROVED) {
4192 		indent();
4193 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4194 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4195 		out(O_ALTFP|O_VERB, " (doesn't meet requirements)");
4196 		indent_pop();
4197 		return (FME_DISPROVED);
4198 	}
4199 	if ((otr == FME_DISPROVED) && is_problem(ep->t)) {
4200 		indent();
4201 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4202 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4203 		out(O_ALTFP|O_VERB, " (doesn't explain all reports)");
4204 		indent_pop();
4205 		return (FME_DISPROVED);
4206 	}
4207 	if (otr == FME_DISPROVED) {
4208 		indent();
4209 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4210 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4211 		out(O_ALTFP|O_VERB, " (causes are not credible)");
4212 		indent_pop();
4213 		return (FME_DISPROVED);
4214 	}
4215 	if ((rtr == FME_WAIT) || (otr == FME_WAIT)) {
4216 		indent();
4217 		out(O_ALTFP|O_VERB|O_NONL, "<-WAIT ");
4218 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4219 		out(O_ALTFP|O_VERB|O_NONL, " to ");
4220 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &overall_delay);
4221 		out(O_ALTFP|O_VERB, NULL);
4222 		indent_pop();
4223 		return (FME_WAIT);
4224 	}
4225 	indent();
4226 	out(O_ALTFP|O_VERB|O_NONL, "<-CREDIBLE ");
4227 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4228 	out(O_ALTFP|O_VERB, NULL);
4229 	indent_pop();
4230 	return (FME_CREDIBLE);
4231 }
4232 
4233 /*
4234  * fme_istat_load -- reconstitute any persistent istats
4235  */
4236 void
4237 fme_istat_load(fmd_hdl_t *hdl)
4238 {
4239 	int sz;
4240 	char *sbuf;
4241 	char *ptr;
4242 
4243 	if ((sz = fmd_buf_size(hdl, NULL, WOBUF_ISTATS)) == 0) {
4244 		out(O_ALTFP, "fme_istat_load: No stats");
4245 		return;
4246 	}
4247 
4248 	sbuf = alloca(sz);
4249 
4250 	fmd_buf_read(hdl, NULL, WOBUF_ISTATS, sbuf, sz);
4251 
4252 	/*
4253 	 * pick apart the serialized stats
4254 	 *
4255 	 * format is:
4256 	 *	<class-name>, '@', <path>, '\0', <value>, '\0'
4257 	 * for example:
4258 	 *	"stat.first@stat0/path0\02\0stat.second@stat0/path1\023\0"
4259 	 *
4260 	 * since this is parsing our own serialized data, any parsing issues
4261 	 * are fatal, so we check for them all with ASSERT() below.
4262 	 */
4263 	ptr = sbuf;
4264 	while (ptr < &sbuf[sz]) {
4265 		char *sepptr;
4266 		struct node *np;
4267 		int val;
4268 
4269 		sepptr = strchr(ptr, '@');
4270 		ASSERT(sepptr != NULL);
4271 		*sepptr = '\0';
4272 
4273 		/* construct the event */
4274 		np = newnode(T_EVENT, NULL, 0);
4275 		np->u.event.ename = newnode(T_NAME, NULL, 0);
4276 		np->u.event.ename->u.name.t = N_STAT;
4277 		np->u.event.ename->u.name.s = stable(ptr);
4278 		np->u.event.ename->u.name.it = IT_ENAME;
4279 		np->u.event.ename->u.name.last = np->u.event.ename;
4280 
4281 		ptr = sepptr + 1;
4282 		ASSERT(ptr < &sbuf[sz]);
4283 		ptr += strlen(ptr);
4284 		ptr++;	/* move past the '\0' separating path from value */
4285 		ASSERT(ptr < &sbuf[sz]);
4286 		ASSERT(isdigit(*ptr));
4287 		val = atoi(ptr);
4288 		ASSERT(val > 0);
4289 		ptr += strlen(ptr);
4290 		ptr++;	/* move past the final '\0' for this entry */
4291 
4292 		np->u.event.epname = pathstring2epnamenp(sepptr + 1);
4293 		ASSERT(np->u.event.epname != NULL);
4294 
4295 		istat_bump(np, val);
4296 		tree_free(np);
4297 	}
4298 
4299 	istat_save();
4300 }
4301