xref: /titanic_52/usr/src/cmd/fm/modules/common/eversholt/fme.c (revision 5a45682c3e7b01faa1761ab8d86f0bed4cc1d363)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  *
25  * fme.c -- fault management exercise module
26  *
27  * this module provides the simulated fault management exercise.
28  */
29 
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <strings.h>
34 #include <ctype.h>
35 #include <alloca.h>
36 #include <libnvpair.h>
37 #include <sys/fm/protocol.h>
38 #include <fm/fmd_api.h>
39 #include "alloc.h"
40 #include "out.h"
41 #include "stats.h"
42 #include "stable.h"
43 #include "literals.h"
44 #include "lut.h"
45 #include "tree.h"
46 #include "ptree.h"
47 #include "itree.h"
48 #include "ipath.h"
49 #include "fme.h"
50 #include "evnv.h"
51 #include "eval.h"
52 #include "config.h"
53 #include "platform.h"
54 #include "esclex.h"
55 
56 /* imported from eft.c... */
57 extern hrtime_t Hesitate;
58 extern char *Serd_Override;
59 extern nv_alloc_t Eft_nv_hdl;
60 extern int Max_fme;
61 extern fmd_hdl_t *Hdl;
62 
63 static int Istat_need_save;
64 static int Serd_need_save;
65 void istat_save(void);
66 void serd_save(void);
67 
68 /* fme under construction is global so we can free it on module abort */
69 static struct fme *Nfmep;
70 
71 static int Undiag_reason = UD_VAL_UNKNOWN;
72 
73 static int Nextid = 0;
74 
75 static int Open_fme_count = 0;	/* Count of open FMEs */
76 
77 /* list of fault management exercises underway */
78 static struct fme {
79 	struct fme *next;		/* next exercise */
80 	unsigned long long ull;		/* time when fme was created */
81 	int id;				/* FME id */
82 	struct config *config;		/* cooked configuration data */
83 	struct lut *eventtree;		/* propagation tree for this FME */
84 	/*
85 	 * The initial error report that created this FME is kept in
86 	 * two forms.  e0 points to the instance tree node and is used
87 	 * by fme_eval() as the starting point for the inference
88 	 * algorithm.  e0r is the event handle FMD passed to us when
89 	 * the ereport first arrived and is used when setting timers,
90 	 * which are always relative to the time of this initial
91 	 * report.
92 	 */
93 	struct event *e0;
94 	fmd_event_t *e0r;
95 
96 	id_t    timer;			/* for setting an fmd time-out */
97 
98 	struct event *ecurrent;		/* ereport under consideration */
99 	struct event *suspects;		/* current suspect list */
100 	struct event *psuspects;	/* previous suspect list */
101 	int nsuspects;			/* count of suspects */
102 	int posted_suspects;		/* true if we've posted a diagnosis */
103 	int uniqobs;			/* number of unique events observed */
104 	int peek;			/* just peeking, don't track suspects */
105 	int overflow;			/* true if overflow FME */
106 	enum fme_state {
107 		FME_NOTHING = 5000,	/* not evaluated yet */
108 		FME_WAIT,		/* need to wait for more info */
109 		FME_CREDIBLE,		/* suspect list is credible */
110 		FME_DISPROVED,		/* no valid suspects found */
111 		FME_DEFERRED		/* don't know yet (k-count not met) */
112 	} state;
113 
114 	unsigned long long pull;	/* time passed since created */
115 	unsigned long long wull;	/* wait until this time for re-eval */
116 	struct event *observations;	/* observation list */
117 	struct lut *globals;		/* values of global variables */
118 	/* fmd interfacing */
119 	fmd_hdl_t *hdl;			/* handle for talking with fmd */
120 	fmd_case_t *fmcase;		/* what fmd 'case' we associate with */
121 	/* stats */
122 	struct stats *Rcount;
123 	struct stats *Hcallcount;
124 	struct stats *Rcallcount;
125 	struct stats *Ccallcount;
126 	struct stats *Ecallcount;
127 	struct stats *Tcallcount;
128 	struct stats *Marrowcount;
129 	struct stats *diags;
130 } *FMElist, *EFMElist, *ClosedFMEs;
131 
132 static struct case_list {
133 	fmd_case_t *fmcase;
134 	struct case_list *next;
135 } *Undiagablecaselist;
136 
137 static void fme_eval(struct fme *fmep, fmd_event_t *ffep);
138 static enum fme_state hypothesise(struct fme *fmep, struct event *ep,
139 	unsigned long long at_latest_by, unsigned long long *pdelay);
140 static struct node *eventprop_lookup(struct event *ep, const char *propname);
141 static struct node *pathstring2epnamenp(char *path);
142 static void publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep,
143 	fmd_case_t *fmcase, nvlist_t *detector, char *arg);
144 static char *undiag_2reason_str(int ud, char *arg);
145 static const char *undiag_2defect_str(int ud);
146 static void restore_suspects(struct fme *fmep);
147 static void save_suspects(struct fme *fmep);
148 static void destroy_fme(struct fme *f);
149 static void fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep,
150     const char *eventstring, const struct ipath *ipp, nvlist_t *nvl);
151 static void istat_counter_reset_cb(struct istat_entry *entp,
152     struct stats *statp, const struct ipath *ipp);
153 static void istat_counter_topo_chg_cb(struct istat_entry *entp,
154     struct stats *statp, void *unused);
155 static void serd_reset_cb(struct serd_entry *entp, void *unused,
156     const struct ipath *ipp);
157 static void serd_topo_chg_cb(struct serd_entry *entp, void *unused,
158     void *unused2);
159 static void destroy_fme_bufs(struct fme *fp);
160 
161 static struct fme *
162 alloc_fme(void)
163 {
164 	struct fme *fmep;
165 
166 	fmep = MALLOC(sizeof (*fmep));
167 	bzero(fmep, sizeof (*fmep));
168 	return (fmep);
169 }
170 
171 /*
172  * fme_ready -- called when all initialization of the FME (except for
173  *	stats) has completed successfully.  Adds the fme to global lists
174  *	and establishes its stats.
175  */
176 static struct fme *
177 fme_ready(struct fme *fmep)
178 {
179 	char nbuf[100];
180 
181 	Nfmep = NULL;	/* don't need to free this on module abort now */
182 
183 	if (EFMElist) {
184 		EFMElist->next = fmep;
185 		EFMElist = fmep;
186 	} else
187 		FMElist = EFMElist = fmep;
188 
189 	(void) sprintf(nbuf, "fme%d.Rcount", fmep->id);
190 	fmep->Rcount = stats_new_counter(nbuf, "ereports received", 0);
191 	(void) sprintf(nbuf, "fme%d.Hcall", fmep->id);
192 	fmep->Hcallcount = stats_new_counter(nbuf, "calls to hypothesise()", 1);
193 	(void) sprintf(nbuf, "fme%d.Rcall", fmep->id);
194 	fmep->Rcallcount = stats_new_counter(nbuf,
195 	    "calls to requirements_test()", 1);
196 	(void) sprintf(nbuf, "fme%d.Ccall", fmep->id);
197 	fmep->Ccallcount = stats_new_counter(nbuf, "calls to causes_test()", 1);
198 	(void) sprintf(nbuf, "fme%d.Ecall", fmep->id);
199 	fmep->Ecallcount =
200 	    stats_new_counter(nbuf, "calls to effects_test()", 1);
201 	(void) sprintf(nbuf, "fme%d.Tcall", fmep->id);
202 	fmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1);
203 	(void) sprintf(nbuf, "fme%d.Marrow", fmep->id);
204 	fmep->Marrowcount = stats_new_counter(nbuf,
205 	    "arrows marked by mark_arrows()", 1);
206 	(void) sprintf(nbuf, "fme%d.diags", fmep->id);
207 	fmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0);
208 
209 	out(O_ALTFP|O_VERB2, "newfme: config snapshot contains...");
210 	config_print(O_ALTFP|O_VERB2, fmep->config);
211 
212 	return (fmep);
213 }
214 
215 extern void ipath_dummy_lut(struct arrow *);
216 extern struct lut *itree_create_dummy(const char *, const struct ipath *);
217 
218 /* ARGSUSED */
219 static void
220 set_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
221 {
222 	struct bubble *bp;
223 	struct arrowlist *ap;
224 
225 	for (bp = itree_next_bubble(ep, NULL); bp;
226 	    bp = itree_next_bubble(ep, bp)) {
227 		if (bp->t != B_FROM)
228 			continue;
229 		for (ap = itree_next_arrow(bp, NULL); ap;
230 		    ap = itree_next_arrow(bp, ap)) {
231 			ap->arrowp->pnode->u.arrow.needed = 1;
232 			ipath_dummy_lut(ap->arrowp);
233 		}
234 	}
235 }
236 
237 /* ARGSUSED */
238 static void
239 unset_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
240 {
241 	struct bubble *bp;
242 	struct arrowlist *ap;
243 
244 	for (bp = itree_next_bubble(ep, NULL); bp;
245 	    bp = itree_next_bubble(ep, bp)) {
246 		if (bp->t != B_FROM)
247 			continue;
248 		for (ap = itree_next_arrow(bp, NULL); ap;
249 		    ap = itree_next_arrow(bp, ap))
250 			ap->arrowp->pnode->u.arrow.needed = 0;
251 	}
252 }
253 
254 static void globals_destructor(void *left, void *right, void *arg);
255 static void clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep);
256 
257 static boolean_t
258 prune_propagations(const char *e0class, const struct ipath *e0ipp)
259 {
260 	char nbuf[100];
261 	unsigned long long my_delay = TIMEVAL_EVENTUALLY;
262 	extern struct lut *Usednames;
263 
264 	Nfmep = alloc_fme();
265 	Nfmep->id = Nextid;
266 	Nfmep->state = FME_NOTHING;
267 	Nfmep->eventtree = itree_create_dummy(e0class, e0ipp);
268 	if ((Nfmep->e0 =
269 	    itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) {
270 		itree_free(Nfmep->eventtree);
271 		FREE(Nfmep);
272 		Nfmep = NULL;
273 		return (B_FALSE);
274 	}
275 	Nfmep->ecurrent = Nfmep->observations = Nfmep->e0;
276 	Nfmep->e0->count++;
277 
278 	(void) sprintf(nbuf, "fme%d.Rcount", Nfmep->id);
279 	Nfmep->Rcount = stats_new_counter(nbuf, "ereports received", 0);
280 	(void) sprintf(nbuf, "fme%d.Hcall", Nfmep->id);
281 	Nfmep->Hcallcount =
282 	    stats_new_counter(nbuf, "calls to hypothesise()", 1);
283 	(void) sprintf(nbuf, "fme%d.Rcall", Nfmep->id);
284 	Nfmep->Rcallcount = stats_new_counter(nbuf,
285 	    "calls to requirements_test()", 1);
286 	(void) sprintf(nbuf, "fme%d.Ccall", Nfmep->id);
287 	Nfmep->Ccallcount =
288 	    stats_new_counter(nbuf, "calls to causes_test()", 1);
289 	(void) sprintf(nbuf, "fme%d.Ecall", Nfmep->id);
290 	Nfmep->Ecallcount =
291 	    stats_new_counter(nbuf, "calls to effects_test()", 1);
292 	(void) sprintf(nbuf, "fme%d.Tcall", Nfmep->id);
293 	Nfmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1);
294 	(void) sprintf(nbuf, "fme%d.Marrow", Nfmep->id);
295 	Nfmep->Marrowcount = stats_new_counter(nbuf,
296 	    "arrows marked by mark_arrows()", 1);
297 	(void) sprintf(nbuf, "fme%d.diags", Nfmep->id);
298 	Nfmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0);
299 
300 	Nfmep->peek = 1;
301 	lut_walk(Nfmep->eventtree, (lut_cb)unset_needed_arrows, (void *)Nfmep);
302 	lut_free(Usednames, NULL, NULL);
303 	Usednames = NULL;
304 	lut_walk(Nfmep->eventtree, (lut_cb)clear_arrows, (void *)Nfmep);
305 	(void) hypothesise(Nfmep, Nfmep->e0, Nfmep->ull, &my_delay);
306 	itree_prune(Nfmep->eventtree);
307 	lut_walk(Nfmep->eventtree, (lut_cb)set_needed_arrows, (void *)Nfmep);
308 
309 	stats_delete(Nfmep->Rcount);
310 	stats_delete(Nfmep->Hcallcount);
311 	stats_delete(Nfmep->Rcallcount);
312 	stats_delete(Nfmep->Ccallcount);
313 	stats_delete(Nfmep->Ecallcount);
314 	stats_delete(Nfmep->Tcallcount);
315 	stats_delete(Nfmep->Marrowcount);
316 	stats_delete(Nfmep->diags);
317 	itree_free(Nfmep->eventtree);
318 	lut_free(Nfmep->globals, globals_destructor, NULL);
319 	FREE(Nfmep);
320 	return (B_TRUE);
321 }
322 
323 static struct fme *
324 newfme(const char *e0class, const struct ipath *e0ipp, fmd_hdl_t *hdl,
325 	fmd_case_t *fmcase, fmd_event_t *ffep, nvlist_t *nvl)
326 {
327 	struct cfgdata *cfgdata;
328 	int init_size;
329 	extern int alloc_total();
330 	nvlist_t *detector = NULL;
331 	char *pathstr;
332 	char *arg;
333 
334 	/*
335 	 * First check if e0ipp is actually in the topology so we can give a
336 	 * more useful error message.
337 	 */
338 	ipathlastcomp(e0ipp);
339 	pathstr = ipath2str(NULL, e0ipp);
340 	cfgdata = config_snapshot();
341 	platform_units_translate(0, cfgdata->cooked, NULL, NULL,
342 	    &detector, pathstr);
343 	FREE(pathstr);
344 	structconfig_free(cfgdata->cooked);
345 	config_free(cfgdata);
346 	if (detector == NULL) {
347 		/* See if class permits silent discard on unknown component. */
348 		if (lut_lookup(Ereportenames_discard, (void *)e0class, NULL)) {
349 			out(O_ALTFP|O_VERB2, "Unable to map \"%s\" ereport "
350 			    "to component path, but silent discard allowed.",
351 			    e0class);
352 		} else {
353 			Undiag_reason = UD_VAL_BADEVENTPATH;
354 			(void) nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR,
355 			    &detector);
356 			arg = ipath2str(e0class, e0ipp);
357 			publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
358 			FREE(arg);
359 		}
360 		return (NULL);
361 	}
362 
363 	/*
364 	 * Next run a quick first pass of the rules with a dummy config. This
365 	 * allows us to prune those rules which can't possibly cause this
366 	 * ereport.
367 	 */
368 	if (!prune_propagations(e0class, e0ipp)) {
369 		/*
370 		 * The fault class must have been in the rules or we would
371 		 * not have registered for it (and got a "nosub"), and the
372 		 * pathname must be in the topology or we would have failed the
373 		 * previous test. So to get here means the combination of
374 		 * class and pathname in the ereport must be invalid.
375 		 */
376 		Undiag_reason = UD_VAL_BADEVENTCLASS;
377 		arg = ipath2str(e0class, e0ipp);
378 		publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
379 		nvlist_free(detector);
380 		FREE(arg);
381 		return (NULL);
382 	}
383 
384 	/*
385 	 * Now go ahead and create the real fme using the pruned rules.
386 	 */
387 	init_size = alloc_total();
388 	out(O_ALTFP|O_STAMP, "start config_snapshot using %d bytes", init_size);
389 	nvlist_free(detector);
390 	pathstr = ipath2str(NULL, e0ipp);
391 	cfgdata = config_snapshot();
392 	platform_units_translate(0, cfgdata->cooked, NULL, NULL,
393 	    &detector, pathstr);
394 	FREE(pathstr);
395 	platform_save_config(hdl, fmcase);
396 	out(O_ALTFP|O_STAMP, "config_snapshot added %d bytes",
397 	    alloc_total() - init_size);
398 
399 	Nfmep = alloc_fme();
400 
401 	Nfmep->id = Nextid++;
402 	Nfmep->config = cfgdata->cooked;
403 	config_free(cfgdata);
404 	Nfmep->posted_suspects = 0;
405 	Nfmep->uniqobs = 0;
406 	Nfmep->state = FME_NOTHING;
407 	Nfmep->pull = 0ULL;
408 	Nfmep->overflow = 0;
409 
410 	Nfmep->fmcase = fmcase;
411 	Nfmep->hdl = hdl;
412 
413 	if ((Nfmep->eventtree = itree_create(Nfmep->config)) == NULL) {
414 		Undiag_reason = UD_VAL_INSTFAIL;
415 		arg = ipath2str(e0class, e0ipp);
416 		publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
417 		nvlist_free(detector);
418 		FREE(arg);
419 		structconfig_free(Nfmep->config);
420 		destroy_fme_bufs(Nfmep);
421 		FREE(Nfmep);
422 		Nfmep = NULL;
423 		return (NULL);
424 	}
425 
426 	itree_ptree(O_ALTFP|O_VERB2, Nfmep->eventtree);
427 
428 	if ((Nfmep->e0 =
429 	    itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) {
430 		Undiag_reason = UD_VAL_BADEVENTI;
431 		arg = ipath2str(e0class, e0ipp);
432 		publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
433 		nvlist_free(detector);
434 		FREE(arg);
435 		itree_free(Nfmep->eventtree);
436 		structconfig_free(Nfmep->config);
437 		destroy_fme_bufs(Nfmep);
438 		FREE(Nfmep);
439 		Nfmep = NULL;
440 		return (NULL);
441 	}
442 
443 	nvlist_free(detector);
444 	return (fme_ready(Nfmep));
445 }
446 
447 void
448 fme_fini(void)
449 {
450 	struct fme *sfp, *fp;
451 	struct case_list *ucasep, *nextcasep;
452 
453 	ucasep = Undiagablecaselist;
454 	while (ucasep != NULL) {
455 		nextcasep = ucasep->next;
456 		FREE(ucasep);
457 		ucasep = nextcasep;
458 	}
459 	Undiagablecaselist = NULL;
460 
461 	/* clean up closed fmes */
462 	fp = ClosedFMEs;
463 	while (fp != NULL) {
464 		sfp = fp->next;
465 		destroy_fme(fp);
466 		fp = sfp;
467 	}
468 	ClosedFMEs = NULL;
469 
470 	fp = FMElist;
471 	while (fp != NULL) {
472 		sfp = fp->next;
473 		destroy_fme(fp);
474 		fp = sfp;
475 	}
476 	FMElist = EFMElist = NULL;
477 
478 	/* if we were in the middle of creating an fme, free it now */
479 	if (Nfmep) {
480 		destroy_fme(Nfmep);
481 		Nfmep = NULL;
482 	}
483 }
484 
485 /*
486  * Allocated space for a buffer name.  20 bytes allows for
487  * a ridiculous 9,999,999 unique observations.
488  */
489 #define	OBBUFNMSZ 20
490 
491 /*
492  *  serialize_observation
493  *
494  *  Create a recoverable version of the current observation
495  *  (f->ecurrent).  We keep a serialized version of each unique
496  *  observation in order that we may resume correctly the fme in the
497  *  correct state if eft or fmd crashes and we're restarted.
498  */
499 static void
500 serialize_observation(struct fme *fp, const char *cls, const struct ipath *ipp)
501 {
502 	size_t pkdlen;
503 	char tmpbuf[OBBUFNMSZ];
504 	char *pkd = NULL;
505 	char *estr;
506 
507 	(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", fp->uniqobs);
508 	estr = ipath2str(cls, ipp);
509 	fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, strlen(estr) + 1);
510 	fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)estr,
511 	    strlen(estr) + 1);
512 	FREE(estr);
513 
514 	if (fp->ecurrent != NULL && fp->ecurrent->nvp != NULL) {
515 		(void) snprintf(tmpbuf,
516 		    OBBUFNMSZ, "observed%d.nvp", fp->uniqobs);
517 		if (nvlist_xpack(fp->ecurrent->nvp,
518 		    &pkd, &pkdlen, NV_ENCODE_XDR, &Eft_nv_hdl) != 0)
519 			out(O_DIE|O_SYS, "pack of observed nvl failed");
520 		fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, pkdlen);
521 		fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)pkd, pkdlen);
522 		FREE(pkd);
523 	}
524 
525 	fp->uniqobs++;
526 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs,
527 	    sizeof (fp->uniqobs));
528 }
529 
530 /*
531  *  init_fme_bufs -- We keep several bits of state about an fme for
532  *	use if eft or fmd crashes and we're restarted.
533  */
534 static void
535 init_fme_bufs(struct fme *fp)
536 {
537 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_PULL, sizeof (fp->pull));
538 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_PULL, (void *)&fp->pull,
539 	    sizeof (fp->pull));
540 
541 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_ID, sizeof (fp->id));
542 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_ID, (void *)&fp->id,
543 	    sizeof (fp->id));
544 
545 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_NOBS, sizeof (fp->uniqobs));
546 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs,
547 	    sizeof (fp->uniqobs));
548 
549 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_POSTD,
550 	    sizeof (fp->posted_suspects));
551 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_POSTD,
552 	    (void *)&fp->posted_suspects, sizeof (fp->posted_suspects));
553 }
554 
555 static void
556 destroy_fme_bufs(struct fme *fp)
557 {
558 	char tmpbuf[OBBUFNMSZ];
559 	int o;
560 
561 	platform_restore_config(fp->hdl, fp->fmcase);
562 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFGLEN);
563 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFG);
564 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_PULL);
565 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_ID);
566 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_POSTD);
567 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_NOBS);
568 
569 	for (o = 0; o < fp->uniqobs; o++) {
570 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", o);
571 		fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf);
572 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", o);
573 		fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf);
574 	}
575 }
576 
577 /*
578  * reconstitute_observations -- convert a case's serialized observations
579  *	back into struct events.  Returns zero if all observations are
580  *	successfully reconstituted.
581  */
582 static int
583 reconstitute_observations(struct fme *fmep)
584 {
585 	struct event *ep;
586 	struct node *epnamenp = NULL;
587 	size_t pkdlen;
588 	char *pkd = NULL;
589 	char *tmpbuf = alloca(OBBUFNMSZ);
590 	char *sepptr;
591 	char *estr;
592 	int ocnt;
593 	int elen;
594 
595 	for (ocnt = 0; ocnt < fmep->uniqobs; ocnt++) {
596 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", ocnt);
597 		elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
598 		if (elen == 0) {
599 			out(O_ALTFP,
600 			    "reconstitute_observation: no %s buffer found.",
601 			    tmpbuf);
602 			Undiag_reason = UD_VAL_MISSINGOBS;
603 			break;
604 		}
605 
606 		estr = MALLOC(elen);
607 		fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen);
608 		sepptr = strchr(estr, '@');
609 		if (sepptr == NULL) {
610 			out(O_ALTFP,
611 			    "reconstitute_observation: %s: "
612 			    "missing @ separator in %s.",
613 			    tmpbuf, estr);
614 			Undiag_reason = UD_VAL_MISSINGPATH;
615 			FREE(estr);
616 			break;
617 		}
618 
619 		*sepptr = '\0';
620 		if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) {
621 			out(O_ALTFP,
622 			    "reconstitute_observation: %s: "
623 			    "trouble converting path string \"%s\" "
624 			    "to internal representation.",
625 			    tmpbuf, sepptr + 1);
626 			Undiag_reason = UD_VAL_MISSINGPATH;
627 			FREE(estr);
628 			break;
629 		}
630 
631 		/* construct the event */
632 		ep = itree_lookup(fmep->eventtree,
633 		    stable(estr), ipath(epnamenp));
634 		if (ep == NULL) {
635 			out(O_ALTFP,
636 			    "reconstitute_observation: %s: "
637 			    "lookup of  \"%s\" in itree failed.",
638 			    tmpbuf, ipath2str(estr, ipath(epnamenp)));
639 			Undiag_reason = UD_VAL_BADOBS;
640 			tree_free(epnamenp);
641 			FREE(estr);
642 			break;
643 		}
644 		tree_free(epnamenp);
645 
646 		/*
647 		 * We may or may not have a saved nvlist for the observation
648 		 */
649 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", ocnt);
650 		pkdlen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
651 		if (pkdlen != 0) {
652 			pkd = MALLOC(pkdlen);
653 			fmd_buf_read(fmep->hdl,
654 			    fmep->fmcase, tmpbuf, pkd, pkdlen);
655 			ASSERT(ep->nvp == NULL);
656 			if (nvlist_xunpack(pkd,
657 			    pkdlen, &ep->nvp, &Eft_nv_hdl) != 0)
658 				out(O_DIE|O_SYS, "pack of observed nvl failed");
659 			FREE(pkd);
660 		}
661 
662 		if (ocnt == 0)
663 			fmep->e0 = ep;
664 
665 		FREE(estr);
666 		fmep->ecurrent = ep;
667 		ep->count++;
668 
669 		/* link it into list of observations seen */
670 		ep->observations = fmep->observations;
671 		fmep->observations = ep;
672 	}
673 
674 	if (ocnt == fmep->uniqobs) {
675 		(void) fme_ready(fmep);
676 		return (0);
677 	}
678 
679 	return (1);
680 }
681 
682 /*
683  * restart_fme -- called during eft initialization.  Reconstitutes
684  *	an in-progress fme.
685  */
686 void
687 fme_restart(fmd_hdl_t *hdl, fmd_case_t *inprogress)
688 {
689 	nvlist_t *defect;
690 	struct case_list *bad;
691 	struct fme *fmep;
692 	struct cfgdata *cfgdata;
693 	size_t rawsz;
694 	struct event *ep;
695 	char *tmpbuf = alloca(OBBUFNMSZ);
696 	char *sepptr;
697 	char *estr;
698 	int elen;
699 	struct node *epnamenp = NULL;
700 	int init_size;
701 	extern int alloc_total();
702 	char *reason;
703 
704 	/*
705 	 * ignore solved or closed cases
706 	 */
707 	if (fmd_case_solved(hdl, inprogress) ||
708 	    fmd_case_closed(hdl, inprogress))
709 		return;
710 
711 	fmep = alloc_fme();
712 	fmep->fmcase = inprogress;
713 	fmep->hdl = hdl;
714 
715 	if (fmd_buf_size(hdl, inprogress, WOBUF_POSTD) == 0) {
716 		out(O_ALTFP, "restart_fme: no saved posted status");
717 		Undiag_reason = UD_VAL_MISSINGINFO;
718 		goto badcase;
719 	} else {
720 		fmd_buf_read(hdl, inprogress, WOBUF_POSTD,
721 		    (void *)&fmep->posted_suspects,
722 		    sizeof (fmep->posted_suspects));
723 	}
724 
725 	if (fmd_buf_size(hdl, inprogress, WOBUF_ID) == 0) {
726 		out(O_ALTFP, "restart_fme: no saved id");
727 		Undiag_reason = UD_VAL_MISSINGINFO;
728 		goto badcase;
729 	} else {
730 		fmd_buf_read(hdl, inprogress, WOBUF_ID, (void *)&fmep->id,
731 		    sizeof (fmep->id));
732 	}
733 	if (Nextid <= fmep->id)
734 		Nextid = fmep->id + 1;
735 
736 	out(O_ALTFP, "Replay FME %d", fmep->id);
737 
738 	if (fmd_buf_size(hdl, inprogress, WOBUF_CFGLEN) != sizeof (size_t)) {
739 		out(O_ALTFP, "restart_fme: No config data");
740 		Undiag_reason = UD_VAL_MISSINGINFO;
741 		goto badcase;
742 	}
743 	fmd_buf_read(hdl, inprogress, WOBUF_CFGLEN, (void *)&rawsz,
744 	    sizeof (size_t));
745 
746 	if ((fmep->e0r = fmd_case_getprincipal(hdl, inprogress)) == NULL) {
747 		out(O_ALTFP, "restart_fme: No event zero");
748 		Undiag_reason = UD_VAL_MISSINGZERO;
749 		goto badcase;
750 	}
751 
752 	if (fmd_buf_size(hdl, inprogress, WOBUF_PULL) == 0) {
753 		out(O_ALTFP, "restart_fme: no saved wait time");
754 		Undiag_reason = UD_VAL_MISSINGINFO;
755 		goto badcase;
756 	} else {
757 		fmd_buf_read(hdl, inprogress, WOBUF_PULL, (void *)&fmep->pull,
758 		    sizeof (fmep->pull));
759 	}
760 
761 	if (fmd_buf_size(hdl, inprogress, WOBUF_NOBS) == 0) {
762 		out(O_ALTFP, "restart_fme: no count of observations");
763 		Undiag_reason = UD_VAL_MISSINGINFO;
764 		goto badcase;
765 	} else {
766 		fmd_buf_read(hdl, inprogress, WOBUF_NOBS,
767 		    (void *)&fmep->uniqobs, sizeof (fmep->uniqobs));
768 	}
769 
770 	(void) snprintf(tmpbuf, OBBUFNMSZ, "observed0");
771 	elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
772 	if (elen == 0) {
773 		out(O_ALTFP, "reconstitute_observation: no %s buffer found.",
774 		    tmpbuf);
775 		Undiag_reason = UD_VAL_MISSINGOBS;
776 		goto badcase;
777 	}
778 	estr = MALLOC(elen);
779 	fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen);
780 	sepptr = strchr(estr, '@');
781 	if (sepptr == NULL) {
782 		out(O_ALTFP, "reconstitute_observation: %s: "
783 		    "missing @ separator in %s.",
784 		    tmpbuf, estr);
785 		Undiag_reason = UD_VAL_MISSINGPATH;
786 		FREE(estr);
787 		goto badcase;
788 	}
789 	*sepptr = '\0';
790 	if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) {
791 		out(O_ALTFP, "reconstitute_observation: %s: "
792 		    "trouble converting path string \"%s\" "
793 		    "to internal representation.", tmpbuf, sepptr + 1);
794 		Undiag_reason = UD_VAL_MISSINGPATH;
795 		FREE(estr);
796 		goto badcase;
797 	}
798 	(void) prune_propagations(stable(estr), ipath(epnamenp));
799 	tree_free(epnamenp);
800 	FREE(estr);
801 
802 	init_size = alloc_total();
803 	out(O_ALTFP|O_STAMP, "start config_restore using %d bytes", init_size);
804 	cfgdata = MALLOC(sizeof (struct cfgdata));
805 	cfgdata->cooked = NULL;
806 	cfgdata->devcache = NULL;
807 	cfgdata->devidcache = NULL;
808 	cfgdata->tpcache = NULL;
809 	cfgdata->cpucache = NULL;
810 	cfgdata->raw_refcnt = 1;
811 
812 	if (rawsz > 0) {
813 		if (fmd_buf_size(hdl, inprogress, WOBUF_CFG) != rawsz) {
814 			out(O_ALTFP, "restart_fme: Config data size mismatch");
815 			Undiag_reason = UD_VAL_CFGMISMATCH;
816 			goto badcase;
817 		}
818 		cfgdata->begin = MALLOC(rawsz);
819 		cfgdata->end = cfgdata->nextfree = cfgdata->begin + rawsz;
820 		fmd_buf_read(hdl,
821 		    inprogress, WOBUF_CFG, cfgdata->begin, rawsz);
822 	} else {
823 		cfgdata->begin = cfgdata->end = cfgdata->nextfree = NULL;
824 	}
825 
826 	config_cook(cfgdata);
827 	fmep->config = cfgdata->cooked;
828 	config_free(cfgdata);
829 	out(O_ALTFP|O_STAMP, "config_restore added %d bytes",
830 	    alloc_total() - init_size);
831 
832 	if ((fmep->eventtree = itree_create(fmep->config)) == NULL) {
833 		/* case not properly saved or irretrievable */
834 		out(O_ALTFP, "restart_fme: NULL instance tree");
835 		Undiag_reason = UD_VAL_INSTFAIL;
836 		goto badcase;
837 	}
838 
839 	itree_ptree(O_ALTFP|O_VERB2, fmep->eventtree);
840 
841 	if (reconstitute_observations(fmep) != 0)
842 		goto badcase;
843 
844 	out(O_ALTFP|O_NONL, "FME %d replay observations: ", fmep->id);
845 	for (ep = fmep->observations; ep; ep = ep->observations) {
846 		out(O_ALTFP|O_NONL, " ");
847 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
848 	}
849 	out(O_ALTFP, NULL);
850 
851 	Open_fme_count++;
852 
853 	/* give the diagnosis algorithm a shot at the new FME state */
854 	fme_eval(fmep, fmep->e0r);
855 	return;
856 
857 badcase:
858 	if (fmep->eventtree != NULL)
859 		itree_free(fmep->eventtree);
860 	if (fmep->config)
861 		structconfig_free(fmep->config);
862 	destroy_fme_bufs(fmep);
863 	FREE(fmep);
864 
865 	/*
866 	 * Since we're unable to restart the case, add it to the undiagable
867 	 * list and solve and close it as appropriate.
868 	 */
869 	bad = MALLOC(sizeof (struct case_list));
870 	bad->next = NULL;
871 
872 	if (Undiagablecaselist != NULL)
873 		bad->next = Undiagablecaselist;
874 	Undiagablecaselist = bad;
875 	bad->fmcase = inprogress;
876 
877 	out(O_ALTFP|O_NONL, "[case %s (unable to restart), ",
878 	    fmd_case_uuid(hdl, bad->fmcase));
879 
880 	if (fmd_case_solved(hdl, bad->fmcase)) {
881 		out(O_ALTFP|O_NONL, "already solved, ");
882 	} else {
883 		out(O_ALTFP|O_NONL, "solving, ");
884 		defect = fmd_nvl_create_fault(hdl,
885 		    undiag_2defect_str(Undiag_reason), 100, NULL, NULL, NULL);
886 		reason = undiag_2reason_str(Undiag_reason, NULL);
887 		(void) nvlist_add_string(defect, UNDIAG_REASON, reason);
888 		FREE(reason);
889 		fmd_case_add_suspect(hdl, bad->fmcase, defect);
890 		fmd_case_solve(hdl, bad->fmcase);
891 		Undiag_reason = UD_VAL_UNKNOWN;
892 	}
893 
894 	if (fmd_case_closed(hdl, bad->fmcase)) {
895 		out(O_ALTFP, "already closed ]");
896 	} else {
897 		out(O_ALTFP, "closing ]");
898 		fmd_case_close(hdl, bad->fmcase);
899 	}
900 }
901 
902 /*ARGSUSED*/
903 static void
904 globals_destructor(void *left, void *right, void *arg)
905 {
906 	struct evalue *evp = (struct evalue *)right;
907 	if (evp->t == NODEPTR)
908 		tree_free((struct node *)(uintptr_t)evp->v);
909 	evp->v = (uintptr_t)NULL;
910 	FREE(evp);
911 }
912 
913 void
914 destroy_fme(struct fme *f)
915 {
916 	stats_delete(f->Rcount);
917 	stats_delete(f->Hcallcount);
918 	stats_delete(f->Rcallcount);
919 	stats_delete(f->Ccallcount);
920 	stats_delete(f->Ecallcount);
921 	stats_delete(f->Tcallcount);
922 	stats_delete(f->Marrowcount);
923 	stats_delete(f->diags);
924 
925 	if (f->eventtree != NULL)
926 		itree_free(f->eventtree);
927 	if (f->config)
928 		structconfig_free(f->config);
929 	lut_free(f->globals, globals_destructor, NULL);
930 	FREE(f);
931 }
932 
933 static const char *
934 fme_state2str(enum fme_state s)
935 {
936 	switch (s) {
937 	case FME_NOTHING:	return ("NOTHING");
938 	case FME_WAIT:		return ("WAIT");
939 	case FME_CREDIBLE:	return ("CREDIBLE");
940 	case FME_DISPROVED:	return ("DISPROVED");
941 	case FME_DEFERRED:	return ("DEFERRED");
942 	default:		return ("UNKNOWN");
943 	}
944 }
945 
946 static int
947 is_problem(enum nametype t)
948 {
949 	return (t == N_FAULT || t == N_DEFECT || t == N_UPSET);
950 }
951 
952 static int
953 is_defect(enum nametype t)
954 {
955 	return (t == N_DEFECT);
956 }
957 
958 static int
959 is_upset(enum nametype t)
960 {
961 	return (t == N_UPSET);
962 }
963 
964 static void
965 fme_print(int flags, struct fme *fmep)
966 {
967 	struct event *ep;
968 
969 	out(flags, "Fault Management Exercise %d", fmep->id);
970 	out(flags, "\t       State: %s", fme_state2str(fmep->state));
971 	out(flags|O_NONL, "\t  Start time: ");
972 	ptree_timeval(flags|O_NONL, &fmep->ull);
973 	out(flags, NULL);
974 	if (fmep->wull) {
975 		out(flags|O_NONL, "\t   Wait time: ");
976 		ptree_timeval(flags|O_NONL, &fmep->wull);
977 		out(flags, NULL);
978 	}
979 	out(flags|O_NONL, "\t          E0: ");
980 	if (fmep->e0)
981 		itree_pevent_brief(flags|O_NONL, fmep->e0);
982 	else
983 		out(flags|O_NONL, "NULL");
984 	out(flags, NULL);
985 	out(flags|O_NONL, "\tObservations:");
986 	for (ep = fmep->observations; ep; ep = ep->observations) {
987 		out(flags|O_NONL, " ");
988 		itree_pevent_brief(flags|O_NONL, ep);
989 	}
990 	out(flags, NULL);
991 	out(flags|O_NONL, "\tSuspect list:");
992 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
993 		out(flags|O_NONL, " ");
994 		itree_pevent_brief(flags|O_NONL, ep);
995 	}
996 	out(flags, NULL);
997 	if (fmep->eventtree != NULL) {
998 		out(flags|O_VERB2, "\t        Tree:");
999 		itree_ptree(flags|O_VERB2, fmep->eventtree);
1000 	}
1001 }
1002 
1003 static struct node *
1004 pathstring2epnamenp(char *path)
1005 {
1006 	char *sep = "/";
1007 	struct node *ret;
1008 	char *ptr;
1009 
1010 	if ((ptr = strtok(path, sep)) == NULL)
1011 		out(O_DIE, "pathstring2epnamenp: invalid empty class");
1012 
1013 	ret = tree_iname(stable(ptr), NULL, 0);
1014 
1015 	while ((ptr = strtok(NULL, sep)) != NULL)
1016 		ret = tree_name_append(ret,
1017 		    tree_iname(stable(ptr), NULL, 0));
1018 
1019 	return (ret);
1020 }
1021 
1022 /*
1023  * for a given upset sp, increment the corresponding SERD engine.  if the
1024  * SERD engine trips, return the ename and ipp of the resulting ereport.
1025  * returns true if engine tripped and *enamep and *ippp were filled in.
1026  */
1027 static int
1028 serd_eval(struct fme *fmep, fmd_hdl_t *hdl, fmd_event_t *ffep,
1029     fmd_case_t *fmcase, struct event *sp, const char **enamep,
1030     const struct ipath **ippp)
1031 {
1032 	struct node *serdinst;
1033 	char *serdname;
1034 	char *serdresource;
1035 	char *serdclass;
1036 	struct node *nid;
1037 	struct serd_entry *newentp;
1038 	int i, serdn = -1, serdincrement = 1, len = 0;
1039 	char *serdsuffix = NULL, *serdt = NULL;
1040 	struct evalue *ep;
1041 
1042 	ASSERT(sp->t == N_UPSET);
1043 	ASSERT(ffep != NULL);
1044 
1045 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1046 	    (void *)"n", (lut_cmp)strcmp)) != NULL) {
1047 		ASSERT(ep->t == UINT64);
1048 		serdn = (int)ep->v;
1049 	}
1050 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1051 	    (void *)"t", (lut_cmp)strcmp)) != NULL) {
1052 		ASSERT(ep->t == STRING);
1053 		serdt = (char *)(uintptr_t)ep->v;
1054 	}
1055 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1056 	    (void *)"suffix", (lut_cmp)strcmp)) != NULL) {
1057 		ASSERT(ep->t == STRING);
1058 		serdsuffix = (char *)(uintptr_t)ep->v;
1059 	}
1060 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1061 	    (void *)"increment", (lut_cmp)strcmp)) != NULL) {
1062 		ASSERT(ep->t == UINT64);
1063 		serdincrement = (int)ep->v;
1064 	}
1065 
1066 	/*
1067 	 * obtain instanced SERD engine from the upset sp.  from this
1068 	 * derive serdname, the string used to identify the SERD engine.
1069 	 */
1070 	serdinst = eventprop_lookup(sp, L_engine);
1071 
1072 	if (serdinst == NULL)
1073 		return (-1);
1074 
1075 	len = strlen(serdinst->u.stmt.np->u.event.ename->u.name.s) + 1;
1076 	if (serdsuffix != NULL)
1077 		len += strlen(serdsuffix);
1078 	serdclass = MALLOC(len);
1079 	if (serdsuffix != NULL)
1080 		(void) snprintf(serdclass, len, "%s%s",
1081 		    serdinst->u.stmt.np->u.event.ename->u.name.s, serdsuffix);
1082 	else
1083 		(void) snprintf(serdclass, len, "%s",
1084 		    serdinst->u.stmt.np->u.event.ename->u.name.s);
1085 	serdresource = ipath2str(NULL,
1086 	    ipath(serdinst->u.stmt.np->u.event.epname));
1087 	len += strlen(serdresource) + 1;
1088 	serdname = MALLOC(len);
1089 	(void) snprintf(serdname, len, "%s@%s", serdclass, serdresource);
1090 	FREE(serdresource);
1091 
1092 	/* handle serd engine "id" property, if there is one */
1093 	if ((nid =
1094 	    lut_lookup(serdinst->u.stmt.lutp, (void *)L_id, NULL)) != NULL) {
1095 		struct evalue *gval;
1096 		char suffixbuf[200];
1097 		char *suffix;
1098 		char *nserdname;
1099 		size_t nname;
1100 
1101 		out(O_ALTFP|O_NONL, "serd \"%s\" id: ", serdname);
1102 		ptree_name_iter(O_ALTFP|O_NONL, nid);
1103 
1104 		ASSERTinfo(nid->t == T_GLOBID, ptree_nodetype2str(nid->t));
1105 
1106 		if ((gval = lut_lookup(fmep->globals,
1107 		    (void *)nid->u.globid.s, NULL)) == NULL) {
1108 			out(O_ALTFP, " undefined");
1109 		} else if (gval->t == UINT64) {
1110 			out(O_ALTFP, " %llu", gval->v);
1111 			(void) sprintf(suffixbuf, "%llu", gval->v);
1112 			suffix = suffixbuf;
1113 		} else {
1114 			out(O_ALTFP, " \"%s\"", (char *)(uintptr_t)gval->v);
1115 			suffix = (char *)(uintptr_t)gval->v;
1116 		}
1117 
1118 		nname = strlen(serdname) + strlen(suffix) + 2;
1119 		nserdname = MALLOC(nname);
1120 		(void) snprintf(nserdname, nname, "%s:%s", serdname, suffix);
1121 		FREE(serdname);
1122 		serdname = nserdname;
1123 	}
1124 
1125 	/*
1126 	 * if the engine is empty, and we have an override for n/t then
1127 	 * destroy and recreate it.
1128 	 */
1129 	if ((serdn != -1 || serdt != NULL) && fmd_serd_exists(hdl, serdname) &&
1130 	    fmd_serd_empty(hdl, serdname))
1131 		fmd_serd_destroy(hdl, serdname);
1132 
1133 	if (!fmd_serd_exists(hdl, serdname)) {
1134 		struct node *nN, *nT;
1135 		const char *s;
1136 		struct node *nodep;
1137 		struct config *cp;
1138 		char *path;
1139 		uint_t nval;
1140 		hrtime_t tval;
1141 		int i;
1142 		char *ptr;
1143 		int got_n_override = 0, got_t_override = 0;
1144 
1145 		/* no SERD engine yet, so create it */
1146 		nodep = serdinst->u.stmt.np->u.event.epname;
1147 		path = ipath2str(NULL, ipath(nodep));
1148 		cp = config_lookup(fmep->config, path, 0);
1149 		FREE((void *)path);
1150 
1151 		/*
1152 		 * We allow serd paramaters to be overridden, either from
1153 		 * eft.conf file values (if Serd_Override is set) or from
1154 		 * driver properties (for "serd.io.device" engines).
1155 		 */
1156 		if (Serd_Override != NULL) {
1157 			char *save_ptr, *ptr1, *ptr2, *ptr3;
1158 			ptr3 = save_ptr = STRDUP(Serd_Override);
1159 			while (*ptr3 != '\0') {
1160 				ptr1 = strchr(ptr3, ',');
1161 				*ptr1 = '\0';
1162 				if (strcmp(ptr3, serdclass) == 0) {
1163 					ptr2 =  strchr(ptr1 + 1, ',');
1164 					*ptr2 = '\0';
1165 					nval = atoi(ptr1 + 1);
1166 					out(O_ALTFP, "serd override %s_n %d",
1167 					    serdclass, nval);
1168 					ptr3 =  strchr(ptr2 + 1, ' ');
1169 					if (ptr3)
1170 						*ptr3 = '\0';
1171 					ptr = STRDUP(ptr2 + 1);
1172 					out(O_ALTFP, "serd override %s_t %s",
1173 					    serdclass, ptr);
1174 					got_n_override = 1;
1175 					got_t_override = 1;
1176 					break;
1177 				} else {
1178 					ptr2 =  strchr(ptr1 + 1, ',');
1179 					ptr3 =  strchr(ptr2 + 1, ' ');
1180 					if (ptr3 == NULL)
1181 						break;
1182 				}
1183 				ptr3++;
1184 			}
1185 			FREE(save_ptr);
1186 		}
1187 
1188 		if (cp && got_n_override == 0) {
1189 			/*
1190 			 * convert serd engine class into property name
1191 			 */
1192 			char *prop_name = MALLOC(strlen(serdclass) + 3);
1193 			for (i = 0; i < strlen(serdclass); i++) {
1194 				if (serdclass[i] == '.')
1195 					prop_name[i] = '_';
1196 				else
1197 					prop_name[i] = serdclass[i];
1198 			}
1199 			prop_name[i++] = '_';
1200 			prop_name[i++] = 'n';
1201 			prop_name[i] = '\0';
1202 			if (s = config_getprop(cp, prop_name)) {
1203 				nval = atoi(s);
1204 				out(O_ALTFP, "serd override %s_n %s",
1205 				    serdclass, s);
1206 				got_n_override = 1;
1207 			}
1208 			prop_name[i - 1] = 't';
1209 			if (s = config_getprop(cp, prop_name)) {
1210 				ptr = STRDUP(s);
1211 				out(O_ALTFP, "serd override %s_t %s",
1212 				    serdclass, s);
1213 				got_t_override = 1;
1214 			}
1215 			FREE(prop_name);
1216 		}
1217 
1218 		if (serdn != -1 && got_n_override == 0) {
1219 			nval = serdn;
1220 			out(O_ALTFP, "serd override %s_n %d", serdclass, serdn);
1221 			got_n_override = 1;
1222 		}
1223 		if (serdt != NULL && got_t_override == 0) {
1224 			ptr = STRDUP(serdt);
1225 			out(O_ALTFP, "serd override %s_t %s", serdclass, serdt);
1226 			got_t_override = 1;
1227 		}
1228 
1229 		if (!got_n_override) {
1230 			nN = lut_lookup(serdinst->u.stmt.lutp, (void *)L_N,
1231 			    NULL);
1232 			ASSERT(nN->t == T_NUM);
1233 			nval = (uint_t)nN->u.ull;
1234 		}
1235 		if (!got_t_override) {
1236 			nT = lut_lookup(serdinst->u.stmt.lutp, (void *)L_T,
1237 			    NULL);
1238 			ASSERT(nT->t == T_TIMEVAL);
1239 			tval = (hrtime_t)nT->u.ull;
1240 		} else {
1241 			const unsigned long long *ullp;
1242 			const char *suffix;
1243 			int len;
1244 
1245 			len = strspn(ptr, "0123456789");
1246 			suffix = stable(&ptr[len]);
1247 			ullp = (unsigned long long *)lut_lookup(Timesuffixlut,
1248 			    (void *)suffix, NULL);
1249 			ptr[len] = '\0';
1250 			tval = strtoull(ptr, NULL, 0) * (ullp ? *ullp : 1ll);
1251 			FREE(ptr);
1252 		}
1253 		fmd_serd_create(hdl, serdname, nval, tval);
1254 	}
1255 
1256 	newentp = MALLOC(sizeof (*newentp));
1257 	newentp->ename = stable(serdclass);
1258 	FREE(serdclass);
1259 	newentp->ipath = ipath(serdinst->u.stmt.np->u.event.epname);
1260 	newentp->hdl = hdl;
1261 	if (lut_lookup(SerdEngines, newentp, (lut_cmp)serd_cmp) == NULL) {
1262 		SerdEngines = lut_add(SerdEngines, (void *)newentp,
1263 		    (void *)newentp, (lut_cmp)serd_cmp);
1264 		Serd_need_save = 1;
1265 		serd_save();
1266 	} else {
1267 		FREE(newentp);
1268 	}
1269 
1270 
1271 	/*
1272 	 * increment SERD engine.  if engine fires, reset serd
1273 	 * engine and return trip_strcode if required.
1274 	 */
1275 	for (i = 0; i < serdincrement; i++) {
1276 		if (fmd_serd_record(hdl, serdname, ffep)) {
1277 			fmd_case_add_serd(hdl, fmcase, serdname);
1278 			fmd_serd_reset(hdl, serdname);
1279 
1280 			if (ippp) {
1281 				struct node *tripinst =
1282 				    lut_lookup(serdinst->u.stmt.lutp,
1283 				    (void *)L_trip, NULL);
1284 				ASSERT(tripinst != NULL);
1285 				*enamep = tripinst->u.event.ename->u.name.s;
1286 				*ippp = ipath(tripinst->u.event.epname);
1287 				out(O_ALTFP|O_NONL,
1288 				    "[engine fired: %s, sending: ", serdname);
1289 				ipath_print(O_ALTFP|O_NONL, *enamep, *ippp);
1290 				out(O_ALTFP, "]");
1291 			} else {
1292 				out(O_ALTFP, "[engine fired: %s, no trip]",
1293 				    serdname);
1294 			}
1295 			FREE(serdname);
1296 			return (1);
1297 		}
1298 	}
1299 
1300 	FREE(serdname);
1301 	return (0);
1302 }
1303 
1304 /*
1305  * search a suspect list for upsets.  feed each upset to serd_eval() and
1306  * build up tripped[], an array of ereports produced by the firing of
1307  * any SERD engines.  then feed each ereport back into
1308  * fme_receive_report().
1309  *
1310  * returns ntrip, the number of these ereports produced.
1311  */
1312 static int
1313 upsets_eval(struct fme *fmep, fmd_event_t *ffep)
1314 {
1315 	/* we build an array of tripped ereports that we send ourselves */
1316 	struct {
1317 		const char *ename;
1318 		const struct ipath *ipp;
1319 	} *tripped;
1320 	struct event *sp;
1321 	int ntrip, nupset, i;
1322 
1323 	/*
1324 	 * count the number of upsets to determine the upper limit on
1325 	 * expected trip ereport strings.  remember that one upset can
1326 	 * lead to at most one ereport.
1327 	 */
1328 	nupset = 0;
1329 	for (sp = fmep->suspects; sp; sp = sp->suspects) {
1330 		if (sp->t == N_UPSET)
1331 			nupset++;
1332 	}
1333 
1334 	if (nupset == 0)
1335 		return (0);
1336 
1337 	/*
1338 	 * get to this point if we have upsets and expect some trip
1339 	 * ereports
1340 	 */
1341 	tripped = alloca(sizeof (*tripped) * nupset);
1342 	bzero((void *)tripped, sizeof (*tripped) * nupset);
1343 
1344 	ntrip = 0;
1345 	for (sp = fmep->suspects; sp; sp = sp->suspects)
1346 		if (sp->t == N_UPSET &&
1347 		    serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, sp,
1348 		    &tripped[ntrip].ename, &tripped[ntrip].ipp) == 1)
1349 			ntrip++;
1350 
1351 	for (i = 0; i < ntrip; i++) {
1352 		struct event *ep, *nep;
1353 		struct fme *nfmep;
1354 		fmd_case_t *fmcase;
1355 		const struct ipath *ipp;
1356 		const char *eventstring;
1357 		int prev_verbose;
1358 		unsigned long long my_delay = TIMEVAL_EVENTUALLY;
1359 		enum fme_state state;
1360 
1361 		/*
1362 		 * First try and evaluate a case with the trip ereport plus
1363 		 * all the other ereports that cause the trip. If that fails
1364 		 * to evaluate then try again with just this ereport on its own.
1365 		 */
1366 		out(O_ALTFP|O_NONL, "fme_receive_report_serd: ");
1367 		ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp);
1368 		out(O_ALTFP|O_STAMP, NULL);
1369 		ep = fmep->e0;
1370 		eventstring = ep->enode->u.event.ename->u.name.s;
1371 		ipp = ep->ipp;
1372 
1373 		/*
1374 		 * create a duplicate fme and case
1375 		 */
1376 		fmcase = fmd_case_open(fmep->hdl, NULL);
1377 		out(O_ALTFP|O_NONL, "duplicate fme for event [");
1378 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1379 		out(O_ALTFP, " ]");
1380 
1381 		if ((nfmep = newfme(eventstring, ipp, fmep->hdl,
1382 		    fmcase, ffep, ep->nvp)) == NULL) {
1383 			out(O_ALTFP|O_NONL, "[");
1384 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1385 			out(O_ALTFP, " CANNOT DIAGNOSE]");
1386 			continue;
1387 		}
1388 
1389 		Open_fme_count++;
1390 		nfmep->pull = fmep->pull;
1391 		init_fme_bufs(nfmep);
1392 		out(O_ALTFP|O_NONL, "[");
1393 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1394 		out(O_ALTFP, " created FME%d, case %s]", nfmep->id,
1395 		    fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
1396 		if (ffep) {
1397 			fmd_case_setprincipal(nfmep->hdl, nfmep->fmcase, ffep);
1398 			fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase, ffep);
1399 			nfmep->e0r = ffep;
1400 		}
1401 
1402 		/*
1403 		 * add the original ereports
1404 		 */
1405 		for (ep = fmep->observations; ep; ep = ep->observations) {
1406 			eventstring = ep->enode->u.event.ename->u.name.s;
1407 			ipp = ep->ipp;
1408 			out(O_ALTFP|O_NONL, "adding event [");
1409 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1410 			out(O_ALTFP, " ]");
1411 			nep = itree_lookup(nfmep->eventtree, eventstring, ipp);
1412 			if (nep->count++ == 0) {
1413 				nep->observations = nfmep->observations;
1414 				nfmep->observations = nep;
1415 				serialize_observation(nfmep, eventstring, ipp);
1416 				nep->nvp = evnv_dupnvl(ep->nvp);
1417 			}
1418 			if (ep->ffep && ep->ffep != ffep)
1419 				fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase,
1420 				    ep->ffep);
1421 			stats_counter_bump(nfmep->Rcount);
1422 		}
1423 
1424 		/*
1425 		 * add the serd trigger ereport
1426 		 */
1427 		if ((ep = itree_lookup(nfmep->eventtree, tripped[i].ename,
1428 		    tripped[i].ipp)) == NULL) {
1429 			/*
1430 			 * The trigger ereport is not in the instance tree. It
1431 			 * was presumably removed by prune_propagations() as
1432 			 * this combination of events is not present in the
1433 			 * rules.
1434 			 */
1435 			out(O_ALTFP, "upsets_eval: e0 not in instance tree");
1436 			Undiag_reason = UD_VAL_BADEVENTI;
1437 			goto retry_lone_ereport;
1438 		}
1439 		out(O_ALTFP|O_NONL, "adding event [");
1440 		ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp);
1441 		out(O_ALTFP, " ]");
1442 		nfmep->ecurrent = ep;
1443 		ep->nvp = NULL;
1444 		ep->count = 1;
1445 		ep->observations = nfmep->observations;
1446 		nfmep->observations = ep;
1447 
1448 		/*
1449 		 * just peek first.
1450 		 */
1451 		nfmep->peek = 1;
1452 		prev_verbose = Verbose;
1453 		if (Debug == 0)
1454 			Verbose = 0;
1455 		lut_walk(nfmep->eventtree, (lut_cb)clear_arrows, (void *)nfmep);
1456 		state = hypothesise(nfmep, nfmep->e0, nfmep->ull, &my_delay);
1457 		nfmep->peek = 0;
1458 		Verbose = prev_verbose;
1459 		if (state == FME_DISPROVED) {
1460 			out(O_ALTFP, "upsets_eval: hypothesis disproved");
1461 			Undiag_reason = UD_VAL_UNSOLVD;
1462 retry_lone_ereport:
1463 			/*
1464 			 * However the trigger ereport on its own might be
1465 			 * diagnosable, so check for that. Undo the new fme
1466 			 * and case we just created and call fme_receive_report.
1467 			 */
1468 			out(O_ALTFP|O_NONL, "[");
1469 			ipath_print(O_ALTFP|O_NONL, tripped[i].ename,
1470 			    tripped[i].ipp);
1471 			out(O_ALTFP, " retrying with just trigger ereport]");
1472 			itree_free(nfmep->eventtree);
1473 			nfmep->eventtree = NULL;
1474 			structconfig_free(nfmep->config);
1475 			nfmep->config = NULL;
1476 			destroy_fme_bufs(nfmep);
1477 			fmd_case_close(nfmep->hdl, nfmep->fmcase);
1478 			fme_receive_report(fmep->hdl, ffep,
1479 			    tripped[i].ename, tripped[i].ipp, NULL);
1480 			continue;
1481 		}
1482 
1483 		/*
1484 		 * and evaluate
1485 		 */
1486 		serialize_observation(nfmep, tripped[i].ename, tripped[i].ipp);
1487 		fme_eval(nfmep, ffep);
1488 	}
1489 
1490 	return (ntrip);
1491 }
1492 
1493 /*
1494  * fme_receive_external_report -- call when an external ereport comes in
1495  *
1496  * this routine just converts the relevant information from the ereport
1497  * into a format used internally and passes it on to fme_receive_report().
1498  */
1499 void
1500 fme_receive_external_report(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl,
1501     const char *class)
1502 {
1503 	struct node		*epnamenp;
1504 	fmd_case_t		*fmcase;
1505 	const struct ipath	*ipp;
1506 	nvlist_t		*detector = NULL;
1507 
1508 	class = stable(class);
1509 
1510 	/* Get the component path from the ereport */
1511 	epnamenp = platform_getpath(nvl);
1512 
1513 	/* See if we ended up without a path. */
1514 	if (epnamenp == NULL) {
1515 		/* See if class permits silent discard on unknown component. */
1516 		if (lut_lookup(Ereportenames_discard, (void *)class, NULL)) {
1517 			out(O_ALTFP|O_VERB2, "Unable to map \"%s\" ereport "
1518 			    "to component path, but silent discard allowed.",
1519 			    class);
1520 		} else {
1521 			/*
1522 			 * XFILE: Failure to find a component is bad unless
1523 			 * 'discard_if_config_unknown=1' was specified in the
1524 			 * ereport definition. Indicate undiagnosable.
1525 			 */
1526 			Undiag_reason = UD_VAL_NOPATH;
1527 			fmcase = fmd_case_open(hdl, NULL);
1528 
1529 			/*
1530 			 * We don't have a component path here (which means that
1531 			 * the detector was not in hc-scheme and couldn't be
1532 			 * converted to hc-scheme. Report the raw detector as
1533 			 * the suspect resource if there is one.
1534 			 */
1535 			(void) nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR,
1536 			    &detector);
1537 			publish_undiagnosable(hdl, ffep, fmcase, detector,
1538 			    (char *)class);
1539 		}
1540 		return;
1541 	}
1542 
1543 	ipp = ipath(epnamenp);
1544 	tree_free(epnamenp);
1545 	fme_receive_report(hdl, ffep, class, ipp, nvl);
1546 }
1547 
1548 /*ARGSUSED*/
1549 void
1550 fme_receive_repair_list(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl,
1551     const char *eventstring)
1552 {
1553 	char *uuid;
1554 	nvlist_t **nva;
1555 	uint_t nvc;
1556 	const struct ipath *ipp;
1557 
1558 	if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) != 0 ||
1559 	    nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST,
1560 	    &nva, &nvc) != 0) {
1561 		out(O_ALTFP, "No uuid or fault list for list.repaired event");
1562 		return;
1563 	}
1564 
1565 	out(O_ALTFP, "Processing list.repaired from case %s", uuid);
1566 
1567 	while (nvc-- != 0) {
1568 		/*
1569 		 * Reset any istat or serd engine associated with this path.
1570 		 */
1571 		char *path;
1572 
1573 		if ((ipp = platform_fault2ipath(*nva++)) == NULL)
1574 			continue;
1575 
1576 		path = ipath2str(NULL, ipp);
1577 		out(O_ALTFP, "fme_receive_repair_list: resetting state for %s",
1578 		    path);
1579 		FREE(path);
1580 
1581 		lut_walk(Istats, (lut_cb)istat_counter_reset_cb, (void *)ipp);
1582 		istat_save();
1583 
1584 		lut_walk(SerdEngines, (lut_cb)serd_reset_cb, (void *)ipp);
1585 		serd_save();
1586 	}
1587 }
1588 
1589 /*ARGSUSED*/
1590 void
1591 fme_receive_topology_change(void)
1592 {
1593 	lut_walk(Istats, (lut_cb)istat_counter_topo_chg_cb, NULL);
1594 	istat_save();
1595 
1596 	lut_walk(SerdEngines, (lut_cb)serd_topo_chg_cb, NULL);
1597 	serd_save();
1598 }
1599 
1600 static int mark_arrows(struct fme *fmep, struct event *ep, int mark,
1601     unsigned long long at_latest_by, unsigned long long *pdelay, int keep);
1602 
1603 /* ARGSUSED */
1604 static void
1605 clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
1606 {
1607 	struct bubble *bp;
1608 	struct arrowlist *ap;
1609 
1610 	ep->cached_state = 0;
1611 	ep->keep_in_tree = 0;
1612 	for (bp = itree_next_bubble(ep, NULL); bp;
1613 	    bp = itree_next_bubble(ep, bp)) {
1614 		if (bp->t != B_FROM)
1615 			continue;
1616 		bp->mark = 0;
1617 		for (ap = itree_next_arrow(bp, NULL); ap;
1618 		    ap = itree_next_arrow(bp, ap))
1619 			ap->arrowp->mark = 0;
1620 	}
1621 }
1622 
1623 static void
1624 fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep,
1625     const char *eventstring, const struct ipath *ipp, nvlist_t *nvl)
1626 {
1627 	struct event *ep;
1628 	struct fme *fmep = NULL;
1629 	struct fme *ofmep = NULL;
1630 	struct fme *cfmep, *svfmep;
1631 	int matched = 0;
1632 	nvlist_t *defect;
1633 	fmd_case_t *fmcase;
1634 	char *reason;
1635 
1636 	out(O_ALTFP|O_NONL, "fme_receive_report: ");
1637 	ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1638 	out(O_ALTFP|O_STAMP, NULL);
1639 
1640 	/* decide which FME it goes to */
1641 	for (fmep = FMElist; fmep; fmep = fmep->next) {
1642 		int prev_verbose;
1643 		unsigned long long my_delay = TIMEVAL_EVENTUALLY;
1644 		enum fme_state state;
1645 		nvlist_t *pre_peek_nvp = NULL;
1646 
1647 		if (fmep->overflow) {
1648 			if (!(fmd_case_closed(fmep->hdl, fmep->fmcase)))
1649 				ofmep = fmep;
1650 
1651 			continue;
1652 		}
1653 
1654 		/*
1655 		 * ignore solved or closed cases
1656 		 */
1657 		if (fmep->posted_suspects ||
1658 		    fmd_case_solved(fmep->hdl, fmep->fmcase) ||
1659 		    fmd_case_closed(fmep->hdl, fmep->fmcase))
1660 			continue;
1661 
1662 		/* look up event in event tree for this FME */
1663 		if ((ep = itree_lookup(fmep->eventtree,
1664 		    eventstring, ipp)) == NULL)
1665 			continue;
1666 
1667 		/* note observation */
1668 		fmep->ecurrent = ep;
1669 		if (ep->count++ == 0) {
1670 			/* link it into list of observations seen */
1671 			ep->observations = fmep->observations;
1672 			fmep->observations = ep;
1673 			ep->nvp = evnv_dupnvl(nvl);
1674 		} else {
1675 			/* use new payload values for peek */
1676 			pre_peek_nvp = ep->nvp;
1677 			ep->nvp = evnv_dupnvl(nvl);
1678 		}
1679 
1680 		/* tell hypothesise() not to mess with suspect list */
1681 		fmep->peek = 1;
1682 
1683 		/* don't want this to be verbose (unless Debug is set) */
1684 		prev_verbose = Verbose;
1685 		if (Debug == 0)
1686 			Verbose = 0;
1687 
1688 		lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep);
1689 		state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay);
1690 
1691 		fmep->peek = 0;
1692 
1693 		/* put verbose flag back */
1694 		Verbose = prev_verbose;
1695 
1696 		if (state != FME_DISPROVED) {
1697 			/* found an FME that explains the ereport */
1698 			matched++;
1699 			out(O_ALTFP|O_NONL, "[");
1700 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1701 			out(O_ALTFP, " explained by FME%d]", fmep->id);
1702 
1703 			if (pre_peek_nvp)
1704 				nvlist_free(pre_peek_nvp);
1705 
1706 			if (ep->count == 1)
1707 				serialize_observation(fmep, eventstring, ipp);
1708 
1709 			if (ffep) {
1710 				fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1711 				ep->ffep = ffep;
1712 			}
1713 
1714 			stats_counter_bump(fmep->Rcount);
1715 
1716 			/* re-eval FME */
1717 			fme_eval(fmep, ffep);
1718 		} else {
1719 
1720 			/* not a match, undo noting of observation */
1721 			fmep->ecurrent = NULL;
1722 			if (--ep->count == 0) {
1723 				/* unlink it from observations */
1724 				fmep->observations = ep->observations;
1725 				ep->observations = NULL;
1726 				nvlist_free(ep->nvp);
1727 				ep->nvp = NULL;
1728 			} else {
1729 				nvlist_free(ep->nvp);
1730 				ep->nvp = pre_peek_nvp;
1731 			}
1732 		}
1733 	}
1734 
1735 	if (matched)
1736 		return;	/* explained by at least one existing FME */
1737 
1738 	/* clean up closed fmes */
1739 	cfmep = ClosedFMEs;
1740 	while (cfmep != NULL) {
1741 		svfmep = cfmep->next;
1742 		destroy_fme(cfmep);
1743 		cfmep = svfmep;
1744 	}
1745 	ClosedFMEs = NULL;
1746 
1747 	if (ofmep) {
1748 		out(O_ALTFP|O_NONL, "[");
1749 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1750 		out(O_ALTFP, " ADDING TO OVERFLOW FME]");
1751 		if (ffep)
1752 			fmd_case_add_ereport(hdl, ofmep->fmcase, ffep);
1753 
1754 		return;
1755 
1756 	} else if (Max_fme && (Open_fme_count >= Max_fme)) {
1757 		out(O_ALTFP|O_NONL, "[");
1758 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1759 		out(O_ALTFP, " MAX OPEN FME REACHED]");
1760 
1761 		fmcase = fmd_case_open(hdl, NULL);
1762 
1763 		/* Create overflow fme */
1764 		if ((fmep = newfme(eventstring, ipp, hdl, fmcase, ffep,
1765 		    nvl)) == NULL) {
1766 			out(O_ALTFP|O_NONL, "[");
1767 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1768 			out(O_ALTFP, " CANNOT OPEN OVERFLOW FME]");
1769 			return;
1770 		}
1771 
1772 		Open_fme_count++;
1773 
1774 		init_fme_bufs(fmep);
1775 		fmep->overflow = B_TRUE;
1776 
1777 		if (ffep)
1778 			fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1779 
1780 		Undiag_reason = UD_VAL_MAXFME;
1781 		defect = fmd_nvl_create_fault(hdl,
1782 		    undiag_2defect_str(Undiag_reason), 100, NULL, NULL, NULL);
1783 		reason = undiag_2reason_str(Undiag_reason, NULL);
1784 		(void) nvlist_add_string(defect, UNDIAG_REASON, reason);
1785 		FREE(reason);
1786 		fmd_case_add_suspect(hdl, fmep->fmcase, defect);
1787 		fmd_case_solve(hdl, fmep->fmcase);
1788 		Undiag_reason = UD_VAL_UNKNOWN;
1789 		return;
1790 	}
1791 
1792 	/* open a case */
1793 	fmcase = fmd_case_open(hdl, NULL);
1794 
1795 	/* start a new FME */
1796 	if ((fmep = newfme(eventstring, ipp, hdl, fmcase, ffep, nvl)) == NULL) {
1797 		out(O_ALTFP|O_NONL, "[");
1798 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1799 		out(O_ALTFP, " CANNOT DIAGNOSE]");
1800 		return;
1801 	}
1802 
1803 	Open_fme_count++;
1804 
1805 	init_fme_bufs(fmep);
1806 
1807 	out(O_ALTFP|O_NONL, "[");
1808 	ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1809 	out(O_ALTFP, " created FME%d, case %s]", fmep->id,
1810 	    fmd_case_uuid(hdl, fmep->fmcase));
1811 
1812 	ep = fmep->e0;
1813 	ASSERT(ep != NULL);
1814 
1815 	/* note observation */
1816 	fmep->ecurrent = ep;
1817 	if (ep->count++ == 0) {
1818 		/* link it into list of observations seen */
1819 		ep->observations = fmep->observations;
1820 		fmep->observations = ep;
1821 		ep->nvp = evnv_dupnvl(nvl);
1822 		serialize_observation(fmep, eventstring, ipp);
1823 	} else {
1824 		/* new payload overrides any previous */
1825 		nvlist_free(ep->nvp);
1826 		ep->nvp = evnv_dupnvl(nvl);
1827 	}
1828 
1829 	stats_counter_bump(fmep->Rcount);
1830 
1831 	if (ffep) {
1832 		fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1833 		fmd_case_setprincipal(hdl, fmep->fmcase, ffep);
1834 		fmep->e0r = ffep;
1835 		ep->ffep = ffep;
1836 	}
1837 
1838 	/* give the diagnosis algorithm a shot at the new FME state */
1839 	fme_eval(fmep, ffep);
1840 }
1841 
1842 void
1843 fme_status(int flags)
1844 {
1845 	struct fme *fmep;
1846 
1847 	if (FMElist == NULL) {
1848 		out(flags, "No fault management exercises underway.");
1849 		return;
1850 	}
1851 
1852 	for (fmep = FMElist; fmep; fmep = fmep->next)
1853 		fme_print(flags, fmep);
1854 }
1855 
1856 /*
1857  * "indent" routines used mostly for nicely formatted debug output, but also
1858  * for sanity checking for infinite recursion bugs.
1859  */
1860 
1861 #define	MAX_INDENT 1024
1862 static const char *indent_s[MAX_INDENT];
1863 static int current_indent;
1864 
1865 static void
1866 indent_push(const char *s)
1867 {
1868 	if (current_indent < MAX_INDENT)
1869 		indent_s[current_indent++] = s;
1870 	else
1871 		out(O_DIE, "unexpected recursion depth (%d)", current_indent);
1872 }
1873 
1874 static void
1875 indent_set(const char *s)
1876 {
1877 	current_indent = 0;
1878 	indent_push(s);
1879 }
1880 
1881 static void
1882 indent_pop(void)
1883 {
1884 	if (current_indent > 0)
1885 		current_indent--;
1886 	else
1887 		out(O_DIE, "recursion underflow");
1888 }
1889 
1890 static void
1891 indent(void)
1892 {
1893 	int i;
1894 	if (!Verbose)
1895 		return;
1896 	for (i = 0; i < current_indent; i++)
1897 		out(O_ALTFP|O_VERB|O_NONL, indent_s[i]);
1898 }
1899 
1900 #define	SLNEW		1
1901 #define	SLCHANGED	2
1902 #define	SLWAIT		3
1903 #define	SLDISPROVED	4
1904 
1905 static void
1906 print_suspects(int circumstance, struct fme *fmep)
1907 {
1908 	struct event *ep;
1909 
1910 	out(O_ALTFP|O_NONL, "[");
1911 	if (circumstance == SLCHANGED) {
1912 		out(O_ALTFP|O_NONL, "FME%d diagnosis changed. state: %s, "
1913 		    "suspect list:", fmep->id, fme_state2str(fmep->state));
1914 	} else if (circumstance == SLWAIT) {
1915 		out(O_ALTFP|O_NONL, "FME%d set wait timer %ld ", fmep->id,
1916 		    fmep->timer);
1917 		ptree_timeval(O_ALTFP|O_NONL, &fmep->wull);
1918 	} else if (circumstance == SLDISPROVED) {
1919 		out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS UNKNOWN", fmep->id);
1920 	} else {
1921 		out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS PRODUCED:", fmep->id);
1922 	}
1923 
1924 	if (circumstance == SLWAIT || circumstance == SLDISPROVED) {
1925 		out(O_ALTFP, "]");
1926 		return;
1927 	}
1928 
1929 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
1930 		out(O_ALTFP|O_NONL, " ");
1931 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
1932 	}
1933 	out(O_ALTFP, "]");
1934 }
1935 
1936 static struct node *
1937 eventprop_lookup(struct event *ep, const char *propname)
1938 {
1939 	return (lut_lookup(ep->props, (void *)propname, NULL));
1940 }
1941 
1942 #define	MAXDIGITIDX	23
1943 static char numbuf[MAXDIGITIDX + 1];
1944 
1945 static int
1946 node2uint(struct node *n, uint_t *valp)
1947 {
1948 	struct evalue value;
1949 	struct lut *globals = NULL;
1950 
1951 	if (n == NULL)
1952 		return (1);
1953 
1954 	/*
1955 	 * check value.v since we are being asked to convert an unsigned
1956 	 * long long int to an unsigned int
1957 	 */
1958 	if (! eval_expr(n, NULL, NULL, &globals, NULL, NULL, 0, &value) ||
1959 	    value.t != UINT64 || value.v > (1ULL << 32))
1960 		return (1);
1961 
1962 	*valp = (uint_t)value.v;
1963 
1964 	return (0);
1965 }
1966 
1967 static nvlist_t *
1968 node2fmri(struct node *n)
1969 {
1970 	nvlist_t **pa, *f, *p;
1971 	struct node *nc;
1972 	uint_t depth = 0;
1973 	char *numstr, *nullbyte;
1974 	char *failure;
1975 	int err, i;
1976 
1977 	/* XXX do we need to be able to handle a non-T_NAME node? */
1978 	if (n == NULL || n->t != T_NAME)
1979 		return (NULL);
1980 
1981 	for (nc = n; nc != NULL; nc = nc->u.name.next) {
1982 		if (nc->u.name.child == NULL || nc->u.name.child->t != T_NUM)
1983 			break;
1984 		depth++;
1985 	}
1986 
1987 	if (nc != NULL) {
1988 		/* We bailed early, something went wrong */
1989 		return (NULL);
1990 	}
1991 
1992 	if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0)
1993 		out(O_DIE|O_SYS, "alloc of fmri nvl failed");
1994 	pa = alloca(depth * sizeof (nvlist_t *));
1995 	for (i = 0; i < depth; i++)
1996 		pa[i] = NULL;
1997 
1998 	err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC);
1999 	err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION);
2000 	err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, "");
2001 	err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth);
2002 	if (err != 0) {
2003 		failure = "basic construction of FMRI failed";
2004 		goto boom;
2005 	}
2006 
2007 	numbuf[MAXDIGITIDX] = '\0';
2008 	nullbyte = &numbuf[MAXDIGITIDX];
2009 	i = 0;
2010 
2011 	for (nc = n; nc != NULL; nc = nc->u.name.next) {
2012 		err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl);
2013 		if (err != 0) {
2014 			failure = "alloc of an hc-pair failed";
2015 			goto boom;
2016 		}
2017 		err = nvlist_add_string(p, FM_FMRI_HC_NAME, nc->u.name.s);
2018 		numstr = ulltostr(nc->u.name.child->u.ull, nullbyte);
2019 		err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr);
2020 		if (err != 0) {
2021 			failure = "construction of an hc-pair failed";
2022 			goto boom;
2023 		}
2024 		pa[i++] = p;
2025 	}
2026 
2027 	err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth);
2028 	if (err == 0) {
2029 		for (i = 0; i < depth; i++)
2030 			if (pa[i] != NULL)
2031 				nvlist_free(pa[i]);
2032 		return (f);
2033 	}
2034 	failure = "addition of hc-pair array to FMRI failed";
2035 
2036 boom:
2037 	for (i = 0; i < depth; i++)
2038 		if (pa[i] != NULL)
2039 			nvlist_free(pa[i]);
2040 	nvlist_free(f);
2041 	out(O_DIE, "%s", failure);
2042 	/*NOTREACHED*/
2043 	return (NULL);
2044 }
2045 
2046 /* an ipath cache entry is an array of these, with s==NULL at the end */
2047 struct ipath {
2048 	const char *s;	/* component name (in stable) */
2049 	int i;		/* instance number */
2050 };
2051 
2052 static nvlist_t *
2053 ipath2fmri(struct ipath *ipath)
2054 {
2055 	nvlist_t **pa, *f, *p;
2056 	uint_t depth = 0;
2057 	char *numstr, *nullbyte;
2058 	char *failure;
2059 	int err, i;
2060 	struct ipath *ipp;
2061 
2062 	for (ipp = ipath; ipp->s != NULL; ipp++)
2063 		depth++;
2064 
2065 	if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0)
2066 		out(O_DIE|O_SYS, "alloc of fmri nvl failed");
2067 	pa = alloca(depth * sizeof (nvlist_t *));
2068 	for (i = 0; i < depth; i++)
2069 		pa[i] = NULL;
2070 
2071 	err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC);
2072 	err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION);
2073 	err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, "");
2074 	err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth);
2075 	if (err != 0) {
2076 		failure = "basic construction of FMRI failed";
2077 		goto boom;
2078 	}
2079 
2080 	numbuf[MAXDIGITIDX] = '\0';
2081 	nullbyte = &numbuf[MAXDIGITIDX];
2082 	i = 0;
2083 
2084 	for (ipp = ipath; ipp->s != NULL; ipp++) {
2085 		err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl);
2086 		if (err != 0) {
2087 			failure = "alloc of an hc-pair failed";
2088 			goto boom;
2089 		}
2090 		err = nvlist_add_string(p, FM_FMRI_HC_NAME, ipp->s);
2091 		numstr = ulltostr(ipp->i, nullbyte);
2092 		err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr);
2093 		if (err != 0) {
2094 			failure = "construction of an hc-pair failed";
2095 			goto boom;
2096 		}
2097 		pa[i++] = p;
2098 	}
2099 
2100 	err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth);
2101 	if (err == 0) {
2102 		for (i = 0; i < depth; i++)
2103 			if (pa[i] != NULL)
2104 				nvlist_free(pa[i]);
2105 		return (f);
2106 	}
2107 	failure = "addition of hc-pair array to FMRI failed";
2108 
2109 boom:
2110 	for (i = 0; i < depth; i++)
2111 		if (pa[i] != NULL)
2112 			nvlist_free(pa[i]);
2113 	nvlist_free(f);
2114 	out(O_DIE, "%s", failure);
2115 	/*NOTREACHED*/
2116 	return (NULL);
2117 }
2118 
2119 static uint8_t
2120 percentof(uint_t part, uint_t whole)
2121 {
2122 	unsigned long long p = part * 1000;
2123 
2124 	return ((p / whole / 10) + (((p / whole % 10) >= 5) ? 1 : 0));
2125 }
2126 
2127 struct rsl {
2128 	struct event *suspect;
2129 	nvlist_t *asru;
2130 	nvlist_t *fru;
2131 	nvlist_t *rsrc;
2132 };
2133 
2134 static void publish_suspects(struct fme *fmep, struct rsl *srl);
2135 
2136 /*
2137  *  rslfree -- free internal members of struct rsl not expected to be
2138  *	freed elsewhere.
2139  */
2140 static void
2141 rslfree(struct rsl *freeme)
2142 {
2143 	if (freeme->asru != NULL)
2144 		nvlist_free(freeme->asru);
2145 	if (freeme->fru != NULL)
2146 		nvlist_free(freeme->fru);
2147 	if (freeme->rsrc != NULL && freeme->rsrc != freeme->asru)
2148 		nvlist_free(freeme->rsrc);
2149 }
2150 
2151 /*
2152  *  rslcmp -- compare two rsl structures.  Use the following
2153  *	comparisons to establish cardinality:
2154  *
2155  *	1. Name of the suspect's class. (simple strcmp)
2156  *	2. Name of the suspect's ASRU. (trickier, since nvlist)
2157  *
2158  */
2159 static int
2160 rslcmp(const void *a, const void *b)
2161 {
2162 	struct rsl *r1 = (struct rsl *)a;
2163 	struct rsl *r2 = (struct rsl *)b;
2164 	int rv;
2165 
2166 	rv = strcmp(r1->suspect->enode->u.event.ename->u.name.s,
2167 	    r2->suspect->enode->u.event.ename->u.name.s);
2168 	if (rv != 0)
2169 		return (rv);
2170 
2171 	if (r1->rsrc == NULL && r2->rsrc == NULL)
2172 		return (0);
2173 	if (r1->rsrc == NULL)
2174 		return (-1);
2175 	if (r2->rsrc == NULL)
2176 		return (1);
2177 	return (evnv_cmpnvl(r1->rsrc, r2->rsrc, 0));
2178 }
2179 
2180 /*
2181  * get_resources -- for a given suspect, determine what ASRU, FRU and
2182  *     RSRC nvlists should be advertised in the final suspect list.
2183  */
2184 void
2185 get_resources(struct event *sp, struct rsl *rsrcs, struct config *croot)
2186 {
2187 	struct node *asrudef, *frudef;
2188 	nvlist_t *asru, *fru;
2189 	nvlist_t *rsrc = NULL;
2190 	char *pathstr;
2191 
2192 	/*
2193 	 * First find any ASRU and/or FRU defined in the
2194 	 * initial fault tree.
2195 	 */
2196 	asrudef = eventprop_lookup(sp, L_ASRU);
2197 	frudef = eventprop_lookup(sp, L_FRU);
2198 
2199 	/*
2200 	 * Create FMRIs based on those definitions
2201 	 */
2202 	asru = node2fmri(asrudef);
2203 	fru = node2fmri(frudef);
2204 	pathstr = ipath2str(NULL, sp->ipp);
2205 
2206 	/*
2207 	 *  Allow for platform translations of the FMRIs
2208 	 */
2209 	platform_units_translate(is_defect(sp->t), croot, &asru, &fru, &rsrc,
2210 	    pathstr);
2211 
2212 	FREE(pathstr);
2213 	rsrcs->suspect = sp;
2214 	rsrcs->asru = asru;
2215 	rsrcs->fru = fru;
2216 	rsrcs->rsrc = rsrc;
2217 }
2218 
2219 /*
2220  * trim_suspects -- prior to publishing, we may need to remove some
2221  *    suspects from the list.  If we're auto-closing upsets, we don't
2222  *    want any of those in the published list.  If the ASRUs for multiple
2223  *    defects resolve to the same ASRU (driver) we only want to publish
2224  *    that as a single suspect.
2225  */
2226 static int
2227 trim_suspects(struct fme *fmep, struct rsl *begin, struct rsl *begin2,
2228     fmd_event_t *ffep)
2229 {
2230 	struct event *ep;
2231 	struct rsl *rp = begin;
2232 	struct rsl *rp2 = begin2;
2233 	int mess_zero_count = 0;
2234 	int serd_rval;
2235 	uint_t messval;
2236 
2237 	/* remove any unwanted upsets and populate our array */
2238 	for (ep = fmep->psuspects; ep; ep = ep->psuspects) {
2239 		if (is_upset(ep->t))
2240 			continue;
2241 		serd_rval = serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, ep,
2242 		    NULL, NULL);
2243 		if (serd_rval == 0)
2244 			continue;
2245 		if (node2uint(eventprop_lookup(ep, L_message),
2246 		    &messval) == 0 && messval == 0) {
2247 			get_resources(ep, rp2, fmep->config);
2248 			rp2++;
2249 			mess_zero_count++;
2250 		} else {
2251 			get_resources(ep, rp, fmep->config);
2252 			rp++;
2253 			fmep->nsuspects++;
2254 		}
2255 	}
2256 	return (mess_zero_count);
2257 }
2258 
2259 /*
2260  * addpayloadprop -- add a payload prop to a problem
2261  */
2262 static void
2263 addpayloadprop(const char *lhs, struct evalue *rhs, nvlist_t *fault)
2264 {
2265 	nvlist_t *rsrc, *hcs;
2266 
2267 	ASSERT(fault != NULL);
2268 	ASSERT(lhs != NULL);
2269 	ASSERT(rhs != NULL);
2270 
2271 	if (nvlist_lookup_nvlist(fault, FM_FAULT_RESOURCE, &rsrc) != 0)
2272 		out(O_DIE, "cannot add payloadprop \"%s\" to fault", lhs);
2273 
2274 	if (nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcs) != 0) {
2275 		out(O_ALTFP|O_VERB2, "addpayloadprop: create hc_specific");
2276 		if (nvlist_xalloc(&hcs, NV_UNIQUE_NAME, &Eft_nv_hdl) != 0)
2277 			out(O_DIE,
2278 			    "cannot add payloadprop \"%s\" to fault", lhs);
2279 		if (nvlist_add_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, hcs) != 0)
2280 			out(O_DIE,
2281 			    "cannot add payloadprop \"%s\" to fault", lhs);
2282 		nvlist_free(hcs);
2283 		if (nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcs) != 0)
2284 			out(O_DIE,
2285 			    "cannot add payloadprop \"%s\" to fault", lhs);
2286 	} else
2287 		out(O_ALTFP|O_VERB2, "addpayloadprop: reuse hc_specific");
2288 
2289 	if (rhs->t == UINT64) {
2290 		out(O_ALTFP|O_VERB2, "addpayloadprop: %s=%llu", lhs, rhs->v);
2291 
2292 		if (nvlist_add_uint64(hcs, lhs, rhs->v) != 0)
2293 			out(O_DIE,
2294 			    "cannot add payloadprop \"%s\" to fault", lhs);
2295 	} else {
2296 		out(O_ALTFP|O_VERB2, "addpayloadprop: %s=\"%s\"",
2297 		    lhs, (char *)(uintptr_t)rhs->v);
2298 
2299 		if (nvlist_add_string(hcs, lhs, (char *)(uintptr_t)rhs->v) != 0)
2300 			out(O_DIE,
2301 			    "cannot add payloadprop \"%s\" to fault", lhs);
2302 	}
2303 }
2304 
2305 static char *Istatbuf;
2306 static char *Istatbufptr;
2307 static int Istatsz;
2308 
2309 /*
2310  * istataddsize -- calculate size of istat and add it to Istatsz
2311  */
2312 /*ARGSUSED2*/
2313 static void
2314 istataddsize(const struct istat_entry *lhs, struct stats *rhs, void *arg)
2315 {
2316 	int val;
2317 
2318 	ASSERT(lhs != NULL);
2319 	ASSERT(rhs != NULL);
2320 
2321 	if ((val = stats_counter_value(rhs)) == 0)
2322 		return;	/* skip zero-valued stats */
2323 
2324 	/* count up the size of the stat name */
2325 	Istatsz += ipath2strlen(lhs->ename, lhs->ipath);
2326 	Istatsz++;	/* for the trailing NULL byte */
2327 
2328 	/* count up the size of the stat value */
2329 	Istatsz += snprintf(NULL, 0, "%d", val);
2330 	Istatsz++;	/* for the trailing NULL byte */
2331 }
2332 
2333 /*
2334  * istat2str -- serialize an istat, writing result to *Istatbufptr
2335  */
2336 /*ARGSUSED2*/
2337 static void
2338 istat2str(const struct istat_entry *lhs, struct stats *rhs, void *arg)
2339 {
2340 	char *str;
2341 	int len;
2342 	int val;
2343 
2344 	ASSERT(lhs != NULL);
2345 	ASSERT(rhs != NULL);
2346 
2347 	if ((val = stats_counter_value(rhs)) == 0)
2348 		return;	/* skip zero-valued stats */
2349 
2350 	/* serialize the stat name */
2351 	str = ipath2str(lhs->ename, lhs->ipath);
2352 	len = strlen(str);
2353 
2354 	ASSERT(Istatbufptr + len + 1 < &Istatbuf[Istatsz]);
2355 	(void) strlcpy(Istatbufptr, str, &Istatbuf[Istatsz] - Istatbufptr);
2356 	Istatbufptr += len;
2357 	FREE(str);
2358 	*Istatbufptr++ = '\0';
2359 
2360 	/* serialize the stat value */
2361 	Istatbufptr += snprintf(Istatbufptr, &Istatbuf[Istatsz] - Istatbufptr,
2362 	    "%d", val);
2363 	*Istatbufptr++ = '\0';
2364 
2365 	ASSERT(Istatbufptr <= &Istatbuf[Istatsz]);
2366 }
2367 
2368 void
2369 istat_save()
2370 {
2371 	if (Istat_need_save == 0)
2372 		return;
2373 
2374 	/* figure out how big the serialzed info is */
2375 	Istatsz = 0;
2376 	lut_walk(Istats, (lut_cb)istataddsize, NULL);
2377 
2378 	if (Istatsz == 0) {
2379 		/* no stats to save */
2380 		fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS);
2381 		return;
2382 	}
2383 
2384 	/* create the serialized buffer */
2385 	Istatbufptr = Istatbuf = MALLOC(Istatsz);
2386 	lut_walk(Istats, (lut_cb)istat2str, NULL);
2387 
2388 	/* clear out current saved stats */
2389 	fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS);
2390 
2391 	/* write out the new version */
2392 	fmd_buf_write(Hdl, NULL, WOBUF_ISTATS, Istatbuf, Istatsz);
2393 	FREE(Istatbuf);
2394 
2395 	Istat_need_save = 0;
2396 }
2397 
2398 int
2399 istat_cmp(struct istat_entry *ent1, struct istat_entry *ent2)
2400 {
2401 	if (ent1->ename != ent2->ename)
2402 		return (ent2->ename - ent1->ename);
2403 	if (ent1->ipath != ent2->ipath)
2404 		return ((char *)ent2->ipath - (char *)ent1->ipath);
2405 
2406 	return (0);
2407 }
2408 
2409 /*
2410  * istat-verify -- verify the component associated with a stat still exists
2411  *
2412  * if the component no longer exists, this routine resets the stat and
2413  * returns 0.  if the component still exists, it returns 1.
2414  */
2415 static int
2416 istat_verify(struct node *snp, struct istat_entry *entp)
2417 {
2418 	struct stats *statp;
2419 	nvlist_t *fmri;
2420 
2421 	fmri = node2fmri(snp->u.event.epname);
2422 	if (platform_path_exists(fmri)) {
2423 		nvlist_free(fmri);
2424 		return (1);
2425 	}
2426 	nvlist_free(fmri);
2427 
2428 	/* component no longer in system.  zero out the associated stats */
2429 	if ((statp = (struct stats *)
2430 	    lut_lookup(Istats, entp, (lut_cmp)istat_cmp)) == NULL ||
2431 	    stats_counter_value(statp) == 0)
2432 		return (0);	/* stat is already reset */
2433 
2434 	Istat_need_save = 1;
2435 	stats_counter_reset(statp);
2436 	return (0);
2437 }
2438 
2439 static void
2440 istat_bump(struct node *snp, int n)
2441 {
2442 	struct stats *statp;
2443 	struct istat_entry ent;
2444 
2445 	ASSERT(snp != NULL);
2446 	ASSERTinfo(snp->t == T_EVENT, ptree_nodetype2str(snp->t));
2447 	ASSERT(snp->u.event.epname != NULL);
2448 
2449 	/* class name should be hoisted into a single stable entry */
2450 	ASSERT(snp->u.event.ename->u.name.next == NULL);
2451 	ent.ename = snp->u.event.ename->u.name.s;
2452 	ent.ipath = ipath(snp->u.event.epname);
2453 
2454 	if (!istat_verify(snp, &ent)) {
2455 		/* component no longer exists in system, nothing to do */
2456 		return;
2457 	}
2458 
2459 	if ((statp = (struct stats *)
2460 	    lut_lookup(Istats, &ent, (lut_cmp)istat_cmp)) == NULL) {
2461 		/* need to create the counter */
2462 		int cnt = 0;
2463 		struct node *np;
2464 		char *sname;
2465 		char *snamep;
2466 		struct istat_entry *newentp;
2467 
2468 		/* count up the size of the stat name */
2469 		np = snp->u.event.ename;
2470 		while (np != NULL) {
2471 			cnt += strlen(np->u.name.s);
2472 			cnt++;	/* for the '.' or '@' */
2473 			np = np->u.name.next;
2474 		}
2475 		np = snp->u.event.epname;
2476 		while (np != NULL) {
2477 			cnt += snprintf(NULL, 0, "%s%llu",
2478 			    np->u.name.s, np->u.name.child->u.ull);
2479 			cnt++;	/* for the '/' or trailing NULL byte */
2480 			np = np->u.name.next;
2481 		}
2482 
2483 		/* build the stat name */
2484 		snamep = sname = alloca(cnt);
2485 		np = snp->u.event.ename;
2486 		while (np != NULL) {
2487 			snamep += snprintf(snamep, &sname[cnt] - snamep,
2488 			    "%s", np->u.name.s);
2489 			np = np->u.name.next;
2490 			if (np)
2491 				*snamep++ = '.';
2492 		}
2493 		*snamep++ = '@';
2494 		np = snp->u.event.epname;
2495 		while (np != NULL) {
2496 			snamep += snprintf(snamep, &sname[cnt] - snamep,
2497 			    "%s%llu", np->u.name.s, np->u.name.child->u.ull);
2498 			np = np->u.name.next;
2499 			if (np)
2500 				*snamep++ = '/';
2501 		}
2502 		*snamep++ = '\0';
2503 
2504 		/* create the new stat & add it to our list */
2505 		newentp = MALLOC(sizeof (*newentp));
2506 		*newentp = ent;
2507 		statp = stats_new_counter(NULL, sname, 0);
2508 		Istats = lut_add(Istats, (void *)newentp, (void *)statp,
2509 		    (lut_cmp)istat_cmp);
2510 	}
2511 
2512 	/* if n is non-zero, set that value instead of bumping */
2513 	if (n) {
2514 		stats_counter_reset(statp);
2515 		stats_counter_add(statp, n);
2516 	} else
2517 		stats_counter_bump(statp);
2518 	Istat_need_save = 1;
2519 
2520 	ipath_print(O_ALTFP|O_VERB2, ent.ename, ent.ipath);
2521 	out(O_ALTFP|O_VERB2, " %s to value %d", n ? "set" : "incremented",
2522 	    stats_counter_value(statp));
2523 }
2524 
2525 /*ARGSUSED*/
2526 static void
2527 istat_destructor(void *left, void *right, void *arg)
2528 {
2529 	struct istat_entry *entp = (struct istat_entry *)left;
2530 	struct stats *statp = (struct stats *)right;
2531 	FREE(entp);
2532 	stats_delete(statp);
2533 }
2534 
2535 /*
2536  * Callback used in a walk of the Istats to reset matching stat counters.
2537  */
2538 static void
2539 istat_counter_reset_cb(struct istat_entry *entp, struct stats *statp,
2540     const struct ipath *ipp)
2541 {
2542 	char *path;
2543 
2544 	if (entp->ipath == ipp) {
2545 		path = ipath2str(entp->ename, ipp);
2546 		out(O_ALTFP, "istat_counter_reset_cb: resetting %s", path);
2547 		FREE(path);
2548 		stats_counter_reset(statp);
2549 		Istat_need_save = 1;
2550 	}
2551 }
2552 
2553 /*ARGSUSED*/
2554 static void
2555 istat_counter_topo_chg_cb(struct istat_entry *entp, struct stats *statp,
2556     void *unused)
2557 {
2558 	char *path;
2559 	nvlist_t *fmri;
2560 
2561 	fmri = ipath2fmri((struct ipath *)(entp->ipath));
2562 	if (!platform_path_exists(fmri)) {
2563 		path = ipath2str(entp->ename, entp->ipath);
2564 		out(O_ALTFP, "istat_counter_topo_chg_cb: not present %s", path);
2565 		FREE(path);
2566 		stats_counter_reset(statp);
2567 		Istat_need_save = 1;
2568 	}
2569 	nvlist_free(fmri);
2570 }
2571 
2572 void
2573 istat_fini(void)
2574 {
2575 	lut_free(Istats, istat_destructor, NULL);
2576 }
2577 
2578 static char *Serdbuf;
2579 static char *Serdbufptr;
2580 static int Serdsz;
2581 
2582 /*
2583  * serdaddsize -- calculate size of serd and add it to Serdsz
2584  */
2585 /*ARGSUSED*/
2586 static void
2587 serdaddsize(const struct serd_entry *lhs, struct stats *rhs, void *arg)
2588 {
2589 	ASSERT(lhs != NULL);
2590 
2591 	/* count up the size of the stat name */
2592 	Serdsz += ipath2strlen(lhs->ename, lhs->ipath);
2593 	Serdsz++;	/* for the trailing NULL byte */
2594 }
2595 
2596 /*
2597  * serd2str -- serialize a serd engine, writing result to *Serdbufptr
2598  */
2599 /*ARGSUSED*/
2600 static void
2601 serd2str(const struct serd_entry *lhs, struct stats *rhs, void *arg)
2602 {
2603 	char *str;
2604 	int len;
2605 
2606 	ASSERT(lhs != NULL);
2607 
2608 	/* serialize the serd engine name */
2609 	str = ipath2str(lhs->ename, lhs->ipath);
2610 	len = strlen(str);
2611 
2612 	ASSERT(Serdbufptr + len + 1 <= &Serdbuf[Serdsz]);
2613 	(void) strlcpy(Serdbufptr, str, &Serdbuf[Serdsz] - Serdbufptr);
2614 	Serdbufptr += len;
2615 	FREE(str);
2616 	*Serdbufptr++ = '\0';
2617 	ASSERT(Serdbufptr <= &Serdbuf[Serdsz]);
2618 }
2619 
2620 void
2621 serd_save()
2622 {
2623 	if (Serd_need_save == 0)
2624 		return;
2625 
2626 	/* figure out how big the serialzed info is */
2627 	Serdsz = 0;
2628 	lut_walk(SerdEngines, (lut_cb)serdaddsize, NULL);
2629 
2630 	if (Serdsz == 0) {
2631 		/* no serd engines to save */
2632 		fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS);
2633 		return;
2634 	}
2635 
2636 	/* create the serialized buffer */
2637 	Serdbufptr = Serdbuf = MALLOC(Serdsz);
2638 	lut_walk(SerdEngines, (lut_cb)serd2str, NULL);
2639 
2640 	/* clear out current saved stats */
2641 	fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS);
2642 
2643 	/* write out the new version */
2644 	fmd_buf_write(Hdl, NULL, WOBUF_SERDS, Serdbuf, Serdsz);
2645 	FREE(Serdbuf);
2646 	Serd_need_save = 0;
2647 }
2648 
2649 int
2650 serd_cmp(struct serd_entry *ent1, struct serd_entry *ent2)
2651 {
2652 	if (ent1->ename != ent2->ename)
2653 		return (ent2->ename - ent1->ename);
2654 	if (ent1->ipath != ent2->ipath)
2655 		return ((char *)ent2->ipath - (char *)ent1->ipath);
2656 
2657 	return (0);
2658 }
2659 
2660 void
2661 fme_serd_load(fmd_hdl_t *hdl)
2662 {
2663 	int sz;
2664 	char *sbuf;
2665 	char *sepptr;
2666 	char *ptr;
2667 	struct serd_entry *newentp;
2668 	struct node *epname;
2669 	nvlist_t *fmri;
2670 	char *namestring;
2671 
2672 	if ((sz = fmd_buf_size(hdl, NULL, WOBUF_SERDS)) == 0)
2673 		return;
2674 	sbuf = alloca(sz);
2675 	fmd_buf_read(hdl, NULL, WOBUF_SERDS, sbuf, sz);
2676 	ptr = sbuf;
2677 	while (ptr < &sbuf[sz]) {
2678 		sepptr = strchr(ptr, '@');
2679 		*sepptr = '\0';
2680 		namestring = ptr;
2681 		sepptr++;
2682 		ptr = sepptr;
2683 		ptr += strlen(ptr);
2684 		ptr++;	/* move past the '\0' separating paths */
2685 		epname = pathstring2epnamenp(sepptr);
2686 		fmri = node2fmri(epname);
2687 		if (platform_path_exists(fmri)) {
2688 			newentp = MALLOC(sizeof (*newentp));
2689 			newentp->hdl = hdl;
2690 			newentp->ipath = ipath(epname);
2691 			newentp->ename = stable(namestring);
2692 			SerdEngines = lut_add(SerdEngines, (void *)newentp,
2693 			    (void *)newentp, (lut_cmp)serd_cmp);
2694 		} else
2695 			Serd_need_save = 1;
2696 		tree_free(epname);
2697 		nvlist_free(fmri);
2698 	}
2699 	/* save it back again in case some of the paths no longer exist */
2700 	serd_save();
2701 }
2702 
2703 /*ARGSUSED*/
2704 static void
2705 serd_destructor(void *left, void *right, void *arg)
2706 {
2707 	struct serd_entry *entp = (struct serd_entry *)left;
2708 	FREE(entp);
2709 }
2710 
2711 /*
2712  * Callback used in a walk of the SerdEngines to reset matching serd engines.
2713  */
2714 /*ARGSUSED*/
2715 static void
2716 serd_reset_cb(struct serd_entry *entp, void *unused, const struct ipath *ipp)
2717 {
2718 	char *path;
2719 
2720 	if (entp->ipath == ipp) {
2721 		path = ipath2str(entp->ename, ipp);
2722 		out(O_ALTFP, "serd_reset_cb: resetting %s", path);
2723 		fmd_serd_reset(entp->hdl, path);
2724 		FREE(path);
2725 		Serd_need_save = 1;
2726 	}
2727 }
2728 
2729 /*ARGSUSED*/
2730 static void
2731 serd_topo_chg_cb(struct serd_entry *entp, void *unused, void *unused2)
2732 {
2733 	char *path;
2734 	nvlist_t *fmri;
2735 
2736 	fmri = ipath2fmri((struct ipath *)(entp->ipath));
2737 	if (!platform_path_exists(fmri)) {
2738 		path = ipath2str(entp->ename, entp->ipath);
2739 		out(O_ALTFP, "serd_topo_chg_cb: not present %s", path);
2740 		fmd_serd_reset(entp->hdl, path);
2741 		FREE(path);
2742 		Serd_need_save = 1;
2743 	}
2744 	nvlist_free(fmri);
2745 }
2746 
2747 void
2748 serd_fini(void)
2749 {
2750 	lut_free(SerdEngines, serd_destructor, NULL);
2751 }
2752 
2753 static void
2754 publish_suspects(struct fme *fmep, struct rsl *srl)
2755 {
2756 	struct rsl *rp;
2757 	nvlist_t *fault;
2758 	uint8_t cert;
2759 	uint_t *frs;
2760 	uint_t frsum, fr;
2761 	uint_t messval;
2762 	uint_t retireval;
2763 	uint_t responseval;
2764 	struct node *snp;
2765 	int frcnt, fridx;
2766 	boolean_t allfaulty = B_TRUE;
2767 	struct rsl *erl = srl + fmep->nsuspects - 1;
2768 
2769 	/*
2770 	 * sort the array
2771 	 */
2772 	qsort(srl, fmep->nsuspects, sizeof (struct rsl), rslcmp);
2773 
2774 	/* sum the fitrates */
2775 	frs = alloca(fmep->nsuspects * sizeof (uint_t));
2776 	fridx = frcnt = frsum = 0;
2777 
2778 	for (rp = srl; rp <= erl; rp++) {
2779 		struct node *n;
2780 
2781 		n = eventprop_lookup(rp->suspect, L_FITrate);
2782 		if (node2uint(n, &fr) != 0) {
2783 			out(O_DEBUG|O_NONL, "event ");
2784 			ipath_print(O_DEBUG|O_NONL,
2785 			    rp->suspect->enode->u.event.ename->u.name.s,
2786 			    rp->suspect->ipp);
2787 			out(O_VERB, " has no FITrate (using 1)");
2788 			fr = 1;
2789 		} else if (fr == 0) {
2790 			out(O_DEBUG|O_NONL, "event ");
2791 			ipath_print(O_DEBUG|O_NONL,
2792 			    rp->suspect->enode->u.event.ename->u.name.s,
2793 			    rp->suspect->ipp);
2794 			out(O_VERB, " has zero FITrate (using 1)");
2795 			fr = 1;
2796 		}
2797 
2798 		frs[fridx++] = fr;
2799 		frsum += fr;
2800 		frcnt++;
2801 	}
2802 
2803 	/* Add them in reverse order of our sort, as fmd reverses order */
2804 	for (rp = erl; rp >= srl; rp--) {
2805 		cert = percentof(frs[--fridx], frsum);
2806 		fault = fmd_nvl_create_fault(fmep->hdl,
2807 		    rp->suspect->enode->u.event.ename->u.name.s,
2808 		    cert,
2809 		    rp->asru,
2810 		    rp->fru,
2811 		    rp->rsrc);
2812 		if (fault == NULL)
2813 			out(O_DIE, "fault creation failed");
2814 		/* if "message" property exists, add it to the fault */
2815 		if (node2uint(eventprop_lookup(rp->suspect, L_message),
2816 		    &messval) == 0) {
2817 
2818 			out(O_ALTFP,
2819 			    "[FME%d, %s adds message=%d to suspect list]",
2820 			    fmep->id,
2821 			    rp->suspect->enode->u.event.ename->u.name.s,
2822 			    messval);
2823 			if (nvlist_add_boolean_value(fault,
2824 			    FM_SUSPECT_MESSAGE,
2825 			    (messval) ? B_TRUE : B_FALSE) != 0) {
2826 				out(O_DIE, "cannot add no-message to fault");
2827 			}
2828 		}
2829 
2830 		/* if "retire" property exists, add it to the fault */
2831 		if (node2uint(eventprop_lookup(rp->suspect, L_retire),
2832 		    &retireval) == 0) {
2833 
2834 			out(O_ALTFP,
2835 			    "[FME%d, %s adds retire=%d to suspect list]",
2836 			    fmep->id,
2837 			    rp->suspect->enode->u.event.ename->u.name.s,
2838 			    retireval);
2839 			if (nvlist_add_boolean_value(fault,
2840 			    FM_SUSPECT_RETIRE,
2841 			    (retireval) ? B_TRUE : B_FALSE) != 0) {
2842 				out(O_DIE, "cannot add no-retire to fault");
2843 			}
2844 		}
2845 
2846 		/* if "response" property exists, add it to the fault */
2847 		if (node2uint(eventprop_lookup(rp->suspect, L_response),
2848 		    &responseval) == 0) {
2849 
2850 			out(O_ALTFP,
2851 			    "[FME%d, %s adds response=%d to suspect list]",
2852 			    fmep->id,
2853 			    rp->suspect->enode->u.event.ename->u.name.s,
2854 			    responseval);
2855 			if (nvlist_add_boolean_value(fault,
2856 			    FM_SUSPECT_RESPONSE,
2857 			    (responseval) ? B_TRUE : B_FALSE) != 0) {
2858 				out(O_DIE, "cannot add no-response to fault");
2859 			}
2860 		}
2861 
2862 		/* add any payload properties */
2863 		lut_walk(rp->suspect->payloadprops,
2864 		    (lut_cb)addpayloadprop, (void *)fault);
2865 		rslfree(rp);
2866 
2867 		/*
2868 		 * If "action" property exists, evaluate it;  this must be done
2869 		 * before the allfaulty check below since some actions may
2870 		 * modify the asru to be used in fmd_nvl_fmri_has_fault.  This
2871 		 * needs to be restructured if any new actions are introduced
2872 		 * that have effects that we do not want to be visible if
2873 		 * we decide not to publish in the dupclose check below.
2874 		 */
2875 		if ((snp = eventprop_lookup(rp->suspect, L_action)) != NULL) {
2876 			struct evalue evalue;
2877 
2878 			out(O_ALTFP|O_NONL,
2879 			    "[FME%d, %s action ", fmep->id,
2880 			    rp->suspect->enode->u.event.ename->u.name.s);
2881 			ptree_name_iter(O_ALTFP|O_NONL, snp);
2882 			out(O_ALTFP, "]");
2883 			Action_nvl = fault;
2884 			(void) eval_expr(snp, NULL, NULL, NULL, NULL,
2885 			    NULL, 0, &evalue);
2886 		}
2887 
2888 		fmd_case_add_suspect(fmep->hdl, fmep->fmcase, fault);
2889 
2890 		/*
2891 		 * check if the asru is already marked as "faulty".
2892 		 */
2893 		if (allfaulty) {
2894 			nvlist_t *asru;
2895 
2896 			out(O_ALTFP|O_VERB, "FME%d dup check ", fmep->id);
2897 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, rp->suspect);
2898 			out(O_ALTFP|O_VERB|O_NONL, " ");
2899 			if (nvlist_lookup_nvlist(fault,
2900 			    FM_FAULT_ASRU, &asru) != 0) {
2901 				out(O_ALTFP|O_VERB, "NULL asru");
2902 				allfaulty = B_FALSE;
2903 			} else if (fmd_nvl_fmri_has_fault(fmep->hdl, asru,
2904 			    FMD_HAS_FAULT_ASRU, NULL)) {
2905 				out(O_ALTFP|O_VERB, "faulty");
2906 			} else {
2907 				out(O_ALTFP|O_VERB, "not faulty");
2908 				allfaulty = B_FALSE;
2909 			}
2910 		}
2911 
2912 	}
2913 
2914 	if (!allfaulty) {
2915 		/*
2916 		 * don't update the count stat if all asrus are already
2917 		 * present and unrepaired in the asru cache
2918 		 */
2919 		for (rp = erl; rp >= srl; rp--) {
2920 			struct event *suspect = rp->suspect;
2921 
2922 			if (suspect == NULL)
2923 				continue;
2924 
2925 			/* if "count" exists, increment the appropriate stat */
2926 			if ((snp = eventprop_lookup(suspect,
2927 			    L_count)) != NULL) {
2928 				out(O_ALTFP|O_NONL,
2929 				    "[FME%d, %s count ", fmep->id,
2930 				    suspect->enode->u.event.ename->u.name.s);
2931 				ptree_name_iter(O_ALTFP|O_NONL, snp);
2932 				out(O_ALTFP, "]");
2933 				istat_bump(snp, 0);
2934 
2935 			}
2936 		}
2937 		istat_save();	/* write out any istat changes */
2938 	}
2939 }
2940 
2941 static const char *
2942 undiag_2defect_str(int ud)
2943 {
2944 	switch (ud) {
2945 	case UD_VAL_MISSINGINFO:
2946 	case UD_VAL_MISSINGOBS:
2947 	case UD_VAL_MISSINGPATH:
2948 	case UD_VAL_MISSINGZERO:
2949 	case UD_VAL_BADOBS:
2950 	case UD_VAL_CFGMISMATCH:
2951 		return (UNDIAG_DEFECT_CHKPT);
2952 		break;
2953 
2954 	case UD_VAL_BADEVENTI:
2955 	case UD_VAL_BADEVENTPATH:
2956 	case UD_VAL_BADEVENTCLASS:
2957 	case UD_VAL_INSTFAIL:
2958 	case UD_VAL_NOPATH:
2959 	case UD_VAL_UNSOLVD:
2960 		return (UNDIAG_DEFECT_FME);
2961 		break;
2962 
2963 	case UD_VAL_MAXFME:
2964 		return (UNDIAG_DEFECT_LIMIT);
2965 		break;
2966 
2967 	case UD_VAL_UNKNOWN:
2968 	default:
2969 		return (UNDIAG_DEFECT_UNKNOWN);
2970 		break;
2971 	}
2972 }
2973 
2974 static const char *
2975 undiag_2fault_str(int ud)
2976 {
2977 	switch (ud) {
2978 	case UD_VAL_BADEVENTI:
2979 	case UD_VAL_BADEVENTPATH:
2980 	case UD_VAL_BADEVENTCLASS:
2981 	case UD_VAL_INSTFAIL:
2982 	case UD_VAL_NOPATH:
2983 	case UD_VAL_UNSOLVD:
2984 		return (UNDIAG_FAULT_FME);
2985 	default:
2986 		return (NULL);
2987 	}
2988 }
2989 
2990 static char *
2991 undiag_2reason_str(int ud, char *arg)
2992 {
2993 	const char *ptr;
2994 	char *buf;
2995 	int with_arg = 0;
2996 
2997 	switch (ud) {
2998 	case UD_VAL_BADEVENTPATH:
2999 		ptr = UD_STR_BADEVENTPATH;
3000 		with_arg = 1;
3001 		break;
3002 	case UD_VAL_BADEVENTCLASS:
3003 		ptr = UD_STR_BADEVENTCLASS;
3004 		with_arg = 1;
3005 		break;
3006 	case UD_VAL_BADEVENTI:
3007 		ptr = UD_STR_BADEVENTI;
3008 		with_arg = 1;
3009 		break;
3010 	case UD_VAL_BADOBS:
3011 		ptr = UD_STR_BADOBS;
3012 		break;
3013 	case UD_VAL_CFGMISMATCH:
3014 		ptr = UD_STR_CFGMISMATCH;
3015 		break;
3016 	case UD_VAL_INSTFAIL:
3017 		ptr = UD_STR_INSTFAIL;
3018 		with_arg = 1;
3019 		break;
3020 	case UD_VAL_MAXFME:
3021 		ptr = UD_STR_MAXFME;
3022 		break;
3023 	case UD_VAL_MISSINGINFO:
3024 		ptr = UD_STR_MISSINGINFO;
3025 		break;
3026 	case UD_VAL_MISSINGOBS:
3027 		ptr = UD_STR_MISSINGOBS;
3028 		break;
3029 	case UD_VAL_MISSINGPATH:
3030 		ptr = UD_STR_MISSINGPATH;
3031 		break;
3032 	case UD_VAL_MISSINGZERO:
3033 		ptr = UD_STR_MISSINGZERO;
3034 		break;
3035 	case UD_VAL_NOPATH:
3036 		ptr = UD_STR_NOPATH;
3037 		with_arg = 1;
3038 		break;
3039 	case UD_VAL_UNSOLVD:
3040 		ptr = UD_STR_UNSOLVD;
3041 		break;
3042 	case UD_VAL_UNKNOWN:
3043 	default:
3044 		ptr = UD_STR_UNKNOWN;
3045 		break;
3046 	}
3047 	if (with_arg) {
3048 		buf = MALLOC(strlen(ptr) + strlen(arg) - 1);
3049 		(void) sprintf(buf, ptr, arg);
3050 	} else {
3051 		buf = MALLOC(strlen(ptr) + 1);
3052 		(void) sprintf(buf, ptr);
3053 	}
3054 	return (buf);
3055 }
3056 
3057 static void
3058 publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep, fmd_case_t *fmcase,
3059     nvlist_t *detector, char *arg)
3060 {
3061 	struct case_list *newcase;
3062 	nvlist_t *defect, *fault;
3063 	const char *faultstr;
3064 	char *reason = undiag_2reason_str(Undiag_reason, arg);
3065 
3066 	out(O_ALTFP,
3067 	    "[undiagnosable ereport received, "
3068 	    "creating and closing a new case (%s)]", reason);
3069 
3070 	newcase = MALLOC(sizeof (struct case_list));
3071 	newcase->next = NULL;
3072 	newcase->fmcase = fmcase;
3073 	if (Undiagablecaselist != NULL)
3074 		newcase->next = Undiagablecaselist;
3075 	Undiagablecaselist = newcase;
3076 
3077 	if (ffep != NULL)
3078 		fmd_case_add_ereport(hdl, newcase->fmcase, ffep);
3079 
3080 	/* add defect */
3081 	defect = fmd_nvl_create_fault(hdl,
3082 	    undiag_2defect_str(Undiag_reason), 50, NULL, NULL, detector);
3083 	(void) nvlist_add_string(defect, UNDIAG_REASON, reason);
3084 	(void) nvlist_add_boolean_value(defect, FM_SUSPECT_RETIRE, B_FALSE);
3085 	(void) nvlist_add_boolean_value(defect, FM_SUSPECT_RESPONSE, B_FALSE);
3086 	fmd_case_add_suspect(hdl, newcase->fmcase, defect);
3087 
3088 	/* add fault if appropriate */
3089 	faultstr = undiag_2fault_str(Undiag_reason);
3090 	if (faultstr != NULL) {
3091 		fault = fmd_nvl_create_fault(hdl, faultstr, 50, NULL, NULL,
3092 		    detector);
3093 		(void) nvlist_add_string(fault, UNDIAG_REASON, reason);
3094 		(void) nvlist_add_boolean_value(fault, FM_SUSPECT_RETIRE,
3095 		    B_FALSE);
3096 		(void) nvlist_add_boolean_value(fault, FM_SUSPECT_RESPONSE,
3097 		    B_FALSE);
3098 		fmd_case_add_suspect(hdl, newcase->fmcase, fault);
3099 	}
3100 	FREE(reason);
3101 
3102 	/* solve and close case */
3103 	fmd_case_solve(hdl, newcase->fmcase);
3104 	fmd_case_close(hdl, newcase->fmcase);
3105 	Undiag_reason = UD_VAL_UNKNOWN;
3106 }
3107 
3108 static void
3109 fme_undiagnosable(struct fme *f)
3110 {
3111 	nvlist_t *defect, *fault, *detector = NULL;
3112 	struct event *ep;
3113 	char *pathstr;
3114 	const char *faultstr;
3115 	char *reason = undiag_2reason_str(Undiag_reason, NULL);
3116 
3117 	out(O_ALTFP, "[solving/closing FME%d, case %s (%s)]",
3118 	    f->id, fmd_case_uuid(f->hdl, f->fmcase), reason);
3119 
3120 	for (ep = f->observations; ep; ep = ep->observations) {
3121 
3122 		if (ep->ffep != f->e0r)
3123 			fmd_case_add_ereport(f->hdl, f->fmcase, ep->ffep);
3124 
3125 		pathstr = ipath2str(NULL, ipath(platform_getpath(ep->nvp)));
3126 		platform_units_translate(0, f->config, NULL, NULL, &detector,
3127 		    pathstr);
3128 		FREE(pathstr);
3129 
3130 		/* add defect */
3131 		defect = fmd_nvl_create_fault(f->hdl,
3132 		    undiag_2defect_str(Undiag_reason), 50 / f->uniqobs,
3133 		    NULL, NULL, detector);
3134 		(void) nvlist_add_string(defect, UNDIAG_REASON, reason);
3135 		(void) nvlist_add_boolean_value(defect, FM_SUSPECT_RETIRE,
3136 		    B_FALSE);
3137 		(void) nvlist_add_boolean_value(defect, FM_SUSPECT_RESPONSE,
3138 		    B_FALSE);
3139 		fmd_case_add_suspect(f->hdl, f->fmcase, defect);
3140 
3141 		/* add fault if appropriate */
3142 		faultstr = undiag_2fault_str(Undiag_reason);
3143 		if (faultstr == NULL)
3144 			continue;
3145 		fault = fmd_nvl_create_fault(f->hdl, faultstr, 50 / f->uniqobs,
3146 		    NULL, NULL, detector);
3147 		(void) nvlist_add_string(fault, UNDIAG_REASON, reason);
3148 		(void) nvlist_add_boolean_value(fault, FM_SUSPECT_RETIRE,
3149 		    B_FALSE);
3150 		(void) nvlist_add_boolean_value(fault, FM_SUSPECT_RESPONSE,
3151 		    B_FALSE);
3152 		fmd_case_add_suspect(f->hdl, f->fmcase, fault);
3153 		nvlist_free(detector);
3154 	}
3155 	FREE(reason);
3156 	fmd_case_solve(f->hdl, f->fmcase);
3157 	fmd_case_close(f->hdl, f->fmcase);
3158 	Undiag_reason = UD_VAL_UNKNOWN;
3159 }
3160 
3161 /*
3162  * fme_close_case
3163  *
3164  *	Find the requested case amongst our fmes and close it.  Free up
3165  *	the related fme.
3166  */
3167 void
3168 fme_close_case(fmd_hdl_t *hdl, fmd_case_t *fmcase)
3169 {
3170 	struct case_list *ucasep, *prevcasep = NULL;
3171 	struct fme *prev = NULL;
3172 	struct fme *fmep;
3173 
3174 	for (ucasep = Undiagablecaselist; ucasep; ucasep = ucasep->next) {
3175 		if (fmcase != ucasep->fmcase) {
3176 			prevcasep = ucasep;
3177 			continue;
3178 		}
3179 
3180 		if (prevcasep == NULL)
3181 			Undiagablecaselist = Undiagablecaselist->next;
3182 		else
3183 			prevcasep->next = ucasep->next;
3184 
3185 		FREE(ucasep);
3186 		return;
3187 	}
3188 
3189 	for (fmep = FMElist; fmep; fmep = fmep->next) {
3190 		if (fmep->hdl == hdl && fmep->fmcase == fmcase)
3191 			break;
3192 		prev = fmep;
3193 	}
3194 
3195 	if (fmep == NULL) {
3196 		out(O_WARN, "Eft asked to close unrecognized case [%s].",
3197 		    fmd_case_uuid(hdl, fmcase));
3198 		return;
3199 	}
3200 
3201 	if (EFMElist == fmep)
3202 		EFMElist = prev;
3203 
3204 	if (prev == NULL)
3205 		FMElist = FMElist->next;
3206 	else
3207 		prev->next = fmep->next;
3208 
3209 	fmep->next = NULL;
3210 
3211 	/* Get rid of any timer this fme has set */
3212 	if (fmep->wull != 0)
3213 		fmd_timer_remove(fmep->hdl, fmep->timer);
3214 
3215 	if (ClosedFMEs == NULL) {
3216 		ClosedFMEs = fmep;
3217 	} else {
3218 		fmep->next = ClosedFMEs;
3219 		ClosedFMEs = fmep;
3220 	}
3221 
3222 	Open_fme_count--;
3223 
3224 	/* See if we can close the overflow FME */
3225 	if (Open_fme_count <= Max_fme) {
3226 		for (fmep = FMElist; fmep; fmep = fmep->next) {
3227 			if (fmep->overflow && !(fmd_case_closed(fmep->hdl,
3228 			    fmep->fmcase)))
3229 				break;
3230 		}
3231 
3232 		if (fmep != NULL)
3233 			fmd_case_close(fmep->hdl, fmep->fmcase);
3234 	}
3235 }
3236 
3237 /*
3238  * fme_set_timer()
3239  *	If the time we need to wait for the given FME is less than the
3240  *	current timer, kick that old timer out and establish a new one.
3241  */
3242 static int
3243 fme_set_timer(struct fme *fmep, unsigned long long wull)
3244 {
3245 	out(O_ALTFP|O_VERB|O_NONL, " fme_set_timer: request to wait ");
3246 	ptree_timeval(O_ALTFP|O_VERB, &wull);
3247 
3248 	if (wull <= fmep->pull) {
3249 		out(O_ALTFP|O_VERB|O_NONL, "already have waited at least ");
3250 		ptree_timeval(O_ALTFP|O_VERB, &fmep->pull);
3251 		out(O_ALTFP|O_VERB, NULL);
3252 		/* we've waited at least wull already, don't need timer */
3253 		return (0);
3254 	}
3255 
3256 	out(O_ALTFP|O_VERB|O_NONL, " currently ");
3257 	if (fmep->wull != 0) {
3258 		out(O_ALTFP|O_VERB|O_NONL, "waiting ");
3259 		ptree_timeval(O_ALTFP|O_VERB, &fmep->wull);
3260 		out(O_ALTFP|O_VERB, NULL);
3261 	} else {
3262 		out(O_ALTFP|O_VERB|O_NONL, "not waiting");
3263 		out(O_ALTFP|O_VERB, NULL);
3264 	}
3265 
3266 	if (fmep->wull != 0)
3267 		if (wull >= fmep->wull)
3268 			/* New timer would fire later than established timer */
3269 			return (0);
3270 
3271 	if (fmep->wull != 0) {
3272 		fmd_timer_remove(fmep->hdl, fmep->timer);
3273 	}
3274 
3275 	fmep->timer = fmd_timer_install(fmep->hdl, (void *)fmep,
3276 	    fmep->e0r, wull);
3277 	out(O_ALTFP|O_VERB, "timer set, id is %ld", fmep->timer);
3278 	fmep->wull = wull;
3279 	return (1);
3280 }
3281 
3282 void
3283 fme_timer_fired(struct fme *fmep, id_t tid)
3284 {
3285 	struct fme *ffmep = NULL;
3286 
3287 	for (ffmep = FMElist; ffmep; ffmep = ffmep->next)
3288 		if (ffmep == fmep)
3289 			break;
3290 
3291 	if (ffmep == NULL) {
3292 		out(O_WARN, "Timer fired for an FME (%p) not in FMEs list.",
3293 		    (void *)fmep);
3294 		return;
3295 	}
3296 
3297 	out(O_ALTFP|O_VERB, "Timer fired %lx", tid);
3298 	fmep->pull = fmep->wull;
3299 	fmep->wull = 0;
3300 	fmd_buf_write(fmep->hdl, fmep->fmcase,
3301 	    WOBUF_PULL, (void *)&fmep->pull, sizeof (fmep->pull));
3302 
3303 	fme_eval(fmep, fmep->e0r);
3304 }
3305 
3306 /*
3307  * Preserve the fme's suspect list in its psuspects list, NULLing the
3308  * suspects list in the meantime.
3309  */
3310 static void
3311 save_suspects(struct fme *fmep)
3312 {
3313 	struct event *ep;
3314 	struct event *nextep;
3315 
3316 	/* zero out the previous suspect list */
3317 	for (ep = fmep->psuspects; ep; ep = nextep) {
3318 		nextep = ep->psuspects;
3319 		ep->psuspects = NULL;
3320 	}
3321 	fmep->psuspects = NULL;
3322 
3323 	/* zero out the suspect list, copying it to previous suspect list */
3324 	fmep->psuspects = fmep->suspects;
3325 	for (ep = fmep->suspects; ep; ep = nextep) {
3326 		nextep = ep->suspects;
3327 		ep->psuspects = ep->suspects;
3328 		ep->suspects = NULL;
3329 		ep->is_suspect = 0;
3330 	}
3331 	fmep->suspects = NULL;
3332 	fmep->nsuspects = 0;
3333 }
3334 
3335 /*
3336  * Retrieve the fme's suspect list from its psuspects list.
3337  */
3338 static void
3339 restore_suspects(struct fme *fmep)
3340 {
3341 	struct event *ep;
3342 	struct event *nextep;
3343 
3344 	fmep->nsuspects = 0;
3345 	fmep->suspects = fmep->psuspects;
3346 	for (ep = fmep->psuspects; ep; ep = nextep) {
3347 		fmep->nsuspects++;
3348 		nextep = ep->psuspects;
3349 		ep->suspects = ep->psuspects;
3350 	}
3351 }
3352 
3353 /*
3354  * this is what we use to call the Emrys prototype code instead of main()
3355  */
3356 static void
3357 fme_eval(struct fme *fmep, fmd_event_t *ffep)
3358 {
3359 	struct event *ep;
3360 	unsigned long long my_delay = TIMEVAL_EVENTUALLY;
3361 	struct rsl *srl = NULL;
3362 	struct rsl *srl2 = NULL;
3363 	int mess_zero_count;
3364 	int rpcnt;
3365 
3366 	save_suspects(fmep);
3367 
3368 	out(O_ALTFP, "Evaluate FME %d", fmep->id);
3369 	indent_set("  ");
3370 
3371 	lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep);
3372 	fmep->state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay);
3373 
3374 	out(O_ALTFP|O_NONL, "FME%d state: %s, suspect list:", fmep->id,
3375 	    fme_state2str(fmep->state));
3376 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
3377 		out(O_ALTFP|O_NONL, " ");
3378 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
3379 	}
3380 	out(O_ALTFP, NULL);
3381 
3382 	switch (fmep->state) {
3383 	case FME_CREDIBLE:
3384 		print_suspects(SLNEW, fmep);
3385 		(void) upsets_eval(fmep, ffep);
3386 
3387 		/*
3388 		 * we may have already posted suspects in upsets_eval() which
3389 		 * can recurse into fme_eval() again. If so then just return.
3390 		 */
3391 		if (fmep->posted_suspects)
3392 			return;
3393 
3394 		stats_counter_bump(fmep->diags);
3395 		rpcnt = fmep->nsuspects;
3396 		save_suspects(fmep);
3397 
3398 		/*
3399 		 * create two lists, one for "message=1" faults and one for
3400 		 * "message=0" faults. If we have a mixture we will generate
3401 		 * two separate suspect lists.
3402 		 */
3403 		srl = MALLOC(rpcnt * sizeof (struct rsl));
3404 		bzero(srl, rpcnt * sizeof (struct rsl));
3405 		srl2 = MALLOC(rpcnt * sizeof (struct rsl));
3406 		bzero(srl2, rpcnt * sizeof (struct rsl));
3407 		mess_zero_count = trim_suspects(fmep, srl, srl2, ffep);
3408 
3409 		/*
3410 		 * If the resulting suspect list has no members, we're
3411 		 * done so simply close the case. Otherwise sort and publish.
3412 		 */
3413 		if (fmep->nsuspects == 0 && mess_zero_count == 0) {
3414 			out(O_ALTFP,
3415 			    "[FME%d, case %s (all suspects are upsets)]",
3416 			    fmep->id, fmd_case_uuid(fmep->hdl, fmep->fmcase));
3417 			fmd_case_close(fmep->hdl, fmep->fmcase);
3418 		} else if (fmep->nsuspects != 0 && mess_zero_count == 0) {
3419 			publish_suspects(fmep, srl);
3420 			out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3421 			    fmd_case_uuid(fmep->hdl, fmep->fmcase));
3422 			fmd_case_solve(fmep->hdl, fmep->fmcase);
3423 		} else if (fmep->nsuspects == 0 && mess_zero_count != 0) {
3424 			fmep->nsuspects = mess_zero_count;
3425 			publish_suspects(fmep, srl2);
3426 			out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3427 			    fmd_case_uuid(fmep->hdl, fmep->fmcase));
3428 			fmd_case_solve(fmep->hdl, fmep->fmcase);
3429 		} else {
3430 			struct event *obsp;
3431 			struct fme *nfmep;
3432 
3433 			publish_suspects(fmep, srl);
3434 			out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3435 			    fmd_case_uuid(fmep->hdl, fmep->fmcase));
3436 			fmd_case_solve(fmep->hdl, fmep->fmcase);
3437 
3438 			/*
3439 			 * Got both message=0 and message=1 so create a
3440 			 * duplicate case. Also need a temporary duplicate fme
3441 			 * structure for use by publish_suspects().
3442 			 */
3443 			nfmep = alloc_fme();
3444 			nfmep->id =  Nextid++;
3445 			nfmep->hdl = fmep->hdl;
3446 			nfmep->nsuspects = mess_zero_count;
3447 			nfmep->fmcase = fmd_case_open(fmep->hdl, NULL);
3448 			out(O_ALTFP|O_STAMP,
3449 			    "[creating parallel FME%d, case %s]", nfmep->id,
3450 			    fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
3451 			Open_fme_count++;
3452 			if (ffep) {
3453 				fmd_case_setprincipal(nfmep->hdl,
3454 				    nfmep->fmcase, ffep);
3455 				fmd_case_add_ereport(nfmep->hdl,
3456 				    nfmep->fmcase, ffep);
3457 			}
3458 			for (obsp = fmep->observations; obsp;
3459 			    obsp = obsp->observations)
3460 				if (obsp->ffep && obsp->ffep != ffep)
3461 					fmd_case_add_ereport(nfmep->hdl,
3462 					    nfmep->fmcase, obsp->ffep);
3463 
3464 			publish_suspects(nfmep, srl2);
3465 			out(O_ALTFP, "[solving FME%d, case %s]", nfmep->id,
3466 			    fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
3467 			fmd_case_solve(nfmep->hdl, nfmep->fmcase);
3468 			FREE(nfmep);
3469 		}
3470 		FREE(srl);
3471 		FREE(srl2);
3472 		restore_suspects(fmep);
3473 
3474 		fmep->posted_suspects = 1;
3475 		fmd_buf_write(fmep->hdl, fmep->fmcase,
3476 		    WOBUF_POSTD,
3477 		    (void *)&fmep->posted_suspects,
3478 		    sizeof (fmep->posted_suspects));
3479 
3480 		/*
3481 		 * Now the suspects have been posted, we can clear up
3482 		 * the instance tree as we won't be looking at it again.
3483 		 * Also cancel the timer as the case is now solved.
3484 		 */
3485 		if (fmep->wull != 0) {
3486 			fmd_timer_remove(fmep->hdl, fmep->timer);
3487 			fmep->wull = 0;
3488 		}
3489 		break;
3490 
3491 	case FME_WAIT:
3492 		ASSERT(my_delay > fmep->ull);
3493 		(void) fme_set_timer(fmep, my_delay);
3494 		print_suspects(SLWAIT, fmep);
3495 		itree_prune(fmep->eventtree);
3496 		return;
3497 
3498 	case FME_DISPROVED:
3499 		print_suspects(SLDISPROVED, fmep);
3500 		Undiag_reason = UD_VAL_UNSOLVD;
3501 		fme_undiagnosable(fmep);
3502 		break;
3503 	}
3504 
3505 	itree_free(fmep->eventtree);
3506 	fmep->eventtree = NULL;
3507 	structconfig_free(fmep->config);
3508 	fmep->config = NULL;
3509 	destroy_fme_bufs(fmep);
3510 }
3511 
3512 static void indent(void);
3513 static int triggered(struct fme *fmep, struct event *ep, int mark);
3514 static enum fme_state effects_test(struct fme *fmep,
3515     struct event *fault_event, unsigned long long at_latest_by,
3516     unsigned long long *pdelay);
3517 static enum fme_state requirements_test(struct fme *fmep, struct event *ep,
3518     unsigned long long at_latest_by, unsigned long long *pdelay);
3519 static enum fme_state causes_test(struct fme *fmep, struct event *ep,
3520     unsigned long long at_latest_by, unsigned long long *pdelay);
3521 
3522 static int
3523 checkconstraints(struct fme *fmep, struct arrow *arrowp)
3524 {
3525 	struct constraintlist *ctp;
3526 	struct evalue value;
3527 	char *sep = "";
3528 
3529 	if (arrowp->forever_false) {
3530 		indent();
3531 		out(O_ALTFP|O_VERB|O_NONL, "  Forever false constraint: ");
3532 		for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3533 			out(O_ALTFP|O_VERB|O_NONL, sep);
3534 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3535 			sep = ", ";
3536 		}
3537 		out(O_ALTFP|O_VERB, NULL);
3538 		return (0);
3539 	}
3540 	if (arrowp->forever_true) {
3541 		indent();
3542 		out(O_ALTFP|O_VERB|O_NONL, "  Forever true constraint: ");
3543 		for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3544 			out(O_ALTFP|O_VERB|O_NONL, sep);
3545 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3546 			sep = ", ";
3547 		}
3548 		out(O_ALTFP|O_VERB, NULL);
3549 		return (1);
3550 	}
3551 
3552 	for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3553 		if (eval_expr(ctp->cnode, NULL, NULL,
3554 		    &fmep->globals, fmep->config,
3555 		    arrowp, 0, &value)) {
3556 			/* evaluation successful */
3557 			if (value.t == UNDEFINED || value.v == 0) {
3558 				/* known false */
3559 				arrowp->forever_false = 1;
3560 				indent();
3561 				out(O_ALTFP|O_VERB|O_NONL,
3562 				    "  False constraint: ");
3563 				ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3564 				out(O_ALTFP|O_VERB, NULL);
3565 				return (0);
3566 			}
3567 		} else {
3568 			/* evaluation unsuccessful -- unknown value */
3569 			indent();
3570 			out(O_ALTFP|O_VERB|O_NONL,
3571 			    "  Deferred constraint: ");
3572 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3573 			out(O_ALTFP|O_VERB, NULL);
3574 			return (1);
3575 		}
3576 	}
3577 	/* known true */
3578 	arrowp->forever_true = 1;
3579 	indent();
3580 	out(O_ALTFP|O_VERB|O_NONL, "  True constraint: ");
3581 	for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3582 		out(O_ALTFP|O_VERB|O_NONL, sep);
3583 		ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3584 		sep = ", ";
3585 	}
3586 	out(O_ALTFP|O_VERB, NULL);
3587 	return (1);
3588 }
3589 
3590 static int
3591 triggered(struct fme *fmep, struct event *ep, int mark)
3592 {
3593 	struct bubble *bp;
3594 	struct arrowlist *ap;
3595 	int count = 0;
3596 
3597 	stats_counter_bump(fmep->Tcallcount);
3598 	for (bp = itree_next_bubble(ep, NULL); bp;
3599 	    bp = itree_next_bubble(ep, bp)) {
3600 		if (bp->t != B_TO)
3601 			continue;
3602 		for (ap = itree_next_arrow(bp, NULL); ap;
3603 		    ap = itree_next_arrow(bp, ap)) {
3604 			/* check count of marks against K in the bubble */
3605 			if ((ap->arrowp->mark & mark) &&
3606 			    ++count >= bp->nork)
3607 				return (1);
3608 		}
3609 	}
3610 	return (0);
3611 }
3612 
3613 static int
3614 mark_arrows(struct fme *fmep, struct event *ep, int mark,
3615     unsigned long long at_latest_by, unsigned long long *pdelay, int keep)
3616 {
3617 	struct bubble *bp;
3618 	struct arrowlist *ap;
3619 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3620 	unsigned long long my_delay;
3621 	enum fme_state result;
3622 	int retval = 0;
3623 
3624 	for (bp = itree_next_bubble(ep, NULL); bp;
3625 	    bp = itree_next_bubble(ep, bp)) {
3626 		if (bp->t != B_FROM)
3627 			continue;
3628 		stats_counter_bump(fmep->Marrowcount);
3629 		for (ap = itree_next_arrow(bp, NULL); ap;
3630 		    ap = itree_next_arrow(bp, ap)) {
3631 			struct event *ep2 = ap->arrowp->head->myevent;
3632 			/*
3633 			 * if we're clearing marks, we can avoid doing
3634 			 * all that work evaluating constraints.
3635 			 */
3636 			if (mark == 0) {
3637 				if (ap->arrowp->arrow_marked == 0)
3638 					continue;
3639 				ap->arrowp->arrow_marked = 0;
3640 				ap->arrowp->mark &= ~EFFECTS_COUNTER;
3641 				if (keep && (ep2->cached_state &
3642 				    (WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT)))
3643 					ep2->keep_in_tree = 1;
3644 				ep2->cached_state &=
3645 				    ~(WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT);
3646 				(void) mark_arrows(fmep, ep2, mark, 0, NULL,
3647 				    keep);
3648 				continue;
3649 			}
3650 			ap->arrowp->arrow_marked = 1;
3651 			if (ep2->cached_state & REQMNTS_DISPROVED) {
3652 				indent();
3653 				out(O_ALTFP|O_VERB|O_NONL,
3654 				    "  ALREADY DISPROVED ");
3655 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3656 				out(O_ALTFP|O_VERB, NULL);
3657 				continue;
3658 			}
3659 			if (ep2->cached_state & WAIT_EFFECT) {
3660 				indent();
3661 				out(O_ALTFP|O_VERB|O_NONL,
3662 				    "  ALREADY EFFECTS WAIT ");
3663 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3664 				out(O_ALTFP|O_VERB, NULL);
3665 				continue;
3666 			}
3667 			if (ep2->cached_state & CREDIBLE_EFFECT) {
3668 				indent();
3669 				out(O_ALTFP|O_VERB|O_NONL,
3670 				    "  ALREADY EFFECTS CREDIBLE ");
3671 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3672 				out(O_ALTFP|O_VERB, NULL);
3673 				continue;
3674 			}
3675 			if ((ep2->cached_state & PARENT_WAIT) &&
3676 			    (mark & PARENT_WAIT)) {
3677 				indent();
3678 				out(O_ALTFP|O_VERB|O_NONL,
3679 				    "  ALREADY PARENT EFFECTS WAIT ");
3680 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3681 				out(O_ALTFP|O_VERB, NULL);
3682 				continue;
3683 			}
3684 			platform_set_payloadnvp(ep2->nvp);
3685 			if (checkconstraints(fmep, ap->arrowp) == 0) {
3686 				platform_set_payloadnvp(NULL);
3687 				indent();
3688 				out(O_ALTFP|O_VERB|O_NONL,
3689 				    "  CONSTRAINTS FAIL ");
3690 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3691 				out(O_ALTFP|O_VERB, NULL);
3692 				continue;
3693 			}
3694 			platform_set_payloadnvp(NULL);
3695 			ap->arrowp->mark |= EFFECTS_COUNTER;
3696 			if (!triggered(fmep, ep2, EFFECTS_COUNTER)) {
3697 				indent();
3698 				out(O_ALTFP|O_VERB|O_NONL,
3699 				    "  K-COUNT NOT YET MET ");
3700 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3701 				out(O_ALTFP|O_VERB, NULL);
3702 				continue;
3703 			}
3704 			ep2->cached_state &= ~PARENT_WAIT;
3705 			/*
3706 			 * if we've reached an ereport and no propagation time
3707 			 * is specified, use the Hesitate value
3708 			 */
3709 			if (ep2->t == N_EREPORT && at_latest_by == 0ULL &&
3710 			    ap->arrowp->maxdelay == 0ULL) {
3711 				out(O_ALTFP|O_VERB|O_NONL, "  default wait ");
3712 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3713 				out(O_ALTFP|O_VERB, NULL);
3714 				result = requirements_test(fmep, ep2, Hesitate,
3715 				    &my_delay);
3716 			} else {
3717 				result = requirements_test(fmep, ep2,
3718 				    at_latest_by + ap->arrowp->maxdelay,
3719 				    &my_delay);
3720 			}
3721 			if (result == FME_WAIT) {
3722 				retval = WAIT_EFFECT;
3723 				if (overall_delay > my_delay)
3724 					overall_delay = my_delay;
3725 				ep2->cached_state |= WAIT_EFFECT;
3726 				indent();
3727 				out(O_ALTFP|O_VERB|O_NONL, "  EFFECTS WAIT ");
3728 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3729 				out(O_ALTFP|O_VERB, NULL);
3730 				indent_push("  E");
3731 				if (mark_arrows(fmep, ep2, PARENT_WAIT,
3732 				    at_latest_by, &my_delay, 0) ==
3733 				    WAIT_EFFECT) {
3734 					retval = WAIT_EFFECT;
3735 					if (overall_delay > my_delay)
3736 						overall_delay = my_delay;
3737 				}
3738 				indent_pop();
3739 			} else if (result == FME_DISPROVED) {
3740 				indent();
3741 				out(O_ALTFP|O_VERB|O_NONL,
3742 				    "  EFFECTS DISPROVED ");
3743 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3744 				out(O_ALTFP|O_VERB, NULL);
3745 			} else {
3746 				ep2->cached_state |= mark;
3747 				indent();
3748 				if (mark == CREDIBLE_EFFECT)
3749 					out(O_ALTFP|O_VERB|O_NONL,
3750 					    "  EFFECTS CREDIBLE ");
3751 				else
3752 					out(O_ALTFP|O_VERB|O_NONL,
3753 					    "  PARENT EFFECTS WAIT ");
3754 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3755 				out(O_ALTFP|O_VERB, NULL);
3756 				indent_push("  E");
3757 				if (mark_arrows(fmep, ep2, mark, at_latest_by,
3758 				    &my_delay, 0) == WAIT_EFFECT) {
3759 					retval = WAIT_EFFECT;
3760 					if (overall_delay > my_delay)
3761 						overall_delay = my_delay;
3762 				}
3763 				indent_pop();
3764 			}
3765 		}
3766 	}
3767 	if (retval == WAIT_EFFECT)
3768 		*pdelay = overall_delay;
3769 	return (retval);
3770 }
3771 
3772 static enum fme_state
3773 effects_test(struct fme *fmep, struct event *fault_event,
3774     unsigned long long at_latest_by, unsigned long long *pdelay)
3775 {
3776 	struct event *error_event;
3777 	enum fme_state return_value = FME_CREDIBLE;
3778 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3779 	unsigned long long my_delay;
3780 
3781 	stats_counter_bump(fmep->Ecallcount);
3782 	indent_push("  E");
3783 	indent();
3784 	out(O_ALTFP|O_VERB|O_NONL, "->");
3785 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event);
3786 	out(O_ALTFP|O_VERB, NULL);
3787 
3788 	if (mark_arrows(fmep, fault_event, CREDIBLE_EFFECT, at_latest_by,
3789 	    &my_delay, 0) == WAIT_EFFECT) {
3790 		return_value = FME_WAIT;
3791 		if (overall_delay > my_delay)
3792 			overall_delay = my_delay;
3793 	}
3794 	for (error_event = fmep->observations;
3795 	    error_event; error_event = error_event->observations) {
3796 		indent();
3797 		out(O_ALTFP|O_VERB|O_NONL, " ");
3798 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, error_event);
3799 		if (!(error_event->cached_state & CREDIBLE_EFFECT)) {
3800 			if (error_event->cached_state &
3801 			    (PARENT_WAIT|WAIT_EFFECT)) {
3802 				out(O_ALTFP|O_VERB, " NOT YET triggered");
3803 				continue;
3804 			}
3805 			return_value = FME_DISPROVED;
3806 			out(O_ALTFP|O_VERB, " NOT triggered");
3807 			break;
3808 		} else {
3809 			out(O_ALTFP|O_VERB, " triggered");
3810 		}
3811 	}
3812 	if (return_value == FME_DISPROVED) {
3813 		(void) mark_arrows(fmep, fault_event, 0, 0, NULL, 0);
3814 	} else {
3815 		fault_event->keep_in_tree = 1;
3816 		(void) mark_arrows(fmep, fault_event, 0, 0, NULL, 1);
3817 	}
3818 
3819 	indent();
3820 	out(O_ALTFP|O_VERB|O_NONL, "<-EFFECTS %s ",
3821 	    fme_state2str(return_value));
3822 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event);
3823 	out(O_ALTFP|O_VERB, NULL);
3824 	indent_pop();
3825 	if (return_value == FME_WAIT)
3826 		*pdelay = overall_delay;
3827 	return (return_value);
3828 }
3829 
3830 static enum fme_state
3831 requirements_test(struct fme *fmep, struct event *ep,
3832     unsigned long long at_latest_by, unsigned long long *pdelay)
3833 {
3834 	int waiting_events;
3835 	int credible_events;
3836 	int deferred_events;
3837 	enum fme_state return_value = FME_CREDIBLE;
3838 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3839 	unsigned long long arrow_delay;
3840 	unsigned long long my_delay;
3841 	struct event *ep2;
3842 	struct bubble *bp;
3843 	struct arrowlist *ap;
3844 
3845 	if (ep->cached_state & REQMNTS_CREDIBLE) {
3846 		indent();
3847 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY CREDIBLE ");
3848 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3849 		out(O_ALTFP|O_VERB, NULL);
3850 		return (FME_CREDIBLE);
3851 	}
3852 	if (ep->cached_state & REQMNTS_DISPROVED) {
3853 		indent();
3854 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY DISPROVED ");
3855 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3856 		out(O_ALTFP|O_VERB, NULL);
3857 		return (FME_DISPROVED);
3858 	}
3859 	if (ep->cached_state & REQMNTS_WAIT) {
3860 		indent();
3861 		*pdelay = ep->cached_delay;
3862 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY WAIT ");
3863 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3864 		out(O_ALTFP|O_VERB|O_NONL, ", wait for: ");
3865 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3866 		out(O_ALTFP|O_VERB, NULL);
3867 		return (FME_WAIT);
3868 	}
3869 	stats_counter_bump(fmep->Rcallcount);
3870 	indent_push("  R");
3871 	indent();
3872 	out(O_ALTFP|O_VERB|O_NONL, "->");
3873 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3874 	out(O_ALTFP|O_VERB|O_NONL, ", at latest by: ");
3875 	ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3876 	out(O_ALTFP|O_VERB, NULL);
3877 
3878 	if (ep->t == N_EREPORT) {
3879 		if (ep->count == 0) {
3880 			if (fmep->pull >= at_latest_by) {
3881 				return_value = FME_DISPROVED;
3882 			} else {
3883 				ep->cached_delay = *pdelay = at_latest_by;
3884 				return_value = FME_WAIT;
3885 			}
3886 		}
3887 
3888 		indent();
3889 		switch (return_value) {
3890 		case FME_CREDIBLE:
3891 			ep->cached_state |= REQMNTS_CREDIBLE;
3892 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS CREDIBLE ");
3893 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3894 			break;
3895 		case FME_DISPROVED:
3896 			ep->cached_state |= REQMNTS_DISPROVED;
3897 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED ");
3898 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3899 			break;
3900 		case FME_WAIT:
3901 			ep->cached_state |= REQMNTS_WAIT;
3902 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS WAIT ");
3903 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3904 			out(O_ALTFP|O_VERB|O_NONL, " to ");
3905 			ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3906 			break;
3907 		default:
3908 			out(O_DIE, "requirements_test: unexpected fme_state");
3909 			break;
3910 		}
3911 		out(O_ALTFP|O_VERB, NULL);
3912 		indent_pop();
3913 
3914 		return (return_value);
3915 	}
3916 
3917 	/* this event is not a report, descend the tree */
3918 	for (bp = itree_next_bubble(ep, NULL); bp;
3919 	    bp = itree_next_bubble(ep, bp)) {
3920 		int n;
3921 
3922 		if (bp->t != B_FROM)
3923 			continue;
3924 
3925 		n = bp->nork;
3926 
3927 		credible_events = 0;
3928 		waiting_events = 0;
3929 		deferred_events = 0;
3930 		arrow_delay = TIMEVAL_EVENTUALLY;
3931 		/*
3932 		 * n is -1 for 'A' so adjust it.
3933 		 * XXX just count up the arrows for now.
3934 		 */
3935 		if (n < 0) {
3936 			n = 0;
3937 			for (ap = itree_next_arrow(bp, NULL); ap;
3938 			    ap = itree_next_arrow(bp, ap))
3939 				n++;
3940 			indent();
3941 			out(O_ALTFP|O_VERB, " Bubble Counted N=%d", n);
3942 		} else {
3943 			indent();
3944 			out(O_ALTFP|O_VERB, " Bubble N=%d", n);
3945 		}
3946 
3947 		if (n == 0)
3948 			continue;
3949 		if (!(bp->mark & (BUBBLE_ELIDED|BUBBLE_OK))) {
3950 			for (ap = itree_next_arrow(bp, NULL); ap;
3951 			    ap = itree_next_arrow(bp, ap)) {
3952 				ep2 = ap->arrowp->head->myevent;
3953 				platform_set_payloadnvp(ep2->nvp);
3954 				(void) checkconstraints(fmep, ap->arrowp);
3955 				if (!ap->arrowp->forever_false) {
3956 					/*
3957 					 * if all arrows are invalidated by the
3958 					 * constraints, then we should elide the
3959 					 * whole bubble to be consistant with
3960 					 * the tree creation time behaviour
3961 					 */
3962 					bp->mark |= BUBBLE_OK;
3963 					platform_set_payloadnvp(NULL);
3964 					break;
3965 				}
3966 				platform_set_payloadnvp(NULL);
3967 			}
3968 		}
3969 		for (ap = itree_next_arrow(bp, NULL); ap;
3970 		    ap = itree_next_arrow(bp, ap)) {
3971 			ep2 = ap->arrowp->head->myevent;
3972 			if (n <= credible_events)
3973 				break;
3974 
3975 			ap->arrowp->mark |= REQMNTS_COUNTER;
3976 			if (triggered(fmep, ep2, REQMNTS_COUNTER))
3977 				/* XXX adding max timevals! */
3978 				switch (requirements_test(fmep, ep2,
3979 				    at_latest_by + ap->arrowp->maxdelay,
3980 				    &my_delay)) {
3981 				case FME_DEFERRED:
3982 					deferred_events++;
3983 					break;
3984 				case FME_CREDIBLE:
3985 					credible_events++;
3986 					break;
3987 				case FME_DISPROVED:
3988 					break;
3989 				case FME_WAIT:
3990 					if (my_delay < arrow_delay)
3991 						arrow_delay = my_delay;
3992 					waiting_events++;
3993 					break;
3994 				default:
3995 					out(O_DIE,
3996 					"Bug in requirements_test.");
3997 				}
3998 			else
3999 				deferred_events++;
4000 		}
4001 		if (!(bp->mark & BUBBLE_OK) && waiting_events == 0) {
4002 			bp->mark |= BUBBLE_ELIDED;
4003 			continue;
4004 		}
4005 		indent();
4006 		out(O_ALTFP|O_VERB, " Credible: %d Waiting %d",
4007 		    credible_events + deferred_events, waiting_events);
4008 		if (credible_events + deferred_events + waiting_events < n) {
4009 			/* Can never meet requirements */
4010 			ep->cached_state |= REQMNTS_DISPROVED;
4011 			indent();
4012 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED ");
4013 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4014 			out(O_ALTFP|O_VERB, NULL);
4015 			indent_pop();
4016 			return (FME_DISPROVED);
4017 		}
4018 		if (credible_events + deferred_events < n) {
4019 			/* will have to wait */
4020 			/* wait time is shortest known */
4021 			if (arrow_delay < overall_delay)
4022 				overall_delay = arrow_delay;
4023 			return_value = FME_WAIT;
4024 		} else if (credible_events < n) {
4025 			if (return_value != FME_WAIT)
4026 				return_value = FME_DEFERRED;
4027 		}
4028 	}
4029 
4030 	/*
4031 	 * don't mark as FME_DEFERRED. If this event isn't reached by another
4032 	 * path, then this will be considered FME_CREDIBLE. But if it is
4033 	 * reached by a different path so the K-count is met, then might
4034 	 * get overridden by FME_WAIT or FME_DISPROVED.
4035 	 */
4036 	if (return_value == FME_WAIT) {
4037 		ep->cached_state |= REQMNTS_WAIT;
4038 		ep->cached_delay = *pdelay = overall_delay;
4039 	} else if (return_value == FME_CREDIBLE) {
4040 		ep->cached_state |= REQMNTS_CREDIBLE;
4041 	}
4042 	indent();
4043 	out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS %s ",
4044 	    fme_state2str(return_value));
4045 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4046 	out(O_ALTFP|O_VERB, NULL);
4047 	indent_pop();
4048 	return (return_value);
4049 }
4050 
4051 static enum fme_state
4052 causes_test(struct fme *fmep, struct event *ep,
4053     unsigned long long at_latest_by, unsigned long long *pdelay)
4054 {
4055 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
4056 	unsigned long long my_delay;
4057 	int credible_results = 0;
4058 	int waiting_results = 0;
4059 	enum fme_state fstate;
4060 	struct event *tail_event;
4061 	struct bubble *bp;
4062 	struct arrowlist *ap;
4063 	int k = 1;
4064 
4065 	stats_counter_bump(fmep->Ccallcount);
4066 	indent_push("  C");
4067 	indent();
4068 	out(O_ALTFP|O_VERB|O_NONL, "->");
4069 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4070 	out(O_ALTFP|O_VERB, NULL);
4071 
4072 	for (bp = itree_next_bubble(ep, NULL); bp;
4073 	    bp = itree_next_bubble(ep, bp)) {
4074 		if (bp->t != B_TO)
4075 			continue;
4076 		k = bp->nork;	/* remember the K value */
4077 		for (ap = itree_next_arrow(bp, NULL); ap;
4078 		    ap = itree_next_arrow(bp, ap)) {
4079 			int do_not_follow = 0;
4080 
4081 			/*
4082 			 * if we get to the same event multiple times
4083 			 * only worry about the first one.
4084 			 */
4085 			if (ap->arrowp->tail->myevent->cached_state &
4086 			    CAUSES_TESTED) {
4087 				indent();
4088 				out(O_ALTFP|O_VERB|O_NONL,
4089 				    "  causes test already run for ");
4090 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL,
4091 				    ap->arrowp->tail->myevent);
4092 				out(O_ALTFP|O_VERB, NULL);
4093 				continue;
4094 			}
4095 
4096 			/*
4097 			 * see if false constraint prevents us
4098 			 * from traversing this arrow
4099 			 */
4100 			platform_set_payloadnvp(ep->nvp);
4101 			if (checkconstraints(fmep, ap->arrowp) == 0)
4102 				do_not_follow = 1;
4103 			platform_set_payloadnvp(NULL);
4104 			if (do_not_follow) {
4105 				indent();
4106 				out(O_ALTFP|O_VERB|O_NONL,
4107 				    "  False arrow from ");
4108 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL,
4109 				    ap->arrowp->tail->myevent);
4110 				out(O_ALTFP|O_VERB, NULL);
4111 				continue;
4112 			}
4113 
4114 			ap->arrowp->tail->myevent->cached_state |=
4115 			    CAUSES_TESTED;
4116 			tail_event = ap->arrowp->tail->myevent;
4117 			fstate = hypothesise(fmep, tail_event, at_latest_by,
4118 			    &my_delay);
4119 
4120 			switch (fstate) {
4121 			case FME_WAIT:
4122 				if (my_delay < overall_delay)
4123 					overall_delay = my_delay;
4124 				waiting_results++;
4125 				break;
4126 			case FME_CREDIBLE:
4127 				credible_results++;
4128 				break;
4129 			case FME_DISPROVED:
4130 				break;
4131 			default:
4132 				out(O_DIE, "Bug in causes_test");
4133 			}
4134 		}
4135 	}
4136 	/* compare against K */
4137 	if (credible_results + waiting_results < k) {
4138 		indent();
4139 		out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES DISPROVED ");
4140 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4141 		out(O_ALTFP|O_VERB, NULL);
4142 		indent_pop();
4143 		return (FME_DISPROVED);
4144 	}
4145 	if (waiting_results != 0) {
4146 		*pdelay = overall_delay;
4147 		indent();
4148 		out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES WAIT ");
4149 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4150 		out(O_ALTFP|O_VERB|O_NONL, " to ");
4151 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
4152 		out(O_ALTFP|O_VERB, NULL);
4153 		indent_pop();
4154 		return (FME_WAIT);
4155 	}
4156 	indent();
4157 	out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES CREDIBLE ");
4158 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4159 	out(O_ALTFP|O_VERB, NULL);
4160 	indent_pop();
4161 	return (FME_CREDIBLE);
4162 }
4163 
4164 static enum fme_state
4165 hypothesise(struct fme *fmep, struct event *ep,
4166 	unsigned long long at_latest_by, unsigned long long *pdelay)
4167 {
4168 	enum fme_state rtr, otr;
4169 	unsigned long long my_delay;
4170 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
4171 
4172 	stats_counter_bump(fmep->Hcallcount);
4173 	indent_push("  H");
4174 	indent();
4175 	out(O_ALTFP|O_VERB|O_NONL, "->");
4176 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4177 	out(O_ALTFP|O_VERB|O_NONL, ", at latest by: ");
4178 	ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
4179 	out(O_ALTFP|O_VERB, NULL);
4180 
4181 	rtr = requirements_test(fmep, ep, at_latest_by, &my_delay);
4182 	if ((rtr == FME_WAIT) && (my_delay < overall_delay))
4183 		overall_delay = my_delay;
4184 	if (rtr != FME_DISPROVED) {
4185 		if (is_problem(ep->t)) {
4186 			otr = effects_test(fmep, ep, at_latest_by, &my_delay);
4187 			if (otr != FME_DISPROVED) {
4188 				if (fmep->peek == 0 && ep->is_suspect == 0) {
4189 					ep->suspects = fmep->suspects;
4190 					ep->is_suspect = 1;
4191 					fmep->suspects = ep;
4192 					fmep->nsuspects++;
4193 				}
4194 			}
4195 		} else
4196 			otr = causes_test(fmep, ep, at_latest_by, &my_delay);
4197 		if ((otr == FME_WAIT) && (my_delay < overall_delay))
4198 			overall_delay = my_delay;
4199 		if ((otr != FME_DISPROVED) &&
4200 		    ((rtr == FME_WAIT) || (otr == FME_WAIT)))
4201 			*pdelay = overall_delay;
4202 	}
4203 	if (rtr == FME_DISPROVED) {
4204 		indent();
4205 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4206 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4207 		out(O_ALTFP|O_VERB, " (doesn't meet requirements)");
4208 		indent_pop();
4209 		return (FME_DISPROVED);
4210 	}
4211 	if ((otr == FME_DISPROVED) && is_problem(ep->t)) {
4212 		indent();
4213 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4214 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4215 		out(O_ALTFP|O_VERB, " (doesn't explain all reports)");
4216 		indent_pop();
4217 		return (FME_DISPROVED);
4218 	}
4219 	if (otr == FME_DISPROVED) {
4220 		indent();
4221 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4222 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4223 		out(O_ALTFP|O_VERB, " (causes are not credible)");
4224 		indent_pop();
4225 		return (FME_DISPROVED);
4226 	}
4227 	if ((rtr == FME_WAIT) || (otr == FME_WAIT)) {
4228 		indent();
4229 		out(O_ALTFP|O_VERB|O_NONL, "<-WAIT ");
4230 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4231 		out(O_ALTFP|O_VERB|O_NONL, " to ");
4232 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &overall_delay);
4233 		out(O_ALTFP|O_VERB, NULL);
4234 		indent_pop();
4235 		return (FME_WAIT);
4236 	}
4237 	indent();
4238 	out(O_ALTFP|O_VERB|O_NONL, "<-CREDIBLE ");
4239 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4240 	out(O_ALTFP|O_VERB, NULL);
4241 	indent_pop();
4242 	return (FME_CREDIBLE);
4243 }
4244 
4245 /*
4246  * fme_istat_load -- reconstitute any persistent istats
4247  */
4248 void
4249 fme_istat_load(fmd_hdl_t *hdl)
4250 {
4251 	int sz;
4252 	char *sbuf;
4253 	char *ptr;
4254 
4255 	if ((sz = fmd_buf_size(hdl, NULL, WOBUF_ISTATS)) == 0) {
4256 		out(O_ALTFP, "fme_istat_load: No stats");
4257 		return;
4258 	}
4259 
4260 	sbuf = alloca(sz);
4261 
4262 	fmd_buf_read(hdl, NULL, WOBUF_ISTATS, sbuf, sz);
4263 
4264 	/*
4265 	 * pick apart the serialized stats
4266 	 *
4267 	 * format is:
4268 	 *	<class-name>, '@', <path>, '\0', <value>, '\0'
4269 	 * for example:
4270 	 *	"stat.first@stat0/path0\02\0stat.second@stat0/path1\023\0"
4271 	 *
4272 	 * since this is parsing our own serialized data, any parsing issues
4273 	 * are fatal, so we check for them all with ASSERT() below.
4274 	 */
4275 	ptr = sbuf;
4276 	while (ptr < &sbuf[sz]) {
4277 		char *sepptr;
4278 		struct node *np;
4279 		int val;
4280 
4281 		sepptr = strchr(ptr, '@');
4282 		ASSERT(sepptr != NULL);
4283 		*sepptr = '\0';
4284 
4285 		/* construct the event */
4286 		np = newnode(T_EVENT, NULL, 0);
4287 		np->u.event.ename = newnode(T_NAME, NULL, 0);
4288 		np->u.event.ename->u.name.t = N_STAT;
4289 		np->u.event.ename->u.name.s = stable(ptr);
4290 		np->u.event.ename->u.name.it = IT_ENAME;
4291 		np->u.event.ename->u.name.last = np->u.event.ename;
4292 
4293 		ptr = sepptr + 1;
4294 		ASSERT(ptr < &sbuf[sz]);
4295 		ptr += strlen(ptr);
4296 		ptr++;	/* move past the '\0' separating path from value */
4297 		ASSERT(ptr < &sbuf[sz]);
4298 		ASSERT(isdigit(*ptr));
4299 		val = atoi(ptr);
4300 		ASSERT(val > 0);
4301 		ptr += strlen(ptr);
4302 		ptr++;	/* move past the final '\0' for this entry */
4303 
4304 		np->u.event.epname = pathstring2epnamenp(sepptr + 1);
4305 		ASSERT(np->u.event.epname != NULL);
4306 
4307 		istat_bump(np, val);
4308 		tree_free(np);
4309 	}
4310 
4311 	istat_save();
4312 }
4313