xref: /titanic_51/usr/src/cmd/fm/modules/common/eversholt/fme.c (revision 6f25ad7ffa9acba13c9da0cb230544442ab650ce)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  *
25  * fme.c -- fault management exercise module
26  *
27  * this module provides the simulated fault management exercise.
28  */
29 
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <strings.h>
34 #include <ctype.h>
35 #include <alloca.h>
36 #include <libnvpair.h>
37 #include <sys/fm/protocol.h>
38 #include <fm/fmd_api.h>
39 #include "alloc.h"
40 #include "out.h"
41 #include "stats.h"
42 #include "stable.h"
43 #include "literals.h"
44 #include "lut.h"
45 #include "tree.h"
46 #include "ptree.h"
47 #include "itree.h"
48 #include "ipath.h"
49 #include "fme.h"
50 #include "evnv.h"
51 #include "eval.h"
52 #include "config.h"
53 #include "platform.h"
54 #include "esclex.h"
55 
56 /* imported from eft.c... */
57 extern hrtime_t Hesitate;
58 extern char *Serd_Override;
59 extern nv_alloc_t Eft_nv_hdl;
60 extern int Max_fme;
61 extern fmd_hdl_t *Hdl;
62 
63 static int Istat_need_save;
64 static int Serd_need_save;
65 void istat_save(void);
66 void serd_save(void);
67 
68 /* fme under construction is global so we can free it on module abort */
69 static struct fme *Nfmep;
70 
71 static int Undiag_reason = UD_VAL_UNKNOWN;
72 
73 static int Nextid = 0;
74 
75 static int Open_fme_count = 0;	/* Count of open FMEs */
76 
77 /* list of fault management exercises underway */
78 static struct fme {
79 	struct fme *next;		/* next exercise */
80 	unsigned long long ull;		/* time when fme was created */
81 	int id;				/* FME id */
82 	struct config *config;		/* cooked configuration data */
83 	struct lut *eventtree;		/* propagation tree for this FME */
84 	/*
85 	 * The initial error report that created this FME is kept in
86 	 * two forms.  e0 points to the instance tree node and is used
87 	 * by fme_eval() as the starting point for the inference
88 	 * algorithm.  e0r is the event handle FMD passed to us when
89 	 * the ereport first arrived and is used when setting timers,
90 	 * which are always relative to the time of this initial
91 	 * report.
92 	 */
93 	struct event *e0;
94 	fmd_event_t *e0r;
95 
96 	id_t    timer;			/* for setting an fmd time-out */
97 
98 	struct event *ecurrent;		/* ereport under consideration */
99 	struct event *suspects;		/* current suspect list */
100 	struct event *psuspects;	/* previous suspect list */
101 	int nsuspects;			/* count of suspects */
102 	int posted_suspects;		/* true if we've posted a diagnosis */
103 	int uniqobs;			/* number of unique events observed */
104 	int peek;			/* just peeking, don't track suspects */
105 	int overflow;			/* true if overflow FME */
106 	enum fme_state {
107 		FME_NOTHING = 5000,	/* not evaluated yet */
108 		FME_WAIT,		/* need to wait for more info */
109 		FME_CREDIBLE,		/* suspect list is credible */
110 		FME_DISPROVED,		/* no valid suspects found */
111 		FME_DEFERRED		/* don't know yet (k-count not met) */
112 	} state;
113 
114 	unsigned long long pull;	/* time passed since created */
115 	unsigned long long wull;	/* wait until this time for re-eval */
116 	struct event *observations;	/* observation list */
117 	struct lut *globals;		/* values of global variables */
118 	/* fmd interfacing */
119 	fmd_hdl_t *hdl;			/* handle for talking with fmd */
120 	fmd_case_t *fmcase;		/* what fmd 'case' we associate with */
121 	/* stats */
122 	struct stats *Rcount;
123 	struct stats *Hcallcount;
124 	struct stats *Rcallcount;
125 	struct stats *Ccallcount;
126 	struct stats *Ecallcount;
127 	struct stats *Tcallcount;
128 	struct stats *Marrowcount;
129 	struct stats *diags;
130 } *FMElist, *EFMElist, *ClosedFMEs;
131 
132 static struct case_list {
133 	fmd_case_t *fmcase;
134 	struct case_list *next;
135 } *Undiagablecaselist;
136 
137 static void fme_eval(struct fme *fmep, fmd_event_t *ffep);
138 static enum fme_state hypothesise(struct fme *fmep, struct event *ep,
139 	unsigned long long at_latest_by, unsigned long long *pdelay);
140 static struct node *eventprop_lookup(struct event *ep, const char *propname);
141 static struct node *pathstring2epnamenp(char *path);
142 static void publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep,
143 	fmd_case_t *fmcase, nvlist_t *detector, char *arg);
144 static char *undiag_2reason_str(int ud, char *arg);
145 static const char *undiag_2defect_str(int ud);
146 static void restore_suspects(struct fme *fmep);
147 static void save_suspects(struct fme *fmep);
148 static void destroy_fme(struct fme *f);
149 static void fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep,
150     const char *eventstring, const struct ipath *ipp, nvlist_t *nvl);
151 static void istat_counter_reset_cb(struct istat_entry *entp,
152     struct stats *statp, const struct ipath *ipp);
153 static void istat_counter_topo_chg_cb(struct istat_entry *entp,
154     struct stats *statp, void *unused);
155 static void serd_reset_cb(struct serd_entry *entp, void *unused,
156     const struct ipath *ipp);
157 static void serd_topo_chg_cb(struct serd_entry *entp, void *unused,
158     void *unused2);
159 static void destroy_fme_bufs(struct fme *fp);
160 
161 static struct fme *
162 alloc_fme(void)
163 {
164 	struct fme *fmep;
165 
166 	fmep = MALLOC(sizeof (*fmep));
167 	bzero(fmep, sizeof (*fmep));
168 	return (fmep);
169 }
170 
171 /*
172  * fme_ready -- called when all initialization of the FME (except for
173  *	stats) has completed successfully.  Adds the fme to global lists
174  *	and establishes its stats.
175  */
176 static struct fme *
177 fme_ready(struct fme *fmep)
178 {
179 	char nbuf[100];
180 
181 	Nfmep = NULL;	/* don't need to free this on module abort now */
182 
183 	if (EFMElist) {
184 		EFMElist->next = fmep;
185 		EFMElist = fmep;
186 	} else
187 		FMElist = EFMElist = fmep;
188 
189 	(void) sprintf(nbuf, "fme%d.Rcount", fmep->id);
190 	fmep->Rcount = stats_new_counter(nbuf, "ereports received", 0);
191 	(void) sprintf(nbuf, "fme%d.Hcall", fmep->id);
192 	fmep->Hcallcount = stats_new_counter(nbuf, "calls to hypothesise()", 1);
193 	(void) sprintf(nbuf, "fme%d.Rcall", fmep->id);
194 	fmep->Rcallcount = stats_new_counter(nbuf,
195 	    "calls to requirements_test()", 1);
196 	(void) sprintf(nbuf, "fme%d.Ccall", fmep->id);
197 	fmep->Ccallcount = stats_new_counter(nbuf, "calls to causes_test()", 1);
198 	(void) sprintf(nbuf, "fme%d.Ecall", fmep->id);
199 	fmep->Ecallcount =
200 	    stats_new_counter(nbuf, "calls to effects_test()", 1);
201 	(void) sprintf(nbuf, "fme%d.Tcall", fmep->id);
202 	fmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1);
203 	(void) sprintf(nbuf, "fme%d.Marrow", fmep->id);
204 	fmep->Marrowcount = stats_new_counter(nbuf,
205 	    "arrows marked by mark_arrows()", 1);
206 	(void) sprintf(nbuf, "fme%d.diags", fmep->id);
207 	fmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0);
208 
209 	out(O_ALTFP|O_VERB2, "newfme: config snapshot contains...");
210 	config_print(O_ALTFP|O_VERB2, fmep->config);
211 
212 	return (fmep);
213 }
214 
215 extern void ipath_dummy_lut(struct arrow *);
216 extern struct lut *itree_create_dummy(const char *, const struct ipath *);
217 
218 /* ARGSUSED */
219 static void
220 set_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
221 {
222 	struct bubble *bp;
223 	struct arrowlist *ap;
224 
225 	for (bp = itree_next_bubble(ep, NULL); bp;
226 	    bp = itree_next_bubble(ep, bp)) {
227 		if (bp->t != B_FROM)
228 			continue;
229 		for (ap = itree_next_arrow(bp, NULL); ap;
230 		    ap = itree_next_arrow(bp, ap)) {
231 			ap->arrowp->pnode->u.arrow.needed = 1;
232 			ipath_dummy_lut(ap->arrowp);
233 		}
234 	}
235 }
236 
237 /* ARGSUSED */
238 static void
239 unset_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
240 {
241 	struct bubble *bp;
242 	struct arrowlist *ap;
243 
244 	for (bp = itree_next_bubble(ep, NULL); bp;
245 	    bp = itree_next_bubble(ep, bp)) {
246 		if (bp->t != B_FROM)
247 			continue;
248 		for (ap = itree_next_arrow(bp, NULL); ap;
249 		    ap = itree_next_arrow(bp, ap))
250 			ap->arrowp->pnode->u.arrow.needed = 0;
251 	}
252 }
253 
254 static void globals_destructor(void *left, void *right, void *arg);
255 static void clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep);
256 
257 static boolean_t
258 prune_propagations(const char *e0class, const struct ipath *e0ipp)
259 {
260 	char nbuf[100];
261 	unsigned long long my_delay = TIMEVAL_EVENTUALLY;
262 	extern struct lut *Usednames;
263 
264 	Nfmep = alloc_fme();
265 	Nfmep->id = Nextid;
266 	Nfmep->state = FME_NOTHING;
267 	Nfmep->eventtree = itree_create_dummy(e0class, e0ipp);
268 	if ((Nfmep->e0 =
269 	    itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) {
270 		itree_free(Nfmep->eventtree);
271 		FREE(Nfmep);
272 		Nfmep = NULL;
273 		return (B_FALSE);
274 	}
275 	Nfmep->ecurrent = Nfmep->observations = Nfmep->e0;
276 	Nfmep->e0->count++;
277 
278 	(void) sprintf(nbuf, "fme%d.Rcount", Nfmep->id);
279 	Nfmep->Rcount = stats_new_counter(nbuf, "ereports received", 0);
280 	(void) sprintf(nbuf, "fme%d.Hcall", Nfmep->id);
281 	Nfmep->Hcallcount =
282 	    stats_new_counter(nbuf, "calls to hypothesise()", 1);
283 	(void) sprintf(nbuf, "fme%d.Rcall", Nfmep->id);
284 	Nfmep->Rcallcount = stats_new_counter(nbuf,
285 	    "calls to requirements_test()", 1);
286 	(void) sprintf(nbuf, "fme%d.Ccall", Nfmep->id);
287 	Nfmep->Ccallcount =
288 	    stats_new_counter(nbuf, "calls to causes_test()", 1);
289 	(void) sprintf(nbuf, "fme%d.Ecall", Nfmep->id);
290 	Nfmep->Ecallcount =
291 	    stats_new_counter(nbuf, "calls to effects_test()", 1);
292 	(void) sprintf(nbuf, "fme%d.Tcall", Nfmep->id);
293 	Nfmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1);
294 	(void) sprintf(nbuf, "fme%d.Marrow", Nfmep->id);
295 	Nfmep->Marrowcount = stats_new_counter(nbuf,
296 	    "arrows marked by mark_arrows()", 1);
297 	(void) sprintf(nbuf, "fme%d.diags", Nfmep->id);
298 	Nfmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0);
299 
300 	Nfmep->peek = 1;
301 	lut_walk(Nfmep->eventtree, (lut_cb)unset_needed_arrows, (void *)Nfmep);
302 	lut_free(Usednames, NULL, NULL);
303 	Usednames = NULL;
304 	lut_walk(Nfmep->eventtree, (lut_cb)clear_arrows, (void *)Nfmep);
305 	(void) hypothesise(Nfmep, Nfmep->e0, Nfmep->ull, &my_delay);
306 	itree_prune(Nfmep->eventtree);
307 	lut_walk(Nfmep->eventtree, (lut_cb)set_needed_arrows, (void *)Nfmep);
308 
309 	stats_delete(Nfmep->Rcount);
310 	stats_delete(Nfmep->Hcallcount);
311 	stats_delete(Nfmep->Rcallcount);
312 	stats_delete(Nfmep->Ccallcount);
313 	stats_delete(Nfmep->Ecallcount);
314 	stats_delete(Nfmep->Tcallcount);
315 	stats_delete(Nfmep->Marrowcount);
316 	stats_delete(Nfmep->diags);
317 	itree_free(Nfmep->eventtree);
318 	lut_free(Nfmep->globals, globals_destructor, NULL);
319 	FREE(Nfmep);
320 	return (B_TRUE);
321 }
322 
323 static struct fme *
324 newfme(const char *e0class, const struct ipath *e0ipp, fmd_hdl_t *hdl,
325 	fmd_case_t *fmcase, fmd_event_t *ffep, nvlist_t *nvl)
326 {
327 	struct cfgdata *cfgdata;
328 	int init_size;
329 	extern int alloc_total();
330 	nvlist_t *detector = NULL;
331 	char *pathstr;
332 	char *arg;
333 
334 	/*
335 	 * First check if e0ipp is actually in the topology so we can give a
336 	 * more useful error message.
337 	 */
338 	ipathlastcomp(e0ipp);
339 	pathstr = ipath2str(NULL, e0ipp);
340 	cfgdata = config_snapshot();
341 	platform_units_translate(0, cfgdata->cooked, NULL, NULL,
342 	    &detector, pathstr);
343 	FREE(pathstr);
344 	structconfig_free(cfgdata->cooked);
345 	config_free(cfgdata);
346 	if (detector == NULL) {
347 		/* See if class permits silent discard on unknown component. */
348 		if (lut_lookup(Ereportenames_discard, (void *)e0class, NULL)) {
349 			out(O_ALTFP|O_VERB2, "Unable to map \"%s\" ereport "
350 			    "to component path, but silent discard allowed.",
351 			    e0class);
352 		} else {
353 			Undiag_reason = UD_VAL_BADEVENTPATH;
354 			(void) nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR,
355 			    &detector);
356 			arg = ipath2str(e0class, e0ipp);
357 			publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
358 			FREE(arg);
359 		}
360 		return (NULL);
361 	}
362 
363 	/*
364 	 * Next run a quick first pass of the rules with a dummy config. This
365 	 * allows us to prune those rules which can't possibly cause this
366 	 * ereport.
367 	 */
368 	if (!prune_propagations(e0class, e0ipp)) {
369 		/*
370 		 * The fault class must have been in the rules or we would
371 		 * not have registered for it (and got a "nosub"), and the
372 		 * pathname must be in the topology or we would have failed the
373 		 * previous test. So to get here means the combination of
374 		 * class and pathname in the ereport must be invalid.
375 		 */
376 		Undiag_reason = UD_VAL_BADEVENTCLASS;
377 		arg = ipath2str(e0class, e0ipp);
378 		publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
379 		nvlist_free(detector);
380 		FREE(arg);
381 		return (NULL);
382 	}
383 
384 	/*
385 	 * Now go ahead and create the real fme using the pruned rules.
386 	 */
387 	init_size = alloc_total();
388 	out(O_ALTFP|O_STAMP, "start config_snapshot using %d bytes", init_size);
389 	cfgdata = config_snapshot();
390 	platform_save_config(hdl, fmcase);
391 	out(O_ALTFP|O_STAMP, "config_snapshot added %d bytes",
392 	    alloc_total() - init_size);
393 
394 	Nfmep = alloc_fme();
395 
396 	Nfmep->id = Nextid++;
397 	Nfmep->config = cfgdata->cooked;
398 	config_free(cfgdata);
399 	Nfmep->posted_suspects = 0;
400 	Nfmep->uniqobs = 0;
401 	Nfmep->state = FME_NOTHING;
402 	Nfmep->pull = 0ULL;
403 	Nfmep->overflow = 0;
404 
405 	Nfmep->fmcase = fmcase;
406 	Nfmep->hdl = hdl;
407 
408 	if ((Nfmep->eventtree = itree_create(Nfmep->config)) == NULL) {
409 		Undiag_reason = UD_VAL_INSTFAIL;
410 		arg = ipath2str(e0class, e0ipp);
411 		publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
412 		nvlist_free(detector);
413 		FREE(arg);
414 		structconfig_free(Nfmep->config);
415 		destroy_fme_bufs(Nfmep);
416 		FREE(Nfmep);
417 		Nfmep = NULL;
418 		return (NULL);
419 	}
420 
421 	itree_ptree(O_ALTFP|O_VERB2, Nfmep->eventtree);
422 
423 	if ((Nfmep->e0 =
424 	    itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) {
425 		Undiag_reason = UD_VAL_BADEVENTI;
426 		arg = ipath2str(e0class, e0ipp);
427 		publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
428 		nvlist_free(detector);
429 		FREE(arg);
430 		itree_free(Nfmep->eventtree);
431 		structconfig_free(Nfmep->config);
432 		destroy_fme_bufs(Nfmep);
433 		FREE(Nfmep);
434 		Nfmep = NULL;
435 		return (NULL);
436 	}
437 
438 	nvlist_free(detector);
439 	return (fme_ready(Nfmep));
440 }
441 
442 void
443 fme_fini(void)
444 {
445 	struct fme *sfp, *fp;
446 	struct case_list *ucasep, *nextcasep;
447 
448 	ucasep = Undiagablecaselist;
449 	while (ucasep != NULL) {
450 		nextcasep = ucasep->next;
451 		FREE(ucasep);
452 		ucasep = nextcasep;
453 	}
454 	Undiagablecaselist = NULL;
455 
456 	/* clean up closed fmes */
457 	fp = ClosedFMEs;
458 	while (fp != NULL) {
459 		sfp = fp->next;
460 		destroy_fme(fp);
461 		fp = sfp;
462 	}
463 	ClosedFMEs = NULL;
464 
465 	fp = FMElist;
466 	while (fp != NULL) {
467 		sfp = fp->next;
468 		destroy_fme(fp);
469 		fp = sfp;
470 	}
471 	FMElist = EFMElist = NULL;
472 
473 	/* if we were in the middle of creating an fme, free it now */
474 	if (Nfmep) {
475 		destroy_fme(Nfmep);
476 		Nfmep = NULL;
477 	}
478 }
479 
480 /*
481  * Allocated space for a buffer name.  20 bytes allows for
482  * a ridiculous 9,999,999 unique observations.
483  */
484 #define	OBBUFNMSZ 20
485 
486 /*
487  *  serialize_observation
488  *
489  *  Create a recoverable version of the current observation
490  *  (f->ecurrent).  We keep a serialized version of each unique
491  *  observation in order that we may resume correctly the fme in the
492  *  correct state if eft or fmd crashes and we're restarted.
493  */
494 static void
495 serialize_observation(struct fme *fp, const char *cls, const struct ipath *ipp)
496 {
497 	size_t pkdlen;
498 	char tmpbuf[OBBUFNMSZ];
499 	char *pkd = NULL;
500 	char *estr;
501 
502 	(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", fp->uniqobs);
503 	estr = ipath2str(cls, ipp);
504 	fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, strlen(estr) + 1);
505 	fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)estr,
506 	    strlen(estr) + 1);
507 	FREE(estr);
508 
509 	if (fp->ecurrent != NULL && fp->ecurrent->nvp != NULL) {
510 		(void) snprintf(tmpbuf,
511 		    OBBUFNMSZ, "observed%d.nvp", fp->uniqobs);
512 		if (nvlist_xpack(fp->ecurrent->nvp,
513 		    &pkd, &pkdlen, NV_ENCODE_XDR, &Eft_nv_hdl) != 0)
514 			out(O_DIE|O_SYS, "pack of observed nvl failed");
515 		fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, pkdlen);
516 		fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)pkd, pkdlen);
517 		FREE(pkd);
518 	}
519 
520 	fp->uniqobs++;
521 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs,
522 	    sizeof (fp->uniqobs));
523 }
524 
525 /*
526  *  init_fme_bufs -- We keep several bits of state about an fme for
527  *	use if eft or fmd crashes and we're restarted.
528  */
529 static void
530 init_fme_bufs(struct fme *fp)
531 {
532 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_PULL, sizeof (fp->pull));
533 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_PULL, (void *)&fp->pull,
534 	    sizeof (fp->pull));
535 
536 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_ID, sizeof (fp->id));
537 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_ID, (void *)&fp->id,
538 	    sizeof (fp->id));
539 
540 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_NOBS, sizeof (fp->uniqobs));
541 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs,
542 	    sizeof (fp->uniqobs));
543 
544 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_POSTD,
545 	    sizeof (fp->posted_suspects));
546 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_POSTD,
547 	    (void *)&fp->posted_suspects, sizeof (fp->posted_suspects));
548 }
549 
550 static void
551 destroy_fme_bufs(struct fme *fp)
552 {
553 	char tmpbuf[OBBUFNMSZ];
554 	int o;
555 
556 	platform_restore_config(fp->hdl, fp->fmcase);
557 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFGLEN);
558 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFG);
559 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_PULL);
560 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_ID);
561 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_POSTD);
562 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_NOBS);
563 
564 	for (o = 0; o < fp->uniqobs; o++) {
565 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", o);
566 		fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf);
567 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", o);
568 		fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf);
569 	}
570 }
571 
572 /*
573  * reconstitute_observations -- convert a case's serialized observations
574  *	back into struct events.  Returns zero if all observations are
575  *	successfully reconstituted.
576  */
577 static int
578 reconstitute_observations(struct fme *fmep)
579 {
580 	struct event *ep;
581 	struct node *epnamenp = NULL;
582 	size_t pkdlen;
583 	char *pkd = NULL;
584 	char *tmpbuf = alloca(OBBUFNMSZ);
585 	char *sepptr;
586 	char *estr;
587 	int ocnt;
588 	int elen;
589 
590 	for (ocnt = 0; ocnt < fmep->uniqobs; ocnt++) {
591 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", ocnt);
592 		elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
593 		if (elen == 0) {
594 			out(O_ALTFP,
595 			    "reconstitute_observation: no %s buffer found.",
596 			    tmpbuf);
597 			Undiag_reason = UD_VAL_MISSINGOBS;
598 			break;
599 		}
600 
601 		estr = MALLOC(elen);
602 		fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen);
603 		sepptr = strchr(estr, '@');
604 		if (sepptr == NULL) {
605 			out(O_ALTFP,
606 			    "reconstitute_observation: %s: "
607 			    "missing @ separator in %s.",
608 			    tmpbuf, estr);
609 			Undiag_reason = UD_VAL_MISSINGPATH;
610 			FREE(estr);
611 			break;
612 		}
613 
614 		*sepptr = '\0';
615 		if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) {
616 			out(O_ALTFP,
617 			    "reconstitute_observation: %s: "
618 			    "trouble converting path string \"%s\" "
619 			    "to internal representation.",
620 			    tmpbuf, sepptr + 1);
621 			Undiag_reason = UD_VAL_MISSINGPATH;
622 			FREE(estr);
623 			break;
624 		}
625 
626 		/* construct the event */
627 		ep = itree_lookup(fmep->eventtree,
628 		    stable(estr), ipath(epnamenp));
629 		if (ep == NULL) {
630 			out(O_ALTFP,
631 			    "reconstitute_observation: %s: "
632 			    "lookup of  \"%s\" in itree failed.",
633 			    tmpbuf, ipath2str(estr, ipath(epnamenp)));
634 			Undiag_reason = UD_VAL_BADOBS;
635 			tree_free(epnamenp);
636 			FREE(estr);
637 			break;
638 		}
639 		tree_free(epnamenp);
640 
641 		/*
642 		 * We may or may not have a saved nvlist for the observation
643 		 */
644 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", ocnt);
645 		pkdlen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
646 		if (pkdlen != 0) {
647 			pkd = MALLOC(pkdlen);
648 			fmd_buf_read(fmep->hdl,
649 			    fmep->fmcase, tmpbuf, pkd, pkdlen);
650 			ASSERT(ep->nvp == NULL);
651 			if (nvlist_xunpack(pkd,
652 			    pkdlen, &ep->nvp, &Eft_nv_hdl) != 0)
653 				out(O_DIE|O_SYS, "pack of observed nvl failed");
654 			FREE(pkd);
655 		}
656 
657 		if (ocnt == 0)
658 			fmep->e0 = ep;
659 
660 		FREE(estr);
661 		fmep->ecurrent = ep;
662 		ep->count++;
663 
664 		/* link it into list of observations seen */
665 		ep->observations = fmep->observations;
666 		fmep->observations = ep;
667 	}
668 
669 	if (ocnt == fmep->uniqobs) {
670 		(void) fme_ready(fmep);
671 		return (0);
672 	}
673 
674 	return (1);
675 }
676 
677 /*
678  * restart_fme -- called during eft initialization.  Reconstitutes
679  *	an in-progress fme.
680  */
681 void
682 fme_restart(fmd_hdl_t *hdl, fmd_case_t *inprogress)
683 {
684 	nvlist_t *defect;
685 	struct case_list *bad;
686 	struct fme *fmep;
687 	struct cfgdata *cfgdata;
688 	size_t rawsz;
689 	struct event *ep;
690 	char *tmpbuf = alloca(OBBUFNMSZ);
691 	char *sepptr;
692 	char *estr;
693 	int elen;
694 	struct node *epnamenp = NULL;
695 	int init_size;
696 	extern int alloc_total();
697 	char *reason;
698 
699 	/*
700 	 * ignore solved or closed cases
701 	 */
702 	if (fmd_case_solved(hdl, inprogress) ||
703 	    fmd_case_closed(hdl, inprogress))
704 		return;
705 
706 	fmep = alloc_fme();
707 	fmep->fmcase = inprogress;
708 	fmep->hdl = hdl;
709 
710 	if (fmd_buf_size(hdl, inprogress, WOBUF_POSTD) == 0) {
711 		out(O_ALTFP, "restart_fme: no saved posted status");
712 		Undiag_reason = UD_VAL_MISSINGINFO;
713 		goto badcase;
714 	} else {
715 		fmd_buf_read(hdl, inprogress, WOBUF_POSTD,
716 		    (void *)&fmep->posted_suspects,
717 		    sizeof (fmep->posted_suspects));
718 	}
719 
720 	if (fmd_buf_size(hdl, inprogress, WOBUF_ID) == 0) {
721 		out(O_ALTFP, "restart_fme: no saved id");
722 		Undiag_reason = UD_VAL_MISSINGINFO;
723 		goto badcase;
724 	} else {
725 		fmd_buf_read(hdl, inprogress, WOBUF_ID, (void *)&fmep->id,
726 		    sizeof (fmep->id));
727 	}
728 	if (Nextid <= fmep->id)
729 		Nextid = fmep->id + 1;
730 
731 	out(O_ALTFP, "Replay FME %d", fmep->id);
732 
733 	if (fmd_buf_size(hdl, inprogress, WOBUF_CFGLEN) != sizeof (size_t)) {
734 		out(O_ALTFP, "restart_fme: No config data");
735 		Undiag_reason = UD_VAL_MISSINGINFO;
736 		goto badcase;
737 	}
738 	fmd_buf_read(hdl, inprogress, WOBUF_CFGLEN, (void *)&rawsz,
739 	    sizeof (size_t));
740 
741 	if ((fmep->e0r = fmd_case_getprincipal(hdl, inprogress)) == NULL) {
742 		out(O_ALTFP, "restart_fme: No event zero");
743 		Undiag_reason = UD_VAL_MISSINGZERO;
744 		goto badcase;
745 	}
746 
747 	if (fmd_buf_size(hdl, inprogress, WOBUF_PULL) == 0) {
748 		out(O_ALTFP, "restart_fme: no saved wait time");
749 		Undiag_reason = UD_VAL_MISSINGINFO;
750 		goto badcase;
751 	} else {
752 		fmd_buf_read(hdl, inprogress, WOBUF_PULL, (void *)&fmep->pull,
753 		    sizeof (fmep->pull));
754 	}
755 
756 	if (fmd_buf_size(hdl, inprogress, WOBUF_NOBS) == 0) {
757 		out(O_ALTFP, "restart_fme: no count of observations");
758 		Undiag_reason = UD_VAL_MISSINGINFO;
759 		goto badcase;
760 	} else {
761 		fmd_buf_read(hdl, inprogress, WOBUF_NOBS,
762 		    (void *)&fmep->uniqobs, sizeof (fmep->uniqobs));
763 	}
764 
765 	(void) snprintf(tmpbuf, OBBUFNMSZ, "observed0");
766 	elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
767 	if (elen == 0) {
768 		out(O_ALTFP, "reconstitute_observation: no %s buffer found.",
769 		    tmpbuf);
770 		Undiag_reason = UD_VAL_MISSINGOBS;
771 		goto badcase;
772 	}
773 	estr = MALLOC(elen);
774 	fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen);
775 	sepptr = strchr(estr, '@');
776 	if (sepptr == NULL) {
777 		out(O_ALTFP, "reconstitute_observation: %s: "
778 		    "missing @ separator in %s.",
779 		    tmpbuf, estr);
780 		Undiag_reason = UD_VAL_MISSINGPATH;
781 		FREE(estr);
782 		goto badcase;
783 	}
784 	*sepptr = '\0';
785 	if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) {
786 		out(O_ALTFP, "reconstitute_observation: %s: "
787 		    "trouble converting path string \"%s\" "
788 		    "to internal representation.", tmpbuf, sepptr + 1);
789 		Undiag_reason = UD_VAL_MISSINGPATH;
790 		FREE(estr);
791 		goto badcase;
792 	}
793 	(void) prune_propagations(stable(estr), ipath(epnamenp));
794 	tree_free(epnamenp);
795 	FREE(estr);
796 
797 	init_size = alloc_total();
798 	out(O_ALTFP|O_STAMP, "start config_restore using %d bytes", init_size);
799 	cfgdata = MALLOC(sizeof (struct cfgdata));
800 	cfgdata->cooked = NULL;
801 	cfgdata->devcache = NULL;
802 	cfgdata->devidcache = NULL;
803 	cfgdata->tpcache = NULL;
804 	cfgdata->cpucache = NULL;
805 	cfgdata->raw_refcnt = 1;
806 
807 	if (rawsz > 0) {
808 		if (fmd_buf_size(hdl, inprogress, WOBUF_CFG) != rawsz) {
809 			out(O_ALTFP, "restart_fme: Config data size mismatch");
810 			Undiag_reason = UD_VAL_CFGMISMATCH;
811 			goto badcase;
812 		}
813 		cfgdata->begin = MALLOC(rawsz);
814 		cfgdata->end = cfgdata->nextfree = cfgdata->begin + rawsz;
815 		fmd_buf_read(hdl,
816 		    inprogress, WOBUF_CFG, cfgdata->begin, rawsz);
817 	} else {
818 		cfgdata->begin = cfgdata->end = cfgdata->nextfree = NULL;
819 	}
820 
821 	config_cook(cfgdata);
822 	fmep->config = cfgdata->cooked;
823 	config_free(cfgdata);
824 	out(O_ALTFP|O_STAMP, "config_restore added %d bytes",
825 	    alloc_total() - init_size);
826 
827 	if ((fmep->eventtree = itree_create(fmep->config)) == NULL) {
828 		/* case not properly saved or irretrievable */
829 		out(O_ALTFP, "restart_fme: NULL instance tree");
830 		Undiag_reason = UD_VAL_INSTFAIL;
831 		goto badcase;
832 	}
833 
834 	itree_ptree(O_ALTFP|O_VERB2, fmep->eventtree);
835 
836 	if (reconstitute_observations(fmep) != 0)
837 		goto badcase;
838 
839 	out(O_ALTFP|O_NONL, "FME %d replay observations: ", fmep->id);
840 	for (ep = fmep->observations; ep; ep = ep->observations) {
841 		out(O_ALTFP|O_NONL, " ");
842 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
843 	}
844 	out(O_ALTFP, NULL);
845 
846 	Open_fme_count++;
847 
848 	/* give the diagnosis algorithm a shot at the new FME state */
849 	fme_eval(fmep, fmep->e0r);
850 	return;
851 
852 badcase:
853 	if (fmep->eventtree != NULL)
854 		itree_free(fmep->eventtree);
855 	if (fmep->config)
856 		structconfig_free(fmep->config);
857 	destroy_fme_bufs(fmep);
858 	FREE(fmep);
859 
860 	/*
861 	 * Since we're unable to restart the case, add it to the undiagable
862 	 * list and solve and close it as appropriate.
863 	 */
864 	bad = MALLOC(sizeof (struct case_list));
865 	bad->next = NULL;
866 
867 	if (Undiagablecaselist != NULL)
868 		bad->next = Undiagablecaselist;
869 	Undiagablecaselist = bad;
870 	bad->fmcase = inprogress;
871 
872 	out(O_ALTFP|O_NONL, "[case %s (unable to restart), ",
873 	    fmd_case_uuid(hdl, bad->fmcase));
874 
875 	if (fmd_case_solved(hdl, bad->fmcase)) {
876 		out(O_ALTFP|O_NONL, "already solved, ");
877 	} else {
878 		out(O_ALTFP|O_NONL, "solving, ");
879 		defect = fmd_nvl_create_fault(hdl,
880 		    undiag_2defect_str(Undiag_reason), 100, NULL, NULL, NULL);
881 		reason = undiag_2reason_str(Undiag_reason, NULL);
882 		(void) nvlist_add_string(defect, UNDIAG_REASON, reason);
883 		FREE(reason);
884 		fmd_case_add_suspect(hdl, bad->fmcase, defect);
885 		fmd_case_solve(hdl, bad->fmcase);
886 		Undiag_reason = UD_VAL_UNKNOWN;
887 	}
888 
889 	if (fmd_case_closed(hdl, bad->fmcase)) {
890 		out(O_ALTFP, "already closed ]");
891 	} else {
892 		out(O_ALTFP, "closing ]");
893 		fmd_case_close(hdl, bad->fmcase);
894 	}
895 }
896 
897 /*ARGSUSED*/
898 static void
899 globals_destructor(void *left, void *right, void *arg)
900 {
901 	struct evalue *evp = (struct evalue *)right;
902 	if (evp->t == NODEPTR)
903 		tree_free((struct node *)(uintptr_t)evp->v);
904 	evp->v = (uintptr_t)NULL;
905 	FREE(evp);
906 }
907 
908 void
909 destroy_fme(struct fme *f)
910 {
911 	stats_delete(f->Rcount);
912 	stats_delete(f->Hcallcount);
913 	stats_delete(f->Rcallcount);
914 	stats_delete(f->Ccallcount);
915 	stats_delete(f->Ecallcount);
916 	stats_delete(f->Tcallcount);
917 	stats_delete(f->Marrowcount);
918 	stats_delete(f->diags);
919 
920 	if (f->eventtree != NULL)
921 		itree_free(f->eventtree);
922 	if (f->config)
923 		structconfig_free(f->config);
924 	lut_free(f->globals, globals_destructor, NULL);
925 	FREE(f);
926 }
927 
928 static const char *
929 fme_state2str(enum fme_state s)
930 {
931 	switch (s) {
932 	case FME_NOTHING:	return ("NOTHING");
933 	case FME_WAIT:		return ("WAIT");
934 	case FME_CREDIBLE:	return ("CREDIBLE");
935 	case FME_DISPROVED:	return ("DISPROVED");
936 	case FME_DEFERRED:	return ("DEFERRED");
937 	default:		return ("UNKNOWN");
938 	}
939 }
940 
941 static int
942 is_problem(enum nametype t)
943 {
944 	return (t == N_FAULT || t == N_DEFECT || t == N_UPSET);
945 }
946 
947 static int
948 is_defect(enum nametype t)
949 {
950 	return (t == N_DEFECT);
951 }
952 
953 static int
954 is_upset(enum nametype t)
955 {
956 	return (t == N_UPSET);
957 }
958 
959 static void
960 fme_print(int flags, struct fme *fmep)
961 {
962 	struct event *ep;
963 
964 	out(flags, "Fault Management Exercise %d", fmep->id);
965 	out(flags, "\t       State: %s", fme_state2str(fmep->state));
966 	out(flags|O_NONL, "\t  Start time: ");
967 	ptree_timeval(flags|O_NONL, &fmep->ull);
968 	out(flags, NULL);
969 	if (fmep->wull) {
970 		out(flags|O_NONL, "\t   Wait time: ");
971 		ptree_timeval(flags|O_NONL, &fmep->wull);
972 		out(flags, NULL);
973 	}
974 	out(flags|O_NONL, "\t          E0: ");
975 	if (fmep->e0)
976 		itree_pevent_brief(flags|O_NONL, fmep->e0);
977 	else
978 		out(flags|O_NONL, "NULL");
979 	out(flags, NULL);
980 	out(flags|O_NONL, "\tObservations:");
981 	for (ep = fmep->observations; ep; ep = ep->observations) {
982 		out(flags|O_NONL, " ");
983 		itree_pevent_brief(flags|O_NONL, ep);
984 	}
985 	out(flags, NULL);
986 	out(flags|O_NONL, "\tSuspect list:");
987 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
988 		out(flags|O_NONL, " ");
989 		itree_pevent_brief(flags|O_NONL, ep);
990 	}
991 	out(flags, NULL);
992 	if (fmep->eventtree != NULL) {
993 		out(flags|O_VERB2, "\t        Tree:");
994 		itree_ptree(flags|O_VERB2, fmep->eventtree);
995 	}
996 }
997 
998 static struct node *
999 pathstring2epnamenp(char *path)
1000 {
1001 	char *sep = "/";
1002 	struct node *ret;
1003 	char *ptr;
1004 
1005 	if ((ptr = strtok(path, sep)) == NULL)
1006 		out(O_DIE, "pathstring2epnamenp: invalid empty class");
1007 
1008 	ret = tree_iname(stable(ptr), NULL, 0);
1009 
1010 	while ((ptr = strtok(NULL, sep)) != NULL)
1011 		ret = tree_name_append(ret,
1012 		    tree_iname(stable(ptr), NULL, 0));
1013 
1014 	return (ret);
1015 }
1016 
1017 /*
1018  * for a given upset sp, increment the corresponding SERD engine.  if the
1019  * SERD engine trips, return the ename and ipp of the resulting ereport.
1020  * returns true if engine tripped and *enamep and *ippp were filled in.
1021  */
1022 static int
1023 serd_eval(struct fme *fmep, fmd_hdl_t *hdl, fmd_event_t *ffep,
1024     fmd_case_t *fmcase, struct event *sp, const char **enamep,
1025     const struct ipath **ippp)
1026 {
1027 	struct node *serdinst;
1028 	char *serdname;
1029 	char *serdresource;
1030 	char *serdclass;
1031 	struct node *nid;
1032 	struct serd_entry *newentp;
1033 	int i, serdn = -1, serdincrement = 1, len = 0;
1034 	char *serdsuffix = NULL, *serdt = NULL;
1035 	struct evalue *ep;
1036 
1037 	ASSERT(sp->t == N_UPSET);
1038 	ASSERT(ffep != NULL);
1039 
1040 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1041 	    (void *)"n", (lut_cmp)strcmp)) != NULL) {
1042 		ASSERT(ep->t == UINT64);
1043 		serdn = (int)ep->v;
1044 	}
1045 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1046 	    (void *)"t", (lut_cmp)strcmp)) != NULL) {
1047 		ASSERT(ep->t == STRING);
1048 		serdt = (char *)(uintptr_t)ep->v;
1049 	}
1050 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1051 	    (void *)"suffix", (lut_cmp)strcmp)) != NULL) {
1052 		ASSERT(ep->t == STRING);
1053 		serdsuffix = (char *)(uintptr_t)ep->v;
1054 	}
1055 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1056 	    (void *)"increment", (lut_cmp)strcmp)) != NULL) {
1057 		ASSERT(ep->t == UINT64);
1058 		serdincrement = (int)ep->v;
1059 	}
1060 
1061 	/*
1062 	 * obtain instanced SERD engine from the upset sp.  from this
1063 	 * derive serdname, the string used to identify the SERD engine.
1064 	 */
1065 	serdinst = eventprop_lookup(sp, L_engine);
1066 
1067 	if (serdinst == NULL)
1068 		return (-1);
1069 
1070 	len = strlen(serdinst->u.stmt.np->u.event.ename->u.name.s) + 1;
1071 	if (serdsuffix != NULL)
1072 		len += strlen(serdsuffix);
1073 	serdclass = MALLOC(len);
1074 	if (serdsuffix != NULL)
1075 		(void) snprintf(serdclass, len, "%s%s",
1076 		    serdinst->u.stmt.np->u.event.ename->u.name.s, serdsuffix);
1077 	else
1078 		(void) snprintf(serdclass, len, "%s",
1079 		    serdinst->u.stmt.np->u.event.ename->u.name.s);
1080 	serdresource = ipath2str(NULL,
1081 	    ipath(serdinst->u.stmt.np->u.event.epname));
1082 	len += strlen(serdresource) + 1;
1083 	serdname = MALLOC(len);
1084 	(void) snprintf(serdname, len, "%s@%s", serdclass, serdresource);
1085 	FREE(serdresource);
1086 
1087 	/* handle serd engine "id" property, if there is one */
1088 	if ((nid =
1089 	    lut_lookup(serdinst->u.stmt.lutp, (void *)L_id, NULL)) != NULL) {
1090 		struct evalue *gval;
1091 		char suffixbuf[200];
1092 		char *suffix;
1093 		char *nserdname;
1094 		size_t nname;
1095 
1096 		out(O_ALTFP|O_NONL, "serd \"%s\" id: ", serdname);
1097 		ptree_name_iter(O_ALTFP|O_NONL, nid);
1098 
1099 		ASSERTinfo(nid->t == T_GLOBID, ptree_nodetype2str(nid->t));
1100 
1101 		if ((gval = lut_lookup(fmep->globals,
1102 		    (void *)nid->u.globid.s, NULL)) == NULL) {
1103 			out(O_ALTFP, " undefined");
1104 		} else if (gval->t == UINT64) {
1105 			out(O_ALTFP, " %llu", gval->v);
1106 			(void) sprintf(suffixbuf, "%llu", gval->v);
1107 			suffix = suffixbuf;
1108 		} else {
1109 			out(O_ALTFP, " \"%s\"", (char *)(uintptr_t)gval->v);
1110 			suffix = (char *)(uintptr_t)gval->v;
1111 		}
1112 
1113 		nname = strlen(serdname) + strlen(suffix) + 2;
1114 		nserdname = MALLOC(nname);
1115 		(void) snprintf(nserdname, nname, "%s:%s", serdname, suffix);
1116 		FREE(serdname);
1117 		serdname = nserdname;
1118 	}
1119 
1120 	/*
1121 	 * if the engine is empty, and we have an override for n/t then
1122 	 * destroy and recreate it.
1123 	 */
1124 	if ((serdn != -1 || serdt != NULL) && fmd_serd_exists(hdl, serdname) &&
1125 	    fmd_serd_empty(hdl, serdname))
1126 		fmd_serd_destroy(hdl, serdname);
1127 
1128 	if (!fmd_serd_exists(hdl, serdname)) {
1129 		struct node *nN, *nT;
1130 		const char *s;
1131 		struct node *nodep;
1132 		struct config *cp;
1133 		char *path;
1134 		uint_t nval;
1135 		hrtime_t tval;
1136 		int i;
1137 		char *ptr;
1138 		int got_n_override = 0, got_t_override = 0;
1139 
1140 		/* no SERD engine yet, so create it */
1141 		nodep = serdinst->u.stmt.np->u.event.epname;
1142 		path = ipath2str(NULL, ipath(nodep));
1143 		cp = config_lookup(fmep->config, path, 0);
1144 		FREE((void *)path);
1145 
1146 		/*
1147 		 * We allow serd paramaters to be overridden, either from
1148 		 * eft.conf file values (if Serd_Override is set) or from
1149 		 * driver properties (for "serd.io.device" engines).
1150 		 */
1151 		if (Serd_Override != NULL) {
1152 			char *save_ptr, *ptr1, *ptr2, *ptr3;
1153 			ptr3 = save_ptr = STRDUP(Serd_Override);
1154 			while (*ptr3 != '\0') {
1155 				ptr1 = strchr(ptr3, ',');
1156 				*ptr1 = '\0';
1157 				if (strcmp(ptr3, serdclass) == 0) {
1158 					ptr2 =  strchr(ptr1 + 1, ',');
1159 					*ptr2 = '\0';
1160 					nval = atoi(ptr1 + 1);
1161 					out(O_ALTFP, "serd override %s_n %d",
1162 					    serdclass, nval);
1163 					ptr3 =  strchr(ptr2 + 1, ' ');
1164 					if (ptr3)
1165 						*ptr3 = '\0';
1166 					ptr = STRDUP(ptr2 + 1);
1167 					out(O_ALTFP, "serd override %s_t %s",
1168 					    serdclass, ptr);
1169 					got_n_override = 1;
1170 					got_t_override = 1;
1171 					break;
1172 				} else {
1173 					ptr2 =  strchr(ptr1 + 1, ',');
1174 					ptr3 =  strchr(ptr2 + 1, ' ');
1175 					if (ptr3 == NULL)
1176 						break;
1177 				}
1178 				ptr3++;
1179 			}
1180 			FREE(save_ptr);
1181 		}
1182 
1183 		if (cp && got_n_override == 0) {
1184 			/*
1185 			 * convert serd engine class into property name
1186 			 */
1187 			char *prop_name = MALLOC(strlen(serdclass) + 3);
1188 			for (i = 0; i < strlen(serdclass); i++) {
1189 				if (serdclass[i] == '.')
1190 					prop_name[i] = '_';
1191 				else
1192 					prop_name[i] = serdclass[i];
1193 			}
1194 			prop_name[i++] = '_';
1195 			prop_name[i++] = 'n';
1196 			prop_name[i] = '\0';
1197 			if (s = config_getprop(cp, prop_name)) {
1198 				nval = atoi(s);
1199 				out(O_ALTFP, "serd override %s_n %s",
1200 				    serdclass, s);
1201 				got_n_override = 1;
1202 			}
1203 			prop_name[i - 1] = 't';
1204 			if (s = config_getprop(cp, prop_name)) {
1205 				ptr = STRDUP(s);
1206 				out(O_ALTFP, "serd override %s_t %s",
1207 				    serdclass, s);
1208 				got_t_override = 1;
1209 			}
1210 			FREE(prop_name);
1211 		}
1212 
1213 		if (serdn != -1 && got_n_override == 0) {
1214 			nval = serdn;
1215 			out(O_ALTFP, "serd override %s_n %d", serdclass, serdn);
1216 			got_n_override = 1;
1217 		}
1218 		if (serdt != NULL && got_t_override == 0) {
1219 			ptr = STRDUP(serdt);
1220 			out(O_ALTFP, "serd override %s_t %s", serdclass, serdt);
1221 			got_t_override = 1;
1222 		}
1223 
1224 		if (!got_n_override) {
1225 			nN = lut_lookup(serdinst->u.stmt.lutp, (void *)L_N,
1226 			    NULL);
1227 			ASSERT(nN->t == T_NUM);
1228 			nval = (uint_t)nN->u.ull;
1229 		}
1230 		if (!got_t_override) {
1231 			nT = lut_lookup(serdinst->u.stmt.lutp, (void *)L_T,
1232 			    NULL);
1233 			ASSERT(nT->t == T_TIMEVAL);
1234 			tval = (hrtime_t)nT->u.ull;
1235 		} else {
1236 			const unsigned long long *ullp;
1237 			const char *suffix;
1238 			int len;
1239 
1240 			len = strspn(ptr, "0123456789");
1241 			suffix = stable(&ptr[len]);
1242 			ullp = (unsigned long long *)lut_lookup(Timesuffixlut,
1243 			    (void *)suffix, NULL);
1244 			ptr[len] = '\0';
1245 			tval = strtoull(ptr, NULL, 0) * (ullp ? *ullp : 1ll);
1246 			FREE(ptr);
1247 		}
1248 		fmd_serd_create(hdl, serdname, nval, tval);
1249 	}
1250 
1251 	newentp = MALLOC(sizeof (*newentp));
1252 	newentp->ename = stable(serdclass);
1253 	FREE(serdclass);
1254 	newentp->ipath = ipath(serdinst->u.stmt.np->u.event.epname);
1255 	newentp->hdl = hdl;
1256 	if (lut_lookup(SerdEngines, newentp, (lut_cmp)serd_cmp) == NULL) {
1257 		SerdEngines = lut_add(SerdEngines, (void *)newentp,
1258 		    (void *)newentp, (lut_cmp)serd_cmp);
1259 		Serd_need_save = 1;
1260 		serd_save();
1261 	} else {
1262 		FREE(newentp);
1263 	}
1264 
1265 
1266 	/*
1267 	 * increment SERD engine.  if engine fires, reset serd
1268 	 * engine and return trip_strcode if required.
1269 	 */
1270 	for (i = 0; i < serdincrement; i++) {
1271 		if (fmd_serd_record(hdl, serdname, ffep)) {
1272 			fmd_case_add_serd(hdl, fmcase, serdname);
1273 			fmd_serd_reset(hdl, serdname);
1274 
1275 			if (ippp) {
1276 				struct node *tripinst =
1277 				    lut_lookup(serdinst->u.stmt.lutp,
1278 				    (void *)L_trip, NULL);
1279 				ASSERT(tripinst != NULL);
1280 				*enamep = tripinst->u.event.ename->u.name.s;
1281 				*ippp = ipath(tripinst->u.event.epname);
1282 				out(O_ALTFP|O_NONL,
1283 				    "[engine fired: %s, sending: ", serdname);
1284 				ipath_print(O_ALTFP|O_NONL, *enamep, *ippp);
1285 				out(O_ALTFP, "]");
1286 			} else {
1287 				out(O_ALTFP, "[engine fired: %s, no trip]",
1288 				    serdname);
1289 			}
1290 			FREE(serdname);
1291 			return (1);
1292 		}
1293 	}
1294 
1295 	FREE(serdname);
1296 	return (0);
1297 }
1298 
1299 /*
1300  * search a suspect list for upsets.  feed each upset to serd_eval() and
1301  * build up tripped[], an array of ereports produced by the firing of
1302  * any SERD engines.  then feed each ereport back into
1303  * fme_receive_report().
1304  *
1305  * returns ntrip, the number of these ereports produced.
1306  */
1307 static int
1308 upsets_eval(struct fme *fmep, fmd_event_t *ffep)
1309 {
1310 	/* we build an array of tripped ereports that we send ourselves */
1311 	struct {
1312 		const char *ename;
1313 		const struct ipath *ipp;
1314 	} *tripped;
1315 	struct event *sp;
1316 	int ntrip, nupset, i;
1317 
1318 	/*
1319 	 * count the number of upsets to determine the upper limit on
1320 	 * expected trip ereport strings.  remember that one upset can
1321 	 * lead to at most one ereport.
1322 	 */
1323 	nupset = 0;
1324 	for (sp = fmep->suspects; sp; sp = sp->suspects) {
1325 		if (sp->t == N_UPSET)
1326 			nupset++;
1327 	}
1328 
1329 	if (nupset == 0)
1330 		return (0);
1331 
1332 	/*
1333 	 * get to this point if we have upsets and expect some trip
1334 	 * ereports
1335 	 */
1336 	tripped = alloca(sizeof (*tripped) * nupset);
1337 	bzero((void *)tripped, sizeof (*tripped) * nupset);
1338 
1339 	ntrip = 0;
1340 	for (sp = fmep->suspects; sp; sp = sp->suspects)
1341 		if (sp->t == N_UPSET &&
1342 		    serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, sp,
1343 		    &tripped[ntrip].ename, &tripped[ntrip].ipp) == 1)
1344 			ntrip++;
1345 
1346 	for (i = 0; i < ntrip; i++) {
1347 		struct event *ep, *nep;
1348 		struct fme *nfmep;
1349 		fmd_case_t *fmcase;
1350 		const struct ipath *ipp;
1351 		const char *eventstring;
1352 		int prev_verbose;
1353 		unsigned long long my_delay = TIMEVAL_EVENTUALLY;
1354 		enum fme_state state;
1355 
1356 		/*
1357 		 * First try and evaluate a case with the trip ereport plus
1358 		 * all the other ereports that cause the trip. If that fails
1359 		 * to evaluate then try again with just this ereport on its own.
1360 		 */
1361 		out(O_ALTFP|O_NONL, "fme_receive_report_serd: ");
1362 		ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp);
1363 		out(O_ALTFP|O_STAMP, NULL);
1364 		ep = fmep->e0;
1365 		eventstring = ep->enode->u.event.ename->u.name.s;
1366 		ipp = ep->ipp;
1367 
1368 		/*
1369 		 * create a duplicate fme and case
1370 		 */
1371 		fmcase = fmd_case_open(fmep->hdl, NULL);
1372 		out(O_ALTFP|O_NONL, "duplicate fme for event [");
1373 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1374 		out(O_ALTFP, " ]");
1375 
1376 		if ((nfmep = newfme(eventstring, ipp, fmep->hdl,
1377 		    fmcase, ffep, ep->nvp)) == NULL) {
1378 			out(O_ALTFP|O_NONL, "[");
1379 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1380 			out(O_ALTFP, " CANNOT DIAGNOSE]");
1381 			continue;
1382 		}
1383 
1384 		Open_fme_count++;
1385 		nfmep->pull = fmep->pull;
1386 		init_fme_bufs(nfmep);
1387 		out(O_ALTFP|O_NONL, "[");
1388 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1389 		out(O_ALTFP, " created FME%d, case %s]", nfmep->id,
1390 		    fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
1391 		if (ffep) {
1392 			fmd_case_setprincipal(nfmep->hdl, nfmep->fmcase, ffep);
1393 			fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase, ffep);
1394 			nfmep->e0r = ffep;
1395 		}
1396 
1397 		/*
1398 		 * add the original ereports
1399 		 */
1400 		for (ep = fmep->observations; ep; ep = ep->observations) {
1401 			eventstring = ep->enode->u.event.ename->u.name.s;
1402 			ipp = ep->ipp;
1403 			out(O_ALTFP|O_NONL, "adding event [");
1404 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1405 			out(O_ALTFP, " ]");
1406 			nep = itree_lookup(nfmep->eventtree, eventstring, ipp);
1407 			if (nep->count++ == 0) {
1408 				nep->observations = nfmep->observations;
1409 				nfmep->observations = nep;
1410 				serialize_observation(nfmep, eventstring, ipp);
1411 				nep->nvp = evnv_dupnvl(ep->nvp);
1412 			}
1413 			if (ep->ffep && ep->ffep != ffep)
1414 				fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase,
1415 				    ep->ffep);
1416 			stats_counter_bump(nfmep->Rcount);
1417 		}
1418 
1419 		/*
1420 		 * add the serd trigger ereport
1421 		 */
1422 		if ((ep = itree_lookup(nfmep->eventtree, tripped[i].ename,
1423 		    tripped[i].ipp)) == NULL) {
1424 			/*
1425 			 * The trigger ereport is not in the instance tree. It
1426 			 * was presumably removed by prune_propagations() as
1427 			 * this combination of events is not present in the
1428 			 * rules.
1429 			 */
1430 			out(O_ALTFP, "upsets_eval: e0 not in instance tree");
1431 			Undiag_reason = UD_VAL_BADEVENTI;
1432 			goto retry_lone_ereport;
1433 		}
1434 		out(O_ALTFP|O_NONL, "adding event [");
1435 		ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp);
1436 		out(O_ALTFP, " ]");
1437 		nfmep->ecurrent = ep;
1438 		ep->nvp = NULL;
1439 		ep->count = 1;
1440 		ep->observations = nfmep->observations;
1441 		nfmep->observations = ep;
1442 
1443 		/*
1444 		 * just peek first.
1445 		 */
1446 		nfmep->peek = 1;
1447 		prev_verbose = Verbose;
1448 		if (Debug == 0)
1449 			Verbose = 0;
1450 		lut_walk(nfmep->eventtree, (lut_cb)clear_arrows, (void *)nfmep);
1451 		state = hypothesise(nfmep, nfmep->e0, nfmep->ull, &my_delay);
1452 		nfmep->peek = 0;
1453 		Verbose = prev_verbose;
1454 		if (state == FME_DISPROVED) {
1455 			out(O_ALTFP, "upsets_eval: hypothesis disproved");
1456 			Undiag_reason = UD_VAL_UNSOLVD;
1457 retry_lone_ereport:
1458 			/*
1459 			 * However the trigger ereport on its own might be
1460 			 * diagnosable, so check for that. Undo the new fme
1461 			 * and case we just created and call fme_receive_report.
1462 			 */
1463 			out(O_ALTFP|O_NONL, "[");
1464 			ipath_print(O_ALTFP|O_NONL, tripped[i].ename,
1465 			    tripped[i].ipp);
1466 			out(O_ALTFP, " retrying with just trigger ereport]");
1467 			itree_free(nfmep->eventtree);
1468 			nfmep->eventtree = NULL;
1469 			structconfig_free(nfmep->config);
1470 			nfmep->config = NULL;
1471 			destroy_fme_bufs(nfmep);
1472 			fmd_case_close(nfmep->hdl, nfmep->fmcase);
1473 			fme_receive_report(fmep->hdl, ffep,
1474 			    tripped[i].ename, tripped[i].ipp, NULL);
1475 			continue;
1476 		}
1477 
1478 		/*
1479 		 * and evaluate
1480 		 */
1481 		serialize_observation(nfmep, tripped[i].ename, tripped[i].ipp);
1482 		fme_eval(nfmep, ffep);
1483 	}
1484 
1485 	return (ntrip);
1486 }
1487 
1488 /*
1489  * fme_receive_external_report -- call when an external ereport comes in
1490  *
1491  * this routine just converts the relevant information from the ereport
1492  * into a format used internally and passes it on to fme_receive_report().
1493  */
1494 void
1495 fme_receive_external_report(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl,
1496     const char *class)
1497 {
1498 	struct node		*epnamenp;
1499 	fmd_case_t		*fmcase;
1500 	const struct ipath	*ipp;
1501 	nvlist_t		*detector = NULL;
1502 
1503 	class = stable(class);
1504 
1505 	/* Get the component path from the ereport */
1506 	epnamenp = platform_getpath(nvl);
1507 
1508 	/* See if we ended up without a path. */
1509 	if (epnamenp == NULL) {
1510 		/* See if class permits silent discard on unknown component. */
1511 		if (lut_lookup(Ereportenames_discard, (void *)class, NULL)) {
1512 			out(O_ALTFP|O_VERB2, "Unable to map \"%s\" ereport "
1513 			    "to component path, but silent discard allowed.",
1514 			    class);
1515 		} else {
1516 			/*
1517 			 * XFILE: Failure to find a component is bad unless
1518 			 * 'discard_if_config_unknown=1' was specified in the
1519 			 * ereport definition. Indicate undiagnosable.
1520 			 */
1521 			Undiag_reason = UD_VAL_NOPATH;
1522 			fmcase = fmd_case_open(hdl, NULL);
1523 
1524 			/*
1525 			 * We don't have a component path here (which means that
1526 			 * the detector was not in hc-scheme and couldn't be
1527 			 * converted to hc-scheme. Report the raw detector as
1528 			 * the suspect resource if there is one.
1529 			 */
1530 			(void) nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR,
1531 			    &detector);
1532 			publish_undiagnosable(hdl, ffep, fmcase, detector,
1533 			    (char *)class);
1534 		}
1535 		return;
1536 	}
1537 
1538 	ipp = ipath(epnamenp);
1539 	tree_free(epnamenp);
1540 	fme_receive_report(hdl, ffep, class, ipp, nvl);
1541 }
1542 
1543 /*ARGSUSED*/
1544 void
1545 fme_receive_repair_list(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl,
1546     const char *eventstring)
1547 {
1548 	char *uuid;
1549 	nvlist_t **nva;
1550 	uint_t nvc;
1551 	const struct ipath *ipp;
1552 
1553 	if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) != 0 ||
1554 	    nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST,
1555 	    &nva, &nvc) != 0) {
1556 		out(O_ALTFP, "No uuid or fault list for list.repaired event");
1557 		return;
1558 	}
1559 
1560 	out(O_ALTFP, "Processing list.repaired from case %s", uuid);
1561 
1562 	while (nvc-- != 0) {
1563 		/*
1564 		 * Reset any istat or serd engine associated with this path.
1565 		 */
1566 		char *path;
1567 
1568 		if ((ipp = platform_fault2ipath(*nva++)) == NULL)
1569 			continue;
1570 
1571 		path = ipath2str(NULL, ipp);
1572 		out(O_ALTFP, "fme_receive_repair_list: resetting state for %s",
1573 		    path);
1574 		FREE(path);
1575 
1576 		lut_walk(Istats, (lut_cb)istat_counter_reset_cb, (void *)ipp);
1577 		istat_save();
1578 
1579 		lut_walk(SerdEngines, (lut_cb)serd_reset_cb, (void *)ipp);
1580 		serd_save();
1581 	}
1582 }
1583 
1584 /*ARGSUSED*/
1585 void
1586 fme_receive_topology_change(void)
1587 {
1588 	lut_walk(Istats, (lut_cb)istat_counter_topo_chg_cb, NULL);
1589 	istat_save();
1590 
1591 	lut_walk(SerdEngines, (lut_cb)serd_topo_chg_cb, NULL);
1592 	serd_save();
1593 }
1594 
1595 static int mark_arrows(struct fme *fmep, struct event *ep, int mark,
1596     unsigned long long at_latest_by, unsigned long long *pdelay, int keep);
1597 
1598 /* ARGSUSED */
1599 static void
1600 clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
1601 {
1602 	struct bubble *bp;
1603 	struct arrowlist *ap;
1604 
1605 	ep->cached_state = 0;
1606 	ep->keep_in_tree = 0;
1607 	for (bp = itree_next_bubble(ep, NULL); bp;
1608 	    bp = itree_next_bubble(ep, bp)) {
1609 		if (bp->t != B_FROM)
1610 			continue;
1611 		bp->mark = 0;
1612 		for (ap = itree_next_arrow(bp, NULL); ap;
1613 		    ap = itree_next_arrow(bp, ap))
1614 			ap->arrowp->mark = 0;
1615 	}
1616 }
1617 
1618 static void
1619 fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep,
1620     const char *eventstring, const struct ipath *ipp, nvlist_t *nvl)
1621 {
1622 	struct event *ep;
1623 	struct fme *fmep = NULL;
1624 	struct fme *ofmep = NULL;
1625 	struct fme *cfmep, *svfmep;
1626 	int matched = 0;
1627 	nvlist_t *defect;
1628 	fmd_case_t *fmcase;
1629 	char *reason;
1630 
1631 	out(O_ALTFP|O_NONL, "fme_receive_report: ");
1632 	ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1633 	out(O_ALTFP|O_STAMP, NULL);
1634 
1635 	/* decide which FME it goes to */
1636 	for (fmep = FMElist; fmep; fmep = fmep->next) {
1637 		int prev_verbose;
1638 		unsigned long long my_delay = TIMEVAL_EVENTUALLY;
1639 		enum fme_state state;
1640 		nvlist_t *pre_peek_nvp = NULL;
1641 
1642 		if (fmep->overflow) {
1643 			if (!(fmd_case_closed(fmep->hdl, fmep->fmcase)))
1644 				ofmep = fmep;
1645 
1646 			continue;
1647 		}
1648 
1649 		/*
1650 		 * ignore solved or closed cases
1651 		 */
1652 		if (fmep->posted_suspects ||
1653 		    fmd_case_solved(fmep->hdl, fmep->fmcase) ||
1654 		    fmd_case_closed(fmep->hdl, fmep->fmcase))
1655 			continue;
1656 
1657 		/* look up event in event tree for this FME */
1658 		if ((ep = itree_lookup(fmep->eventtree,
1659 		    eventstring, ipp)) == NULL)
1660 			continue;
1661 
1662 		/* note observation */
1663 		fmep->ecurrent = ep;
1664 		if (ep->count++ == 0) {
1665 			/* link it into list of observations seen */
1666 			ep->observations = fmep->observations;
1667 			fmep->observations = ep;
1668 			ep->nvp = evnv_dupnvl(nvl);
1669 		} else {
1670 			/* use new payload values for peek */
1671 			pre_peek_nvp = ep->nvp;
1672 			ep->nvp = evnv_dupnvl(nvl);
1673 		}
1674 
1675 		/* tell hypothesise() not to mess with suspect list */
1676 		fmep->peek = 1;
1677 
1678 		/* don't want this to be verbose (unless Debug is set) */
1679 		prev_verbose = Verbose;
1680 		if (Debug == 0)
1681 			Verbose = 0;
1682 
1683 		lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep);
1684 		state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay);
1685 
1686 		fmep->peek = 0;
1687 
1688 		/* put verbose flag back */
1689 		Verbose = prev_verbose;
1690 
1691 		if (state != FME_DISPROVED) {
1692 			/* found an FME that explains the ereport */
1693 			matched++;
1694 			out(O_ALTFP|O_NONL, "[");
1695 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1696 			out(O_ALTFP, " explained by FME%d]", fmep->id);
1697 
1698 			if (pre_peek_nvp)
1699 				nvlist_free(pre_peek_nvp);
1700 
1701 			if (ep->count == 1)
1702 				serialize_observation(fmep, eventstring, ipp);
1703 
1704 			if (ffep) {
1705 				fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1706 				ep->ffep = ffep;
1707 			}
1708 
1709 			stats_counter_bump(fmep->Rcount);
1710 
1711 			/* re-eval FME */
1712 			fme_eval(fmep, ffep);
1713 		} else {
1714 
1715 			/* not a match, undo noting of observation */
1716 			fmep->ecurrent = NULL;
1717 			if (--ep->count == 0) {
1718 				/* unlink it from observations */
1719 				fmep->observations = ep->observations;
1720 				ep->observations = NULL;
1721 				nvlist_free(ep->nvp);
1722 				ep->nvp = NULL;
1723 			} else {
1724 				nvlist_free(ep->nvp);
1725 				ep->nvp = pre_peek_nvp;
1726 			}
1727 		}
1728 	}
1729 
1730 	if (matched)
1731 		return;	/* explained by at least one existing FME */
1732 
1733 	/* clean up closed fmes */
1734 	cfmep = ClosedFMEs;
1735 	while (cfmep != NULL) {
1736 		svfmep = cfmep->next;
1737 		destroy_fme(cfmep);
1738 		cfmep = svfmep;
1739 	}
1740 	ClosedFMEs = NULL;
1741 
1742 	if (ofmep) {
1743 		out(O_ALTFP|O_NONL, "[");
1744 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1745 		out(O_ALTFP, " ADDING TO OVERFLOW FME]");
1746 		if (ffep)
1747 			fmd_case_add_ereport(hdl, ofmep->fmcase, ffep);
1748 
1749 		return;
1750 
1751 	} else if (Max_fme && (Open_fme_count >= Max_fme)) {
1752 		out(O_ALTFP|O_NONL, "[");
1753 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1754 		out(O_ALTFP, " MAX OPEN FME REACHED]");
1755 
1756 		fmcase = fmd_case_open(hdl, NULL);
1757 
1758 		/* Create overflow fme */
1759 		if ((fmep = newfme(eventstring, ipp, hdl, fmcase, ffep,
1760 		    nvl)) == NULL) {
1761 			out(O_ALTFP|O_NONL, "[");
1762 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1763 			out(O_ALTFP, " CANNOT OPEN OVERFLOW FME]");
1764 			return;
1765 		}
1766 
1767 		Open_fme_count++;
1768 
1769 		init_fme_bufs(fmep);
1770 		fmep->overflow = B_TRUE;
1771 
1772 		if (ffep)
1773 			fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1774 
1775 		Undiag_reason = UD_VAL_MAXFME;
1776 		defect = fmd_nvl_create_fault(hdl,
1777 		    undiag_2defect_str(Undiag_reason), 100, NULL, NULL, NULL);
1778 		reason = undiag_2reason_str(Undiag_reason, NULL);
1779 		(void) nvlist_add_string(defect, UNDIAG_REASON, reason);
1780 		FREE(reason);
1781 		fmd_case_add_suspect(hdl, fmep->fmcase, defect);
1782 		fmd_case_solve(hdl, fmep->fmcase);
1783 		Undiag_reason = UD_VAL_UNKNOWN;
1784 		return;
1785 	}
1786 
1787 	/* open a case */
1788 	fmcase = fmd_case_open(hdl, NULL);
1789 
1790 	/* start a new FME */
1791 	if ((fmep = newfme(eventstring, ipp, hdl, fmcase, ffep, nvl)) == NULL) {
1792 		out(O_ALTFP|O_NONL, "[");
1793 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1794 		out(O_ALTFP, " CANNOT DIAGNOSE]");
1795 		return;
1796 	}
1797 
1798 	Open_fme_count++;
1799 
1800 	init_fme_bufs(fmep);
1801 
1802 	out(O_ALTFP|O_NONL, "[");
1803 	ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1804 	out(O_ALTFP, " created FME%d, case %s]", fmep->id,
1805 	    fmd_case_uuid(hdl, fmep->fmcase));
1806 
1807 	ep = fmep->e0;
1808 	ASSERT(ep != NULL);
1809 
1810 	/* note observation */
1811 	fmep->ecurrent = ep;
1812 	if (ep->count++ == 0) {
1813 		/* link it into list of observations seen */
1814 		ep->observations = fmep->observations;
1815 		fmep->observations = ep;
1816 		ep->nvp = evnv_dupnvl(nvl);
1817 		serialize_observation(fmep, eventstring, ipp);
1818 	} else {
1819 		/* new payload overrides any previous */
1820 		nvlist_free(ep->nvp);
1821 		ep->nvp = evnv_dupnvl(nvl);
1822 	}
1823 
1824 	stats_counter_bump(fmep->Rcount);
1825 
1826 	if (ffep) {
1827 		fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1828 		fmd_case_setprincipal(hdl, fmep->fmcase, ffep);
1829 		fmep->e0r = ffep;
1830 		ep->ffep = ffep;
1831 	}
1832 
1833 	/* give the diagnosis algorithm a shot at the new FME state */
1834 	fme_eval(fmep, ffep);
1835 }
1836 
1837 void
1838 fme_status(int flags)
1839 {
1840 	struct fme *fmep;
1841 
1842 	if (FMElist == NULL) {
1843 		out(flags, "No fault management exercises underway.");
1844 		return;
1845 	}
1846 
1847 	for (fmep = FMElist; fmep; fmep = fmep->next)
1848 		fme_print(flags, fmep);
1849 }
1850 
1851 /*
1852  * "indent" routines used mostly for nicely formatted debug output, but also
1853  * for sanity checking for infinite recursion bugs.
1854  */
1855 
1856 #define	MAX_INDENT 1024
1857 static const char *indent_s[MAX_INDENT];
1858 static int current_indent;
1859 
1860 static void
1861 indent_push(const char *s)
1862 {
1863 	if (current_indent < MAX_INDENT)
1864 		indent_s[current_indent++] = s;
1865 	else
1866 		out(O_DIE, "unexpected recursion depth (%d)", current_indent);
1867 }
1868 
1869 static void
1870 indent_set(const char *s)
1871 {
1872 	current_indent = 0;
1873 	indent_push(s);
1874 }
1875 
1876 static void
1877 indent_pop(void)
1878 {
1879 	if (current_indent > 0)
1880 		current_indent--;
1881 	else
1882 		out(O_DIE, "recursion underflow");
1883 }
1884 
1885 static void
1886 indent(void)
1887 {
1888 	int i;
1889 	if (!Verbose)
1890 		return;
1891 	for (i = 0; i < current_indent; i++)
1892 		out(O_ALTFP|O_VERB|O_NONL, indent_s[i]);
1893 }
1894 
1895 #define	SLNEW		1
1896 #define	SLCHANGED	2
1897 #define	SLWAIT		3
1898 #define	SLDISPROVED	4
1899 
1900 static void
1901 print_suspects(int circumstance, struct fme *fmep)
1902 {
1903 	struct event *ep;
1904 
1905 	out(O_ALTFP|O_NONL, "[");
1906 	if (circumstance == SLCHANGED) {
1907 		out(O_ALTFP|O_NONL, "FME%d diagnosis changed. state: %s, "
1908 		    "suspect list:", fmep->id, fme_state2str(fmep->state));
1909 	} else if (circumstance == SLWAIT) {
1910 		out(O_ALTFP|O_NONL, "FME%d set wait timer %ld ", fmep->id,
1911 		    fmep->timer);
1912 		ptree_timeval(O_ALTFP|O_NONL, &fmep->wull);
1913 	} else if (circumstance == SLDISPROVED) {
1914 		out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS UNKNOWN", fmep->id);
1915 	} else {
1916 		out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS PRODUCED:", fmep->id);
1917 	}
1918 
1919 	if (circumstance == SLWAIT || circumstance == SLDISPROVED) {
1920 		out(O_ALTFP, "]");
1921 		return;
1922 	}
1923 
1924 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
1925 		out(O_ALTFP|O_NONL, " ");
1926 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
1927 	}
1928 	out(O_ALTFP, "]");
1929 }
1930 
1931 static struct node *
1932 eventprop_lookup(struct event *ep, const char *propname)
1933 {
1934 	return (lut_lookup(ep->props, (void *)propname, NULL));
1935 }
1936 
1937 #define	MAXDIGITIDX	23
1938 static char numbuf[MAXDIGITIDX + 1];
1939 
1940 static int
1941 node2uint(struct node *n, uint_t *valp)
1942 {
1943 	struct evalue value;
1944 	struct lut *globals = NULL;
1945 
1946 	if (n == NULL)
1947 		return (1);
1948 
1949 	/*
1950 	 * check value.v since we are being asked to convert an unsigned
1951 	 * long long int to an unsigned int
1952 	 */
1953 	if (! eval_expr(n, NULL, NULL, &globals, NULL, NULL, 0, &value) ||
1954 	    value.t != UINT64 || value.v > (1ULL << 32))
1955 		return (1);
1956 
1957 	*valp = (uint_t)value.v;
1958 
1959 	return (0);
1960 }
1961 
1962 static nvlist_t *
1963 node2fmri(struct node *n)
1964 {
1965 	nvlist_t **pa, *f, *p;
1966 	struct node *nc;
1967 	uint_t depth = 0;
1968 	char *numstr, *nullbyte;
1969 	char *failure;
1970 	int err, i;
1971 
1972 	/* XXX do we need to be able to handle a non-T_NAME node? */
1973 	if (n == NULL || n->t != T_NAME)
1974 		return (NULL);
1975 
1976 	for (nc = n; nc != NULL; nc = nc->u.name.next) {
1977 		if (nc->u.name.child == NULL || nc->u.name.child->t != T_NUM)
1978 			break;
1979 		depth++;
1980 	}
1981 
1982 	if (nc != NULL) {
1983 		/* We bailed early, something went wrong */
1984 		return (NULL);
1985 	}
1986 
1987 	if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0)
1988 		out(O_DIE|O_SYS, "alloc of fmri nvl failed");
1989 	pa = alloca(depth * sizeof (nvlist_t *));
1990 	for (i = 0; i < depth; i++)
1991 		pa[i] = NULL;
1992 
1993 	err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC);
1994 	err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION);
1995 	err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, "");
1996 	err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth);
1997 	if (err != 0) {
1998 		failure = "basic construction of FMRI failed";
1999 		goto boom;
2000 	}
2001 
2002 	numbuf[MAXDIGITIDX] = '\0';
2003 	nullbyte = &numbuf[MAXDIGITIDX];
2004 	i = 0;
2005 
2006 	for (nc = n; nc != NULL; nc = nc->u.name.next) {
2007 		err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl);
2008 		if (err != 0) {
2009 			failure = "alloc of an hc-pair failed";
2010 			goto boom;
2011 		}
2012 		err = nvlist_add_string(p, FM_FMRI_HC_NAME, nc->u.name.s);
2013 		numstr = ulltostr(nc->u.name.child->u.ull, nullbyte);
2014 		err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr);
2015 		if (err != 0) {
2016 			failure = "construction of an hc-pair failed";
2017 			goto boom;
2018 		}
2019 		pa[i++] = p;
2020 	}
2021 
2022 	err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth);
2023 	if (err == 0) {
2024 		for (i = 0; i < depth; i++)
2025 			if (pa[i] != NULL)
2026 				nvlist_free(pa[i]);
2027 		return (f);
2028 	}
2029 	failure = "addition of hc-pair array to FMRI failed";
2030 
2031 boom:
2032 	for (i = 0; i < depth; i++)
2033 		if (pa[i] != NULL)
2034 			nvlist_free(pa[i]);
2035 	nvlist_free(f);
2036 	out(O_DIE, "%s", failure);
2037 	/*NOTREACHED*/
2038 	return (NULL);
2039 }
2040 
2041 /* an ipath cache entry is an array of these, with s==NULL at the end */
2042 struct ipath {
2043 	const char *s;	/* component name (in stable) */
2044 	int i;		/* instance number */
2045 };
2046 
2047 static nvlist_t *
2048 ipath2fmri(struct ipath *ipath)
2049 {
2050 	nvlist_t **pa, *f, *p;
2051 	uint_t depth = 0;
2052 	char *numstr, *nullbyte;
2053 	char *failure;
2054 	int err, i;
2055 	struct ipath *ipp;
2056 
2057 	for (ipp = ipath; ipp->s != NULL; ipp++)
2058 		depth++;
2059 
2060 	if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0)
2061 		out(O_DIE|O_SYS, "alloc of fmri nvl failed");
2062 	pa = alloca(depth * sizeof (nvlist_t *));
2063 	for (i = 0; i < depth; i++)
2064 		pa[i] = NULL;
2065 
2066 	err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC);
2067 	err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION);
2068 	err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, "");
2069 	err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth);
2070 	if (err != 0) {
2071 		failure = "basic construction of FMRI failed";
2072 		goto boom;
2073 	}
2074 
2075 	numbuf[MAXDIGITIDX] = '\0';
2076 	nullbyte = &numbuf[MAXDIGITIDX];
2077 	i = 0;
2078 
2079 	for (ipp = ipath; ipp->s != NULL; ipp++) {
2080 		err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl);
2081 		if (err != 0) {
2082 			failure = "alloc of an hc-pair failed";
2083 			goto boom;
2084 		}
2085 		err = nvlist_add_string(p, FM_FMRI_HC_NAME, ipp->s);
2086 		numstr = ulltostr(ipp->i, nullbyte);
2087 		err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr);
2088 		if (err != 0) {
2089 			failure = "construction of an hc-pair failed";
2090 			goto boom;
2091 		}
2092 		pa[i++] = p;
2093 	}
2094 
2095 	err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth);
2096 	if (err == 0) {
2097 		for (i = 0; i < depth; i++)
2098 			if (pa[i] != NULL)
2099 				nvlist_free(pa[i]);
2100 		return (f);
2101 	}
2102 	failure = "addition of hc-pair array to FMRI failed";
2103 
2104 boom:
2105 	for (i = 0; i < depth; i++)
2106 		if (pa[i] != NULL)
2107 			nvlist_free(pa[i]);
2108 	nvlist_free(f);
2109 	out(O_DIE, "%s", failure);
2110 	/*NOTREACHED*/
2111 	return (NULL);
2112 }
2113 
2114 static uint8_t
2115 percentof(uint_t part, uint_t whole)
2116 {
2117 	unsigned long long p = part * 1000;
2118 
2119 	return ((p / whole / 10) + (((p / whole % 10) >= 5) ? 1 : 0));
2120 }
2121 
2122 struct rsl {
2123 	struct event *suspect;
2124 	nvlist_t *asru;
2125 	nvlist_t *fru;
2126 	nvlist_t *rsrc;
2127 };
2128 
2129 static void publish_suspects(struct fme *fmep, struct rsl *srl);
2130 
2131 /*
2132  *  rslfree -- free internal members of struct rsl not expected to be
2133  *	freed elsewhere.
2134  */
2135 static void
2136 rslfree(struct rsl *freeme)
2137 {
2138 	if (freeme->asru != NULL)
2139 		nvlist_free(freeme->asru);
2140 	if (freeme->fru != NULL)
2141 		nvlist_free(freeme->fru);
2142 	if (freeme->rsrc != NULL && freeme->rsrc != freeme->asru)
2143 		nvlist_free(freeme->rsrc);
2144 }
2145 
2146 /*
2147  *  rslcmp -- compare two rsl structures.  Use the following
2148  *	comparisons to establish cardinality:
2149  *
2150  *	1. Name of the suspect's class. (simple strcmp)
2151  *	2. Name of the suspect's ASRU. (trickier, since nvlist)
2152  *
2153  */
2154 static int
2155 rslcmp(const void *a, const void *b)
2156 {
2157 	struct rsl *r1 = (struct rsl *)a;
2158 	struct rsl *r2 = (struct rsl *)b;
2159 	int rv;
2160 
2161 	rv = strcmp(r1->suspect->enode->u.event.ename->u.name.s,
2162 	    r2->suspect->enode->u.event.ename->u.name.s);
2163 	if (rv != 0)
2164 		return (rv);
2165 
2166 	if (r1->rsrc == NULL && r2->rsrc == NULL)
2167 		return (0);
2168 	if (r1->rsrc == NULL)
2169 		return (-1);
2170 	if (r2->rsrc == NULL)
2171 		return (1);
2172 	return (evnv_cmpnvl(r1->rsrc, r2->rsrc, 0));
2173 }
2174 
2175 /*
2176  * get_resources -- for a given suspect, determine what ASRU, FRU and
2177  *     RSRC nvlists should be advertised in the final suspect list.
2178  */
2179 void
2180 get_resources(struct event *sp, struct rsl *rsrcs, struct config *croot)
2181 {
2182 	struct node *asrudef, *frudef;
2183 	nvlist_t *asru, *fru;
2184 	nvlist_t *rsrc = NULL;
2185 	char *pathstr;
2186 
2187 	/*
2188 	 * First find any ASRU and/or FRU defined in the
2189 	 * initial fault tree.
2190 	 */
2191 	asrudef = eventprop_lookup(sp, L_ASRU);
2192 	frudef = eventprop_lookup(sp, L_FRU);
2193 
2194 	/*
2195 	 * Create FMRIs based on those definitions
2196 	 */
2197 	asru = node2fmri(asrudef);
2198 	fru = node2fmri(frudef);
2199 	pathstr = ipath2str(NULL, sp->ipp);
2200 
2201 	/*
2202 	 *  Allow for platform translations of the FMRIs
2203 	 */
2204 	platform_units_translate(is_defect(sp->t), croot, &asru, &fru, &rsrc,
2205 	    pathstr);
2206 
2207 	FREE(pathstr);
2208 	rsrcs->suspect = sp;
2209 	rsrcs->asru = asru;
2210 	rsrcs->fru = fru;
2211 	rsrcs->rsrc = rsrc;
2212 }
2213 
2214 /*
2215  * trim_suspects -- prior to publishing, we may need to remove some
2216  *    suspects from the list.  If we're auto-closing upsets, we don't
2217  *    want any of those in the published list.  If the ASRUs for multiple
2218  *    defects resolve to the same ASRU (driver) we only want to publish
2219  *    that as a single suspect.
2220  */
2221 static int
2222 trim_suspects(struct fme *fmep, struct rsl *begin, struct rsl *begin2,
2223     fmd_event_t *ffep)
2224 {
2225 	struct event *ep;
2226 	struct rsl *rp = begin;
2227 	struct rsl *rp2 = begin2;
2228 	int mess_zero_count = 0;
2229 	int serd_rval;
2230 	uint_t messval;
2231 
2232 	/* remove any unwanted upsets and populate our array */
2233 	for (ep = fmep->psuspects; ep; ep = ep->psuspects) {
2234 		if (is_upset(ep->t))
2235 			continue;
2236 		serd_rval = serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, ep,
2237 		    NULL, NULL);
2238 		if (serd_rval == 0)
2239 			continue;
2240 		if (node2uint(eventprop_lookup(ep, L_message),
2241 		    &messval) == 0 && messval == 0) {
2242 			get_resources(ep, rp2, fmep->config);
2243 			rp2++;
2244 			mess_zero_count++;
2245 		} else {
2246 			get_resources(ep, rp, fmep->config);
2247 			rp++;
2248 			fmep->nsuspects++;
2249 		}
2250 	}
2251 	return (mess_zero_count);
2252 }
2253 
2254 /*
2255  * addpayloadprop -- add a payload prop to a problem
2256  */
2257 static void
2258 addpayloadprop(const char *lhs, struct evalue *rhs, nvlist_t *fault)
2259 {
2260 	nvlist_t *rsrc, *hcs;
2261 
2262 	ASSERT(fault != NULL);
2263 	ASSERT(lhs != NULL);
2264 	ASSERT(rhs != NULL);
2265 
2266 	if (nvlist_lookup_nvlist(fault, FM_FAULT_RESOURCE, &rsrc) != 0)
2267 		out(O_DIE, "cannot add payloadprop \"%s\" to fault", lhs);
2268 
2269 	if (nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcs) != 0) {
2270 		out(O_ALTFP|O_VERB2, "addpayloadprop: create hc_specific");
2271 		if (nvlist_xalloc(&hcs, NV_UNIQUE_NAME, &Eft_nv_hdl) != 0)
2272 			out(O_DIE,
2273 			    "cannot add payloadprop \"%s\" to fault", lhs);
2274 		if (nvlist_add_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, hcs) != 0)
2275 			out(O_DIE,
2276 			    "cannot add payloadprop \"%s\" to fault", lhs);
2277 		nvlist_free(hcs);
2278 		if (nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcs) != 0)
2279 			out(O_DIE,
2280 			    "cannot add payloadprop \"%s\" to fault", lhs);
2281 	} else
2282 		out(O_ALTFP|O_VERB2, "addpayloadprop: reuse hc_specific");
2283 
2284 	if (rhs->t == UINT64) {
2285 		out(O_ALTFP|O_VERB2, "addpayloadprop: %s=%llu", lhs, rhs->v);
2286 
2287 		if (nvlist_add_uint64(hcs, lhs, rhs->v) != 0)
2288 			out(O_DIE,
2289 			    "cannot add payloadprop \"%s\" to fault", lhs);
2290 	} else {
2291 		out(O_ALTFP|O_VERB2, "addpayloadprop: %s=\"%s\"",
2292 		    lhs, (char *)(uintptr_t)rhs->v);
2293 
2294 		if (nvlist_add_string(hcs, lhs, (char *)(uintptr_t)rhs->v) != 0)
2295 			out(O_DIE,
2296 			    "cannot add payloadprop \"%s\" to fault", lhs);
2297 	}
2298 }
2299 
2300 static char *Istatbuf;
2301 static char *Istatbufptr;
2302 static int Istatsz;
2303 
2304 /*
2305  * istataddsize -- calculate size of istat and add it to Istatsz
2306  */
2307 /*ARGSUSED2*/
2308 static void
2309 istataddsize(const struct istat_entry *lhs, struct stats *rhs, void *arg)
2310 {
2311 	int val;
2312 
2313 	ASSERT(lhs != NULL);
2314 	ASSERT(rhs != NULL);
2315 
2316 	if ((val = stats_counter_value(rhs)) == 0)
2317 		return;	/* skip zero-valued stats */
2318 
2319 	/* count up the size of the stat name */
2320 	Istatsz += ipath2strlen(lhs->ename, lhs->ipath);
2321 	Istatsz++;	/* for the trailing NULL byte */
2322 
2323 	/* count up the size of the stat value */
2324 	Istatsz += snprintf(NULL, 0, "%d", val);
2325 	Istatsz++;	/* for the trailing NULL byte */
2326 }
2327 
2328 /*
2329  * istat2str -- serialize an istat, writing result to *Istatbufptr
2330  */
2331 /*ARGSUSED2*/
2332 static void
2333 istat2str(const struct istat_entry *lhs, struct stats *rhs, void *arg)
2334 {
2335 	char *str;
2336 	int len;
2337 	int val;
2338 
2339 	ASSERT(lhs != NULL);
2340 	ASSERT(rhs != NULL);
2341 
2342 	if ((val = stats_counter_value(rhs)) == 0)
2343 		return;	/* skip zero-valued stats */
2344 
2345 	/* serialize the stat name */
2346 	str = ipath2str(lhs->ename, lhs->ipath);
2347 	len = strlen(str);
2348 
2349 	ASSERT(Istatbufptr + len + 1 < &Istatbuf[Istatsz]);
2350 	(void) strlcpy(Istatbufptr, str, &Istatbuf[Istatsz] - Istatbufptr);
2351 	Istatbufptr += len;
2352 	FREE(str);
2353 	*Istatbufptr++ = '\0';
2354 
2355 	/* serialize the stat value */
2356 	Istatbufptr += snprintf(Istatbufptr, &Istatbuf[Istatsz] - Istatbufptr,
2357 	    "%d", val);
2358 	*Istatbufptr++ = '\0';
2359 
2360 	ASSERT(Istatbufptr <= &Istatbuf[Istatsz]);
2361 }
2362 
2363 void
2364 istat_save()
2365 {
2366 	if (Istat_need_save == 0)
2367 		return;
2368 
2369 	/* figure out how big the serialzed info is */
2370 	Istatsz = 0;
2371 	lut_walk(Istats, (lut_cb)istataddsize, NULL);
2372 
2373 	if (Istatsz == 0) {
2374 		/* no stats to save */
2375 		fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS);
2376 		return;
2377 	}
2378 
2379 	/* create the serialized buffer */
2380 	Istatbufptr = Istatbuf = MALLOC(Istatsz);
2381 	lut_walk(Istats, (lut_cb)istat2str, NULL);
2382 
2383 	/* clear out current saved stats */
2384 	fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS);
2385 
2386 	/* write out the new version */
2387 	fmd_buf_write(Hdl, NULL, WOBUF_ISTATS, Istatbuf, Istatsz);
2388 	FREE(Istatbuf);
2389 
2390 	Istat_need_save = 0;
2391 }
2392 
2393 int
2394 istat_cmp(struct istat_entry *ent1, struct istat_entry *ent2)
2395 {
2396 	if (ent1->ename != ent2->ename)
2397 		return (ent2->ename - ent1->ename);
2398 	if (ent1->ipath != ent2->ipath)
2399 		return ((char *)ent2->ipath - (char *)ent1->ipath);
2400 
2401 	return (0);
2402 }
2403 
2404 /*
2405  * istat-verify -- verify the component associated with a stat still exists
2406  *
2407  * if the component no longer exists, this routine resets the stat and
2408  * returns 0.  if the component still exists, it returns 1.
2409  */
2410 static int
2411 istat_verify(struct node *snp, struct istat_entry *entp)
2412 {
2413 	struct stats *statp;
2414 	nvlist_t *fmri;
2415 
2416 	fmri = node2fmri(snp->u.event.epname);
2417 	if (platform_path_exists(fmri)) {
2418 		nvlist_free(fmri);
2419 		return (1);
2420 	}
2421 	nvlist_free(fmri);
2422 
2423 	/* component no longer in system.  zero out the associated stats */
2424 	if ((statp = (struct stats *)
2425 	    lut_lookup(Istats, entp, (lut_cmp)istat_cmp)) == NULL ||
2426 	    stats_counter_value(statp) == 0)
2427 		return (0);	/* stat is already reset */
2428 
2429 	Istat_need_save = 1;
2430 	stats_counter_reset(statp);
2431 	return (0);
2432 }
2433 
2434 static void
2435 istat_bump(struct node *snp, int n)
2436 {
2437 	struct stats *statp;
2438 	struct istat_entry ent;
2439 
2440 	ASSERT(snp != NULL);
2441 	ASSERTinfo(snp->t == T_EVENT, ptree_nodetype2str(snp->t));
2442 	ASSERT(snp->u.event.epname != NULL);
2443 
2444 	/* class name should be hoisted into a single stable entry */
2445 	ASSERT(snp->u.event.ename->u.name.next == NULL);
2446 	ent.ename = snp->u.event.ename->u.name.s;
2447 	ent.ipath = ipath(snp->u.event.epname);
2448 
2449 	if (!istat_verify(snp, &ent)) {
2450 		/* component no longer exists in system, nothing to do */
2451 		return;
2452 	}
2453 
2454 	if ((statp = (struct stats *)
2455 	    lut_lookup(Istats, &ent, (lut_cmp)istat_cmp)) == NULL) {
2456 		/* need to create the counter */
2457 		int cnt = 0;
2458 		struct node *np;
2459 		char *sname;
2460 		char *snamep;
2461 		struct istat_entry *newentp;
2462 
2463 		/* count up the size of the stat name */
2464 		np = snp->u.event.ename;
2465 		while (np != NULL) {
2466 			cnt += strlen(np->u.name.s);
2467 			cnt++;	/* for the '.' or '@' */
2468 			np = np->u.name.next;
2469 		}
2470 		np = snp->u.event.epname;
2471 		while (np != NULL) {
2472 			cnt += snprintf(NULL, 0, "%s%llu",
2473 			    np->u.name.s, np->u.name.child->u.ull);
2474 			cnt++;	/* for the '/' or trailing NULL byte */
2475 			np = np->u.name.next;
2476 		}
2477 
2478 		/* build the stat name */
2479 		snamep = sname = alloca(cnt);
2480 		np = snp->u.event.ename;
2481 		while (np != NULL) {
2482 			snamep += snprintf(snamep, &sname[cnt] - snamep,
2483 			    "%s", np->u.name.s);
2484 			np = np->u.name.next;
2485 			if (np)
2486 				*snamep++ = '.';
2487 		}
2488 		*snamep++ = '@';
2489 		np = snp->u.event.epname;
2490 		while (np != NULL) {
2491 			snamep += snprintf(snamep, &sname[cnt] - snamep,
2492 			    "%s%llu", np->u.name.s, np->u.name.child->u.ull);
2493 			np = np->u.name.next;
2494 			if (np)
2495 				*snamep++ = '/';
2496 		}
2497 		*snamep++ = '\0';
2498 
2499 		/* create the new stat & add it to our list */
2500 		newentp = MALLOC(sizeof (*newentp));
2501 		*newentp = ent;
2502 		statp = stats_new_counter(NULL, sname, 0);
2503 		Istats = lut_add(Istats, (void *)newentp, (void *)statp,
2504 		    (lut_cmp)istat_cmp);
2505 	}
2506 
2507 	/* if n is non-zero, set that value instead of bumping */
2508 	if (n) {
2509 		stats_counter_reset(statp);
2510 		stats_counter_add(statp, n);
2511 	} else
2512 		stats_counter_bump(statp);
2513 	Istat_need_save = 1;
2514 
2515 	ipath_print(O_ALTFP|O_VERB2, ent.ename, ent.ipath);
2516 	out(O_ALTFP|O_VERB2, " %s to value %d", n ? "set" : "incremented",
2517 	    stats_counter_value(statp));
2518 }
2519 
2520 /*ARGSUSED*/
2521 static void
2522 istat_destructor(void *left, void *right, void *arg)
2523 {
2524 	struct istat_entry *entp = (struct istat_entry *)left;
2525 	struct stats *statp = (struct stats *)right;
2526 	FREE(entp);
2527 	stats_delete(statp);
2528 }
2529 
2530 /*
2531  * Callback used in a walk of the Istats to reset matching stat counters.
2532  */
2533 static void
2534 istat_counter_reset_cb(struct istat_entry *entp, struct stats *statp,
2535     const struct ipath *ipp)
2536 {
2537 	char *path;
2538 
2539 	if (entp->ipath == ipp) {
2540 		path = ipath2str(entp->ename, ipp);
2541 		out(O_ALTFP, "istat_counter_reset_cb: resetting %s", path);
2542 		FREE(path);
2543 		stats_counter_reset(statp);
2544 		Istat_need_save = 1;
2545 	}
2546 }
2547 
2548 /*ARGSUSED*/
2549 static void
2550 istat_counter_topo_chg_cb(struct istat_entry *entp, struct stats *statp,
2551     void *unused)
2552 {
2553 	char *path;
2554 	nvlist_t *fmri;
2555 
2556 	fmri = ipath2fmri((struct ipath *)(entp->ipath));
2557 	if (!platform_path_exists(fmri)) {
2558 		path = ipath2str(entp->ename, entp->ipath);
2559 		out(O_ALTFP, "istat_counter_topo_chg_cb: not present %s", path);
2560 		FREE(path);
2561 		stats_counter_reset(statp);
2562 		Istat_need_save = 1;
2563 	}
2564 	nvlist_free(fmri);
2565 }
2566 
2567 void
2568 istat_fini(void)
2569 {
2570 	lut_free(Istats, istat_destructor, NULL);
2571 }
2572 
2573 static char *Serdbuf;
2574 static char *Serdbufptr;
2575 static int Serdsz;
2576 
2577 /*
2578  * serdaddsize -- calculate size of serd and add it to Serdsz
2579  */
2580 /*ARGSUSED*/
2581 static void
2582 serdaddsize(const struct serd_entry *lhs, struct stats *rhs, void *arg)
2583 {
2584 	ASSERT(lhs != NULL);
2585 
2586 	/* count up the size of the stat name */
2587 	Serdsz += ipath2strlen(lhs->ename, lhs->ipath);
2588 	Serdsz++;	/* for the trailing NULL byte */
2589 }
2590 
2591 /*
2592  * serd2str -- serialize a serd engine, writing result to *Serdbufptr
2593  */
2594 /*ARGSUSED*/
2595 static void
2596 serd2str(const struct serd_entry *lhs, struct stats *rhs, void *arg)
2597 {
2598 	char *str;
2599 	int len;
2600 
2601 	ASSERT(lhs != NULL);
2602 
2603 	/* serialize the serd engine name */
2604 	str = ipath2str(lhs->ename, lhs->ipath);
2605 	len = strlen(str);
2606 
2607 	ASSERT(Serdbufptr + len + 1 <= &Serdbuf[Serdsz]);
2608 	(void) strlcpy(Serdbufptr, str, &Serdbuf[Serdsz] - Serdbufptr);
2609 	Serdbufptr += len;
2610 	FREE(str);
2611 	*Serdbufptr++ = '\0';
2612 	ASSERT(Serdbufptr <= &Serdbuf[Serdsz]);
2613 }
2614 
2615 void
2616 serd_save()
2617 {
2618 	if (Serd_need_save == 0)
2619 		return;
2620 
2621 	/* figure out how big the serialzed info is */
2622 	Serdsz = 0;
2623 	lut_walk(SerdEngines, (lut_cb)serdaddsize, NULL);
2624 
2625 	if (Serdsz == 0) {
2626 		/* no serd engines to save */
2627 		fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS);
2628 		return;
2629 	}
2630 
2631 	/* create the serialized buffer */
2632 	Serdbufptr = Serdbuf = MALLOC(Serdsz);
2633 	lut_walk(SerdEngines, (lut_cb)serd2str, NULL);
2634 
2635 	/* clear out current saved stats */
2636 	fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS);
2637 
2638 	/* write out the new version */
2639 	fmd_buf_write(Hdl, NULL, WOBUF_SERDS, Serdbuf, Serdsz);
2640 	FREE(Serdbuf);
2641 	Serd_need_save = 0;
2642 }
2643 
2644 int
2645 serd_cmp(struct serd_entry *ent1, struct serd_entry *ent2)
2646 {
2647 	if (ent1->ename != ent2->ename)
2648 		return (ent2->ename - ent1->ename);
2649 	if (ent1->ipath != ent2->ipath)
2650 		return ((char *)ent2->ipath - (char *)ent1->ipath);
2651 
2652 	return (0);
2653 }
2654 
2655 void
2656 fme_serd_load(fmd_hdl_t *hdl)
2657 {
2658 	int sz;
2659 	char *sbuf;
2660 	char *sepptr;
2661 	char *ptr;
2662 	struct serd_entry *newentp;
2663 	struct node *epname;
2664 	nvlist_t *fmri;
2665 	char *namestring;
2666 
2667 	if ((sz = fmd_buf_size(hdl, NULL, WOBUF_SERDS)) == 0)
2668 		return;
2669 	sbuf = alloca(sz);
2670 	fmd_buf_read(hdl, NULL, WOBUF_SERDS, sbuf, sz);
2671 	ptr = sbuf;
2672 	while (ptr < &sbuf[sz]) {
2673 		sepptr = strchr(ptr, '@');
2674 		*sepptr = '\0';
2675 		namestring = ptr;
2676 		sepptr++;
2677 		ptr = sepptr;
2678 		ptr += strlen(ptr);
2679 		ptr++;	/* move past the '\0' separating paths */
2680 		epname = pathstring2epnamenp(sepptr);
2681 		fmri = node2fmri(epname);
2682 		if (platform_path_exists(fmri)) {
2683 			newentp = MALLOC(sizeof (*newentp));
2684 			newentp->hdl = hdl;
2685 			newentp->ipath = ipath(epname);
2686 			newentp->ename = stable(namestring);
2687 			SerdEngines = lut_add(SerdEngines, (void *)newentp,
2688 			    (void *)newentp, (lut_cmp)serd_cmp);
2689 		} else
2690 			Serd_need_save = 1;
2691 		tree_free(epname);
2692 		nvlist_free(fmri);
2693 	}
2694 	/* save it back again in case some of the paths no longer exist */
2695 	serd_save();
2696 }
2697 
2698 /*ARGSUSED*/
2699 static void
2700 serd_destructor(void *left, void *right, void *arg)
2701 {
2702 	struct serd_entry *entp = (struct serd_entry *)left;
2703 	FREE(entp);
2704 }
2705 
2706 /*
2707  * Callback used in a walk of the SerdEngines to reset matching serd engines.
2708  */
2709 /*ARGSUSED*/
2710 static void
2711 serd_reset_cb(struct serd_entry *entp, void *unused, const struct ipath *ipp)
2712 {
2713 	char *path;
2714 
2715 	if (entp->ipath == ipp) {
2716 		path = ipath2str(entp->ename, ipp);
2717 		out(O_ALTFP, "serd_reset_cb: resetting %s", path);
2718 		fmd_serd_reset(entp->hdl, path);
2719 		FREE(path);
2720 		Serd_need_save = 1;
2721 	}
2722 }
2723 
2724 /*ARGSUSED*/
2725 static void
2726 serd_topo_chg_cb(struct serd_entry *entp, void *unused, void *unused2)
2727 {
2728 	char *path;
2729 	nvlist_t *fmri;
2730 
2731 	fmri = ipath2fmri((struct ipath *)(entp->ipath));
2732 	if (!platform_path_exists(fmri)) {
2733 		path = ipath2str(entp->ename, entp->ipath);
2734 		out(O_ALTFP, "serd_topo_chg_cb: not present %s", path);
2735 		fmd_serd_reset(entp->hdl, path);
2736 		FREE(path);
2737 		Serd_need_save = 1;
2738 	}
2739 	nvlist_free(fmri);
2740 }
2741 
2742 void
2743 serd_fini(void)
2744 {
2745 	lut_free(SerdEngines, serd_destructor, NULL);
2746 }
2747 
2748 static void
2749 publish_suspects(struct fme *fmep, struct rsl *srl)
2750 {
2751 	struct rsl *rp;
2752 	nvlist_t *fault;
2753 	uint8_t cert;
2754 	uint_t *frs;
2755 	uint_t frsum, fr;
2756 	uint_t messval;
2757 	uint_t retireval;
2758 	uint_t responseval;
2759 	struct node *snp;
2760 	int frcnt, fridx;
2761 	boolean_t allfaulty = B_TRUE;
2762 	struct rsl *erl = srl + fmep->nsuspects - 1;
2763 
2764 	/*
2765 	 * sort the array
2766 	 */
2767 	qsort(srl, fmep->nsuspects, sizeof (struct rsl), rslcmp);
2768 
2769 	/* sum the fitrates */
2770 	frs = alloca(fmep->nsuspects * sizeof (uint_t));
2771 	fridx = frcnt = frsum = 0;
2772 
2773 	for (rp = srl; rp <= erl; rp++) {
2774 		struct node *n;
2775 
2776 		n = eventprop_lookup(rp->suspect, L_FITrate);
2777 		if (node2uint(n, &fr) != 0) {
2778 			out(O_DEBUG|O_NONL, "event ");
2779 			ipath_print(O_DEBUG|O_NONL,
2780 			    rp->suspect->enode->u.event.ename->u.name.s,
2781 			    rp->suspect->ipp);
2782 			out(O_DEBUG, " has no FITrate (using 1)");
2783 			fr = 1;
2784 		} else if (fr == 0) {
2785 			out(O_DEBUG|O_NONL, "event ");
2786 			ipath_print(O_DEBUG|O_NONL,
2787 			    rp->suspect->enode->u.event.ename->u.name.s,
2788 			    rp->suspect->ipp);
2789 			out(O_DEBUG, " has zero FITrate (using 1)");
2790 			fr = 1;
2791 		}
2792 
2793 		frs[fridx++] = fr;
2794 		frsum += fr;
2795 		frcnt++;
2796 	}
2797 
2798 	/* Add them in reverse order of our sort, as fmd reverses order */
2799 	for (rp = erl; rp >= srl; rp--) {
2800 		cert = percentof(frs[--fridx], frsum);
2801 		fault = fmd_nvl_create_fault(fmep->hdl,
2802 		    rp->suspect->enode->u.event.ename->u.name.s,
2803 		    cert,
2804 		    rp->asru,
2805 		    rp->fru,
2806 		    rp->rsrc);
2807 		if (fault == NULL)
2808 			out(O_DIE, "fault creation failed");
2809 		/* if "message" property exists, add it to the fault */
2810 		if (node2uint(eventprop_lookup(rp->suspect, L_message),
2811 		    &messval) == 0) {
2812 
2813 			out(O_ALTFP,
2814 			    "[FME%d, %s adds message=%d to suspect list]",
2815 			    fmep->id,
2816 			    rp->suspect->enode->u.event.ename->u.name.s,
2817 			    messval);
2818 			if (nvlist_add_boolean_value(fault,
2819 			    FM_SUSPECT_MESSAGE,
2820 			    (messval) ? B_TRUE : B_FALSE) != 0) {
2821 				out(O_DIE, "cannot add no-message to fault");
2822 			}
2823 		}
2824 
2825 		/* if "retire" property exists, add it to the fault */
2826 		if (node2uint(eventprop_lookup(rp->suspect, L_retire),
2827 		    &retireval) == 0) {
2828 
2829 			out(O_ALTFP,
2830 			    "[FME%d, %s adds retire=%d to suspect list]",
2831 			    fmep->id,
2832 			    rp->suspect->enode->u.event.ename->u.name.s,
2833 			    retireval);
2834 			if (nvlist_add_boolean_value(fault,
2835 			    FM_SUSPECT_RETIRE,
2836 			    (retireval) ? B_TRUE : B_FALSE) != 0) {
2837 				out(O_DIE, "cannot add no-retire to fault");
2838 			}
2839 		}
2840 
2841 		/* if "response" property exists, add it to the fault */
2842 		if (node2uint(eventprop_lookup(rp->suspect, L_response),
2843 		    &responseval) == 0) {
2844 
2845 			out(O_ALTFP,
2846 			    "[FME%d, %s adds response=%d to suspect list]",
2847 			    fmep->id,
2848 			    rp->suspect->enode->u.event.ename->u.name.s,
2849 			    responseval);
2850 			if (nvlist_add_boolean_value(fault,
2851 			    FM_SUSPECT_RESPONSE,
2852 			    (responseval) ? B_TRUE : B_FALSE) != 0) {
2853 				out(O_DIE, "cannot add no-response to fault");
2854 			}
2855 		}
2856 
2857 		/* add any payload properties */
2858 		lut_walk(rp->suspect->payloadprops,
2859 		    (lut_cb)addpayloadprop, (void *)fault);
2860 		rslfree(rp);
2861 
2862 		/*
2863 		 * If "action" property exists, evaluate it;  this must be done
2864 		 * before the allfaulty check below since some actions may
2865 		 * modify the asru to be used in fmd_nvl_fmri_has_fault.  This
2866 		 * needs to be restructured if any new actions are introduced
2867 		 * that have effects that we do not want to be visible if
2868 		 * we decide not to publish in the dupclose check below.
2869 		 */
2870 		if ((snp = eventprop_lookup(rp->suspect, L_action)) != NULL) {
2871 			struct evalue evalue;
2872 
2873 			out(O_ALTFP|O_NONL,
2874 			    "[FME%d, %s action ", fmep->id,
2875 			    rp->suspect->enode->u.event.ename->u.name.s);
2876 			ptree_name_iter(O_ALTFP|O_NONL, snp);
2877 			out(O_ALTFP, "]");
2878 			Action_nvl = fault;
2879 			(void) eval_expr(snp, NULL, NULL, NULL, NULL,
2880 			    NULL, 0, &evalue);
2881 		}
2882 
2883 		fmd_case_add_suspect(fmep->hdl, fmep->fmcase, fault);
2884 
2885 		/*
2886 		 * check if the asru is already marked as "faulty".
2887 		 */
2888 		if (allfaulty) {
2889 			nvlist_t *asru;
2890 
2891 			out(O_ALTFP|O_VERB, "FME%d dup check ", fmep->id);
2892 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, rp->suspect);
2893 			out(O_ALTFP|O_VERB|O_NONL, " ");
2894 			if (nvlist_lookup_nvlist(fault,
2895 			    FM_FAULT_ASRU, &asru) != 0) {
2896 				out(O_ALTFP|O_VERB, "NULL asru");
2897 				allfaulty = B_FALSE;
2898 			} else if (fmd_nvl_fmri_has_fault(fmep->hdl, asru,
2899 			    FMD_HAS_FAULT_ASRU, NULL)) {
2900 				out(O_ALTFP|O_VERB, "faulty");
2901 			} else {
2902 				out(O_ALTFP|O_VERB, "not faulty");
2903 				allfaulty = B_FALSE;
2904 			}
2905 		}
2906 
2907 	}
2908 
2909 	if (!allfaulty) {
2910 		/*
2911 		 * don't update the count stat if all asrus are already
2912 		 * present and unrepaired in the asru cache
2913 		 */
2914 		for (rp = erl; rp >= srl; rp--) {
2915 			struct event *suspect = rp->suspect;
2916 
2917 			if (suspect == NULL)
2918 				continue;
2919 
2920 			/* if "count" exists, increment the appropriate stat */
2921 			if ((snp = eventprop_lookup(suspect,
2922 			    L_count)) != NULL) {
2923 				out(O_ALTFP|O_NONL,
2924 				    "[FME%d, %s count ", fmep->id,
2925 				    suspect->enode->u.event.ename->u.name.s);
2926 				ptree_name_iter(O_ALTFP|O_NONL, snp);
2927 				out(O_ALTFP, "]");
2928 				istat_bump(snp, 0);
2929 
2930 			}
2931 		}
2932 		istat_save();	/* write out any istat changes */
2933 	}
2934 }
2935 
2936 static const char *
2937 undiag_2defect_str(int ud)
2938 {
2939 	switch (ud) {
2940 	case UD_VAL_MISSINGINFO:
2941 	case UD_VAL_MISSINGOBS:
2942 	case UD_VAL_MISSINGPATH:
2943 	case UD_VAL_MISSINGZERO:
2944 	case UD_VAL_BADOBS:
2945 	case UD_VAL_CFGMISMATCH:
2946 		return (UNDIAG_DEFECT_CHKPT);
2947 		break;
2948 
2949 	case UD_VAL_BADEVENTI:
2950 	case UD_VAL_BADEVENTPATH:
2951 	case UD_VAL_BADEVENTCLASS:
2952 	case UD_VAL_INSTFAIL:
2953 	case UD_VAL_NOPATH:
2954 	case UD_VAL_UNSOLVD:
2955 		return (UNDIAG_DEFECT_FME);
2956 		break;
2957 
2958 	case UD_VAL_MAXFME:
2959 		return (UNDIAG_DEFECT_LIMIT);
2960 		break;
2961 
2962 	case UD_VAL_UNKNOWN:
2963 	default:
2964 		return (UNDIAG_DEFECT_UNKNOWN);
2965 		break;
2966 	}
2967 }
2968 
2969 static const char *
2970 undiag_2fault_str(int ud)
2971 {
2972 	switch (ud) {
2973 	case UD_VAL_BADEVENTI:
2974 	case UD_VAL_BADEVENTPATH:
2975 	case UD_VAL_BADEVENTCLASS:
2976 	case UD_VAL_INSTFAIL:
2977 	case UD_VAL_NOPATH:
2978 	case UD_VAL_UNSOLVD:
2979 		return (UNDIAG_FAULT_FME);
2980 	default:
2981 		return (NULL);
2982 	}
2983 }
2984 
2985 static char *
2986 undiag_2reason_str(int ud, char *arg)
2987 {
2988 	const char *ptr;
2989 	char *buf;
2990 	int with_arg = 0;
2991 
2992 	switch (ud) {
2993 	case UD_VAL_BADEVENTPATH:
2994 		ptr = UD_STR_BADEVENTPATH;
2995 		with_arg = 1;
2996 		break;
2997 	case UD_VAL_BADEVENTCLASS:
2998 		ptr = UD_STR_BADEVENTCLASS;
2999 		with_arg = 1;
3000 		break;
3001 	case UD_VAL_BADEVENTI:
3002 		ptr = UD_STR_BADEVENTI;
3003 		with_arg = 1;
3004 		break;
3005 	case UD_VAL_BADOBS:
3006 		ptr = UD_STR_BADOBS;
3007 		break;
3008 	case UD_VAL_CFGMISMATCH:
3009 		ptr = UD_STR_CFGMISMATCH;
3010 		break;
3011 	case UD_VAL_INSTFAIL:
3012 		ptr = UD_STR_INSTFAIL;
3013 		with_arg = 1;
3014 		break;
3015 	case UD_VAL_MAXFME:
3016 		ptr = UD_STR_MAXFME;
3017 		break;
3018 	case UD_VAL_MISSINGINFO:
3019 		ptr = UD_STR_MISSINGINFO;
3020 		break;
3021 	case UD_VAL_MISSINGOBS:
3022 		ptr = UD_STR_MISSINGOBS;
3023 		break;
3024 	case UD_VAL_MISSINGPATH:
3025 		ptr = UD_STR_MISSINGPATH;
3026 		break;
3027 	case UD_VAL_MISSINGZERO:
3028 		ptr = UD_STR_MISSINGZERO;
3029 		break;
3030 	case UD_VAL_NOPATH:
3031 		ptr = UD_STR_NOPATH;
3032 		with_arg = 1;
3033 		break;
3034 	case UD_VAL_UNSOLVD:
3035 		ptr = UD_STR_UNSOLVD;
3036 		break;
3037 	case UD_VAL_UNKNOWN:
3038 	default:
3039 		ptr = UD_STR_UNKNOWN;
3040 		break;
3041 	}
3042 	if (with_arg) {
3043 		buf = MALLOC(strlen(ptr) + strlen(arg) - 1);
3044 		(void) sprintf(buf, ptr, arg);
3045 	} else {
3046 		buf = MALLOC(strlen(ptr) + 1);
3047 		(void) sprintf(buf, ptr);
3048 	}
3049 	return (buf);
3050 }
3051 
3052 static void
3053 publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep, fmd_case_t *fmcase,
3054     nvlist_t *detector, char *arg)
3055 {
3056 	struct case_list *newcase;
3057 	nvlist_t *defect, *fault;
3058 	const char *faultstr;
3059 	char *reason = undiag_2reason_str(Undiag_reason, arg);
3060 
3061 	out(O_ALTFP,
3062 	    "[undiagnosable ereport received, "
3063 	    "creating and closing a new case (%s)]", reason);
3064 
3065 	newcase = MALLOC(sizeof (struct case_list));
3066 	newcase->next = NULL;
3067 	newcase->fmcase = fmcase;
3068 	if (Undiagablecaselist != NULL)
3069 		newcase->next = Undiagablecaselist;
3070 	Undiagablecaselist = newcase;
3071 
3072 	if (ffep != NULL)
3073 		fmd_case_add_ereport(hdl, newcase->fmcase, ffep);
3074 
3075 	/* add defect */
3076 	defect = fmd_nvl_create_fault(hdl,
3077 	    undiag_2defect_str(Undiag_reason), 50, NULL, NULL, detector);
3078 	(void) nvlist_add_string(defect, UNDIAG_REASON, reason);
3079 	(void) nvlist_add_boolean_value(defect, FM_SUSPECT_RETIRE, B_FALSE);
3080 	(void) nvlist_add_boolean_value(defect, FM_SUSPECT_RESPONSE, B_FALSE);
3081 	fmd_case_add_suspect(hdl, newcase->fmcase, defect);
3082 
3083 	/* add fault if appropriate */
3084 	faultstr = undiag_2fault_str(Undiag_reason);
3085 	if (faultstr != NULL) {
3086 		fault = fmd_nvl_create_fault(hdl, faultstr, 50, NULL, NULL,
3087 		    detector);
3088 		(void) nvlist_add_string(fault, UNDIAG_REASON, reason);
3089 		(void) nvlist_add_boolean_value(fault, FM_SUSPECT_RETIRE,
3090 		    B_FALSE);
3091 		(void) nvlist_add_boolean_value(fault, FM_SUSPECT_RESPONSE,
3092 		    B_FALSE);
3093 		fmd_case_add_suspect(hdl, newcase->fmcase, fault);
3094 	}
3095 	FREE(reason);
3096 
3097 	/* solve and close case */
3098 	fmd_case_solve(hdl, newcase->fmcase);
3099 	fmd_case_close(hdl, newcase->fmcase);
3100 	Undiag_reason = UD_VAL_UNKNOWN;
3101 }
3102 
3103 static void
3104 fme_undiagnosable(struct fme *f)
3105 {
3106 	nvlist_t *defect, *fault, *detector = NULL;
3107 	struct event *ep;
3108 	char *pathstr;
3109 	const char *faultstr;
3110 	char *reason = undiag_2reason_str(Undiag_reason, NULL);
3111 
3112 	out(O_ALTFP, "[solving/closing FME%d, case %s (%s)]",
3113 	    f->id, fmd_case_uuid(f->hdl, f->fmcase), reason);
3114 
3115 	for (ep = f->observations; ep; ep = ep->observations) {
3116 
3117 		if (ep->ffep != f->e0r)
3118 			fmd_case_add_ereport(f->hdl, f->fmcase, ep->ffep);
3119 
3120 		pathstr = ipath2str(NULL, ipath(platform_getpath(ep->nvp)));
3121 		platform_units_translate(0, f->config, NULL, NULL, &detector,
3122 		    pathstr);
3123 		FREE(pathstr);
3124 
3125 		/* add defect */
3126 		defect = fmd_nvl_create_fault(f->hdl,
3127 		    undiag_2defect_str(Undiag_reason), 50 / f->uniqobs,
3128 		    NULL, NULL, detector);
3129 		(void) nvlist_add_string(defect, UNDIAG_REASON, reason);
3130 		(void) nvlist_add_boolean_value(defect, FM_SUSPECT_RETIRE,
3131 		    B_FALSE);
3132 		(void) nvlist_add_boolean_value(defect, FM_SUSPECT_RESPONSE,
3133 		    B_FALSE);
3134 		fmd_case_add_suspect(f->hdl, f->fmcase, defect);
3135 
3136 		/* add fault if appropriate */
3137 		faultstr = undiag_2fault_str(Undiag_reason);
3138 		if (faultstr == NULL)
3139 			continue;
3140 		fault = fmd_nvl_create_fault(f->hdl, faultstr, 50 / f->uniqobs,
3141 		    NULL, NULL, detector);
3142 		(void) nvlist_add_string(fault, UNDIAG_REASON, reason);
3143 		(void) nvlist_add_boolean_value(fault, FM_SUSPECT_RETIRE,
3144 		    B_FALSE);
3145 		(void) nvlist_add_boolean_value(fault, FM_SUSPECT_RESPONSE,
3146 		    B_FALSE);
3147 		fmd_case_add_suspect(f->hdl, f->fmcase, fault);
3148 		nvlist_free(detector);
3149 	}
3150 	FREE(reason);
3151 	fmd_case_solve(f->hdl, f->fmcase);
3152 	fmd_case_close(f->hdl, f->fmcase);
3153 	Undiag_reason = UD_VAL_UNKNOWN;
3154 }
3155 
3156 /*
3157  * fme_close_case
3158  *
3159  *	Find the requested case amongst our fmes and close it.  Free up
3160  *	the related fme.
3161  */
3162 void
3163 fme_close_case(fmd_hdl_t *hdl, fmd_case_t *fmcase)
3164 {
3165 	struct case_list *ucasep, *prevcasep = NULL;
3166 	struct fme *prev = NULL;
3167 	struct fme *fmep;
3168 
3169 	for (ucasep = Undiagablecaselist; ucasep; ucasep = ucasep->next) {
3170 		if (fmcase != ucasep->fmcase) {
3171 			prevcasep = ucasep;
3172 			continue;
3173 		}
3174 
3175 		if (prevcasep == NULL)
3176 			Undiagablecaselist = Undiagablecaselist->next;
3177 		else
3178 			prevcasep->next = ucasep->next;
3179 
3180 		FREE(ucasep);
3181 		return;
3182 	}
3183 
3184 	for (fmep = FMElist; fmep; fmep = fmep->next) {
3185 		if (fmep->hdl == hdl && fmep->fmcase == fmcase)
3186 			break;
3187 		prev = fmep;
3188 	}
3189 
3190 	if (fmep == NULL) {
3191 		out(O_WARN, "Eft asked to close unrecognized case [%s].",
3192 		    fmd_case_uuid(hdl, fmcase));
3193 		return;
3194 	}
3195 
3196 	if (EFMElist == fmep)
3197 		EFMElist = prev;
3198 
3199 	if (prev == NULL)
3200 		FMElist = FMElist->next;
3201 	else
3202 		prev->next = fmep->next;
3203 
3204 	fmep->next = NULL;
3205 
3206 	/* Get rid of any timer this fme has set */
3207 	if (fmep->wull != 0)
3208 		fmd_timer_remove(fmep->hdl, fmep->timer);
3209 
3210 	if (ClosedFMEs == NULL) {
3211 		ClosedFMEs = fmep;
3212 	} else {
3213 		fmep->next = ClosedFMEs;
3214 		ClosedFMEs = fmep;
3215 	}
3216 
3217 	Open_fme_count--;
3218 
3219 	/* See if we can close the overflow FME */
3220 	if (Open_fme_count <= Max_fme) {
3221 		for (fmep = FMElist; fmep; fmep = fmep->next) {
3222 			if (fmep->overflow && !(fmd_case_closed(fmep->hdl,
3223 			    fmep->fmcase)))
3224 				break;
3225 		}
3226 
3227 		if (fmep != NULL)
3228 			fmd_case_close(fmep->hdl, fmep->fmcase);
3229 	}
3230 }
3231 
3232 /*
3233  * fme_set_timer()
3234  *	If the time we need to wait for the given FME is less than the
3235  *	current timer, kick that old timer out and establish a new one.
3236  */
3237 static int
3238 fme_set_timer(struct fme *fmep, unsigned long long wull)
3239 {
3240 	out(O_ALTFP|O_VERB|O_NONL, " fme_set_timer: request to wait ");
3241 	ptree_timeval(O_ALTFP|O_VERB, &wull);
3242 
3243 	if (wull <= fmep->pull) {
3244 		out(O_ALTFP|O_VERB|O_NONL, "already have waited at least ");
3245 		ptree_timeval(O_ALTFP|O_VERB, &fmep->pull);
3246 		out(O_ALTFP|O_VERB, NULL);
3247 		/* we've waited at least wull already, don't need timer */
3248 		return (0);
3249 	}
3250 
3251 	out(O_ALTFP|O_VERB|O_NONL, " currently ");
3252 	if (fmep->wull != 0) {
3253 		out(O_ALTFP|O_VERB|O_NONL, "waiting ");
3254 		ptree_timeval(O_ALTFP|O_VERB, &fmep->wull);
3255 		out(O_ALTFP|O_VERB, NULL);
3256 	} else {
3257 		out(O_ALTFP|O_VERB|O_NONL, "not waiting");
3258 		out(O_ALTFP|O_VERB, NULL);
3259 	}
3260 
3261 	if (fmep->wull != 0)
3262 		if (wull >= fmep->wull)
3263 			/* New timer would fire later than established timer */
3264 			return (0);
3265 
3266 	if (fmep->wull != 0) {
3267 		fmd_timer_remove(fmep->hdl, fmep->timer);
3268 	}
3269 
3270 	fmep->timer = fmd_timer_install(fmep->hdl, (void *)fmep,
3271 	    fmep->e0r, wull);
3272 	out(O_ALTFP|O_VERB, "timer set, id is %ld", fmep->timer);
3273 	fmep->wull = wull;
3274 	return (1);
3275 }
3276 
3277 void
3278 fme_timer_fired(struct fme *fmep, id_t tid)
3279 {
3280 	struct fme *ffmep = NULL;
3281 
3282 	for (ffmep = FMElist; ffmep; ffmep = ffmep->next)
3283 		if (ffmep == fmep)
3284 			break;
3285 
3286 	if (ffmep == NULL) {
3287 		out(O_WARN, "Timer fired for an FME (%p) not in FMEs list.",
3288 		    (void *)fmep);
3289 		return;
3290 	}
3291 
3292 	out(O_ALTFP|O_VERB, "Timer fired %lx", tid);
3293 	fmep->pull = fmep->wull;
3294 	fmep->wull = 0;
3295 	fmd_buf_write(fmep->hdl, fmep->fmcase,
3296 	    WOBUF_PULL, (void *)&fmep->pull, sizeof (fmep->pull));
3297 
3298 	fme_eval(fmep, fmep->e0r);
3299 }
3300 
3301 /*
3302  * Preserve the fme's suspect list in its psuspects list, NULLing the
3303  * suspects list in the meantime.
3304  */
3305 static void
3306 save_suspects(struct fme *fmep)
3307 {
3308 	struct event *ep;
3309 	struct event *nextep;
3310 
3311 	/* zero out the previous suspect list */
3312 	for (ep = fmep->psuspects; ep; ep = nextep) {
3313 		nextep = ep->psuspects;
3314 		ep->psuspects = NULL;
3315 	}
3316 	fmep->psuspects = NULL;
3317 
3318 	/* zero out the suspect list, copying it to previous suspect list */
3319 	fmep->psuspects = fmep->suspects;
3320 	for (ep = fmep->suspects; ep; ep = nextep) {
3321 		nextep = ep->suspects;
3322 		ep->psuspects = ep->suspects;
3323 		ep->suspects = NULL;
3324 		ep->is_suspect = 0;
3325 	}
3326 	fmep->suspects = NULL;
3327 	fmep->nsuspects = 0;
3328 }
3329 
3330 /*
3331  * Retrieve the fme's suspect list from its psuspects list.
3332  */
3333 static void
3334 restore_suspects(struct fme *fmep)
3335 {
3336 	struct event *ep;
3337 	struct event *nextep;
3338 
3339 	fmep->nsuspects = 0;
3340 	fmep->suspects = fmep->psuspects;
3341 	for (ep = fmep->psuspects; ep; ep = nextep) {
3342 		fmep->nsuspects++;
3343 		nextep = ep->psuspects;
3344 		ep->suspects = ep->psuspects;
3345 	}
3346 }
3347 
3348 /*
3349  * this is what we use to call the Emrys prototype code instead of main()
3350  */
3351 static void
3352 fme_eval(struct fme *fmep, fmd_event_t *ffep)
3353 {
3354 	struct event *ep;
3355 	unsigned long long my_delay = TIMEVAL_EVENTUALLY;
3356 	struct rsl *srl = NULL;
3357 	struct rsl *srl2 = NULL;
3358 	int mess_zero_count;
3359 	int rpcnt;
3360 
3361 	save_suspects(fmep);
3362 
3363 	out(O_ALTFP, "Evaluate FME %d", fmep->id);
3364 	indent_set("  ");
3365 
3366 	lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep);
3367 	fmep->state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay);
3368 
3369 	out(O_ALTFP|O_NONL, "FME%d state: %s, suspect list:", fmep->id,
3370 	    fme_state2str(fmep->state));
3371 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
3372 		out(O_ALTFP|O_NONL, " ");
3373 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
3374 	}
3375 	out(O_ALTFP, NULL);
3376 
3377 	switch (fmep->state) {
3378 	case FME_CREDIBLE:
3379 		print_suspects(SLNEW, fmep);
3380 		(void) upsets_eval(fmep, ffep);
3381 
3382 		/*
3383 		 * we may have already posted suspects in upsets_eval() which
3384 		 * can recurse into fme_eval() again. If so then just return.
3385 		 */
3386 		if (fmep->posted_suspects)
3387 			return;
3388 
3389 		stats_counter_bump(fmep->diags);
3390 		rpcnt = fmep->nsuspects;
3391 		save_suspects(fmep);
3392 
3393 		/*
3394 		 * create two lists, one for "message=1" faults and one for
3395 		 * "message=0" faults. If we have a mixture we will generate
3396 		 * two separate suspect lists.
3397 		 */
3398 		srl = MALLOC(rpcnt * sizeof (struct rsl));
3399 		bzero(srl, rpcnt * sizeof (struct rsl));
3400 		srl2 = MALLOC(rpcnt * sizeof (struct rsl));
3401 		bzero(srl2, rpcnt * sizeof (struct rsl));
3402 		mess_zero_count = trim_suspects(fmep, srl, srl2, ffep);
3403 
3404 		/*
3405 		 * If the resulting suspect list has no members, we're
3406 		 * done so simply close the case. Otherwise sort and publish.
3407 		 */
3408 		if (fmep->nsuspects == 0 && mess_zero_count == 0) {
3409 			out(O_ALTFP,
3410 			    "[FME%d, case %s (all suspects are upsets)]",
3411 			    fmep->id, fmd_case_uuid(fmep->hdl, fmep->fmcase));
3412 			fmd_case_close(fmep->hdl, fmep->fmcase);
3413 		} else if (fmep->nsuspects != 0 && mess_zero_count == 0) {
3414 			publish_suspects(fmep, srl);
3415 			out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3416 			    fmd_case_uuid(fmep->hdl, fmep->fmcase));
3417 			fmd_case_solve(fmep->hdl, fmep->fmcase);
3418 		} else if (fmep->nsuspects == 0 && mess_zero_count != 0) {
3419 			fmep->nsuspects = mess_zero_count;
3420 			publish_suspects(fmep, srl2);
3421 			out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3422 			    fmd_case_uuid(fmep->hdl, fmep->fmcase));
3423 			fmd_case_solve(fmep->hdl, fmep->fmcase);
3424 		} else {
3425 			struct event *obsp;
3426 			struct fme *nfmep;
3427 
3428 			publish_suspects(fmep, srl);
3429 			out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3430 			    fmd_case_uuid(fmep->hdl, fmep->fmcase));
3431 			fmd_case_solve(fmep->hdl, fmep->fmcase);
3432 
3433 			/*
3434 			 * Got both message=0 and message=1 so create a
3435 			 * duplicate case. Also need a temporary duplicate fme
3436 			 * structure for use by publish_suspects().
3437 			 */
3438 			nfmep = alloc_fme();
3439 			nfmep->id =  Nextid++;
3440 			nfmep->hdl = fmep->hdl;
3441 			nfmep->nsuspects = mess_zero_count;
3442 			nfmep->fmcase = fmd_case_open(fmep->hdl, NULL);
3443 			out(O_ALTFP|O_STAMP,
3444 			    "[creating parallel FME%d, case %s]", nfmep->id,
3445 			    fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
3446 			Open_fme_count++;
3447 			if (ffep) {
3448 				fmd_case_setprincipal(nfmep->hdl,
3449 				    nfmep->fmcase, ffep);
3450 				fmd_case_add_ereport(nfmep->hdl,
3451 				    nfmep->fmcase, ffep);
3452 			}
3453 			for (obsp = fmep->observations; obsp;
3454 			    obsp = obsp->observations)
3455 				if (obsp->ffep && obsp->ffep != ffep)
3456 					fmd_case_add_ereport(nfmep->hdl,
3457 					    nfmep->fmcase, obsp->ffep);
3458 
3459 			publish_suspects(nfmep, srl2);
3460 			out(O_ALTFP, "[solving FME%d, case %s]", nfmep->id,
3461 			    fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
3462 			fmd_case_solve(nfmep->hdl, nfmep->fmcase);
3463 			FREE(nfmep);
3464 		}
3465 		FREE(srl);
3466 		FREE(srl2);
3467 		restore_suspects(fmep);
3468 
3469 		fmep->posted_suspects = 1;
3470 		fmd_buf_write(fmep->hdl, fmep->fmcase,
3471 		    WOBUF_POSTD,
3472 		    (void *)&fmep->posted_suspects,
3473 		    sizeof (fmep->posted_suspects));
3474 
3475 		/*
3476 		 * Now the suspects have been posted, we can clear up
3477 		 * the instance tree as we won't be looking at it again.
3478 		 * Also cancel the timer as the case is now solved.
3479 		 */
3480 		if (fmep->wull != 0) {
3481 			fmd_timer_remove(fmep->hdl, fmep->timer);
3482 			fmep->wull = 0;
3483 		}
3484 		break;
3485 
3486 	case FME_WAIT:
3487 		ASSERT(my_delay > fmep->ull);
3488 		(void) fme_set_timer(fmep, my_delay);
3489 		print_suspects(SLWAIT, fmep);
3490 		itree_prune(fmep->eventtree);
3491 		return;
3492 
3493 	case FME_DISPROVED:
3494 		print_suspects(SLDISPROVED, fmep);
3495 		Undiag_reason = UD_VAL_UNSOLVD;
3496 		fme_undiagnosable(fmep);
3497 		break;
3498 	}
3499 
3500 	itree_free(fmep->eventtree);
3501 	fmep->eventtree = NULL;
3502 	structconfig_free(fmep->config);
3503 	fmep->config = NULL;
3504 	destroy_fme_bufs(fmep);
3505 }
3506 
3507 static void indent(void);
3508 static int triggered(struct fme *fmep, struct event *ep, int mark);
3509 static enum fme_state effects_test(struct fme *fmep,
3510     struct event *fault_event, unsigned long long at_latest_by,
3511     unsigned long long *pdelay);
3512 static enum fme_state requirements_test(struct fme *fmep, struct event *ep,
3513     unsigned long long at_latest_by, unsigned long long *pdelay);
3514 static enum fme_state causes_test(struct fme *fmep, struct event *ep,
3515     unsigned long long at_latest_by, unsigned long long *pdelay);
3516 
3517 static int
3518 checkconstraints(struct fme *fmep, struct arrow *arrowp)
3519 {
3520 	struct constraintlist *ctp;
3521 	struct evalue value;
3522 	char *sep = "";
3523 
3524 	if (arrowp->forever_false) {
3525 		indent();
3526 		out(O_ALTFP|O_VERB|O_NONL, "  Forever false constraint: ");
3527 		for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3528 			out(O_ALTFP|O_VERB|O_NONL, sep);
3529 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3530 			sep = ", ";
3531 		}
3532 		out(O_ALTFP|O_VERB, NULL);
3533 		return (0);
3534 	}
3535 	if (arrowp->forever_true) {
3536 		indent();
3537 		out(O_ALTFP|O_VERB|O_NONL, "  Forever true constraint: ");
3538 		for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3539 			out(O_ALTFP|O_VERB|O_NONL, sep);
3540 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3541 			sep = ", ";
3542 		}
3543 		out(O_ALTFP|O_VERB, NULL);
3544 		return (1);
3545 	}
3546 
3547 	for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3548 		if (eval_expr(ctp->cnode, NULL, NULL,
3549 		    &fmep->globals, fmep->config,
3550 		    arrowp, 0, &value)) {
3551 			/* evaluation successful */
3552 			if (value.t == UNDEFINED || value.v == 0) {
3553 				/* known false */
3554 				arrowp->forever_false = 1;
3555 				indent();
3556 				out(O_ALTFP|O_VERB|O_NONL,
3557 				    "  False constraint: ");
3558 				ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3559 				out(O_ALTFP|O_VERB, NULL);
3560 				return (0);
3561 			}
3562 		} else {
3563 			/* evaluation unsuccessful -- unknown value */
3564 			indent();
3565 			out(O_ALTFP|O_VERB|O_NONL,
3566 			    "  Deferred constraint: ");
3567 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3568 			out(O_ALTFP|O_VERB, NULL);
3569 			return (1);
3570 		}
3571 	}
3572 	/* known true */
3573 	arrowp->forever_true = 1;
3574 	indent();
3575 	out(O_ALTFP|O_VERB|O_NONL, "  True constraint: ");
3576 	for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3577 		out(O_ALTFP|O_VERB|O_NONL, sep);
3578 		ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3579 		sep = ", ";
3580 	}
3581 	out(O_ALTFP|O_VERB, NULL);
3582 	return (1);
3583 }
3584 
3585 static int
3586 triggered(struct fme *fmep, struct event *ep, int mark)
3587 {
3588 	struct bubble *bp;
3589 	struct arrowlist *ap;
3590 	int count = 0;
3591 
3592 	stats_counter_bump(fmep->Tcallcount);
3593 	for (bp = itree_next_bubble(ep, NULL); bp;
3594 	    bp = itree_next_bubble(ep, bp)) {
3595 		if (bp->t != B_TO)
3596 			continue;
3597 		for (ap = itree_next_arrow(bp, NULL); ap;
3598 		    ap = itree_next_arrow(bp, ap)) {
3599 			/* check count of marks against K in the bubble */
3600 			if ((ap->arrowp->mark & mark) &&
3601 			    ++count >= bp->nork)
3602 				return (1);
3603 		}
3604 	}
3605 	return (0);
3606 }
3607 
3608 static int
3609 mark_arrows(struct fme *fmep, struct event *ep, int mark,
3610     unsigned long long at_latest_by, unsigned long long *pdelay, int keep)
3611 {
3612 	struct bubble *bp;
3613 	struct arrowlist *ap;
3614 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3615 	unsigned long long my_delay;
3616 	enum fme_state result;
3617 	int retval = 0;
3618 
3619 	for (bp = itree_next_bubble(ep, NULL); bp;
3620 	    bp = itree_next_bubble(ep, bp)) {
3621 		if (bp->t != B_FROM)
3622 			continue;
3623 		stats_counter_bump(fmep->Marrowcount);
3624 		for (ap = itree_next_arrow(bp, NULL); ap;
3625 		    ap = itree_next_arrow(bp, ap)) {
3626 			struct event *ep2 = ap->arrowp->head->myevent;
3627 			/*
3628 			 * if we're clearing marks, we can avoid doing
3629 			 * all that work evaluating constraints.
3630 			 */
3631 			if (mark == 0) {
3632 				if (ap->arrowp->arrow_marked == 0)
3633 					continue;
3634 				ap->arrowp->arrow_marked = 0;
3635 				ap->arrowp->mark &= ~EFFECTS_COUNTER;
3636 				if (keep && (ep2->cached_state &
3637 				    (WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT)))
3638 					ep2->keep_in_tree = 1;
3639 				ep2->cached_state &=
3640 				    ~(WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT);
3641 				(void) mark_arrows(fmep, ep2, mark, 0, NULL,
3642 				    keep);
3643 				continue;
3644 			}
3645 			ap->arrowp->arrow_marked = 1;
3646 			if (ep2->cached_state & REQMNTS_DISPROVED) {
3647 				indent();
3648 				out(O_ALTFP|O_VERB|O_NONL,
3649 				    "  ALREADY DISPROVED ");
3650 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3651 				out(O_ALTFP|O_VERB, NULL);
3652 				continue;
3653 			}
3654 			if (ep2->cached_state & WAIT_EFFECT) {
3655 				indent();
3656 				out(O_ALTFP|O_VERB|O_NONL,
3657 				    "  ALREADY EFFECTS WAIT ");
3658 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3659 				out(O_ALTFP|O_VERB, NULL);
3660 				continue;
3661 			}
3662 			if (ep2->cached_state & CREDIBLE_EFFECT) {
3663 				indent();
3664 				out(O_ALTFP|O_VERB|O_NONL,
3665 				    "  ALREADY EFFECTS CREDIBLE ");
3666 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3667 				out(O_ALTFP|O_VERB, NULL);
3668 				continue;
3669 			}
3670 			if ((ep2->cached_state & PARENT_WAIT) &&
3671 			    (mark & PARENT_WAIT)) {
3672 				indent();
3673 				out(O_ALTFP|O_VERB|O_NONL,
3674 				    "  ALREADY PARENT EFFECTS WAIT ");
3675 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3676 				out(O_ALTFP|O_VERB, NULL);
3677 				continue;
3678 			}
3679 			platform_set_payloadnvp(ep2->nvp);
3680 			if (checkconstraints(fmep, ap->arrowp) == 0) {
3681 				platform_set_payloadnvp(NULL);
3682 				indent();
3683 				out(O_ALTFP|O_VERB|O_NONL,
3684 				    "  CONSTRAINTS FAIL ");
3685 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3686 				out(O_ALTFP|O_VERB, NULL);
3687 				continue;
3688 			}
3689 			platform_set_payloadnvp(NULL);
3690 			ap->arrowp->mark |= EFFECTS_COUNTER;
3691 			if (!triggered(fmep, ep2, EFFECTS_COUNTER)) {
3692 				indent();
3693 				out(O_ALTFP|O_VERB|O_NONL,
3694 				    "  K-COUNT NOT YET MET ");
3695 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3696 				out(O_ALTFP|O_VERB, NULL);
3697 				continue;
3698 			}
3699 			ep2->cached_state &= ~PARENT_WAIT;
3700 			/*
3701 			 * if we've reached an ereport and no propagation time
3702 			 * is specified, use the Hesitate value
3703 			 */
3704 			if (ep2->t == N_EREPORT && at_latest_by == 0ULL &&
3705 			    ap->arrowp->maxdelay == 0ULL) {
3706 				out(O_ALTFP|O_VERB|O_NONL, "  default wait ");
3707 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3708 				out(O_ALTFP|O_VERB, NULL);
3709 				result = requirements_test(fmep, ep2, Hesitate,
3710 				    &my_delay);
3711 			} else {
3712 				result = requirements_test(fmep, ep2,
3713 				    at_latest_by + ap->arrowp->maxdelay,
3714 				    &my_delay);
3715 			}
3716 			if (result == FME_WAIT) {
3717 				retval = WAIT_EFFECT;
3718 				if (overall_delay > my_delay)
3719 					overall_delay = my_delay;
3720 				ep2->cached_state |= WAIT_EFFECT;
3721 				indent();
3722 				out(O_ALTFP|O_VERB|O_NONL, "  EFFECTS WAIT ");
3723 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3724 				out(O_ALTFP|O_VERB, NULL);
3725 				indent_push("  E");
3726 				if (mark_arrows(fmep, ep2, PARENT_WAIT,
3727 				    at_latest_by, &my_delay, 0) ==
3728 				    WAIT_EFFECT) {
3729 					retval = WAIT_EFFECT;
3730 					if (overall_delay > my_delay)
3731 						overall_delay = my_delay;
3732 				}
3733 				indent_pop();
3734 			} else if (result == FME_DISPROVED) {
3735 				indent();
3736 				out(O_ALTFP|O_VERB|O_NONL,
3737 				    "  EFFECTS DISPROVED ");
3738 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3739 				out(O_ALTFP|O_VERB, NULL);
3740 			} else {
3741 				ep2->cached_state |= mark;
3742 				indent();
3743 				if (mark == CREDIBLE_EFFECT)
3744 					out(O_ALTFP|O_VERB|O_NONL,
3745 					    "  EFFECTS CREDIBLE ");
3746 				else
3747 					out(O_ALTFP|O_VERB|O_NONL,
3748 					    "  PARENT EFFECTS WAIT ");
3749 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3750 				out(O_ALTFP|O_VERB, NULL);
3751 				indent_push("  E");
3752 				if (mark_arrows(fmep, ep2, mark, at_latest_by,
3753 				    &my_delay, 0) == WAIT_EFFECT) {
3754 					retval = WAIT_EFFECT;
3755 					if (overall_delay > my_delay)
3756 						overall_delay = my_delay;
3757 				}
3758 				indent_pop();
3759 			}
3760 		}
3761 	}
3762 	if (retval == WAIT_EFFECT)
3763 		*pdelay = overall_delay;
3764 	return (retval);
3765 }
3766 
3767 static enum fme_state
3768 effects_test(struct fme *fmep, struct event *fault_event,
3769     unsigned long long at_latest_by, unsigned long long *pdelay)
3770 {
3771 	struct event *error_event;
3772 	enum fme_state return_value = FME_CREDIBLE;
3773 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3774 	unsigned long long my_delay;
3775 
3776 	stats_counter_bump(fmep->Ecallcount);
3777 	indent_push("  E");
3778 	indent();
3779 	out(O_ALTFP|O_VERB|O_NONL, "->");
3780 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event);
3781 	out(O_ALTFP|O_VERB, NULL);
3782 
3783 	if (mark_arrows(fmep, fault_event, CREDIBLE_EFFECT, at_latest_by,
3784 	    &my_delay, 0) == WAIT_EFFECT) {
3785 		return_value = FME_WAIT;
3786 		if (overall_delay > my_delay)
3787 			overall_delay = my_delay;
3788 	}
3789 	for (error_event = fmep->observations;
3790 	    error_event; error_event = error_event->observations) {
3791 		indent();
3792 		out(O_ALTFP|O_VERB|O_NONL, " ");
3793 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, error_event);
3794 		if (!(error_event->cached_state & CREDIBLE_EFFECT)) {
3795 			if (error_event->cached_state &
3796 			    (PARENT_WAIT|WAIT_EFFECT)) {
3797 				out(O_ALTFP|O_VERB, " NOT YET triggered");
3798 				continue;
3799 			}
3800 			return_value = FME_DISPROVED;
3801 			out(O_ALTFP|O_VERB, " NOT triggered");
3802 			break;
3803 		} else {
3804 			out(O_ALTFP|O_VERB, " triggered");
3805 		}
3806 	}
3807 	if (return_value == FME_DISPROVED) {
3808 		(void) mark_arrows(fmep, fault_event, 0, 0, NULL, 0);
3809 	} else {
3810 		fault_event->keep_in_tree = 1;
3811 		(void) mark_arrows(fmep, fault_event, 0, 0, NULL, 1);
3812 	}
3813 
3814 	indent();
3815 	out(O_ALTFP|O_VERB|O_NONL, "<-EFFECTS %s ",
3816 	    fme_state2str(return_value));
3817 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event);
3818 	out(O_ALTFP|O_VERB, NULL);
3819 	indent_pop();
3820 	if (return_value == FME_WAIT)
3821 		*pdelay = overall_delay;
3822 	return (return_value);
3823 }
3824 
3825 static enum fme_state
3826 requirements_test(struct fme *fmep, struct event *ep,
3827     unsigned long long at_latest_by, unsigned long long *pdelay)
3828 {
3829 	int waiting_events;
3830 	int credible_events;
3831 	int deferred_events;
3832 	enum fme_state return_value = FME_CREDIBLE;
3833 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3834 	unsigned long long arrow_delay;
3835 	unsigned long long my_delay;
3836 	struct event *ep2;
3837 	struct bubble *bp;
3838 	struct arrowlist *ap;
3839 
3840 	if (ep->cached_state & REQMNTS_CREDIBLE) {
3841 		indent();
3842 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY CREDIBLE ");
3843 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3844 		out(O_ALTFP|O_VERB, NULL);
3845 		return (FME_CREDIBLE);
3846 	}
3847 	if (ep->cached_state & REQMNTS_DISPROVED) {
3848 		indent();
3849 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY DISPROVED ");
3850 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3851 		out(O_ALTFP|O_VERB, NULL);
3852 		return (FME_DISPROVED);
3853 	}
3854 	if (ep->cached_state & REQMNTS_WAIT) {
3855 		indent();
3856 		*pdelay = ep->cached_delay;
3857 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY WAIT ");
3858 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3859 		out(O_ALTFP|O_VERB|O_NONL, ", wait for: ");
3860 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3861 		out(O_ALTFP|O_VERB, NULL);
3862 		return (FME_WAIT);
3863 	}
3864 	stats_counter_bump(fmep->Rcallcount);
3865 	indent_push("  R");
3866 	indent();
3867 	out(O_ALTFP|O_VERB|O_NONL, "->");
3868 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3869 	out(O_ALTFP|O_VERB|O_NONL, ", at latest by: ");
3870 	ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3871 	out(O_ALTFP|O_VERB, NULL);
3872 
3873 	if (ep->t == N_EREPORT) {
3874 		if (ep->count == 0) {
3875 			if (fmep->pull >= at_latest_by) {
3876 				return_value = FME_DISPROVED;
3877 			} else {
3878 				ep->cached_delay = *pdelay = at_latest_by;
3879 				return_value = FME_WAIT;
3880 			}
3881 		}
3882 
3883 		indent();
3884 		switch (return_value) {
3885 		case FME_CREDIBLE:
3886 			ep->cached_state |= REQMNTS_CREDIBLE;
3887 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS CREDIBLE ");
3888 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3889 			break;
3890 		case FME_DISPROVED:
3891 			ep->cached_state |= REQMNTS_DISPROVED;
3892 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED ");
3893 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3894 			break;
3895 		case FME_WAIT:
3896 			ep->cached_state |= REQMNTS_WAIT;
3897 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS WAIT ");
3898 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3899 			out(O_ALTFP|O_VERB|O_NONL, " to ");
3900 			ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3901 			break;
3902 		default:
3903 			out(O_DIE, "requirements_test: unexpected fme_state");
3904 			break;
3905 		}
3906 		out(O_ALTFP|O_VERB, NULL);
3907 		indent_pop();
3908 
3909 		return (return_value);
3910 	}
3911 
3912 	/* this event is not a report, descend the tree */
3913 	for (bp = itree_next_bubble(ep, NULL); bp;
3914 	    bp = itree_next_bubble(ep, bp)) {
3915 		int n;
3916 
3917 		if (bp->t != B_FROM)
3918 			continue;
3919 
3920 		n = bp->nork;
3921 
3922 		credible_events = 0;
3923 		waiting_events = 0;
3924 		deferred_events = 0;
3925 		arrow_delay = TIMEVAL_EVENTUALLY;
3926 		/*
3927 		 * n is -1 for 'A' so adjust it.
3928 		 * XXX just count up the arrows for now.
3929 		 */
3930 		if (n < 0) {
3931 			n = 0;
3932 			for (ap = itree_next_arrow(bp, NULL); ap;
3933 			    ap = itree_next_arrow(bp, ap))
3934 				n++;
3935 			indent();
3936 			out(O_ALTFP|O_VERB, " Bubble Counted N=%d", n);
3937 		} else {
3938 			indent();
3939 			out(O_ALTFP|O_VERB, " Bubble N=%d", n);
3940 		}
3941 
3942 		if (n == 0)
3943 			continue;
3944 		if (!(bp->mark & (BUBBLE_ELIDED|BUBBLE_OK))) {
3945 			for (ap = itree_next_arrow(bp, NULL); ap;
3946 			    ap = itree_next_arrow(bp, ap)) {
3947 				ep2 = ap->arrowp->head->myevent;
3948 				platform_set_payloadnvp(ep2->nvp);
3949 				(void) checkconstraints(fmep, ap->arrowp);
3950 				if (!ap->arrowp->forever_false) {
3951 					/*
3952 					 * if all arrows are invalidated by the
3953 					 * constraints, then we should elide the
3954 					 * whole bubble to be consistant with
3955 					 * the tree creation time behaviour
3956 					 */
3957 					bp->mark |= BUBBLE_OK;
3958 					platform_set_payloadnvp(NULL);
3959 					break;
3960 				}
3961 				platform_set_payloadnvp(NULL);
3962 			}
3963 		}
3964 		for (ap = itree_next_arrow(bp, NULL); ap;
3965 		    ap = itree_next_arrow(bp, ap)) {
3966 			ep2 = ap->arrowp->head->myevent;
3967 			if (n <= credible_events)
3968 				break;
3969 
3970 			ap->arrowp->mark |= REQMNTS_COUNTER;
3971 			if (triggered(fmep, ep2, REQMNTS_COUNTER))
3972 				/* XXX adding max timevals! */
3973 				switch (requirements_test(fmep, ep2,
3974 				    at_latest_by + ap->arrowp->maxdelay,
3975 				    &my_delay)) {
3976 				case FME_DEFERRED:
3977 					deferred_events++;
3978 					break;
3979 				case FME_CREDIBLE:
3980 					credible_events++;
3981 					break;
3982 				case FME_DISPROVED:
3983 					break;
3984 				case FME_WAIT:
3985 					if (my_delay < arrow_delay)
3986 						arrow_delay = my_delay;
3987 					waiting_events++;
3988 					break;
3989 				default:
3990 					out(O_DIE,
3991 					"Bug in requirements_test.");
3992 				}
3993 			else
3994 				deferred_events++;
3995 		}
3996 		if (!(bp->mark & BUBBLE_OK) && waiting_events == 0) {
3997 			bp->mark |= BUBBLE_ELIDED;
3998 			continue;
3999 		}
4000 		indent();
4001 		out(O_ALTFP|O_VERB, " Credible: %d Waiting %d",
4002 		    credible_events + deferred_events, waiting_events);
4003 		if (credible_events + deferred_events + waiting_events < n) {
4004 			/* Can never meet requirements */
4005 			ep->cached_state |= REQMNTS_DISPROVED;
4006 			indent();
4007 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED ");
4008 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4009 			out(O_ALTFP|O_VERB, NULL);
4010 			indent_pop();
4011 			return (FME_DISPROVED);
4012 		}
4013 		if (credible_events + deferred_events < n) {
4014 			/* will have to wait */
4015 			/* wait time is shortest known */
4016 			if (arrow_delay < overall_delay)
4017 				overall_delay = arrow_delay;
4018 			return_value = FME_WAIT;
4019 		} else if (credible_events < n) {
4020 			if (return_value != FME_WAIT)
4021 				return_value = FME_DEFERRED;
4022 		}
4023 	}
4024 
4025 	/*
4026 	 * don't mark as FME_DEFERRED. If this event isn't reached by another
4027 	 * path, then this will be considered FME_CREDIBLE. But if it is
4028 	 * reached by a different path so the K-count is met, then might
4029 	 * get overridden by FME_WAIT or FME_DISPROVED.
4030 	 */
4031 	if (return_value == FME_WAIT) {
4032 		ep->cached_state |= REQMNTS_WAIT;
4033 		ep->cached_delay = *pdelay = overall_delay;
4034 	} else if (return_value == FME_CREDIBLE) {
4035 		ep->cached_state |= REQMNTS_CREDIBLE;
4036 	}
4037 	indent();
4038 	out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS %s ",
4039 	    fme_state2str(return_value));
4040 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4041 	out(O_ALTFP|O_VERB, NULL);
4042 	indent_pop();
4043 	return (return_value);
4044 }
4045 
4046 static enum fme_state
4047 causes_test(struct fme *fmep, struct event *ep,
4048     unsigned long long at_latest_by, unsigned long long *pdelay)
4049 {
4050 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
4051 	unsigned long long my_delay;
4052 	int credible_results = 0;
4053 	int waiting_results = 0;
4054 	enum fme_state fstate;
4055 	struct event *tail_event;
4056 	struct bubble *bp;
4057 	struct arrowlist *ap;
4058 	int k = 1;
4059 
4060 	stats_counter_bump(fmep->Ccallcount);
4061 	indent_push("  C");
4062 	indent();
4063 	out(O_ALTFP|O_VERB|O_NONL, "->");
4064 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4065 	out(O_ALTFP|O_VERB, NULL);
4066 
4067 	for (bp = itree_next_bubble(ep, NULL); bp;
4068 	    bp = itree_next_bubble(ep, bp)) {
4069 		if (bp->t != B_TO)
4070 			continue;
4071 		k = bp->nork;	/* remember the K value */
4072 		for (ap = itree_next_arrow(bp, NULL); ap;
4073 		    ap = itree_next_arrow(bp, ap)) {
4074 			int do_not_follow = 0;
4075 
4076 			/*
4077 			 * if we get to the same event multiple times
4078 			 * only worry about the first one.
4079 			 */
4080 			if (ap->arrowp->tail->myevent->cached_state &
4081 			    CAUSES_TESTED) {
4082 				indent();
4083 				out(O_ALTFP|O_VERB|O_NONL,
4084 				    "  causes test already run for ");
4085 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL,
4086 				    ap->arrowp->tail->myevent);
4087 				out(O_ALTFP|O_VERB, NULL);
4088 				continue;
4089 			}
4090 
4091 			/*
4092 			 * see if false constraint prevents us
4093 			 * from traversing this arrow
4094 			 */
4095 			platform_set_payloadnvp(ep->nvp);
4096 			if (checkconstraints(fmep, ap->arrowp) == 0)
4097 				do_not_follow = 1;
4098 			platform_set_payloadnvp(NULL);
4099 			if (do_not_follow) {
4100 				indent();
4101 				out(O_ALTFP|O_VERB|O_NONL,
4102 				    "  False arrow from ");
4103 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL,
4104 				    ap->arrowp->tail->myevent);
4105 				out(O_ALTFP|O_VERB, NULL);
4106 				continue;
4107 			}
4108 
4109 			ap->arrowp->tail->myevent->cached_state |=
4110 			    CAUSES_TESTED;
4111 			tail_event = ap->arrowp->tail->myevent;
4112 			fstate = hypothesise(fmep, tail_event, at_latest_by,
4113 			    &my_delay);
4114 
4115 			switch (fstate) {
4116 			case FME_WAIT:
4117 				if (my_delay < overall_delay)
4118 					overall_delay = my_delay;
4119 				waiting_results++;
4120 				break;
4121 			case FME_CREDIBLE:
4122 				credible_results++;
4123 				break;
4124 			case FME_DISPROVED:
4125 				break;
4126 			default:
4127 				out(O_DIE, "Bug in causes_test");
4128 			}
4129 		}
4130 	}
4131 	/* compare against K */
4132 	if (credible_results + waiting_results < k) {
4133 		indent();
4134 		out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES DISPROVED ");
4135 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4136 		out(O_ALTFP|O_VERB, NULL);
4137 		indent_pop();
4138 		return (FME_DISPROVED);
4139 	}
4140 	if (waiting_results != 0) {
4141 		*pdelay = overall_delay;
4142 		indent();
4143 		out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES WAIT ");
4144 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4145 		out(O_ALTFP|O_VERB|O_NONL, " to ");
4146 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
4147 		out(O_ALTFP|O_VERB, NULL);
4148 		indent_pop();
4149 		return (FME_WAIT);
4150 	}
4151 	indent();
4152 	out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES CREDIBLE ");
4153 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4154 	out(O_ALTFP|O_VERB, NULL);
4155 	indent_pop();
4156 	return (FME_CREDIBLE);
4157 }
4158 
4159 static enum fme_state
4160 hypothesise(struct fme *fmep, struct event *ep,
4161 	unsigned long long at_latest_by, unsigned long long *pdelay)
4162 {
4163 	enum fme_state rtr, otr;
4164 	unsigned long long my_delay;
4165 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
4166 
4167 	stats_counter_bump(fmep->Hcallcount);
4168 	indent_push("  H");
4169 	indent();
4170 	out(O_ALTFP|O_VERB|O_NONL, "->");
4171 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4172 	out(O_ALTFP|O_VERB|O_NONL, ", at latest by: ");
4173 	ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
4174 	out(O_ALTFP|O_VERB, NULL);
4175 
4176 	rtr = requirements_test(fmep, ep, at_latest_by, &my_delay);
4177 	if ((rtr == FME_WAIT) && (my_delay < overall_delay))
4178 		overall_delay = my_delay;
4179 	if (rtr != FME_DISPROVED) {
4180 		if (is_problem(ep->t)) {
4181 			otr = effects_test(fmep, ep, at_latest_by, &my_delay);
4182 			if (otr != FME_DISPROVED) {
4183 				if (fmep->peek == 0 && ep->is_suspect == 0) {
4184 					ep->suspects = fmep->suspects;
4185 					ep->is_suspect = 1;
4186 					fmep->suspects = ep;
4187 					fmep->nsuspects++;
4188 				}
4189 			}
4190 		} else
4191 			otr = causes_test(fmep, ep, at_latest_by, &my_delay);
4192 		if ((otr == FME_WAIT) && (my_delay < overall_delay))
4193 			overall_delay = my_delay;
4194 		if ((otr != FME_DISPROVED) &&
4195 		    ((rtr == FME_WAIT) || (otr == FME_WAIT)))
4196 			*pdelay = overall_delay;
4197 	}
4198 	if (rtr == FME_DISPROVED) {
4199 		indent();
4200 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4201 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4202 		out(O_ALTFP|O_VERB, " (doesn't meet requirements)");
4203 		indent_pop();
4204 		return (FME_DISPROVED);
4205 	}
4206 	if ((otr == FME_DISPROVED) && is_problem(ep->t)) {
4207 		indent();
4208 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4209 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4210 		out(O_ALTFP|O_VERB, " (doesn't explain all reports)");
4211 		indent_pop();
4212 		return (FME_DISPROVED);
4213 	}
4214 	if (otr == FME_DISPROVED) {
4215 		indent();
4216 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4217 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4218 		out(O_ALTFP|O_VERB, " (causes are not credible)");
4219 		indent_pop();
4220 		return (FME_DISPROVED);
4221 	}
4222 	if ((rtr == FME_WAIT) || (otr == FME_WAIT)) {
4223 		indent();
4224 		out(O_ALTFP|O_VERB|O_NONL, "<-WAIT ");
4225 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4226 		out(O_ALTFP|O_VERB|O_NONL, " to ");
4227 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &overall_delay);
4228 		out(O_ALTFP|O_VERB, NULL);
4229 		indent_pop();
4230 		return (FME_WAIT);
4231 	}
4232 	indent();
4233 	out(O_ALTFP|O_VERB|O_NONL, "<-CREDIBLE ");
4234 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4235 	out(O_ALTFP|O_VERB, NULL);
4236 	indent_pop();
4237 	return (FME_CREDIBLE);
4238 }
4239 
4240 /*
4241  * fme_istat_load -- reconstitute any persistent istats
4242  */
4243 void
4244 fme_istat_load(fmd_hdl_t *hdl)
4245 {
4246 	int sz;
4247 	char *sbuf;
4248 	char *ptr;
4249 
4250 	if ((sz = fmd_buf_size(hdl, NULL, WOBUF_ISTATS)) == 0) {
4251 		out(O_ALTFP, "fme_istat_load: No stats");
4252 		return;
4253 	}
4254 
4255 	sbuf = alloca(sz);
4256 
4257 	fmd_buf_read(hdl, NULL, WOBUF_ISTATS, sbuf, sz);
4258 
4259 	/*
4260 	 * pick apart the serialized stats
4261 	 *
4262 	 * format is:
4263 	 *	<class-name>, '@', <path>, '\0', <value>, '\0'
4264 	 * for example:
4265 	 *	"stat.first@stat0/path0\02\0stat.second@stat0/path1\023\0"
4266 	 *
4267 	 * since this is parsing our own serialized data, any parsing issues
4268 	 * are fatal, so we check for them all with ASSERT() below.
4269 	 */
4270 	ptr = sbuf;
4271 	while (ptr < &sbuf[sz]) {
4272 		char *sepptr;
4273 		struct node *np;
4274 		int val;
4275 
4276 		sepptr = strchr(ptr, '@');
4277 		ASSERT(sepptr != NULL);
4278 		*sepptr = '\0';
4279 
4280 		/* construct the event */
4281 		np = newnode(T_EVENT, NULL, 0);
4282 		np->u.event.ename = newnode(T_NAME, NULL, 0);
4283 		np->u.event.ename->u.name.t = N_STAT;
4284 		np->u.event.ename->u.name.s = stable(ptr);
4285 		np->u.event.ename->u.name.it = IT_ENAME;
4286 		np->u.event.ename->u.name.last = np->u.event.ename;
4287 
4288 		ptr = sepptr + 1;
4289 		ASSERT(ptr < &sbuf[sz]);
4290 		ptr += strlen(ptr);
4291 		ptr++;	/* move past the '\0' separating path from value */
4292 		ASSERT(ptr < &sbuf[sz]);
4293 		ASSERT(isdigit(*ptr));
4294 		val = atoi(ptr);
4295 		ASSERT(val > 0);
4296 		ptr += strlen(ptr);
4297 		ptr++;	/* move past the final '\0' for this entry */
4298 
4299 		np->u.event.epname = pathstring2epnamenp(sepptr + 1);
4300 		ASSERT(np->u.event.epname != NULL);
4301 
4302 		istat_bump(np, val);
4303 		tree_free(np);
4304 	}
4305 
4306 	istat_save();
4307 }
4308