xref: /illumos-gate/usr/src/cmd/fm/modules/common/eversholt/fme.c (revision a629ded1d7b2e67c2028ccbc5ba9099328cc4e1b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2012 Milan Jurik. All rights reserved.
25  * Copyright (c) 2018, Joyent, Inc.
26  *
27  * fme.c -- fault management exercise module
28  *
29  * this module provides the simulated fault management exercise.
30  */
31 
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <strings.h>
36 #include <ctype.h>
37 #include <alloca.h>
38 #include <libnvpair.h>
39 #include <sys/fm/protocol.h>
40 #include <fm/fmd_api.h>
41 #include "alloc.h"
42 #include "out.h"
43 #include "stats.h"
44 #include "stable.h"
45 #include "literals.h"
46 #include "lut.h"
47 #include "tree.h"
48 #include "ptree.h"
49 #include "itree.h"
50 #include "ipath.h"
51 #include "fme.h"
52 #include "evnv.h"
53 #include "eval.h"
54 #include "config.h"
55 #include "platform.h"
56 #include "esclex.h"
57 
58 /* imported from eft.c... */
59 extern hrtime_t Hesitate;
60 extern char *Serd_Override;
61 extern nv_alloc_t Eft_nv_hdl;
62 extern int Max_fme;
63 extern fmd_hdl_t *Hdl;
64 
65 static int Istat_need_save;
66 static int Serd_need_save;
67 void istat_save(void);
68 void serd_save(void);
69 
70 /* fme under construction is global so we can free it on module abort */
71 static struct fme *Nfmep;
72 
73 static int Undiag_reason = UD_VAL_UNKNOWN;
74 
75 static int Nextid = 0;
76 
77 static int Open_fme_count = 0;	/* Count of open FMEs */
78 
79 /* list of fault management exercises underway */
80 static struct fme {
81 	struct fme *next;		/* next exercise */
82 	unsigned long long ull;		/* time when fme was created */
83 	int id;				/* FME id */
84 	struct config *config;		/* cooked configuration data */
85 	struct lut *eventtree;		/* propagation tree for this FME */
86 	/*
87 	 * The initial error report that created this FME is kept in
88 	 * two forms.  e0 points to the instance tree node and is used
89 	 * by fme_eval() as the starting point for the inference
90 	 * algorithm.  e0r is the event handle FMD passed to us when
91 	 * the ereport first arrived and is used when setting timers,
92 	 * which are always relative to the time of this initial
93 	 * report.
94 	 */
95 	struct event *e0;
96 	fmd_event_t *e0r;
97 
98 	id_t    timer;			/* for setting an fmd time-out */
99 
100 	struct event *ecurrent;		/* ereport under consideration */
101 	struct event *suspects;		/* current suspect list */
102 	struct event *psuspects;	/* previous suspect list */
103 	int nsuspects;			/* count of suspects */
104 	int posted_suspects;		/* true if we've posted a diagnosis */
105 	int uniqobs;			/* number of unique events observed */
106 	int peek;			/* just peeking, don't track suspects */
107 	int overflow;			/* true if overflow FME */
108 	enum fme_state {
109 		FME_NOTHING = 5000,	/* not evaluated yet */
110 		FME_WAIT,		/* need to wait for more info */
111 		FME_CREDIBLE,		/* suspect list is credible */
112 		FME_DISPROVED,		/* no valid suspects found */
113 		FME_DEFERRED		/* don't know yet (k-count not met) */
114 	} state;
115 
116 	unsigned long long pull;	/* time passed since created */
117 	unsigned long long wull;	/* wait until this time for re-eval */
118 	struct event *observations;	/* observation list */
119 	struct lut *globals;		/* values of global variables */
120 	/* fmd interfacing */
121 	fmd_hdl_t *hdl;			/* handle for talking with fmd */
122 	fmd_case_t *fmcase;		/* what fmd 'case' we associate with */
123 	/* stats */
124 	struct stats *Rcount;
125 	struct stats *Hcallcount;
126 	struct stats *Rcallcount;
127 	struct stats *Ccallcount;
128 	struct stats *Ecallcount;
129 	struct stats *Tcallcount;
130 	struct stats *Marrowcount;
131 	struct stats *diags;
132 } *FMElist, *EFMElist, *ClosedFMEs;
133 
134 static struct case_list {
135 	fmd_case_t *fmcase;
136 	struct case_list *next;
137 } *Undiagablecaselist;
138 
139 static void fme_eval(struct fme *fmep, fmd_event_t *ffep);
140 static enum fme_state hypothesise(struct fme *fmep, struct event *ep,
141 	unsigned long long at_latest_by, unsigned long long *pdelay);
142 static struct node *eventprop_lookup(struct event *ep, const char *propname);
143 static struct node *pathstring2epnamenp(char *path);
144 static void publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep,
145 	fmd_case_t *fmcase, nvlist_t *detector, char *arg);
146 static char *undiag_2reason_str(int ud, char *arg);
147 static const char *undiag_2defect_str(int ud);
148 static void restore_suspects(struct fme *fmep);
149 static void save_suspects(struct fme *fmep);
150 static void destroy_fme(struct fme *f);
151 static void fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep,
152     const char *eventstring, const struct ipath *ipp, nvlist_t *nvl);
153 static void istat_counter_reset_cb(struct istat_entry *entp,
154     struct stats *statp, const struct ipath *ipp);
155 static void istat_counter_topo_chg_cb(struct istat_entry *entp,
156     struct stats *statp, void *unused);
157 static void serd_reset_cb(struct serd_entry *entp, void *unused,
158     const struct ipath *ipp);
159 static void serd_topo_chg_cb(struct serd_entry *entp, void *unused,
160     void *unused2);
161 static void destroy_fme_bufs(struct fme *fp);
162 
163 static struct fme *
164 alloc_fme(void)
165 {
166 	struct fme *fmep;
167 
168 	fmep = MALLOC(sizeof (*fmep));
169 	bzero(fmep, sizeof (*fmep));
170 	return (fmep);
171 }
172 
173 /*
174  * fme_ready -- called when all initialization of the FME (except for
175  *	stats) has completed successfully.  Adds the fme to global lists
176  *	and establishes its stats.
177  */
178 static struct fme *
179 fme_ready(struct fme *fmep)
180 {
181 	char nbuf[100];
182 
183 	Nfmep = NULL;	/* don't need to free this on module abort now */
184 
185 	if (EFMElist) {
186 		EFMElist->next = fmep;
187 		EFMElist = fmep;
188 	} else
189 		FMElist = EFMElist = fmep;
190 
191 	(void) sprintf(nbuf, "fme%d.Rcount", fmep->id);
192 	fmep->Rcount = stats_new_counter(nbuf, "ereports received", 0);
193 	(void) sprintf(nbuf, "fme%d.Hcall", fmep->id);
194 	fmep->Hcallcount = stats_new_counter(nbuf, "calls to hypothesise()", 1);
195 	(void) sprintf(nbuf, "fme%d.Rcall", fmep->id);
196 	fmep->Rcallcount = stats_new_counter(nbuf,
197 	    "calls to requirements_test()", 1);
198 	(void) sprintf(nbuf, "fme%d.Ccall", fmep->id);
199 	fmep->Ccallcount = stats_new_counter(nbuf, "calls to causes_test()", 1);
200 	(void) sprintf(nbuf, "fme%d.Ecall", fmep->id);
201 	fmep->Ecallcount =
202 	    stats_new_counter(nbuf, "calls to effects_test()", 1);
203 	(void) sprintf(nbuf, "fme%d.Tcall", fmep->id);
204 	fmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1);
205 	(void) sprintf(nbuf, "fme%d.Marrow", fmep->id);
206 	fmep->Marrowcount = stats_new_counter(nbuf,
207 	    "arrows marked by mark_arrows()", 1);
208 	(void) sprintf(nbuf, "fme%d.diags", fmep->id);
209 	fmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0);
210 
211 	out(O_ALTFP|O_VERB2, "newfme: config snapshot contains...");
212 	config_print(O_ALTFP|O_VERB2, fmep->config);
213 
214 	return (fmep);
215 }
216 
217 extern void ipath_dummy_lut(struct arrow *);
218 extern struct lut *itree_create_dummy(const char *, const struct ipath *);
219 
220 /* ARGSUSED */
221 static void
222 set_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
223 {
224 	struct bubble *bp;
225 	struct arrowlist *ap;
226 
227 	for (bp = itree_next_bubble(ep, NULL); bp;
228 	    bp = itree_next_bubble(ep, bp)) {
229 		if (bp->t != B_FROM)
230 			continue;
231 		for (ap = itree_next_arrow(bp, NULL); ap;
232 		    ap = itree_next_arrow(bp, ap)) {
233 			ap->arrowp->pnode->u.arrow.needed = 1;
234 			ipath_dummy_lut(ap->arrowp);
235 		}
236 	}
237 }
238 
239 /* ARGSUSED */
240 static void
241 unset_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
242 {
243 	struct bubble *bp;
244 	struct arrowlist *ap;
245 
246 	for (bp = itree_next_bubble(ep, NULL); bp;
247 	    bp = itree_next_bubble(ep, bp)) {
248 		if (bp->t != B_FROM)
249 			continue;
250 		for (ap = itree_next_arrow(bp, NULL); ap;
251 		    ap = itree_next_arrow(bp, ap))
252 			ap->arrowp->pnode->u.arrow.needed = 0;
253 	}
254 }
255 
256 static void globals_destructor(void *left, void *right, void *arg);
257 static void clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep);
258 
259 static boolean_t
260 prune_propagations(const char *e0class, const struct ipath *e0ipp)
261 {
262 	char nbuf[100];
263 	unsigned long long my_delay = TIMEVAL_EVENTUALLY;
264 	extern struct lut *Usednames;
265 
266 	Nfmep = alloc_fme();
267 	Nfmep->id = Nextid;
268 	Nfmep->state = FME_NOTHING;
269 	Nfmep->eventtree = itree_create_dummy(e0class, e0ipp);
270 	if ((Nfmep->e0 =
271 	    itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) {
272 		itree_free(Nfmep->eventtree);
273 		FREE(Nfmep);
274 		Nfmep = NULL;
275 		return (B_FALSE);
276 	}
277 	Nfmep->ecurrent = Nfmep->observations = Nfmep->e0;
278 	Nfmep->e0->count++;
279 
280 	(void) sprintf(nbuf, "fme%d.Rcount", Nfmep->id);
281 	Nfmep->Rcount = stats_new_counter(nbuf, "ereports received", 0);
282 	(void) sprintf(nbuf, "fme%d.Hcall", Nfmep->id);
283 	Nfmep->Hcallcount =
284 	    stats_new_counter(nbuf, "calls to hypothesise()", 1);
285 	(void) sprintf(nbuf, "fme%d.Rcall", Nfmep->id);
286 	Nfmep->Rcallcount = stats_new_counter(nbuf,
287 	    "calls to requirements_test()", 1);
288 	(void) sprintf(nbuf, "fme%d.Ccall", Nfmep->id);
289 	Nfmep->Ccallcount =
290 	    stats_new_counter(nbuf, "calls to causes_test()", 1);
291 	(void) sprintf(nbuf, "fme%d.Ecall", Nfmep->id);
292 	Nfmep->Ecallcount =
293 	    stats_new_counter(nbuf, "calls to effects_test()", 1);
294 	(void) sprintf(nbuf, "fme%d.Tcall", Nfmep->id);
295 	Nfmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1);
296 	(void) sprintf(nbuf, "fme%d.Marrow", Nfmep->id);
297 	Nfmep->Marrowcount = stats_new_counter(nbuf,
298 	    "arrows marked by mark_arrows()", 1);
299 	(void) sprintf(nbuf, "fme%d.diags", Nfmep->id);
300 	Nfmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0);
301 
302 	Nfmep->peek = 1;
303 	lut_walk(Nfmep->eventtree, (lut_cb)unset_needed_arrows, (void *)Nfmep);
304 	lut_free(Usednames, NULL, NULL);
305 	Usednames = NULL;
306 	lut_walk(Nfmep->eventtree, (lut_cb)clear_arrows, (void *)Nfmep);
307 	(void) hypothesise(Nfmep, Nfmep->e0, Nfmep->ull, &my_delay);
308 	itree_prune(Nfmep->eventtree);
309 	lut_walk(Nfmep->eventtree, (lut_cb)set_needed_arrows, (void *)Nfmep);
310 
311 	stats_delete(Nfmep->Rcount);
312 	stats_delete(Nfmep->Hcallcount);
313 	stats_delete(Nfmep->Rcallcount);
314 	stats_delete(Nfmep->Ccallcount);
315 	stats_delete(Nfmep->Ecallcount);
316 	stats_delete(Nfmep->Tcallcount);
317 	stats_delete(Nfmep->Marrowcount);
318 	stats_delete(Nfmep->diags);
319 	itree_free(Nfmep->eventtree);
320 	lut_free(Nfmep->globals, globals_destructor, NULL);
321 	FREE(Nfmep);
322 	return (B_TRUE);
323 }
324 
325 static struct fme *
326 newfme(const char *e0class, const struct ipath *e0ipp, fmd_hdl_t *hdl,
327     fmd_case_t *fmcase, fmd_event_t *ffep, nvlist_t *nvl)
328 {
329 	struct cfgdata *cfgdata;
330 	int init_size;
331 	extern int alloc_total();
332 	nvlist_t *detector = NULL;
333 	char *pathstr;
334 	char *arg;
335 
336 	/*
337 	 * First check if e0ipp is actually in the topology so we can give a
338 	 * more useful error message.
339 	 */
340 	ipathlastcomp(e0ipp);
341 	pathstr = ipath2str(NULL, e0ipp);
342 	cfgdata = config_snapshot();
343 	platform_units_translate(0, cfgdata->cooked, NULL, NULL,
344 	    &detector, pathstr);
345 	FREE(pathstr);
346 	structconfig_free(cfgdata->cooked);
347 	config_free(cfgdata);
348 	if (detector == NULL) {
349 		/* See if class permits silent discard on unknown component. */
350 		if (lut_lookup(Ereportenames_discard, (void *)e0class, NULL)) {
351 			out(O_ALTFP|O_VERB2, "Unable to map \"%s\" ereport "
352 			    "to component path, but silent discard allowed.",
353 			    e0class);
354 			fmd_case_close(hdl, fmcase);
355 		} else {
356 			Undiag_reason = UD_VAL_BADEVENTPATH;
357 			(void) nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR,
358 			    &detector);
359 			arg = ipath2str(e0class, e0ipp);
360 			publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
361 			FREE(arg);
362 		}
363 		return (NULL);
364 	}
365 
366 	/*
367 	 * Next run a quick first pass of the rules with a dummy config. This
368 	 * allows us to prune those rules which can't possibly cause this
369 	 * ereport.
370 	 */
371 	if (!prune_propagations(e0class, e0ipp)) {
372 		/*
373 		 * The fault class must have been in the rules or we would
374 		 * not have registered for it (and got a "nosub"), and the
375 		 * pathname must be in the topology or we would have failed the
376 		 * previous test. So to get here means the combination of
377 		 * class and pathname in the ereport must be invalid.
378 		 */
379 		Undiag_reason = UD_VAL_BADEVENTCLASS;
380 		arg = ipath2str(e0class, e0ipp);
381 		publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
382 		nvlist_free(detector);
383 		FREE(arg);
384 		return (NULL);
385 	}
386 
387 	/*
388 	 * Now go ahead and create the real fme using the pruned rules.
389 	 */
390 	init_size = alloc_total();
391 	out(O_ALTFP|O_STAMP, "start config_snapshot using %d bytes", init_size);
392 	nvlist_free(detector);
393 	pathstr = ipath2str(NULL, e0ipp);
394 	cfgdata = config_snapshot();
395 	platform_units_translate(0, cfgdata->cooked, NULL, NULL,
396 	    &detector, pathstr);
397 	FREE(pathstr);
398 	platform_save_config(hdl, fmcase);
399 	out(O_ALTFP|O_STAMP, "config_snapshot added %d bytes",
400 	    alloc_total() - init_size);
401 
402 	Nfmep = alloc_fme();
403 
404 	Nfmep->id = Nextid++;
405 	Nfmep->config = cfgdata->cooked;
406 	config_free(cfgdata);
407 	Nfmep->posted_suspects = 0;
408 	Nfmep->uniqobs = 0;
409 	Nfmep->state = FME_NOTHING;
410 	Nfmep->pull = 0ULL;
411 	Nfmep->overflow = 0;
412 
413 	Nfmep->fmcase = fmcase;
414 	Nfmep->hdl = hdl;
415 
416 	if ((Nfmep->eventtree = itree_create(Nfmep->config)) == NULL) {
417 		Undiag_reason = UD_VAL_INSTFAIL;
418 		arg = ipath2str(e0class, e0ipp);
419 		publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
420 		nvlist_free(detector);
421 		FREE(arg);
422 		structconfig_free(Nfmep->config);
423 		destroy_fme_bufs(Nfmep);
424 		FREE(Nfmep);
425 		Nfmep = NULL;
426 		return (NULL);
427 	}
428 
429 	itree_ptree(O_ALTFP|O_VERB2, Nfmep->eventtree);
430 
431 	if ((Nfmep->e0 =
432 	    itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) {
433 		Undiag_reason = UD_VAL_BADEVENTI;
434 		arg = ipath2str(e0class, e0ipp);
435 		publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
436 		nvlist_free(detector);
437 		FREE(arg);
438 		itree_free(Nfmep->eventtree);
439 		structconfig_free(Nfmep->config);
440 		destroy_fme_bufs(Nfmep);
441 		FREE(Nfmep);
442 		Nfmep = NULL;
443 		return (NULL);
444 	}
445 
446 	nvlist_free(detector);
447 	return (fme_ready(Nfmep));
448 }
449 
450 void
451 fme_fini(void)
452 {
453 	struct fme *sfp, *fp;
454 	struct case_list *ucasep, *nextcasep;
455 
456 	ucasep = Undiagablecaselist;
457 	while (ucasep != NULL) {
458 		nextcasep = ucasep->next;
459 		FREE(ucasep);
460 		ucasep = nextcasep;
461 	}
462 	Undiagablecaselist = NULL;
463 
464 	/* clean up closed fmes */
465 	fp = ClosedFMEs;
466 	while (fp != NULL) {
467 		sfp = fp->next;
468 		destroy_fme(fp);
469 		fp = sfp;
470 	}
471 	ClosedFMEs = NULL;
472 
473 	fp = FMElist;
474 	while (fp != NULL) {
475 		sfp = fp->next;
476 		destroy_fme(fp);
477 		fp = sfp;
478 	}
479 	FMElist = EFMElist = NULL;
480 
481 	/* if we were in the middle of creating an fme, free it now */
482 	if (Nfmep) {
483 		destroy_fme(Nfmep);
484 		Nfmep = NULL;
485 	}
486 }
487 
488 /*
489  * Allocated space for a buffer name.  20 bytes allows for
490  * a ridiculous 9,999,999 unique observations.
491  */
492 #define	OBBUFNMSZ 20
493 
494 /*
495  *  serialize_observation
496  *
497  *  Create a recoverable version of the current observation
498  *  (f->ecurrent).  We keep a serialized version of each unique
499  *  observation in order that we may resume correctly the fme in the
500  *  correct state if eft or fmd crashes and we're restarted.
501  */
502 static void
503 serialize_observation(struct fme *fp, const char *cls, const struct ipath *ipp)
504 {
505 	size_t pkdlen;
506 	char tmpbuf[OBBUFNMSZ];
507 	char *pkd = NULL;
508 	char *estr;
509 
510 	(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", fp->uniqobs);
511 	estr = ipath2str(cls, ipp);
512 	fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, strlen(estr) + 1);
513 	fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)estr,
514 	    strlen(estr) + 1);
515 	FREE(estr);
516 
517 	if (fp->ecurrent != NULL && fp->ecurrent->nvp != NULL) {
518 		(void) snprintf(tmpbuf,
519 		    OBBUFNMSZ, "observed%d.nvp", fp->uniqobs);
520 		if (nvlist_xpack(fp->ecurrent->nvp,
521 		    &pkd, &pkdlen, NV_ENCODE_XDR, &Eft_nv_hdl) != 0)
522 			out(O_DIE|O_SYS, "pack of observed nvl failed");
523 		fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, pkdlen);
524 		fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)pkd, pkdlen);
525 		FREE(pkd);
526 	}
527 
528 	fp->uniqobs++;
529 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs,
530 	    sizeof (fp->uniqobs));
531 }
532 
533 /*
534  *  init_fme_bufs -- We keep several bits of state about an fme for
535  *	use if eft or fmd crashes and we're restarted.
536  */
537 static void
538 init_fme_bufs(struct fme *fp)
539 {
540 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_PULL, sizeof (fp->pull));
541 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_PULL, (void *)&fp->pull,
542 	    sizeof (fp->pull));
543 
544 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_ID, sizeof (fp->id));
545 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_ID, (void *)&fp->id,
546 	    sizeof (fp->id));
547 
548 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_NOBS, sizeof (fp->uniqobs));
549 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs,
550 	    sizeof (fp->uniqobs));
551 
552 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_POSTD,
553 	    sizeof (fp->posted_suspects));
554 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_POSTD,
555 	    (void *)&fp->posted_suspects, sizeof (fp->posted_suspects));
556 }
557 
558 static void
559 destroy_fme_bufs(struct fme *fp)
560 {
561 	char tmpbuf[OBBUFNMSZ];
562 	int o;
563 
564 	platform_restore_config(fp->hdl, fp->fmcase);
565 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFGLEN);
566 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFG);
567 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_PULL);
568 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_ID);
569 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_POSTD);
570 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_NOBS);
571 
572 	for (o = 0; o < fp->uniqobs; o++) {
573 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", o);
574 		fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf);
575 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", o);
576 		fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf);
577 	}
578 }
579 
580 /*
581  * reconstitute_observations -- convert a case's serialized observations
582  *	back into struct events.  Returns zero if all observations are
583  *	successfully reconstituted.
584  */
585 static int
586 reconstitute_observations(struct fme *fmep)
587 {
588 	struct event *ep;
589 	struct node *epnamenp = NULL;
590 	size_t pkdlen;
591 	char *pkd = NULL;
592 	char *tmpbuf = alloca(OBBUFNMSZ);
593 	char *sepptr;
594 	char *estr;
595 	int ocnt;
596 	int elen;
597 
598 	for (ocnt = 0; ocnt < fmep->uniqobs; ocnt++) {
599 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", ocnt);
600 		elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
601 		if (elen == 0) {
602 			out(O_ALTFP,
603 			    "reconstitute_observation: no %s buffer found.",
604 			    tmpbuf);
605 			Undiag_reason = UD_VAL_MISSINGOBS;
606 			break;
607 		}
608 
609 		estr = MALLOC(elen);
610 		fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen);
611 		sepptr = strchr(estr, '@');
612 		if (sepptr == NULL) {
613 			out(O_ALTFP,
614 			    "reconstitute_observation: %s: "
615 			    "missing @ separator in %s.",
616 			    tmpbuf, estr);
617 			Undiag_reason = UD_VAL_MISSINGPATH;
618 			FREE(estr);
619 			break;
620 		}
621 
622 		*sepptr = '\0';
623 		if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) {
624 			out(O_ALTFP,
625 			    "reconstitute_observation: %s: "
626 			    "trouble converting path string \"%s\" "
627 			    "to internal representation.",
628 			    tmpbuf, sepptr + 1);
629 			Undiag_reason = UD_VAL_MISSINGPATH;
630 			FREE(estr);
631 			break;
632 		}
633 
634 		/* construct the event */
635 		ep = itree_lookup(fmep->eventtree,
636 		    stable(estr), ipath(epnamenp));
637 		if (ep == NULL) {
638 			out(O_ALTFP,
639 			    "reconstitute_observation: %s: "
640 			    "lookup of  \"%s\" in itree failed.",
641 			    tmpbuf, ipath2str(estr, ipath(epnamenp)));
642 			Undiag_reason = UD_VAL_BADOBS;
643 			tree_free(epnamenp);
644 			FREE(estr);
645 			break;
646 		}
647 		tree_free(epnamenp);
648 
649 		/*
650 		 * We may or may not have a saved nvlist for the observation
651 		 */
652 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", ocnt);
653 		pkdlen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
654 		if (pkdlen != 0) {
655 			pkd = MALLOC(pkdlen);
656 			fmd_buf_read(fmep->hdl,
657 			    fmep->fmcase, tmpbuf, pkd, pkdlen);
658 			ASSERT(ep->nvp == NULL);
659 			if (nvlist_xunpack(pkd,
660 			    pkdlen, &ep->nvp, &Eft_nv_hdl) != 0)
661 				out(O_DIE|O_SYS, "pack of observed nvl failed");
662 			FREE(pkd);
663 		}
664 
665 		if (ocnt == 0)
666 			fmep->e0 = ep;
667 
668 		FREE(estr);
669 		fmep->ecurrent = ep;
670 		ep->count++;
671 
672 		/* link it into list of observations seen */
673 		ep->observations = fmep->observations;
674 		fmep->observations = ep;
675 	}
676 
677 	if (ocnt == fmep->uniqobs) {
678 		(void) fme_ready(fmep);
679 		return (0);
680 	}
681 
682 	return (1);
683 }
684 
685 /*
686  * restart_fme -- called during eft initialization.  Reconstitutes
687  *	an in-progress fme.
688  */
689 void
690 fme_restart(fmd_hdl_t *hdl, fmd_case_t *inprogress)
691 {
692 	nvlist_t *defect;
693 	struct case_list *bad;
694 	struct fme *fmep;
695 	struct cfgdata *cfgdata;
696 	size_t rawsz;
697 	struct event *ep;
698 	char *tmpbuf = alloca(OBBUFNMSZ);
699 	char *sepptr;
700 	char *estr;
701 	int elen;
702 	struct node *epnamenp = NULL;
703 	int init_size;
704 	extern int alloc_total();
705 	char *reason;
706 
707 	/*
708 	 * ignore solved or closed cases
709 	 */
710 	if (fmd_case_solved(hdl, inprogress) ||
711 	    fmd_case_closed(hdl, inprogress))
712 		return;
713 
714 	fmep = alloc_fme();
715 	fmep->fmcase = inprogress;
716 	fmep->hdl = hdl;
717 
718 	if (fmd_buf_size(hdl, inprogress, WOBUF_POSTD) == 0) {
719 		out(O_ALTFP, "restart_fme: no saved posted status");
720 		Undiag_reason = UD_VAL_MISSINGINFO;
721 		goto badcase;
722 	} else {
723 		fmd_buf_read(hdl, inprogress, WOBUF_POSTD,
724 		    (void *)&fmep->posted_suspects,
725 		    sizeof (fmep->posted_suspects));
726 	}
727 
728 	if (fmd_buf_size(hdl, inprogress, WOBUF_ID) == 0) {
729 		out(O_ALTFP, "restart_fme: no saved id");
730 		Undiag_reason = UD_VAL_MISSINGINFO;
731 		goto badcase;
732 	} else {
733 		fmd_buf_read(hdl, inprogress, WOBUF_ID, (void *)&fmep->id,
734 		    sizeof (fmep->id));
735 	}
736 	if (Nextid <= fmep->id)
737 		Nextid = fmep->id + 1;
738 
739 	out(O_ALTFP, "Replay FME %d", fmep->id);
740 
741 	if (fmd_buf_size(hdl, inprogress, WOBUF_CFGLEN) != sizeof (size_t)) {
742 		out(O_ALTFP, "restart_fme: No config data");
743 		Undiag_reason = UD_VAL_MISSINGINFO;
744 		goto badcase;
745 	}
746 	fmd_buf_read(hdl, inprogress, WOBUF_CFGLEN, (void *)&rawsz,
747 	    sizeof (size_t));
748 
749 	if ((fmep->e0r = fmd_case_getprincipal(hdl, inprogress)) == NULL) {
750 		out(O_ALTFP, "restart_fme: No event zero");
751 		Undiag_reason = UD_VAL_MISSINGZERO;
752 		goto badcase;
753 	}
754 
755 	if (fmd_buf_size(hdl, inprogress, WOBUF_PULL) == 0) {
756 		out(O_ALTFP, "restart_fme: no saved wait time");
757 		Undiag_reason = UD_VAL_MISSINGINFO;
758 		goto badcase;
759 	} else {
760 		fmd_buf_read(hdl, inprogress, WOBUF_PULL, (void *)&fmep->pull,
761 		    sizeof (fmep->pull));
762 	}
763 
764 	if (fmd_buf_size(hdl, inprogress, WOBUF_NOBS) == 0) {
765 		out(O_ALTFP, "restart_fme: no count of observations");
766 		Undiag_reason = UD_VAL_MISSINGINFO;
767 		goto badcase;
768 	} else {
769 		fmd_buf_read(hdl, inprogress, WOBUF_NOBS,
770 		    (void *)&fmep->uniqobs, sizeof (fmep->uniqobs));
771 	}
772 
773 	(void) snprintf(tmpbuf, OBBUFNMSZ, "observed0");
774 	elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
775 	if (elen == 0) {
776 		out(O_ALTFP, "reconstitute_observation: no %s buffer found.",
777 		    tmpbuf);
778 		Undiag_reason = UD_VAL_MISSINGOBS;
779 		goto badcase;
780 	}
781 	estr = MALLOC(elen);
782 	fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen);
783 	sepptr = strchr(estr, '@');
784 	if (sepptr == NULL) {
785 		out(O_ALTFP, "reconstitute_observation: %s: "
786 		    "missing @ separator in %s.",
787 		    tmpbuf, estr);
788 		Undiag_reason = UD_VAL_MISSINGPATH;
789 		FREE(estr);
790 		goto badcase;
791 	}
792 	*sepptr = '\0';
793 	if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) {
794 		out(O_ALTFP, "reconstitute_observation: %s: "
795 		    "trouble converting path string \"%s\" "
796 		    "to internal representation.", tmpbuf, sepptr + 1);
797 		Undiag_reason = UD_VAL_MISSINGPATH;
798 		FREE(estr);
799 		goto badcase;
800 	}
801 	(void) prune_propagations(stable(estr), ipath(epnamenp));
802 	tree_free(epnamenp);
803 	FREE(estr);
804 
805 	init_size = alloc_total();
806 	out(O_ALTFP|O_STAMP, "start config_restore using %d bytes", init_size);
807 	cfgdata = MALLOC(sizeof (struct cfgdata));
808 	cfgdata->cooked = NULL;
809 	cfgdata->devcache = NULL;
810 	cfgdata->devidcache = NULL;
811 	cfgdata->tpcache = NULL;
812 	cfgdata->cpucache = NULL;
813 	cfgdata->raw_refcnt = 1;
814 
815 	if (rawsz > 0) {
816 		if (fmd_buf_size(hdl, inprogress, WOBUF_CFG) != rawsz) {
817 			out(O_ALTFP, "restart_fme: Config data size mismatch");
818 			Undiag_reason = UD_VAL_CFGMISMATCH;
819 			goto badcase;
820 		}
821 		cfgdata->begin = MALLOC(rawsz);
822 		cfgdata->end = cfgdata->nextfree = cfgdata->begin + rawsz;
823 		fmd_buf_read(hdl,
824 		    inprogress, WOBUF_CFG, cfgdata->begin, rawsz);
825 	} else {
826 		cfgdata->begin = cfgdata->end = cfgdata->nextfree = NULL;
827 	}
828 
829 	config_cook(cfgdata);
830 	fmep->config = cfgdata->cooked;
831 	config_free(cfgdata);
832 	out(O_ALTFP|O_STAMP, "config_restore added %d bytes",
833 	    alloc_total() - init_size);
834 
835 	if ((fmep->eventtree = itree_create(fmep->config)) == NULL) {
836 		/* case not properly saved or irretrievable */
837 		out(O_ALTFP, "restart_fme: NULL instance tree");
838 		Undiag_reason = UD_VAL_INSTFAIL;
839 		goto badcase;
840 	}
841 
842 	itree_ptree(O_ALTFP|O_VERB2, fmep->eventtree);
843 
844 	if (reconstitute_observations(fmep) != 0)
845 		goto badcase;
846 
847 	out(O_ALTFP|O_NONL, "FME %d replay observations: ", fmep->id);
848 	for (ep = fmep->observations; ep; ep = ep->observations) {
849 		out(O_ALTFP|O_NONL, " ");
850 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
851 	}
852 	out(O_ALTFP, NULL);
853 
854 	Open_fme_count++;
855 
856 	/* give the diagnosis algorithm a shot at the new FME state */
857 	fme_eval(fmep, fmep->e0r);
858 	return;
859 
860 badcase:
861 	if (fmep->eventtree != NULL)
862 		itree_free(fmep->eventtree);
863 	if (fmep->config)
864 		structconfig_free(fmep->config);
865 	destroy_fme_bufs(fmep);
866 	FREE(fmep);
867 
868 	/*
869 	 * Since we're unable to restart the case, add it to the undiagable
870 	 * list and solve and close it as appropriate.
871 	 */
872 	bad = MALLOC(sizeof (struct case_list));
873 	bad->next = NULL;
874 
875 	if (Undiagablecaselist != NULL)
876 		bad->next = Undiagablecaselist;
877 	Undiagablecaselist = bad;
878 	bad->fmcase = inprogress;
879 
880 	out(O_ALTFP|O_NONL, "[case %s (unable to restart), ",
881 	    fmd_case_uuid(hdl, bad->fmcase));
882 
883 	if (fmd_case_solved(hdl, bad->fmcase)) {
884 		out(O_ALTFP|O_NONL, "already solved, ");
885 	} else {
886 		out(O_ALTFP|O_NONL, "solving, ");
887 		defect = fmd_nvl_create_fault(hdl,
888 		    undiag_2defect_str(Undiag_reason), 100, NULL, NULL, NULL);
889 		reason = undiag_2reason_str(Undiag_reason, NULL);
890 		(void) nvlist_add_string(defect, UNDIAG_REASON, reason);
891 		FREE(reason);
892 		fmd_case_add_suspect(hdl, bad->fmcase, defect);
893 		fmd_case_solve(hdl, bad->fmcase);
894 		Undiag_reason = UD_VAL_UNKNOWN;
895 	}
896 
897 	if (fmd_case_closed(hdl, bad->fmcase)) {
898 		out(O_ALTFP, "already closed ]");
899 	} else {
900 		out(O_ALTFP, "closing ]");
901 		fmd_case_close(hdl, bad->fmcase);
902 	}
903 }
904 
905 /*ARGSUSED*/
906 static void
907 globals_destructor(void *left, void *right, void *arg)
908 {
909 	struct evalue *evp = (struct evalue *)right;
910 	if (evp->t == NODEPTR)
911 		tree_free((struct node *)(uintptr_t)evp->v);
912 	evp->v = (uintptr_t)NULL;
913 	FREE(evp);
914 }
915 
916 void
917 destroy_fme(struct fme *f)
918 {
919 	stats_delete(f->Rcount);
920 	stats_delete(f->Hcallcount);
921 	stats_delete(f->Rcallcount);
922 	stats_delete(f->Ccallcount);
923 	stats_delete(f->Ecallcount);
924 	stats_delete(f->Tcallcount);
925 	stats_delete(f->Marrowcount);
926 	stats_delete(f->diags);
927 
928 	if (f->eventtree != NULL)
929 		itree_free(f->eventtree);
930 	if (f->config)
931 		structconfig_free(f->config);
932 	lut_free(f->globals, globals_destructor, NULL);
933 	FREE(f);
934 }
935 
936 static const char *
937 fme_state2str(enum fme_state s)
938 {
939 	switch (s) {
940 	case FME_NOTHING:	return ("NOTHING");
941 	case FME_WAIT:		return ("WAIT");
942 	case FME_CREDIBLE:	return ("CREDIBLE");
943 	case FME_DISPROVED:	return ("DISPROVED");
944 	case FME_DEFERRED:	return ("DEFERRED");
945 	default:		return ("UNKNOWN");
946 	}
947 }
948 
949 static int
950 is_problem(enum nametype t)
951 {
952 	return (t == N_FAULT || t == N_DEFECT || t == N_UPSET);
953 }
954 
955 static int
956 is_defect(enum nametype t)
957 {
958 	return (t == N_DEFECT);
959 }
960 
961 static int
962 is_upset(enum nametype t)
963 {
964 	return (t == N_UPSET);
965 }
966 
967 static void
968 fme_print(int flags, struct fme *fmep)
969 {
970 	struct event *ep;
971 
972 	out(flags, "Fault Management Exercise %d", fmep->id);
973 	out(flags, "\t       State: %s", fme_state2str(fmep->state));
974 	out(flags|O_NONL, "\t  Start time: ");
975 	ptree_timeval(flags|O_NONL, &fmep->ull);
976 	out(flags, NULL);
977 	if (fmep->wull) {
978 		out(flags|O_NONL, "\t   Wait time: ");
979 		ptree_timeval(flags|O_NONL, &fmep->wull);
980 		out(flags, NULL);
981 	}
982 	out(flags|O_NONL, "\t          E0: ");
983 	if (fmep->e0)
984 		itree_pevent_brief(flags|O_NONL, fmep->e0);
985 	else
986 		out(flags|O_NONL, "NULL");
987 	out(flags, NULL);
988 	out(flags|O_NONL, "\tObservations:");
989 	for (ep = fmep->observations; ep; ep = ep->observations) {
990 		out(flags|O_NONL, " ");
991 		itree_pevent_brief(flags|O_NONL, ep);
992 	}
993 	out(flags, NULL);
994 	out(flags|O_NONL, "\tSuspect list:");
995 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
996 		out(flags|O_NONL, " ");
997 		itree_pevent_brief(flags|O_NONL, ep);
998 	}
999 	out(flags, NULL);
1000 	if (fmep->eventtree != NULL) {
1001 		out(flags|O_VERB2, "\t        Tree:");
1002 		itree_ptree(flags|O_VERB2, fmep->eventtree);
1003 	}
1004 }
1005 
1006 static struct node *
1007 pathstring2epnamenp(char *path)
1008 {
1009 	char *sep = "/";
1010 	struct node *ret;
1011 	char *ptr;
1012 
1013 	if ((ptr = strtok(path, sep)) == NULL)
1014 		out(O_DIE, "pathstring2epnamenp: invalid empty class");
1015 
1016 	ret = tree_iname(stable(ptr), NULL, 0);
1017 
1018 	while ((ptr = strtok(NULL, sep)) != NULL)
1019 		ret = tree_name_append(ret,
1020 		    tree_iname(stable(ptr), NULL, 0));
1021 
1022 	return (ret);
1023 }
1024 
1025 /*
1026  * for a given upset sp, increment the corresponding SERD engine.  if the
1027  * SERD engine trips, return the ename and ipp of the resulting ereport.
1028  * returns true if engine tripped and *enamep and *ippp were filled in.
1029  */
1030 static int
1031 serd_eval(struct fme *fmep, fmd_hdl_t *hdl, fmd_event_t *ffep,
1032     fmd_case_t *fmcase, struct event *sp, const char **enamep,
1033     const struct ipath **ippp)
1034 {
1035 	struct node *serdinst;
1036 	char *serdname;
1037 	char *serdresource;
1038 	char *serdclass;
1039 	struct node *nid;
1040 	struct serd_entry *newentp;
1041 	int i, serdn = -1, serdincrement = 1, len = 0;
1042 	char *serdsuffix = NULL, *serdt = NULL;
1043 	struct evalue *ep;
1044 
1045 	ASSERT(sp->t == N_UPSET);
1046 	ASSERT(ffep != NULL);
1047 
1048 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1049 	    (void *)"n", (lut_cmp)strcmp)) != NULL) {
1050 		ASSERT(ep->t == UINT64);
1051 		serdn = (int)ep->v;
1052 	}
1053 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1054 	    (void *)"t", (lut_cmp)strcmp)) != NULL) {
1055 		ASSERT(ep->t == STRING);
1056 		serdt = (char *)(uintptr_t)ep->v;
1057 	}
1058 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1059 	    (void *)"suffix", (lut_cmp)strcmp)) != NULL) {
1060 		ASSERT(ep->t == STRING);
1061 		serdsuffix = (char *)(uintptr_t)ep->v;
1062 	}
1063 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1064 	    (void *)"increment", (lut_cmp)strcmp)) != NULL) {
1065 		ASSERT(ep->t == UINT64);
1066 		serdincrement = (int)ep->v;
1067 	}
1068 
1069 	/*
1070 	 * obtain instanced SERD engine from the upset sp.  from this
1071 	 * derive serdname, the string used to identify the SERD engine.
1072 	 */
1073 	serdinst = eventprop_lookup(sp, L_engine);
1074 
1075 	if (serdinst == NULL)
1076 		return (-1);
1077 
1078 	len = strlen(serdinst->u.stmt.np->u.event.ename->u.name.s) + 1;
1079 	if (serdsuffix != NULL)
1080 		len += strlen(serdsuffix);
1081 	serdclass = MALLOC(len);
1082 	if (serdsuffix != NULL)
1083 		(void) snprintf(serdclass, len, "%s%s",
1084 		    serdinst->u.stmt.np->u.event.ename->u.name.s, serdsuffix);
1085 	else
1086 		(void) snprintf(serdclass, len, "%s",
1087 		    serdinst->u.stmt.np->u.event.ename->u.name.s);
1088 	serdresource = ipath2str(NULL,
1089 	    ipath(serdinst->u.stmt.np->u.event.epname));
1090 	len += strlen(serdresource) + 1;
1091 	serdname = MALLOC(len);
1092 	(void) snprintf(serdname, len, "%s@%s", serdclass, serdresource);
1093 	FREE(serdresource);
1094 
1095 	/* handle serd engine "id" property, if there is one */
1096 	if ((nid =
1097 	    lut_lookup(serdinst->u.stmt.lutp, (void *)L_id, NULL)) != NULL) {
1098 		struct evalue *gval;
1099 		char suffixbuf[200];
1100 		char *suffix;
1101 		char *nserdname;
1102 		size_t nname;
1103 
1104 		out(O_ALTFP|O_NONL, "serd \"%s\" id: ", serdname);
1105 		ptree_name_iter(O_ALTFP|O_NONL, nid);
1106 
1107 		ASSERTinfo(nid->t == T_GLOBID, ptree_nodetype2str(nid->t));
1108 
1109 		if ((gval = lut_lookup(fmep->globals,
1110 		    (void *)nid->u.globid.s, NULL)) == NULL) {
1111 			out(O_ALTFP, " undefined");
1112 		} else if (gval->t == UINT64) {
1113 			out(O_ALTFP, " %llu", gval->v);
1114 			(void) sprintf(suffixbuf, "%llu", gval->v);
1115 			suffix = suffixbuf;
1116 		} else {
1117 			out(O_ALTFP, " \"%s\"", (char *)(uintptr_t)gval->v);
1118 			suffix = (char *)(uintptr_t)gval->v;
1119 		}
1120 
1121 		nname = strlen(serdname) + strlen(suffix) + 2;
1122 		nserdname = MALLOC(nname);
1123 		(void) snprintf(nserdname, nname, "%s:%s", serdname, suffix);
1124 		FREE(serdname);
1125 		serdname = nserdname;
1126 	}
1127 
1128 	/*
1129 	 * if the engine is empty, and we have an override for n/t then
1130 	 * destroy and recreate it.
1131 	 */
1132 	if ((serdn != -1 || serdt != NULL) && fmd_serd_exists(hdl, serdname) &&
1133 	    fmd_serd_empty(hdl, serdname))
1134 		fmd_serd_destroy(hdl, serdname);
1135 
1136 	if (!fmd_serd_exists(hdl, serdname)) {
1137 		struct node *nN, *nT;
1138 		const char *s;
1139 		struct node *nodep;
1140 		struct config *cp;
1141 		char *path;
1142 		uint_t nval;
1143 		hrtime_t tval;
1144 		int i;
1145 		char *ptr;
1146 		int got_n_override = 0, got_t_override = 0;
1147 
1148 		/* no SERD engine yet, so create it */
1149 		nodep = serdinst->u.stmt.np->u.event.epname;
1150 		path = ipath2str(NULL, ipath(nodep));
1151 		cp = config_lookup(fmep->config, path, 0);
1152 		FREE((void *)path);
1153 
1154 		/*
1155 		 * We allow serd paramaters to be overridden, either from
1156 		 * eft.conf file values (if Serd_Override is set) or from
1157 		 * driver properties (for "serd.io.device" engines).
1158 		 */
1159 		if (Serd_Override != NULL) {
1160 			char *save_ptr, *ptr1, *ptr2, *ptr3;
1161 			ptr3 = save_ptr = STRDUP(Serd_Override);
1162 			while (*ptr3 != '\0') {
1163 				ptr1 = strchr(ptr3, ',');
1164 				*ptr1 = '\0';
1165 				if (strcmp(ptr3, serdclass) == 0) {
1166 					ptr2 =  strchr(ptr1 + 1, ',');
1167 					*ptr2 = '\0';
1168 					nval = atoi(ptr1 + 1);
1169 					out(O_ALTFP, "serd override %s_n %d",
1170 					    serdclass, nval);
1171 					ptr3 =  strchr(ptr2 + 1, ' ');
1172 					if (ptr3)
1173 						*ptr3 = '\0';
1174 					ptr = STRDUP(ptr2 + 1);
1175 					out(O_ALTFP, "serd override %s_t %s",
1176 					    serdclass, ptr);
1177 					got_n_override = 1;
1178 					got_t_override = 1;
1179 					break;
1180 				} else {
1181 					ptr2 =  strchr(ptr1 + 1, ',');
1182 					ptr3 =  strchr(ptr2 + 1, ' ');
1183 					if (ptr3 == NULL)
1184 						break;
1185 				}
1186 				ptr3++;
1187 			}
1188 			FREE(save_ptr);
1189 		}
1190 
1191 		if (cp && got_n_override == 0) {
1192 			/*
1193 			 * convert serd engine class into property name
1194 			 */
1195 			char *prop_name = MALLOC(strlen(serdclass) + 3);
1196 			for (i = 0; i < strlen(serdclass); i++) {
1197 				if (serdclass[i] == '.')
1198 					prop_name[i] = '_';
1199 				else
1200 					prop_name[i] = serdclass[i];
1201 			}
1202 			prop_name[i++] = '_';
1203 			prop_name[i++] = 'n';
1204 			prop_name[i] = '\0';
1205 			if (s = config_getprop(cp, prop_name)) {
1206 				nval = atoi(s);
1207 				out(O_ALTFP, "serd override %s_n %s",
1208 				    serdclass, s);
1209 				got_n_override = 1;
1210 			}
1211 			prop_name[i - 1] = 't';
1212 			if (s = config_getprop(cp, prop_name)) {
1213 				ptr = STRDUP(s);
1214 				out(O_ALTFP, "serd override %s_t %s",
1215 				    serdclass, s);
1216 				got_t_override = 1;
1217 			}
1218 			FREE(prop_name);
1219 		}
1220 
1221 		if (serdn != -1 && got_n_override == 0) {
1222 			nval = serdn;
1223 			out(O_ALTFP, "serd override %s_n %d", serdclass, serdn);
1224 			got_n_override = 1;
1225 		}
1226 		if (serdt != NULL && got_t_override == 0) {
1227 			ptr = STRDUP(serdt);
1228 			out(O_ALTFP, "serd override %s_t %s", serdclass, serdt);
1229 			got_t_override = 1;
1230 		}
1231 
1232 		if (!got_n_override) {
1233 			nN = lut_lookup(serdinst->u.stmt.lutp, (void *)L_N,
1234 			    NULL);
1235 			ASSERT(nN->t == T_NUM);
1236 			nval = (uint_t)nN->u.ull;
1237 		}
1238 		if (!got_t_override) {
1239 			nT = lut_lookup(serdinst->u.stmt.lutp, (void *)L_T,
1240 			    NULL);
1241 			ASSERT(nT->t == T_TIMEVAL);
1242 			tval = (hrtime_t)nT->u.ull;
1243 		} else {
1244 			const unsigned long long *ullp;
1245 			const char *suffix;
1246 			int len;
1247 
1248 			len = strspn(ptr, "0123456789");
1249 			suffix = stable(&ptr[len]);
1250 			ullp = (unsigned long long *)lut_lookup(Timesuffixlut,
1251 			    (void *)suffix, NULL);
1252 			ptr[len] = '\0';
1253 			tval = strtoull(ptr, NULL, 0) * (ullp ? *ullp : 1ll);
1254 			FREE(ptr);
1255 		}
1256 		fmd_serd_create(hdl, serdname, nval, tval);
1257 	}
1258 
1259 	newentp = MALLOC(sizeof (*newentp));
1260 	newentp->ename = stable(serdclass);
1261 	FREE(serdclass);
1262 	newentp->ipath = ipath(serdinst->u.stmt.np->u.event.epname);
1263 	newentp->hdl = hdl;
1264 	if (lut_lookup(SerdEngines, newentp, (lut_cmp)serd_cmp) == NULL) {
1265 		SerdEngines = lut_add(SerdEngines, (void *)newentp,
1266 		    (void *)newentp, (lut_cmp)serd_cmp);
1267 		Serd_need_save = 1;
1268 		serd_save();
1269 	} else {
1270 		FREE(newentp);
1271 	}
1272 
1273 
1274 	/*
1275 	 * increment SERD engine.  if engine fires, reset serd
1276 	 * engine and return trip_strcode if required.
1277 	 */
1278 	for (i = 0; i < serdincrement; i++) {
1279 		if (fmd_serd_record(hdl, serdname, ffep)) {
1280 			fmd_case_add_serd(hdl, fmcase, serdname);
1281 			fmd_serd_reset(hdl, serdname);
1282 
1283 			if (ippp) {
1284 				struct node *tripinst =
1285 				    lut_lookup(serdinst->u.stmt.lutp,
1286 				    (void *)L_trip, NULL);
1287 				ASSERT(tripinst != NULL);
1288 				*enamep = tripinst->u.event.ename->u.name.s;
1289 				*ippp = ipath(tripinst->u.event.epname);
1290 				out(O_ALTFP|O_NONL,
1291 				    "[engine fired: %s, sending: ", serdname);
1292 				ipath_print(O_ALTFP|O_NONL, *enamep, *ippp);
1293 				out(O_ALTFP, "]");
1294 			} else {
1295 				out(O_ALTFP, "[engine fired: %s, no trip]",
1296 				    serdname);
1297 			}
1298 			FREE(serdname);
1299 			return (1);
1300 		}
1301 	}
1302 
1303 	FREE(serdname);
1304 	return (0);
1305 }
1306 
1307 /*
1308  * search a suspect list for upsets.  feed each upset to serd_eval() and
1309  * build up tripped[], an array of ereports produced by the firing of
1310  * any SERD engines.  then feed each ereport back into
1311  * fme_receive_report().
1312  *
1313  * returns ntrip, the number of these ereports produced.
1314  */
1315 static int
1316 upsets_eval(struct fme *fmep, fmd_event_t *ffep)
1317 {
1318 	/* we build an array of tripped ereports that we send ourselves */
1319 	struct {
1320 		const char *ename;
1321 		const struct ipath *ipp;
1322 	} *tripped;
1323 	struct event *sp;
1324 	int ntrip, nupset, i;
1325 
1326 	/*
1327 	 * count the number of upsets to determine the upper limit on
1328 	 * expected trip ereport strings.  remember that one upset can
1329 	 * lead to at most one ereport.
1330 	 */
1331 	nupset = 0;
1332 	for (sp = fmep->suspects; sp; sp = sp->suspects) {
1333 		if (sp->t == N_UPSET)
1334 			nupset++;
1335 	}
1336 
1337 	if (nupset == 0)
1338 		return (0);
1339 
1340 	/*
1341 	 * get to this point if we have upsets and expect some trip
1342 	 * ereports
1343 	 */
1344 	tripped = alloca(sizeof (*tripped) * nupset);
1345 	bzero((void *)tripped, sizeof (*tripped) * nupset);
1346 
1347 	ntrip = 0;
1348 	for (sp = fmep->suspects; sp; sp = sp->suspects)
1349 		if (sp->t == N_UPSET &&
1350 		    serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, sp,
1351 		    &tripped[ntrip].ename, &tripped[ntrip].ipp) == 1)
1352 			ntrip++;
1353 
1354 	for (i = 0; i < ntrip; i++) {
1355 		struct event *ep, *nep;
1356 		struct fme *nfmep;
1357 		fmd_case_t *fmcase;
1358 		const struct ipath *ipp;
1359 		const char *eventstring;
1360 		int prev_verbose;
1361 		unsigned long long my_delay = TIMEVAL_EVENTUALLY;
1362 		enum fme_state state;
1363 
1364 		/*
1365 		 * First try and evaluate a case with the trip ereport plus
1366 		 * all the other ereports that cause the trip. If that fails
1367 		 * to evaluate then try again with just this ereport on its own.
1368 		 */
1369 		out(O_ALTFP|O_NONL, "fme_receive_report_serd: ");
1370 		ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp);
1371 		out(O_ALTFP|O_STAMP, NULL);
1372 		ep = fmep->e0;
1373 		eventstring = ep->enode->u.event.ename->u.name.s;
1374 		ipp = ep->ipp;
1375 
1376 		/*
1377 		 * create a duplicate fme and case
1378 		 */
1379 		fmcase = fmd_case_open(fmep->hdl, NULL);
1380 		out(O_ALTFP|O_NONL, "duplicate fme for event [");
1381 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1382 		out(O_ALTFP, " ]");
1383 
1384 		if ((nfmep = newfme(eventstring, ipp, fmep->hdl,
1385 		    fmcase, ffep, ep->nvp)) == NULL) {
1386 			out(O_ALTFP|O_NONL, "[");
1387 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1388 			out(O_ALTFP, " CANNOT DIAGNOSE]");
1389 			continue;
1390 		}
1391 
1392 		Open_fme_count++;
1393 		nfmep->pull = fmep->pull;
1394 		init_fme_bufs(nfmep);
1395 		out(O_ALTFP|O_NONL, "[");
1396 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1397 		out(O_ALTFP, " created FME%d, case %s]", nfmep->id,
1398 		    fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
1399 		if (ffep) {
1400 			fmd_case_setprincipal(nfmep->hdl, nfmep->fmcase, ffep);
1401 			fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase, ffep);
1402 			nfmep->e0r = ffep;
1403 		}
1404 
1405 		/*
1406 		 * add the original ereports
1407 		 */
1408 		for (ep = fmep->observations; ep; ep = ep->observations) {
1409 			eventstring = ep->enode->u.event.ename->u.name.s;
1410 			ipp = ep->ipp;
1411 			out(O_ALTFP|O_NONL, "adding event [");
1412 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1413 			out(O_ALTFP, " ]");
1414 			nep = itree_lookup(nfmep->eventtree, eventstring, ipp);
1415 			if (nep->count++ == 0) {
1416 				nep->observations = nfmep->observations;
1417 				nfmep->observations = nep;
1418 				serialize_observation(nfmep, eventstring, ipp);
1419 				nep->nvp = evnv_dupnvl(ep->nvp);
1420 			}
1421 			if (ep->ffep && ep->ffep != ffep)
1422 				fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase,
1423 				    ep->ffep);
1424 			stats_counter_bump(nfmep->Rcount);
1425 		}
1426 
1427 		/*
1428 		 * add the serd trigger ereport
1429 		 */
1430 		if ((ep = itree_lookup(nfmep->eventtree, tripped[i].ename,
1431 		    tripped[i].ipp)) == NULL) {
1432 			/*
1433 			 * The trigger ereport is not in the instance tree. It
1434 			 * was presumably removed by prune_propagations() as
1435 			 * this combination of events is not present in the
1436 			 * rules.
1437 			 */
1438 			out(O_ALTFP, "upsets_eval: e0 not in instance tree");
1439 			Undiag_reason = UD_VAL_BADEVENTI;
1440 			goto retry_lone_ereport;
1441 		}
1442 		out(O_ALTFP|O_NONL, "adding event [");
1443 		ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp);
1444 		out(O_ALTFP, " ]");
1445 		nfmep->ecurrent = ep;
1446 		ep->nvp = NULL;
1447 		ep->count = 1;
1448 		ep->observations = nfmep->observations;
1449 		nfmep->observations = ep;
1450 
1451 		/*
1452 		 * just peek first.
1453 		 */
1454 		nfmep->peek = 1;
1455 		prev_verbose = Verbose;
1456 		if (Debug == 0)
1457 			Verbose = 0;
1458 		lut_walk(nfmep->eventtree, (lut_cb)clear_arrows, (void *)nfmep);
1459 		state = hypothesise(nfmep, nfmep->e0, nfmep->ull, &my_delay);
1460 		nfmep->peek = 0;
1461 		Verbose = prev_verbose;
1462 		if (state == FME_DISPROVED) {
1463 			out(O_ALTFP, "upsets_eval: hypothesis disproved");
1464 			Undiag_reason = UD_VAL_UNSOLVD;
1465 retry_lone_ereport:
1466 			/*
1467 			 * However the trigger ereport on its own might be
1468 			 * diagnosable, so check for that. Undo the new fme
1469 			 * and case we just created and call fme_receive_report.
1470 			 */
1471 			out(O_ALTFP|O_NONL, "[");
1472 			ipath_print(O_ALTFP|O_NONL, tripped[i].ename,
1473 			    tripped[i].ipp);
1474 			out(O_ALTFP, " retrying with just trigger ereport]");
1475 			itree_free(nfmep->eventtree);
1476 			nfmep->eventtree = NULL;
1477 			structconfig_free(nfmep->config);
1478 			nfmep->config = NULL;
1479 			destroy_fme_bufs(nfmep);
1480 			fmd_case_close(nfmep->hdl, nfmep->fmcase);
1481 			fme_receive_report(fmep->hdl, ffep,
1482 			    tripped[i].ename, tripped[i].ipp, NULL);
1483 			continue;
1484 		}
1485 
1486 		/*
1487 		 * and evaluate
1488 		 */
1489 		serialize_observation(nfmep, tripped[i].ename, tripped[i].ipp);
1490 		fme_eval(nfmep, ffep);
1491 	}
1492 
1493 	return (ntrip);
1494 }
1495 
1496 /*
1497  * fme_receive_external_report -- call when an external ereport comes in
1498  *
1499  * this routine just converts the relevant information from the ereport
1500  * into a format used internally and passes it on to fme_receive_report().
1501  */
1502 void
1503 fme_receive_external_report(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl,
1504     const char *class)
1505 {
1506 	struct node		*epnamenp;
1507 	fmd_case_t		*fmcase;
1508 	const struct ipath	*ipp;
1509 	nvlist_t		*detector = NULL;
1510 
1511 	class = stable(class);
1512 
1513 	/* Get the component path from the ereport */
1514 	epnamenp = platform_getpath(nvl);
1515 
1516 	/* See if we ended up without a path. */
1517 	if (epnamenp == NULL) {
1518 		/* See if class permits silent discard on unknown component. */
1519 		if (lut_lookup(Ereportenames_discard, (void *)class, NULL)) {
1520 			out(O_ALTFP|O_VERB2, "Unable to map \"%s\" ereport "
1521 			    "to component path, but silent discard allowed.",
1522 			    class);
1523 		} else {
1524 			/*
1525 			 * XFILE: Failure to find a component is bad unless
1526 			 * 'discard_if_config_unknown=1' was specified in the
1527 			 * ereport definition. Indicate undiagnosable.
1528 			 */
1529 			Undiag_reason = UD_VAL_NOPATH;
1530 			fmcase = fmd_case_open(hdl, NULL);
1531 
1532 			/*
1533 			 * We don't have a component path here (which means that
1534 			 * the detector was not in hc-scheme and couldn't be
1535 			 * converted to hc-scheme. Report the raw detector as
1536 			 * the suspect resource if there is one.
1537 			 */
1538 			(void) nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR,
1539 			    &detector);
1540 			publish_undiagnosable(hdl, ffep, fmcase, detector,
1541 			    (char *)class);
1542 		}
1543 		return;
1544 	}
1545 
1546 	ipp = ipath(epnamenp);
1547 	tree_free(epnamenp);
1548 	fme_receive_report(hdl, ffep, class, ipp, nvl);
1549 }
1550 
1551 /*ARGSUSED*/
1552 void
1553 fme_receive_repair_list(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl,
1554     const char *eventstring)
1555 {
1556 	char *uuid;
1557 	nvlist_t **nva;
1558 	uint_t nvc;
1559 	const struct ipath *ipp;
1560 
1561 	if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) != 0 ||
1562 	    nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST,
1563 	    &nva, &nvc) != 0) {
1564 		out(O_ALTFP, "No uuid or fault list for list.repaired event");
1565 		return;
1566 	}
1567 
1568 	out(O_ALTFP, "Processing list.repaired from case %s", uuid);
1569 
1570 	while (nvc-- != 0) {
1571 		/*
1572 		 * Reset any istat or serd engine associated with this path.
1573 		 */
1574 		char *path;
1575 
1576 		if ((ipp = platform_fault2ipath(*nva++)) == NULL)
1577 			continue;
1578 
1579 		path = ipath2str(NULL, ipp);
1580 		out(O_ALTFP, "fme_receive_repair_list: resetting state for %s",
1581 		    path);
1582 		FREE(path);
1583 
1584 		lut_walk(Istats, (lut_cb)istat_counter_reset_cb, (void *)ipp);
1585 		istat_save();
1586 
1587 		lut_walk(SerdEngines, (lut_cb)serd_reset_cb, (void *)ipp);
1588 		serd_save();
1589 	}
1590 }
1591 
1592 /*ARGSUSED*/
1593 void
1594 fme_receive_topology_change(void)
1595 {
1596 	lut_walk(Istats, (lut_cb)istat_counter_topo_chg_cb, NULL);
1597 	istat_save();
1598 
1599 	lut_walk(SerdEngines, (lut_cb)serd_topo_chg_cb, NULL);
1600 	serd_save();
1601 }
1602 
1603 static int mark_arrows(struct fme *fmep, struct event *ep, int mark,
1604     unsigned long long at_latest_by, unsigned long long *pdelay, int keep);
1605 
1606 /* ARGSUSED */
1607 static void
1608 clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
1609 {
1610 	struct bubble *bp;
1611 	struct arrowlist *ap;
1612 
1613 	ep->cached_state = 0;
1614 	ep->keep_in_tree = 0;
1615 	for (bp = itree_next_bubble(ep, NULL); bp;
1616 	    bp = itree_next_bubble(ep, bp)) {
1617 		if (bp->t != B_FROM)
1618 			continue;
1619 		bp->mark = 0;
1620 		for (ap = itree_next_arrow(bp, NULL); ap;
1621 		    ap = itree_next_arrow(bp, ap))
1622 			ap->arrowp->mark = 0;
1623 	}
1624 }
1625 
1626 static void
1627 fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep,
1628     const char *eventstring, const struct ipath *ipp, nvlist_t *nvl)
1629 {
1630 	struct event *ep;
1631 	struct fme *fmep = NULL;
1632 	struct fme *ofmep = NULL;
1633 	struct fme *cfmep, *svfmep;
1634 	int matched = 0;
1635 	nvlist_t *defect;
1636 	fmd_case_t *fmcase;
1637 	char *reason;
1638 
1639 	out(O_ALTFP|O_NONL, "fme_receive_report: ");
1640 	ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1641 	out(O_ALTFP|O_STAMP, NULL);
1642 
1643 	/* decide which FME it goes to */
1644 	for (fmep = FMElist; fmep; fmep = fmep->next) {
1645 		int prev_verbose;
1646 		unsigned long long my_delay = TIMEVAL_EVENTUALLY;
1647 		enum fme_state state;
1648 		nvlist_t *pre_peek_nvp = NULL;
1649 
1650 		if (fmep->overflow) {
1651 			if (!(fmd_case_closed(fmep->hdl, fmep->fmcase)))
1652 				ofmep = fmep;
1653 
1654 			continue;
1655 		}
1656 
1657 		/*
1658 		 * ignore solved or closed cases
1659 		 */
1660 		if (fmep->posted_suspects ||
1661 		    fmd_case_solved(fmep->hdl, fmep->fmcase) ||
1662 		    fmd_case_closed(fmep->hdl, fmep->fmcase))
1663 			continue;
1664 
1665 		/* look up event in event tree for this FME */
1666 		if ((ep = itree_lookup(fmep->eventtree,
1667 		    eventstring, ipp)) == NULL)
1668 			continue;
1669 
1670 		/* note observation */
1671 		fmep->ecurrent = ep;
1672 		if (ep->count++ == 0) {
1673 			/* link it into list of observations seen */
1674 			ep->observations = fmep->observations;
1675 			fmep->observations = ep;
1676 			ep->nvp = evnv_dupnvl(nvl);
1677 		} else {
1678 			/* use new payload values for peek */
1679 			pre_peek_nvp = ep->nvp;
1680 			ep->nvp = evnv_dupnvl(nvl);
1681 		}
1682 
1683 		/* tell hypothesise() not to mess with suspect list */
1684 		fmep->peek = 1;
1685 
1686 		/* don't want this to be verbose (unless Debug is set) */
1687 		prev_verbose = Verbose;
1688 		if (Debug == 0)
1689 			Verbose = 0;
1690 
1691 		lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep);
1692 		state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay);
1693 
1694 		fmep->peek = 0;
1695 
1696 		/* put verbose flag back */
1697 		Verbose = prev_verbose;
1698 
1699 		if (state != FME_DISPROVED) {
1700 			/* found an FME that explains the ereport */
1701 			matched++;
1702 			out(O_ALTFP|O_NONL, "[");
1703 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1704 			out(O_ALTFP, " explained by FME%d]", fmep->id);
1705 
1706 			nvlist_free(pre_peek_nvp);
1707 
1708 			if (ep->count == 1)
1709 				serialize_observation(fmep, eventstring, ipp);
1710 
1711 			if (ffep) {
1712 				fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1713 				ep->ffep = ffep;
1714 			}
1715 
1716 			stats_counter_bump(fmep->Rcount);
1717 
1718 			/* re-eval FME */
1719 			fme_eval(fmep, ffep);
1720 		} else {
1721 
1722 			/* not a match, undo noting of observation */
1723 			fmep->ecurrent = NULL;
1724 			if (--ep->count == 0) {
1725 				/* unlink it from observations */
1726 				fmep->observations = ep->observations;
1727 				ep->observations = NULL;
1728 				nvlist_free(ep->nvp);
1729 				ep->nvp = NULL;
1730 			} else {
1731 				nvlist_free(ep->nvp);
1732 				ep->nvp = pre_peek_nvp;
1733 			}
1734 		}
1735 	}
1736 
1737 	if (matched)
1738 		return;	/* explained by at least one existing FME */
1739 
1740 	/* clean up closed fmes */
1741 	cfmep = ClosedFMEs;
1742 	while (cfmep != NULL) {
1743 		svfmep = cfmep->next;
1744 		destroy_fme(cfmep);
1745 		cfmep = svfmep;
1746 	}
1747 	ClosedFMEs = NULL;
1748 
1749 	if (ofmep) {
1750 		out(O_ALTFP|O_NONL, "[");
1751 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1752 		out(O_ALTFP, " ADDING TO OVERFLOW FME]");
1753 		if (ffep)
1754 			fmd_case_add_ereport(hdl, ofmep->fmcase, ffep);
1755 
1756 		return;
1757 
1758 	} else if (Max_fme && (Open_fme_count >= Max_fme)) {
1759 		out(O_ALTFP|O_NONL, "[");
1760 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1761 		out(O_ALTFP, " MAX OPEN FME REACHED]");
1762 
1763 		fmcase = fmd_case_open(hdl, NULL);
1764 
1765 		/* Create overflow fme */
1766 		if ((fmep = newfme(eventstring, ipp, hdl, fmcase, ffep,
1767 		    nvl)) == NULL) {
1768 			out(O_ALTFP|O_NONL, "[");
1769 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1770 			out(O_ALTFP, " CANNOT OPEN OVERFLOW FME]");
1771 			return;
1772 		}
1773 
1774 		Open_fme_count++;
1775 
1776 		init_fme_bufs(fmep);
1777 		fmep->overflow = B_TRUE;
1778 
1779 		if (ffep)
1780 			fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1781 
1782 		Undiag_reason = UD_VAL_MAXFME;
1783 		defect = fmd_nvl_create_fault(hdl,
1784 		    undiag_2defect_str(Undiag_reason), 100, NULL, NULL, NULL);
1785 		reason = undiag_2reason_str(Undiag_reason, NULL);
1786 		(void) nvlist_add_string(defect, UNDIAG_REASON, reason);
1787 		FREE(reason);
1788 		fmd_case_add_suspect(hdl, fmep->fmcase, defect);
1789 		fmd_case_solve(hdl, fmep->fmcase);
1790 		Undiag_reason = UD_VAL_UNKNOWN;
1791 		return;
1792 	}
1793 
1794 	/* open a case */
1795 	fmcase = fmd_case_open(hdl, NULL);
1796 
1797 	/* start a new FME */
1798 	if ((fmep = newfme(eventstring, ipp, hdl, fmcase, ffep, nvl)) == NULL) {
1799 		out(O_ALTFP|O_NONL, "[");
1800 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1801 		out(O_ALTFP, " CANNOT DIAGNOSE]");
1802 		return;
1803 	}
1804 
1805 	Open_fme_count++;
1806 
1807 	init_fme_bufs(fmep);
1808 
1809 	out(O_ALTFP|O_NONL, "[");
1810 	ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1811 	out(O_ALTFP, " created FME%d, case %s]", fmep->id,
1812 	    fmd_case_uuid(hdl, fmep->fmcase));
1813 
1814 	ep = fmep->e0;
1815 	ASSERT(ep != NULL);
1816 
1817 	/* note observation */
1818 	fmep->ecurrent = ep;
1819 	if (ep->count++ == 0) {
1820 		/* link it into list of observations seen */
1821 		ep->observations = fmep->observations;
1822 		fmep->observations = ep;
1823 		ep->nvp = evnv_dupnvl(nvl);
1824 		serialize_observation(fmep, eventstring, ipp);
1825 	} else {
1826 		/* new payload overrides any previous */
1827 		nvlist_free(ep->nvp);
1828 		ep->nvp = evnv_dupnvl(nvl);
1829 	}
1830 
1831 	stats_counter_bump(fmep->Rcount);
1832 
1833 	if (ffep) {
1834 		fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1835 		fmd_case_setprincipal(hdl, fmep->fmcase, ffep);
1836 		fmep->e0r = ffep;
1837 		ep->ffep = ffep;
1838 	}
1839 
1840 	/* give the diagnosis algorithm a shot at the new FME state */
1841 	fme_eval(fmep, ffep);
1842 }
1843 
1844 void
1845 fme_status(int flags)
1846 {
1847 	struct fme *fmep;
1848 
1849 	if (FMElist == NULL) {
1850 		out(flags, "No fault management exercises underway.");
1851 		return;
1852 	}
1853 
1854 	for (fmep = FMElist; fmep; fmep = fmep->next)
1855 		fme_print(flags, fmep);
1856 }
1857 
1858 /*
1859  * "indent" routines used mostly for nicely formatted debug output, but also
1860  * for sanity checking for infinite recursion bugs.
1861  */
1862 
1863 #define	MAX_INDENT 1024
1864 static const char *indent_s[MAX_INDENT];
1865 static int current_indent;
1866 
1867 static void
1868 indent_push(const char *s)
1869 {
1870 	if (current_indent < MAX_INDENT)
1871 		indent_s[current_indent++] = s;
1872 	else
1873 		out(O_DIE, "unexpected recursion depth (%d)", current_indent);
1874 }
1875 
1876 static void
1877 indent_set(const char *s)
1878 {
1879 	current_indent = 0;
1880 	indent_push(s);
1881 }
1882 
1883 static void
1884 indent_pop(void)
1885 {
1886 	if (current_indent > 0)
1887 		current_indent--;
1888 	else
1889 		out(O_DIE, "recursion underflow");
1890 }
1891 
1892 static void
1893 indent(void)
1894 {
1895 	int i;
1896 	if (!Verbose)
1897 		return;
1898 	for (i = 0; i < current_indent; i++)
1899 		out(O_ALTFP|O_VERB|O_NONL, indent_s[i]);
1900 }
1901 
1902 #define	SLNEW		1
1903 #define	SLCHANGED	2
1904 #define	SLWAIT		3
1905 #define	SLDISPROVED	4
1906 
1907 static void
1908 print_suspects(int circumstance, struct fme *fmep)
1909 {
1910 	struct event *ep;
1911 
1912 	out(O_ALTFP|O_NONL, "[");
1913 	if (circumstance == SLCHANGED) {
1914 		out(O_ALTFP|O_NONL, "FME%d diagnosis changed. state: %s, "
1915 		    "suspect list:", fmep->id, fme_state2str(fmep->state));
1916 	} else if (circumstance == SLWAIT) {
1917 		out(O_ALTFP|O_NONL, "FME%d set wait timer %ld ", fmep->id,
1918 		    fmep->timer);
1919 		ptree_timeval(O_ALTFP|O_NONL, &fmep->wull);
1920 	} else if (circumstance == SLDISPROVED) {
1921 		out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS UNKNOWN", fmep->id);
1922 	} else {
1923 		out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS PRODUCED:", fmep->id);
1924 	}
1925 
1926 	if (circumstance == SLWAIT || circumstance == SLDISPROVED) {
1927 		out(O_ALTFP, "]");
1928 		return;
1929 	}
1930 
1931 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
1932 		out(O_ALTFP|O_NONL, " ");
1933 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
1934 	}
1935 	out(O_ALTFP, "]");
1936 }
1937 
1938 static struct node *
1939 eventprop_lookup(struct event *ep, const char *propname)
1940 {
1941 	return (lut_lookup(ep->props, (void *)propname, NULL));
1942 }
1943 
1944 #define	MAXDIGITIDX	23
1945 static char numbuf[MAXDIGITIDX + 1];
1946 
1947 static int
1948 node2uint(struct node *n, uint_t *valp)
1949 {
1950 	struct evalue value;
1951 	struct lut *globals = NULL;
1952 
1953 	if (n == NULL)
1954 		return (1);
1955 
1956 	/*
1957 	 * check value.v since we are being asked to convert an unsigned
1958 	 * long long int to an unsigned int
1959 	 */
1960 	if (! eval_expr(n, NULL, NULL, &globals, NULL, NULL, 0, &value) ||
1961 	    value.t != UINT64 || value.v > (1ULL << 32))
1962 		return (1);
1963 
1964 	*valp = (uint_t)value.v;
1965 
1966 	return (0);
1967 }
1968 
1969 static nvlist_t *
1970 node2fmri(struct node *n)
1971 {
1972 	nvlist_t **pa, *f, *p;
1973 	struct node *nc;
1974 	uint_t depth = 0;
1975 	char *numstr, *nullbyte;
1976 	char *failure;
1977 	int err, i;
1978 
1979 	/* XXX do we need to be able to handle a non-T_NAME node? */
1980 	if (n == NULL || n->t != T_NAME)
1981 		return (NULL);
1982 
1983 	for (nc = n; nc != NULL; nc = nc->u.name.next) {
1984 		if (nc->u.name.child == NULL || nc->u.name.child->t != T_NUM)
1985 			break;
1986 		depth++;
1987 	}
1988 
1989 	if (nc != NULL) {
1990 		/* We bailed early, something went wrong */
1991 		return (NULL);
1992 	}
1993 
1994 	if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0)
1995 		out(O_DIE|O_SYS, "alloc of fmri nvl failed");
1996 	pa = alloca(depth * sizeof (nvlist_t *));
1997 	for (i = 0; i < depth; i++)
1998 		pa[i] = NULL;
1999 
2000 	err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC);
2001 	err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION);
2002 	err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, "");
2003 	err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth);
2004 	if (err != 0) {
2005 		failure = "basic construction of FMRI failed";
2006 		goto boom;
2007 	}
2008 
2009 	numbuf[MAXDIGITIDX] = '\0';
2010 	nullbyte = &numbuf[MAXDIGITIDX];
2011 	i = 0;
2012 
2013 	for (nc = n; nc != NULL; nc = nc->u.name.next) {
2014 		err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl);
2015 		if (err != 0) {
2016 			failure = "alloc of an hc-pair failed";
2017 			goto boom;
2018 		}
2019 		err = nvlist_add_string(p, FM_FMRI_HC_NAME, nc->u.name.s);
2020 		numstr = ulltostr(nc->u.name.child->u.ull, nullbyte);
2021 		err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr);
2022 		if (err != 0) {
2023 			failure = "construction of an hc-pair failed";
2024 			goto boom;
2025 		}
2026 		pa[i++] = p;
2027 	}
2028 
2029 	err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth);
2030 	if (err == 0) {
2031 		for (i = 0; i < depth; i++)
2032 			nvlist_free(pa[i]);
2033 		return (f);
2034 	}
2035 	failure = "addition of hc-pair array to FMRI failed";
2036 
2037 boom:
2038 	for (i = 0; i < depth; i++)
2039 		nvlist_free(pa[i]);
2040 	nvlist_free(f);
2041 	out(O_DIE, "%s", failure);
2042 	/*NOTREACHED*/
2043 	return (NULL);
2044 }
2045 
2046 /* an ipath cache entry is an array of these, with s==NULL at the end */
2047 struct ipath {
2048 	const char *s;	/* component name (in stable) */
2049 	int i;		/* instance number */
2050 };
2051 
2052 static nvlist_t *
2053 ipath2fmri(struct ipath *ipath)
2054 {
2055 	nvlist_t **pa, *f, *p;
2056 	uint_t depth = 0;
2057 	char *numstr, *nullbyte;
2058 	char *failure;
2059 	int err, i;
2060 	struct ipath *ipp;
2061 
2062 	for (ipp = ipath; ipp->s != NULL; ipp++)
2063 		depth++;
2064 
2065 	if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0)
2066 		out(O_DIE|O_SYS, "alloc of fmri nvl failed");
2067 	pa = alloca(depth * sizeof (nvlist_t *));
2068 	for (i = 0; i < depth; i++)
2069 		pa[i] = NULL;
2070 
2071 	err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC);
2072 	err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION);
2073 	err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, "");
2074 	err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth);
2075 	if (err != 0) {
2076 		failure = "basic construction of FMRI failed";
2077 		goto boom;
2078 	}
2079 
2080 	numbuf[MAXDIGITIDX] = '\0';
2081 	nullbyte = &numbuf[MAXDIGITIDX];
2082 	i = 0;
2083 
2084 	for (ipp = ipath; ipp->s != NULL; ipp++) {
2085 		err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl);
2086 		if (err != 0) {
2087 			failure = "alloc of an hc-pair failed";
2088 			goto boom;
2089 		}
2090 		err = nvlist_add_string(p, FM_FMRI_HC_NAME, ipp->s);
2091 		numstr = ulltostr(ipp->i, nullbyte);
2092 		err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr);
2093 		if (err != 0) {
2094 			failure = "construction of an hc-pair failed";
2095 			goto boom;
2096 		}
2097 		pa[i++] = p;
2098 	}
2099 
2100 	err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth);
2101 	if (err == 0) {
2102 		for (i = 0; i < depth; i++)
2103 			nvlist_free(pa[i]);
2104 		return (f);
2105 	}
2106 	failure = "addition of hc-pair array to FMRI failed";
2107 
2108 boom:
2109 	for (i = 0; i < depth; i++)
2110 		nvlist_free(pa[i]);
2111 	nvlist_free(f);
2112 	out(O_DIE, "%s", failure);
2113 	/*NOTREACHED*/
2114 	return (NULL);
2115 }
2116 
2117 static uint8_t
2118 percentof(uint_t part, uint_t whole)
2119 {
2120 	unsigned long long p = part * 1000;
2121 
2122 	return ((p / whole / 10) + (((p / whole % 10) >= 5) ? 1 : 0));
2123 }
2124 
2125 struct rsl {
2126 	struct event *suspect;
2127 	nvlist_t *asru;
2128 	nvlist_t *fru;
2129 	nvlist_t *rsrc;
2130 };
2131 
2132 static void publish_suspects(struct fme *fmep, struct rsl *srl);
2133 
2134 /*
2135  *  rslfree -- free internal members of struct rsl not expected to be
2136  *	freed elsewhere.
2137  */
2138 static void
2139 rslfree(struct rsl *freeme)
2140 {
2141 	nvlist_free(freeme->asru);
2142 	nvlist_free(freeme->fru);
2143 	if (freeme->rsrc != freeme->asru)
2144 		nvlist_free(freeme->rsrc);
2145 }
2146 
2147 /*
2148  *  rslcmp -- compare two rsl structures.  Use the following
2149  *	comparisons to establish cardinality:
2150  *
2151  *	1. Name of the suspect's class. (simple strcmp)
2152  *	2. Name of the suspect's ASRU. (trickier, since nvlist)
2153  *
2154  */
2155 static int
2156 rslcmp(const void *a, const void *b)
2157 {
2158 	struct rsl *r1 = (struct rsl *)a;
2159 	struct rsl *r2 = (struct rsl *)b;
2160 	int rv;
2161 
2162 	rv = strcmp(r1->suspect->enode->u.event.ename->u.name.s,
2163 	    r2->suspect->enode->u.event.ename->u.name.s);
2164 	if (rv != 0)
2165 		return (rv);
2166 
2167 	if (r1->rsrc == NULL && r2->rsrc == NULL)
2168 		return (0);
2169 	if (r1->rsrc == NULL)
2170 		return (-1);
2171 	if (r2->rsrc == NULL)
2172 		return (1);
2173 	return (evnv_cmpnvl(r1->rsrc, r2->rsrc, 0));
2174 }
2175 
2176 /*
2177  * get_resources -- for a given suspect, determine what ASRU, FRU and
2178  *     RSRC nvlists should be advertised in the final suspect list.
2179  */
2180 void
2181 get_resources(struct event *sp, struct rsl *rsrcs, struct config *croot)
2182 {
2183 	struct node *asrudef, *frudef;
2184 	nvlist_t *asru, *fru;
2185 	nvlist_t *rsrc = NULL;
2186 	char *pathstr;
2187 
2188 	/*
2189 	 * First find any ASRU and/or FRU defined in the
2190 	 * initial fault tree.
2191 	 */
2192 	asrudef = eventprop_lookup(sp, L_ASRU);
2193 	frudef = eventprop_lookup(sp, L_FRU);
2194 
2195 	/*
2196 	 * Create FMRIs based on those definitions
2197 	 */
2198 	asru = node2fmri(asrudef);
2199 	fru = node2fmri(frudef);
2200 	pathstr = ipath2str(NULL, sp->ipp);
2201 
2202 	/*
2203 	 *  Allow for platform translations of the FMRIs
2204 	 */
2205 	platform_units_translate(is_defect(sp->t), croot, &asru, &fru, &rsrc,
2206 	    pathstr);
2207 
2208 	FREE(pathstr);
2209 	rsrcs->suspect = sp;
2210 	rsrcs->asru = asru;
2211 	rsrcs->fru = fru;
2212 	rsrcs->rsrc = rsrc;
2213 }
2214 
2215 /*
2216  * trim_suspects -- prior to publishing, we may need to remove some
2217  *    suspects from the list.  If we're auto-closing upsets, we don't
2218  *    want any of those in the published list.  If the ASRUs for multiple
2219  *    defects resolve to the same ASRU (driver) we only want to publish
2220  *    that as a single suspect.
2221  */
2222 static int
2223 trim_suspects(struct fme *fmep, struct rsl *begin, struct rsl *begin2,
2224     fmd_event_t *ffep)
2225 {
2226 	struct event *ep;
2227 	struct rsl *rp = begin;
2228 	struct rsl *rp2 = begin2;
2229 	int mess_zero_count = 0;
2230 	int serd_rval;
2231 	uint_t messval;
2232 
2233 	/* remove any unwanted upsets and populate our array */
2234 	for (ep = fmep->psuspects; ep; ep = ep->psuspects) {
2235 		if (is_upset(ep->t))
2236 			continue;
2237 		serd_rval = serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, ep,
2238 		    NULL, NULL);
2239 		if (serd_rval == 0)
2240 			continue;
2241 		if (node2uint(eventprop_lookup(ep, L_message),
2242 		    &messval) == 0 && messval == 0) {
2243 			get_resources(ep, rp2, fmep->config);
2244 			rp2++;
2245 			mess_zero_count++;
2246 		} else {
2247 			get_resources(ep, rp, fmep->config);
2248 			rp++;
2249 			fmep->nsuspects++;
2250 		}
2251 	}
2252 	return (mess_zero_count);
2253 }
2254 
2255 /*
2256  * addpayloadprop -- add a payload prop to a problem
2257  */
2258 static void
2259 addpayloadprop(const char *lhs, struct evalue *rhs, nvlist_t *fault)
2260 {
2261 	nvlist_t *rsrc, *hcs;
2262 
2263 	ASSERT(fault != NULL);
2264 	ASSERT(lhs != NULL);
2265 	ASSERT(rhs != NULL);
2266 
2267 	if (nvlist_lookup_nvlist(fault, FM_FAULT_RESOURCE, &rsrc) != 0)
2268 		out(O_DIE, "cannot add payloadprop \"%s\" to fault", lhs);
2269 
2270 	if (nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcs) != 0) {
2271 		out(O_ALTFP|O_VERB2, "addpayloadprop: create hc_specific");
2272 		if (nvlist_xalloc(&hcs, NV_UNIQUE_NAME, &Eft_nv_hdl) != 0)
2273 			out(O_DIE,
2274 			    "cannot add payloadprop \"%s\" to fault", lhs);
2275 		if (nvlist_add_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, hcs) != 0)
2276 			out(O_DIE,
2277 			    "cannot add payloadprop \"%s\" to fault", lhs);
2278 		nvlist_free(hcs);
2279 		if (nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcs) != 0)
2280 			out(O_DIE,
2281 			    "cannot add payloadprop \"%s\" to fault", lhs);
2282 	} else
2283 		out(O_ALTFP|O_VERB2, "addpayloadprop: reuse hc_specific");
2284 
2285 	if (rhs->t == UINT64) {
2286 		out(O_ALTFP|O_VERB2, "addpayloadprop: %s=%llu", lhs, rhs->v);
2287 
2288 		if (nvlist_add_uint64(hcs, lhs, rhs->v) != 0)
2289 			out(O_DIE,
2290 			    "cannot add payloadprop \"%s\" to fault", lhs);
2291 	} else {
2292 		out(O_ALTFP|O_VERB2, "addpayloadprop: %s=\"%s\"",
2293 		    lhs, (char *)(uintptr_t)rhs->v);
2294 
2295 		if (nvlist_add_string(hcs, lhs, (char *)(uintptr_t)rhs->v) != 0)
2296 			out(O_DIE,
2297 			    "cannot add payloadprop \"%s\" to fault", lhs);
2298 	}
2299 }
2300 
2301 static char *Istatbuf;
2302 static char *Istatbufptr;
2303 static int Istatsz;
2304 
2305 /*
2306  * istataddsize -- calculate size of istat and add it to Istatsz
2307  */
2308 /*ARGSUSED2*/
2309 static void
2310 istataddsize(const struct istat_entry *lhs, struct stats *rhs, void *arg)
2311 {
2312 	int val;
2313 
2314 	ASSERT(lhs != NULL);
2315 	ASSERT(rhs != NULL);
2316 
2317 	if ((val = stats_counter_value(rhs)) == 0)
2318 		return;	/* skip zero-valued stats */
2319 
2320 	/* count up the size of the stat name */
2321 	Istatsz += ipath2strlen(lhs->ename, lhs->ipath);
2322 	Istatsz++;	/* for the trailing NULL byte */
2323 
2324 	/* count up the size of the stat value */
2325 	Istatsz += snprintf(NULL, 0, "%d", val);
2326 	Istatsz++;	/* for the trailing NULL byte */
2327 }
2328 
2329 /*
2330  * istat2str -- serialize an istat, writing result to *Istatbufptr
2331  */
2332 /*ARGSUSED2*/
2333 static void
2334 istat2str(const struct istat_entry *lhs, struct stats *rhs, void *arg)
2335 {
2336 	char *str;
2337 	int len;
2338 	int val;
2339 
2340 	ASSERT(lhs != NULL);
2341 	ASSERT(rhs != NULL);
2342 
2343 	if ((val = stats_counter_value(rhs)) == 0)
2344 		return;	/* skip zero-valued stats */
2345 
2346 	/* serialize the stat name */
2347 	str = ipath2str(lhs->ename, lhs->ipath);
2348 	len = strlen(str);
2349 
2350 	ASSERT(Istatbufptr + len + 1 < &Istatbuf[Istatsz]);
2351 	(void) strlcpy(Istatbufptr, str, &Istatbuf[Istatsz] - Istatbufptr);
2352 	Istatbufptr += len;
2353 	FREE(str);
2354 	*Istatbufptr++ = '\0';
2355 
2356 	/* serialize the stat value */
2357 	Istatbufptr += snprintf(Istatbufptr, &Istatbuf[Istatsz] - Istatbufptr,
2358 	    "%d", val);
2359 	*Istatbufptr++ = '\0';
2360 
2361 	ASSERT(Istatbufptr <= &Istatbuf[Istatsz]);
2362 }
2363 
2364 void
2365 istat_save()
2366 {
2367 	if (Istat_need_save == 0)
2368 		return;
2369 
2370 	/* figure out how big the serialzed info is */
2371 	Istatsz = 0;
2372 	lut_walk(Istats, (lut_cb)istataddsize, NULL);
2373 
2374 	if (Istatsz == 0) {
2375 		/* no stats to save */
2376 		fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS);
2377 		return;
2378 	}
2379 
2380 	/* create the serialized buffer */
2381 	Istatbufptr = Istatbuf = MALLOC(Istatsz);
2382 	lut_walk(Istats, (lut_cb)istat2str, NULL);
2383 
2384 	/* clear out current saved stats */
2385 	fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS);
2386 
2387 	/* write out the new version */
2388 	fmd_buf_write(Hdl, NULL, WOBUF_ISTATS, Istatbuf, Istatsz);
2389 	FREE(Istatbuf);
2390 
2391 	Istat_need_save = 0;
2392 }
2393 
2394 int
2395 istat_cmp(struct istat_entry *ent1, struct istat_entry *ent2)
2396 {
2397 	if (ent1->ename != ent2->ename)
2398 		return (ent2->ename - ent1->ename);
2399 	if (ent1->ipath != ent2->ipath)
2400 		return ((char *)ent2->ipath - (char *)ent1->ipath);
2401 
2402 	return (0);
2403 }
2404 
2405 /*
2406  * istat-verify -- verify the component associated with a stat still exists
2407  *
2408  * if the component no longer exists, this routine resets the stat and
2409  * returns 0.  if the component still exists, it returns 1.
2410  */
2411 static int
2412 istat_verify(struct node *snp, struct istat_entry *entp)
2413 {
2414 	struct stats *statp;
2415 	nvlist_t *fmri;
2416 
2417 	fmri = node2fmri(snp->u.event.epname);
2418 	if (platform_path_exists(fmri)) {
2419 		nvlist_free(fmri);
2420 		return (1);
2421 	}
2422 	nvlist_free(fmri);
2423 
2424 	/* component no longer in system.  zero out the associated stats */
2425 	if ((statp = (struct stats *)
2426 	    lut_lookup(Istats, entp, (lut_cmp)istat_cmp)) == NULL ||
2427 	    stats_counter_value(statp) == 0)
2428 		return (0);	/* stat is already reset */
2429 
2430 	Istat_need_save = 1;
2431 	stats_counter_reset(statp);
2432 	return (0);
2433 }
2434 
2435 static void
2436 istat_bump(struct node *snp, int n)
2437 {
2438 	struct stats *statp;
2439 	struct istat_entry ent;
2440 
2441 	ASSERT(snp != NULL);
2442 	ASSERTinfo(snp->t == T_EVENT, ptree_nodetype2str(snp->t));
2443 	ASSERT(snp->u.event.epname != NULL);
2444 
2445 	/* class name should be hoisted into a single stable entry */
2446 	ASSERT(snp->u.event.ename->u.name.next == NULL);
2447 	ent.ename = snp->u.event.ename->u.name.s;
2448 	ent.ipath = ipath(snp->u.event.epname);
2449 
2450 	if (!istat_verify(snp, &ent)) {
2451 		/* component no longer exists in system, nothing to do */
2452 		return;
2453 	}
2454 
2455 	if ((statp = (struct stats *)
2456 	    lut_lookup(Istats, &ent, (lut_cmp)istat_cmp)) == NULL) {
2457 		/* need to create the counter */
2458 		int cnt = 0;
2459 		struct node *np;
2460 		char *sname;
2461 		char *snamep;
2462 		struct istat_entry *newentp;
2463 
2464 		/* count up the size of the stat name */
2465 		np = snp->u.event.ename;
2466 		while (np != NULL) {
2467 			cnt += strlen(np->u.name.s);
2468 			cnt++;	/* for the '.' or '@' */
2469 			np = np->u.name.next;
2470 		}
2471 		np = snp->u.event.epname;
2472 		while (np != NULL) {
2473 			cnt += snprintf(NULL, 0, "%s%llu",
2474 			    np->u.name.s, np->u.name.child->u.ull);
2475 			cnt++;	/* for the '/' or trailing NULL byte */
2476 			np = np->u.name.next;
2477 		}
2478 
2479 		/* build the stat name */
2480 		snamep = sname = alloca(cnt);
2481 		np = snp->u.event.ename;
2482 		while (np != NULL) {
2483 			snamep += snprintf(snamep, &sname[cnt] - snamep,
2484 			    "%s", np->u.name.s);
2485 			np = np->u.name.next;
2486 			if (np)
2487 				*snamep++ = '.';
2488 		}
2489 		*snamep++ = '@';
2490 		np = snp->u.event.epname;
2491 		while (np != NULL) {
2492 			snamep += snprintf(snamep, &sname[cnt] - snamep,
2493 			    "%s%llu", np->u.name.s, np->u.name.child->u.ull);
2494 			np = np->u.name.next;
2495 			if (np)
2496 				*snamep++ = '/';
2497 		}
2498 		*snamep++ = '\0';
2499 
2500 		/* create the new stat & add it to our list */
2501 		newentp = MALLOC(sizeof (*newentp));
2502 		*newentp = ent;
2503 		statp = stats_new_counter(NULL, sname, 0);
2504 		Istats = lut_add(Istats, (void *)newentp, (void *)statp,
2505 		    (lut_cmp)istat_cmp);
2506 	}
2507 
2508 	/* if n is non-zero, set that value instead of bumping */
2509 	if (n) {
2510 		stats_counter_reset(statp);
2511 		stats_counter_add(statp, n);
2512 	} else
2513 		stats_counter_bump(statp);
2514 	Istat_need_save = 1;
2515 
2516 	ipath_print(O_ALTFP|O_VERB2, ent.ename, ent.ipath);
2517 	out(O_ALTFP|O_VERB2, " %s to value %d", n ? "set" : "incremented",
2518 	    stats_counter_value(statp));
2519 }
2520 
2521 /*ARGSUSED*/
2522 static void
2523 istat_destructor(void *left, void *right, void *arg)
2524 {
2525 	struct istat_entry *entp = (struct istat_entry *)left;
2526 	struct stats *statp = (struct stats *)right;
2527 	FREE(entp);
2528 	stats_delete(statp);
2529 }
2530 
2531 /*
2532  * Callback used in a walk of the Istats to reset matching stat counters.
2533  */
2534 static void
2535 istat_counter_reset_cb(struct istat_entry *entp, struct stats *statp,
2536     const struct ipath *ipp)
2537 {
2538 	char *path;
2539 
2540 	if (entp->ipath == ipp) {
2541 		path = ipath2str(entp->ename, ipp);
2542 		out(O_ALTFP, "istat_counter_reset_cb: resetting %s", path);
2543 		FREE(path);
2544 		stats_counter_reset(statp);
2545 		Istat_need_save = 1;
2546 	}
2547 }
2548 
2549 /*ARGSUSED*/
2550 static void
2551 istat_counter_topo_chg_cb(struct istat_entry *entp, struct stats *statp,
2552     void *unused)
2553 {
2554 	char *path;
2555 	nvlist_t *fmri;
2556 
2557 	fmri = ipath2fmri((struct ipath *)(entp->ipath));
2558 	if (!platform_path_exists(fmri)) {
2559 		path = ipath2str(entp->ename, entp->ipath);
2560 		out(O_ALTFP, "istat_counter_topo_chg_cb: not present %s", path);
2561 		FREE(path);
2562 		stats_counter_reset(statp);
2563 		Istat_need_save = 1;
2564 	}
2565 	nvlist_free(fmri);
2566 }
2567 
2568 void
2569 istat_fini(void)
2570 {
2571 	lut_free(Istats, istat_destructor, NULL);
2572 }
2573 
2574 static char *Serdbuf;
2575 static char *Serdbufptr;
2576 static int Serdsz;
2577 
2578 /*
2579  * serdaddsize -- calculate size of serd and add it to Serdsz
2580  */
2581 /*ARGSUSED*/
2582 static void
2583 serdaddsize(const struct serd_entry *lhs, struct stats *rhs, void *arg)
2584 {
2585 	ASSERT(lhs != NULL);
2586 
2587 	/* count up the size of the stat name */
2588 	Serdsz += ipath2strlen(lhs->ename, lhs->ipath);
2589 	Serdsz++;	/* for the trailing NULL byte */
2590 }
2591 
2592 /*
2593  * serd2str -- serialize a serd engine, writing result to *Serdbufptr
2594  */
2595 /*ARGSUSED*/
2596 static void
2597 serd2str(const struct serd_entry *lhs, struct stats *rhs, void *arg)
2598 {
2599 	char *str;
2600 	int len;
2601 
2602 	ASSERT(lhs != NULL);
2603 
2604 	/* serialize the serd engine name */
2605 	str = ipath2str(lhs->ename, lhs->ipath);
2606 	len = strlen(str);
2607 
2608 	ASSERT(Serdbufptr + len + 1 <= &Serdbuf[Serdsz]);
2609 	(void) strlcpy(Serdbufptr, str, &Serdbuf[Serdsz] - Serdbufptr);
2610 	Serdbufptr += len;
2611 	FREE(str);
2612 	*Serdbufptr++ = '\0';
2613 	ASSERT(Serdbufptr <= &Serdbuf[Serdsz]);
2614 }
2615 
2616 void
2617 serd_save()
2618 {
2619 	if (Serd_need_save == 0)
2620 		return;
2621 
2622 	/* figure out how big the serialzed info is */
2623 	Serdsz = 0;
2624 	lut_walk(SerdEngines, (lut_cb)serdaddsize, NULL);
2625 
2626 	if (Serdsz == 0) {
2627 		/* no serd engines to save */
2628 		fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS);
2629 		return;
2630 	}
2631 
2632 	/* create the serialized buffer */
2633 	Serdbufptr = Serdbuf = MALLOC(Serdsz);
2634 	lut_walk(SerdEngines, (lut_cb)serd2str, NULL);
2635 
2636 	/* clear out current saved stats */
2637 	fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS);
2638 
2639 	/* write out the new version */
2640 	fmd_buf_write(Hdl, NULL, WOBUF_SERDS, Serdbuf, Serdsz);
2641 	FREE(Serdbuf);
2642 	Serd_need_save = 0;
2643 }
2644 
2645 int
2646 serd_cmp(struct serd_entry *ent1, struct serd_entry *ent2)
2647 {
2648 	if (ent1->ename != ent2->ename)
2649 		return (ent2->ename - ent1->ename);
2650 	if (ent1->ipath != ent2->ipath)
2651 		return ((char *)ent2->ipath - (char *)ent1->ipath);
2652 
2653 	return (0);
2654 }
2655 
2656 void
2657 fme_serd_load(fmd_hdl_t *hdl)
2658 {
2659 	int sz;
2660 	char *sbuf;
2661 	char *sepptr;
2662 	char *ptr;
2663 	struct serd_entry *newentp;
2664 	struct node *epname;
2665 	nvlist_t *fmri;
2666 	char *namestring;
2667 
2668 	if ((sz = fmd_buf_size(hdl, NULL, WOBUF_SERDS)) == 0)
2669 		return;
2670 	sbuf = alloca(sz);
2671 	fmd_buf_read(hdl, NULL, WOBUF_SERDS, sbuf, sz);
2672 	ptr = sbuf;
2673 	while (ptr < &sbuf[sz]) {
2674 		sepptr = strchr(ptr, '@');
2675 		*sepptr = '\0';
2676 		namestring = ptr;
2677 		sepptr++;
2678 		ptr = sepptr;
2679 		ptr += strlen(ptr);
2680 		ptr++;	/* move past the '\0' separating paths */
2681 		epname = pathstring2epnamenp(sepptr);
2682 		fmri = node2fmri(epname);
2683 		if (platform_path_exists(fmri)) {
2684 			newentp = MALLOC(sizeof (*newentp));
2685 			newentp->hdl = hdl;
2686 			newentp->ipath = ipath(epname);
2687 			newentp->ename = stable(namestring);
2688 			SerdEngines = lut_add(SerdEngines, (void *)newentp,
2689 			    (void *)newentp, (lut_cmp)serd_cmp);
2690 		} else
2691 			Serd_need_save = 1;
2692 		tree_free(epname);
2693 		nvlist_free(fmri);
2694 	}
2695 	/* save it back again in case some of the paths no longer exist */
2696 	serd_save();
2697 }
2698 
2699 /*ARGSUSED*/
2700 static void
2701 serd_destructor(void *left, void *right, void *arg)
2702 {
2703 	struct serd_entry *entp = (struct serd_entry *)left;
2704 	FREE(entp);
2705 }
2706 
2707 /*
2708  * Callback used in a walk of the SerdEngines to reset matching serd engines.
2709  */
2710 /*ARGSUSED*/
2711 static void
2712 serd_reset_cb(struct serd_entry *entp, void *unused, const struct ipath *ipp)
2713 {
2714 	char *path;
2715 
2716 	if (entp->ipath == ipp) {
2717 		path = ipath2str(entp->ename, ipp);
2718 		out(O_ALTFP, "serd_reset_cb: resetting %s", path);
2719 		fmd_serd_reset(entp->hdl, path);
2720 		FREE(path);
2721 		Serd_need_save = 1;
2722 	}
2723 }
2724 
2725 /*ARGSUSED*/
2726 static void
2727 serd_topo_chg_cb(struct serd_entry *entp, void *unused, void *unused2)
2728 {
2729 	char *path;
2730 	nvlist_t *fmri;
2731 
2732 	fmri = ipath2fmri((struct ipath *)(entp->ipath));
2733 	if (!platform_path_exists(fmri)) {
2734 		path = ipath2str(entp->ename, entp->ipath);
2735 		out(O_ALTFP, "serd_topo_chg_cb: not present %s", path);
2736 		fmd_serd_reset(entp->hdl, path);
2737 		FREE(path);
2738 		Serd_need_save = 1;
2739 	}
2740 	nvlist_free(fmri);
2741 }
2742 
2743 void
2744 serd_fini(void)
2745 {
2746 	lut_free(SerdEngines, serd_destructor, NULL);
2747 }
2748 
2749 static void
2750 publish_suspects(struct fme *fmep, struct rsl *srl)
2751 {
2752 	struct rsl *rp;
2753 	nvlist_t *fault;
2754 	uint8_t cert;
2755 	uint_t *frs;
2756 	uint_t frsum, fr;
2757 	uint_t messval;
2758 	uint_t retireval;
2759 	uint_t responseval;
2760 	struct node *snp;
2761 	int frcnt, fridx;
2762 	boolean_t allfaulty = B_TRUE;
2763 	struct rsl *erl = srl + fmep->nsuspects - 1;
2764 
2765 	/*
2766 	 * sort the array
2767 	 */
2768 	qsort(srl, fmep->nsuspects, sizeof (struct rsl), rslcmp);
2769 
2770 	/* sum the fitrates */
2771 	frs = alloca(fmep->nsuspects * sizeof (uint_t));
2772 	fridx = frcnt = frsum = 0;
2773 
2774 	for (rp = srl; rp <= erl; rp++) {
2775 		struct node *n;
2776 
2777 		n = eventprop_lookup(rp->suspect, L_FITrate);
2778 		if (node2uint(n, &fr) != 0) {
2779 			out(O_DEBUG|O_NONL, "event ");
2780 			ipath_print(O_DEBUG|O_NONL,
2781 			    rp->suspect->enode->u.event.ename->u.name.s,
2782 			    rp->suspect->ipp);
2783 			out(O_VERB, " has no FITrate (using 1)");
2784 			fr = 1;
2785 		} else if (fr == 0) {
2786 			out(O_DEBUG|O_NONL, "event ");
2787 			ipath_print(O_DEBUG|O_NONL,
2788 			    rp->suspect->enode->u.event.ename->u.name.s,
2789 			    rp->suspect->ipp);
2790 			out(O_VERB, " has zero FITrate (using 1)");
2791 			fr = 1;
2792 		}
2793 
2794 		frs[fridx++] = fr;
2795 		frsum += fr;
2796 		frcnt++;
2797 	}
2798 
2799 	/* Add them in reverse order of our sort, as fmd reverses order */
2800 	for (rp = erl; rp >= srl; rp--) {
2801 		cert = percentof(frs[--fridx], frsum);
2802 		fault = fmd_nvl_create_fault(fmep->hdl,
2803 		    rp->suspect->enode->u.event.ename->u.name.s,
2804 		    cert,
2805 		    rp->asru,
2806 		    rp->fru,
2807 		    rp->rsrc);
2808 		if (fault == NULL)
2809 			out(O_DIE, "fault creation failed");
2810 		/* if "message" property exists, add it to the fault */
2811 		if (node2uint(eventprop_lookup(rp->suspect, L_message),
2812 		    &messval) == 0) {
2813 
2814 			out(O_ALTFP,
2815 			    "[FME%d, %s adds message=%d to suspect list]",
2816 			    fmep->id,
2817 			    rp->suspect->enode->u.event.ename->u.name.s,
2818 			    messval);
2819 			if (nvlist_add_boolean_value(fault,
2820 			    FM_SUSPECT_MESSAGE,
2821 			    (messval) ? B_TRUE : B_FALSE) != 0) {
2822 				out(O_DIE, "cannot add no-message to fault");
2823 			}
2824 		}
2825 
2826 		/* if "retire" property exists, add it to the fault */
2827 		if (node2uint(eventprop_lookup(rp->suspect, L_retire),
2828 		    &retireval) == 0) {
2829 
2830 			out(O_ALTFP,
2831 			    "[FME%d, %s adds retire=%d to suspect list]",
2832 			    fmep->id,
2833 			    rp->suspect->enode->u.event.ename->u.name.s,
2834 			    retireval);
2835 			if (nvlist_add_boolean_value(fault,
2836 			    FM_SUSPECT_RETIRE,
2837 			    (retireval) ? B_TRUE : B_FALSE) != 0) {
2838 				out(O_DIE, "cannot add no-retire to fault");
2839 			}
2840 		}
2841 
2842 		/* if "response" property exists, add it to the fault */
2843 		if (node2uint(eventprop_lookup(rp->suspect, L_response),
2844 		    &responseval) == 0) {
2845 
2846 			out(O_ALTFP,
2847 			    "[FME%d, %s adds response=%d to suspect list]",
2848 			    fmep->id,
2849 			    rp->suspect->enode->u.event.ename->u.name.s,
2850 			    responseval);
2851 			if (nvlist_add_boolean_value(fault,
2852 			    FM_SUSPECT_RESPONSE,
2853 			    (responseval) ? B_TRUE : B_FALSE) != 0) {
2854 				out(O_DIE, "cannot add no-response to fault");
2855 			}
2856 		}
2857 
2858 		/* add any payload properties */
2859 		lut_walk(rp->suspect->payloadprops,
2860 		    (lut_cb)addpayloadprop, (void *)fault);
2861 		rslfree(rp);
2862 
2863 		/*
2864 		 * If "action" property exists, evaluate it;  this must be done
2865 		 * before the allfaulty check below since some actions may
2866 		 * modify the asru to be used in fmd_nvl_fmri_has_fault.  This
2867 		 * needs to be restructured if any new actions are introduced
2868 		 * that have effects that we do not want to be visible if
2869 		 * we decide not to publish in the dupclose check below.
2870 		 */
2871 		if ((snp = eventprop_lookup(rp->suspect, L_action)) != NULL) {
2872 			struct evalue evalue;
2873 
2874 			out(O_ALTFP|O_NONL,
2875 			    "[FME%d, %s action ", fmep->id,
2876 			    rp->suspect->enode->u.event.ename->u.name.s);
2877 			ptree_name_iter(O_ALTFP|O_NONL, snp);
2878 			out(O_ALTFP, "]");
2879 			Action_nvl = fault;
2880 			(void) eval_expr(snp, NULL, NULL, NULL, NULL,
2881 			    NULL, 0, &evalue);
2882 		}
2883 
2884 		fmd_case_add_suspect(fmep->hdl, fmep->fmcase, fault);
2885 
2886 		/*
2887 		 * check if the asru is already marked as "faulty".
2888 		 */
2889 		if (allfaulty) {
2890 			nvlist_t *asru;
2891 
2892 			out(O_ALTFP|O_VERB, "FME%d dup check ", fmep->id);
2893 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, rp->suspect);
2894 			out(O_ALTFP|O_VERB|O_NONL, " ");
2895 			if (nvlist_lookup_nvlist(fault,
2896 			    FM_FAULT_ASRU, &asru) != 0) {
2897 				out(O_ALTFP|O_VERB, "NULL asru");
2898 				allfaulty = B_FALSE;
2899 			} else if (fmd_nvl_fmri_has_fault(fmep->hdl, asru,
2900 			    FMD_HAS_FAULT_ASRU, NULL)) {
2901 				out(O_ALTFP|O_VERB, "faulty");
2902 			} else {
2903 				out(O_ALTFP|O_VERB, "not faulty");
2904 				allfaulty = B_FALSE;
2905 			}
2906 		}
2907 
2908 	}
2909 
2910 	if (!allfaulty) {
2911 		/*
2912 		 * don't update the count stat if all asrus are already
2913 		 * present and unrepaired in the asru cache
2914 		 */
2915 		for (rp = erl; rp >= srl; rp--) {
2916 			struct event *suspect = rp->suspect;
2917 
2918 			if (suspect == NULL)
2919 				continue;
2920 
2921 			/* if "count" exists, increment the appropriate stat */
2922 			if ((snp = eventprop_lookup(suspect,
2923 			    L_count)) != NULL) {
2924 				out(O_ALTFP|O_NONL,
2925 				    "[FME%d, %s count ", fmep->id,
2926 				    suspect->enode->u.event.ename->u.name.s);
2927 				ptree_name_iter(O_ALTFP|O_NONL, snp);
2928 				out(O_ALTFP, "]");
2929 				istat_bump(snp, 0);
2930 
2931 			}
2932 		}
2933 		istat_save();	/* write out any istat changes */
2934 	}
2935 }
2936 
2937 static const char *
2938 undiag_2defect_str(int ud)
2939 {
2940 	switch (ud) {
2941 	case UD_VAL_MISSINGINFO:
2942 	case UD_VAL_MISSINGOBS:
2943 	case UD_VAL_MISSINGPATH:
2944 	case UD_VAL_MISSINGZERO:
2945 	case UD_VAL_BADOBS:
2946 	case UD_VAL_CFGMISMATCH:
2947 		return (UNDIAG_DEFECT_CHKPT);
2948 
2949 	case UD_VAL_BADEVENTI:
2950 	case UD_VAL_BADEVENTPATH:
2951 	case UD_VAL_BADEVENTCLASS:
2952 	case UD_VAL_INSTFAIL:
2953 	case UD_VAL_NOPATH:
2954 	case UD_VAL_UNSOLVD:
2955 		return (UNDIAG_DEFECT_FME);
2956 
2957 	case UD_VAL_MAXFME:
2958 		return (UNDIAG_DEFECT_LIMIT);
2959 
2960 	case UD_VAL_UNKNOWN:
2961 	default:
2962 		return (UNDIAG_DEFECT_UNKNOWN);
2963 	}
2964 }
2965 
2966 static const char *
2967 undiag_2fault_str(int ud)
2968 {
2969 	switch (ud) {
2970 	case UD_VAL_BADEVENTI:
2971 	case UD_VAL_BADEVENTPATH:
2972 	case UD_VAL_BADEVENTCLASS:
2973 	case UD_VAL_INSTFAIL:
2974 	case UD_VAL_NOPATH:
2975 	case UD_VAL_UNSOLVD:
2976 		return (UNDIAG_FAULT_FME);
2977 	default:
2978 		return (NULL);
2979 	}
2980 }
2981 
2982 static char *
2983 undiag_2reason_str(int ud, char *arg)
2984 {
2985 	const char *ptr;
2986 	char *buf;
2987 	int with_arg = 0;
2988 
2989 	switch (ud) {
2990 	case UD_VAL_BADEVENTPATH:
2991 		ptr = UD_STR_BADEVENTPATH;
2992 		with_arg = 1;
2993 		break;
2994 	case UD_VAL_BADEVENTCLASS:
2995 		ptr = UD_STR_BADEVENTCLASS;
2996 		with_arg = 1;
2997 		break;
2998 	case UD_VAL_BADEVENTI:
2999 		ptr = UD_STR_BADEVENTI;
3000 		with_arg = 1;
3001 		break;
3002 	case UD_VAL_BADOBS:
3003 		ptr = UD_STR_BADOBS;
3004 		break;
3005 	case UD_VAL_CFGMISMATCH:
3006 		ptr = UD_STR_CFGMISMATCH;
3007 		break;
3008 	case UD_VAL_INSTFAIL:
3009 		ptr = UD_STR_INSTFAIL;
3010 		with_arg = 1;
3011 		break;
3012 	case UD_VAL_MAXFME:
3013 		ptr = UD_STR_MAXFME;
3014 		break;
3015 	case UD_VAL_MISSINGINFO:
3016 		ptr = UD_STR_MISSINGINFO;
3017 		break;
3018 	case UD_VAL_MISSINGOBS:
3019 		ptr = UD_STR_MISSINGOBS;
3020 		break;
3021 	case UD_VAL_MISSINGPATH:
3022 		ptr = UD_STR_MISSINGPATH;
3023 		break;
3024 	case UD_VAL_MISSINGZERO:
3025 		ptr = UD_STR_MISSINGZERO;
3026 		break;
3027 	case UD_VAL_NOPATH:
3028 		ptr = UD_STR_NOPATH;
3029 		with_arg = 1;
3030 		break;
3031 	case UD_VAL_UNSOLVD:
3032 		ptr = UD_STR_UNSOLVD;
3033 		break;
3034 	case UD_VAL_UNKNOWN:
3035 	default:
3036 		ptr = UD_STR_UNKNOWN;
3037 		break;
3038 	}
3039 	if (with_arg) {
3040 		buf = MALLOC(strlen(ptr) + strlen(arg) - 1);
3041 		(void) sprintf(buf, ptr, arg);
3042 	} else {
3043 		buf = MALLOC(strlen(ptr) + 1);
3044 		(void) sprintf(buf, ptr);
3045 	}
3046 	return (buf);
3047 }
3048 
3049 static void
3050 publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep, fmd_case_t *fmcase,
3051     nvlist_t *detector, char *arg)
3052 {
3053 	struct case_list *newcase;
3054 	nvlist_t *defect, *fault;
3055 	const char *faultstr;
3056 	char *reason = undiag_2reason_str(Undiag_reason, arg);
3057 
3058 	out(O_ALTFP,
3059 	    "[undiagnosable ereport received, "
3060 	    "creating and closing a new case (%s)]", reason);
3061 
3062 	newcase = MALLOC(sizeof (struct case_list));
3063 	newcase->next = NULL;
3064 	newcase->fmcase = fmcase;
3065 	if (Undiagablecaselist != NULL)
3066 		newcase->next = Undiagablecaselist;
3067 	Undiagablecaselist = newcase;
3068 
3069 	if (ffep != NULL)
3070 		fmd_case_add_ereport(hdl, newcase->fmcase, ffep);
3071 
3072 	/* add defect */
3073 	defect = fmd_nvl_create_fault(hdl,
3074 	    undiag_2defect_str(Undiag_reason), 50, NULL, NULL, detector);
3075 	(void) nvlist_add_string(defect, UNDIAG_REASON, reason);
3076 	(void) nvlist_add_boolean_value(defect, FM_SUSPECT_RETIRE, B_FALSE);
3077 	(void) nvlist_add_boolean_value(defect, FM_SUSPECT_RESPONSE, B_FALSE);
3078 	fmd_case_add_suspect(hdl, newcase->fmcase, defect);
3079 
3080 	/* add fault if appropriate */
3081 	faultstr = undiag_2fault_str(Undiag_reason);
3082 	if (faultstr != NULL) {
3083 		fault = fmd_nvl_create_fault(hdl, faultstr, 50, NULL, NULL,
3084 		    detector);
3085 		(void) nvlist_add_string(fault, UNDIAG_REASON, reason);
3086 		(void) nvlist_add_boolean_value(fault, FM_SUSPECT_RETIRE,
3087 		    B_FALSE);
3088 		(void) nvlist_add_boolean_value(fault, FM_SUSPECT_RESPONSE,
3089 		    B_FALSE);
3090 		fmd_case_add_suspect(hdl, newcase->fmcase, fault);
3091 	}
3092 	FREE(reason);
3093 
3094 	/* solve and close case */
3095 	fmd_case_solve(hdl, newcase->fmcase);
3096 	fmd_case_close(hdl, newcase->fmcase);
3097 	Undiag_reason = UD_VAL_UNKNOWN;
3098 }
3099 
3100 static void
3101 fme_undiagnosable(struct fme *f)
3102 {
3103 	nvlist_t *defect, *fault, *detector = NULL;
3104 	struct event *ep;
3105 	char *pathstr;
3106 	const char *faultstr;
3107 	char *reason = undiag_2reason_str(Undiag_reason, NULL);
3108 
3109 	out(O_ALTFP, "[solving/closing FME%d, case %s (%s)]",
3110 	    f->id, fmd_case_uuid(f->hdl, f->fmcase), reason);
3111 
3112 	for (ep = f->observations; ep; ep = ep->observations) {
3113 
3114 		if (ep->ffep != f->e0r)
3115 			fmd_case_add_ereport(f->hdl, f->fmcase, ep->ffep);
3116 
3117 		pathstr = ipath2str(NULL, ipath(platform_getpath(ep->nvp)));
3118 		platform_units_translate(0, f->config, NULL, NULL, &detector,
3119 		    pathstr);
3120 		FREE(pathstr);
3121 
3122 		/* add defect */
3123 		defect = fmd_nvl_create_fault(f->hdl,
3124 		    undiag_2defect_str(Undiag_reason), 50 / f->uniqobs,
3125 		    NULL, NULL, detector);
3126 		(void) nvlist_add_string(defect, UNDIAG_REASON, reason);
3127 		(void) nvlist_add_boolean_value(defect, FM_SUSPECT_RETIRE,
3128 		    B_FALSE);
3129 		(void) nvlist_add_boolean_value(defect, FM_SUSPECT_RESPONSE,
3130 		    B_FALSE);
3131 		fmd_case_add_suspect(f->hdl, f->fmcase, defect);
3132 
3133 		/* add fault if appropriate */
3134 		faultstr = undiag_2fault_str(Undiag_reason);
3135 		if (faultstr == NULL)
3136 			continue;
3137 		fault = fmd_nvl_create_fault(f->hdl, faultstr, 50 / f->uniqobs,
3138 		    NULL, NULL, detector);
3139 		(void) nvlist_add_string(fault, UNDIAG_REASON, reason);
3140 		(void) nvlist_add_boolean_value(fault, FM_SUSPECT_RETIRE,
3141 		    B_FALSE);
3142 		(void) nvlist_add_boolean_value(fault, FM_SUSPECT_RESPONSE,
3143 		    B_FALSE);
3144 		fmd_case_add_suspect(f->hdl, f->fmcase, fault);
3145 		nvlist_free(detector);
3146 	}
3147 	FREE(reason);
3148 	fmd_case_solve(f->hdl, f->fmcase);
3149 	fmd_case_close(f->hdl, f->fmcase);
3150 	Undiag_reason = UD_VAL_UNKNOWN;
3151 }
3152 
3153 /*
3154  * fme_close_case
3155  *
3156  *	Find the requested case amongst our fmes and close it.  Free up
3157  *	the related fme.
3158  */
3159 void
3160 fme_close_case(fmd_hdl_t *hdl, fmd_case_t *fmcase)
3161 {
3162 	struct case_list *ucasep, *prevcasep = NULL;
3163 	struct fme *prev = NULL;
3164 	struct fme *fmep;
3165 
3166 	for (ucasep = Undiagablecaselist; ucasep; ucasep = ucasep->next) {
3167 		if (fmcase != ucasep->fmcase) {
3168 			prevcasep = ucasep;
3169 			continue;
3170 		}
3171 
3172 		if (prevcasep == NULL)
3173 			Undiagablecaselist = Undiagablecaselist->next;
3174 		else
3175 			prevcasep->next = ucasep->next;
3176 
3177 		FREE(ucasep);
3178 		return;
3179 	}
3180 
3181 	for (fmep = FMElist; fmep; fmep = fmep->next) {
3182 		if (fmep->hdl == hdl && fmep->fmcase == fmcase)
3183 			break;
3184 		prev = fmep;
3185 	}
3186 
3187 	if (fmep == NULL) {
3188 		out(O_WARN, "Eft asked to close unrecognized case [%s].",
3189 		    fmd_case_uuid(hdl, fmcase));
3190 		return;
3191 	}
3192 
3193 	if (EFMElist == fmep)
3194 		EFMElist = prev;
3195 
3196 	if (prev == NULL)
3197 		FMElist = FMElist->next;
3198 	else
3199 		prev->next = fmep->next;
3200 
3201 	fmep->next = NULL;
3202 
3203 	/* Get rid of any timer this fme has set */
3204 	if (fmep->wull != 0)
3205 		fmd_timer_remove(fmep->hdl, fmep->timer);
3206 
3207 	if (ClosedFMEs == NULL) {
3208 		ClosedFMEs = fmep;
3209 	} else {
3210 		fmep->next = ClosedFMEs;
3211 		ClosedFMEs = fmep;
3212 	}
3213 
3214 	Open_fme_count--;
3215 
3216 	/* See if we can close the overflow FME */
3217 	if (Open_fme_count <= Max_fme) {
3218 		for (fmep = FMElist; fmep; fmep = fmep->next) {
3219 			if (fmep->overflow && !(fmd_case_closed(fmep->hdl,
3220 			    fmep->fmcase)))
3221 				break;
3222 		}
3223 
3224 		if (fmep != NULL)
3225 			fmd_case_close(fmep->hdl, fmep->fmcase);
3226 	}
3227 }
3228 
3229 /*
3230  * fme_set_timer()
3231  *	If the time we need to wait for the given FME is less than the
3232  *	current timer, kick that old timer out and establish a new one.
3233  */
3234 static int
3235 fme_set_timer(struct fme *fmep, unsigned long long wull)
3236 {
3237 	out(O_ALTFP|O_VERB|O_NONL, " fme_set_timer: request to wait ");
3238 	ptree_timeval(O_ALTFP|O_VERB, &wull);
3239 
3240 	if (wull <= fmep->pull) {
3241 		out(O_ALTFP|O_VERB|O_NONL, "already have waited at least ");
3242 		ptree_timeval(O_ALTFP|O_VERB, &fmep->pull);
3243 		out(O_ALTFP|O_VERB, NULL);
3244 		/* we've waited at least wull already, don't need timer */
3245 		return (0);
3246 	}
3247 
3248 	out(O_ALTFP|O_VERB|O_NONL, " currently ");
3249 	if (fmep->wull != 0) {
3250 		out(O_ALTFP|O_VERB|O_NONL, "waiting ");
3251 		ptree_timeval(O_ALTFP|O_VERB, &fmep->wull);
3252 		out(O_ALTFP|O_VERB, NULL);
3253 	} else {
3254 		out(O_ALTFP|O_VERB|O_NONL, "not waiting");
3255 		out(O_ALTFP|O_VERB, NULL);
3256 	}
3257 
3258 	if (fmep->wull != 0)
3259 		if (wull >= fmep->wull)
3260 			/* New timer would fire later than established timer */
3261 			return (0);
3262 
3263 	if (fmep->wull != 0) {
3264 		fmd_timer_remove(fmep->hdl, fmep->timer);
3265 	}
3266 
3267 	fmep->timer = fmd_timer_install(fmep->hdl, (void *)fmep,
3268 	    fmep->e0r, wull);
3269 	out(O_ALTFP|O_VERB, "timer set, id is %ld", fmep->timer);
3270 	fmep->wull = wull;
3271 	return (1);
3272 }
3273 
3274 void
3275 fme_timer_fired(struct fme *fmep, id_t tid)
3276 {
3277 	struct fme *ffmep = NULL;
3278 
3279 	for (ffmep = FMElist; ffmep; ffmep = ffmep->next)
3280 		if (ffmep == fmep)
3281 			break;
3282 
3283 	if (ffmep == NULL) {
3284 		out(O_WARN, "Timer fired for an FME (%p) not in FMEs list.",
3285 		    (void *)fmep);
3286 		return;
3287 	}
3288 
3289 	out(O_ALTFP|O_VERB, "Timer fired %lx", tid);
3290 	fmep->pull = fmep->wull;
3291 	fmep->wull = 0;
3292 	fmd_buf_write(fmep->hdl, fmep->fmcase,
3293 	    WOBUF_PULL, (void *)&fmep->pull, sizeof (fmep->pull));
3294 
3295 	fme_eval(fmep, fmep->e0r);
3296 }
3297 
3298 /*
3299  * Preserve the fme's suspect list in its psuspects list, NULLing the
3300  * suspects list in the meantime.
3301  */
3302 static void
3303 save_suspects(struct fme *fmep)
3304 {
3305 	struct event *ep;
3306 	struct event *nextep;
3307 
3308 	/* zero out the previous suspect list */
3309 	for (ep = fmep->psuspects; ep; ep = nextep) {
3310 		nextep = ep->psuspects;
3311 		ep->psuspects = NULL;
3312 	}
3313 	fmep->psuspects = NULL;
3314 
3315 	/* zero out the suspect list, copying it to previous suspect list */
3316 	fmep->psuspects = fmep->suspects;
3317 	for (ep = fmep->suspects; ep; ep = nextep) {
3318 		nextep = ep->suspects;
3319 		ep->psuspects = ep->suspects;
3320 		ep->suspects = NULL;
3321 		ep->is_suspect = 0;
3322 	}
3323 	fmep->suspects = NULL;
3324 	fmep->nsuspects = 0;
3325 }
3326 
3327 /*
3328  * Retrieve the fme's suspect list from its psuspects list.
3329  */
3330 static void
3331 restore_suspects(struct fme *fmep)
3332 {
3333 	struct event *ep;
3334 	struct event *nextep;
3335 
3336 	fmep->nsuspects = 0;
3337 	fmep->suspects = fmep->psuspects;
3338 	for (ep = fmep->psuspects; ep; ep = nextep) {
3339 		fmep->nsuspects++;
3340 		nextep = ep->psuspects;
3341 		ep->suspects = ep->psuspects;
3342 	}
3343 }
3344 
3345 /*
3346  * this is what we use to call the Emrys prototype code instead of main()
3347  */
3348 static void
3349 fme_eval(struct fme *fmep, fmd_event_t *ffep)
3350 {
3351 	struct event *ep;
3352 	unsigned long long my_delay = TIMEVAL_EVENTUALLY;
3353 	struct rsl *srl = NULL;
3354 	struct rsl *srl2 = NULL;
3355 	int mess_zero_count;
3356 	int rpcnt;
3357 
3358 	save_suspects(fmep);
3359 
3360 	out(O_ALTFP, "Evaluate FME %d", fmep->id);
3361 	indent_set("  ");
3362 
3363 	lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep);
3364 	fmep->state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay);
3365 
3366 	out(O_ALTFP|O_NONL, "FME%d state: %s, suspect list:", fmep->id,
3367 	    fme_state2str(fmep->state));
3368 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
3369 		out(O_ALTFP|O_NONL, " ");
3370 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
3371 	}
3372 	out(O_ALTFP, NULL);
3373 
3374 	switch (fmep->state) {
3375 	case FME_CREDIBLE:
3376 		print_suspects(SLNEW, fmep);
3377 		(void) upsets_eval(fmep, ffep);
3378 
3379 		/*
3380 		 * we may have already posted suspects in upsets_eval() which
3381 		 * can recurse into fme_eval() again. If so then just return.
3382 		 */
3383 		if (fmep->posted_suspects)
3384 			return;
3385 
3386 		stats_counter_bump(fmep->diags);
3387 		rpcnt = fmep->nsuspects;
3388 		save_suspects(fmep);
3389 
3390 		/*
3391 		 * create two lists, one for "message=1" faults and one for
3392 		 * "message=0" faults. If we have a mixture we will generate
3393 		 * two separate suspect lists.
3394 		 */
3395 		srl = MALLOC(rpcnt * sizeof (struct rsl));
3396 		bzero(srl, rpcnt * sizeof (struct rsl));
3397 		srl2 = MALLOC(rpcnt * sizeof (struct rsl));
3398 		bzero(srl2, rpcnt * sizeof (struct rsl));
3399 		mess_zero_count = trim_suspects(fmep, srl, srl2, ffep);
3400 
3401 		/*
3402 		 * If the resulting suspect list has no members, we're
3403 		 * done so simply close the case. Otherwise sort and publish.
3404 		 */
3405 		if (fmep->nsuspects == 0 && mess_zero_count == 0) {
3406 			out(O_ALTFP,
3407 			    "[FME%d, case %s (all suspects are upsets)]",
3408 			    fmep->id, fmd_case_uuid(fmep->hdl, fmep->fmcase));
3409 			fmd_case_close(fmep->hdl, fmep->fmcase);
3410 		} else if (fmep->nsuspects != 0 && mess_zero_count == 0) {
3411 			publish_suspects(fmep, srl);
3412 			out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3413 			    fmd_case_uuid(fmep->hdl, fmep->fmcase));
3414 			fmd_case_solve(fmep->hdl, fmep->fmcase);
3415 		} else if (fmep->nsuspects == 0 && mess_zero_count != 0) {
3416 			fmep->nsuspects = mess_zero_count;
3417 			publish_suspects(fmep, srl2);
3418 			out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3419 			    fmd_case_uuid(fmep->hdl, fmep->fmcase));
3420 			fmd_case_solve(fmep->hdl, fmep->fmcase);
3421 		} else {
3422 			struct event *obsp;
3423 			struct fme *nfmep;
3424 
3425 			publish_suspects(fmep, srl);
3426 			out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3427 			    fmd_case_uuid(fmep->hdl, fmep->fmcase));
3428 			fmd_case_solve(fmep->hdl, fmep->fmcase);
3429 
3430 			/*
3431 			 * Got both message=0 and message=1 so create a
3432 			 * duplicate case. Also need a temporary duplicate fme
3433 			 * structure for use by publish_suspects().
3434 			 */
3435 			nfmep = alloc_fme();
3436 			nfmep->id =  Nextid++;
3437 			nfmep->hdl = fmep->hdl;
3438 			nfmep->nsuspects = mess_zero_count;
3439 			nfmep->fmcase = fmd_case_open(fmep->hdl, NULL);
3440 			out(O_ALTFP|O_STAMP,
3441 			    "[creating parallel FME%d, case %s]", nfmep->id,
3442 			    fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
3443 			Open_fme_count++;
3444 			if (ffep) {
3445 				fmd_case_setprincipal(nfmep->hdl,
3446 				    nfmep->fmcase, ffep);
3447 				fmd_case_add_ereport(nfmep->hdl,
3448 				    nfmep->fmcase, ffep);
3449 			}
3450 			for (obsp = fmep->observations; obsp;
3451 			    obsp = obsp->observations)
3452 				if (obsp->ffep && obsp->ffep != ffep)
3453 					fmd_case_add_ereport(nfmep->hdl,
3454 					    nfmep->fmcase, obsp->ffep);
3455 
3456 			publish_suspects(nfmep, srl2);
3457 			out(O_ALTFP, "[solving FME%d, case %s]", nfmep->id,
3458 			    fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
3459 			fmd_case_solve(nfmep->hdl, nfmep->fmcase);
3460 			FREE(nfmep);
3461 		}
3462 		FREE(srl);
3463 		FREE(srl2);
3464 		restore_suspects(fmep);
3465 
3466 		fmep->posted_suspects = 1;
3467 		fmd_buf_write(fmep->hdl, fmep->fmcase,
3468 		    WOBUF_POSTD,
3469 		    (void *)&fmep->posted_suspects,
3470 		    sizeof (fmep->posted_suspects));
3471 
3472 		/*
3473 		 * Now the suspects have been posted, we can clear up
3474 		 * the instance tree as we won't be looking at it again.
3475 		 * Also cancel the timer as the case is now solved.
3476 		 */
3477 		if (fmep->wull != 0) {
3478 			fmd_timer_remove(fmep->hdl, fmep->timer);
3479 			fmep->wull = 0;
3480 		}
3481 		break;
3482 
3483 	case FME_WAIT:
3484 		ASSERT(my_delay > fmep->ull);
3485 		(void) fme_set_timer(fmep, my_delay);
3486 		print_suspects(SLWAIT, fmep);
3487 		itree_prune(fmep->eventtree);
3488 		return;
3489 
3490 	case FME_DISPROVED:
3491 		print_suspects(SLDISPROVED, fmep);
3492 		Undiag_reason = UD_VAL_UNSOLVD;
3493 		fme_undiagnosable(fmep);
3494 		break;
3495 	}
3496 
3497 	itree_free(fmep->eventtree);
3498 	fmep->eventtree = NULL;
3499 	structconfig_free(fmep->config);
3500 	fmep->config = NULL;
3501 	destroy_fme_bufs(fmep);
3502 }
3503 
3504 static void indent(void);
3505 static int triggered(struct fme *fmep, struct event *ep, int mark);
3506 static enum fme_state effects_test(struct fme *fmep,
3507     struct event *fault_event, unsigned long long at_latest_by,
3508     unsigned long long *pdelay);
3509 static enum fme_state requirements_test(struct fme *fmep, struct event *ep,
3510     unsigned long long at_latest_by, unsigned long long *pdelay);
3511 static enum fme_state causes_test(struct fme *fmep, struct event *ep,
3512     unsigned long long at_latest_by, unsigned long long *pdelay);
3513 
3514 static int
3515 checkconstraints(struct fme *fmep, struct arrow *arrowp)
3516 {
3517 	struct constraintlist *ctp;
3518 	struct evalue value;
3519 	char *sep = "";
3520 
3521 	if (arrowp->forever_false) {
3522 		indent();
3523 		out(O_ALTFP|O_VERB|O_NONL, "  Forever false constraint: ");
3524 		for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3525 			out(O_ALTFP|O_VERB|O_NONL, sep);
3526 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3527 			sep = ", ";
3528 		}
3529 		out(O_ALTFP|O_VERB, NULL);
3530 		return (0);
3531 	}
3532 	if (arrowp->forever_true) {
3533 		indent();
3534 		out(O_ALTFP|O_VERB|O_NONL, "  Forever true constraint: ");
3535 		for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3536 			out(O_ALTFP|O_VERB|O_NONL, sep);
3537 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3538 			sep = ", ";
3539 		}
3540 		out(O_ALTFP|O_VERB, NULL);
3541 		return (1);
3542 	}
3543 
3544 	for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3545 		if (eval_expr(ctp->cnode, NULL, NULL,
3546 		    &fmep->globals, fmep->config,
3547 		    arrowp, 0, &value)) {
3548 			/* evaluation successful */
3549 			if (value.t == UNDEFINED || value.v == 0) {
3550 				/* known false */
3551 				arrowp->forever_false = 1;
3552 				indent();
3553 				out(O_ALTFP|O_VERB|O_NONL,
3554 				    "  False constraint: ");
3555 				ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3556 				out(O_ALTFP|O_VERB, NULL);
3557 				return (0);
3558 			}
3559 		} else {
3560 			/* evaluation unsuccessful -- unknown value */
3561 			indent();
3562 			out(O_ALTFP|O_VERB|O_NONL,
3563 			    "  Deferred constraint: ");
3564 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3565 			out(O_ALTFP|O_VERB, NULL);
3566 			return (1);
3567 		}
3568 	}
3569 	/* known true */
3570 	arrowp->forever_true = 1;
3571 	indent();
3572 	out(O_ALTFP|O_VERB|O_NONL, "  True constraint: ");
3573 	for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3574 		out(O_ALTFP|O_VERB|O_NONL, sep);
3575 		ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3576 		sep = ", ";
3577 	}
3578 	out(O_ALTFP|O_VERB, NULL);
3579 	return (1);
3580 }
3581 
3582 static int
3583 triggered(struct fme *fmep, struct event *ep, int mark)
3584 {
3585 	struct bubble *bp;
3586 	struct arrowlist *ap;
3587 	int count = 0;
3588 
3589 	stats_counter_bump(fmep->Tcallcount);
3590 	for (bp = itree_next_bubble(ep, NULL); bp;
3591 	    bp = itree_next_bubble(ep, bp)) {
3592 		if (bp->t != B_TO)
3593 			continue;
3594 		for (ap = itree_next_arrow(bp, NULL); ap;
3595 		    ap = itree_next_arrow(bp, ap)) {
3596 			/* check count of marks against K in the bubble */
3597 			if ((ap->arrowp->mark & mark) &&
3598 			    ++count >= bp->nork)
3599 				return (1);
3600 		}
3601 	}
3602 	return (0);
3603 }
3604 
3605 static int
3606 mark_arrows(struct fme *fmep, struct event *ep, int mark,
3607     unsigned long long at_latest_by, unsigned long long *pdelay, int keep)
3608 {
3609 	struct bubble *bp;
3610 	struct arrowlist *ap;
3611 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3612 	unsigned long long my_delay;
3613 	enum fme_state result;
3614 	int retval = 0;
3615 
3616 	for (bp = itree_next_bubble(ep, NULL); bp;
3617 	    bp = itree_next_bubble(ep, bp)) {
3618 		if (bp->t != B_FROM)
3619 			continue;
3620 		stats_counter_bump(fmep->Marrowcount);
3621 		for (ap = itree_next_arrow(bp, NULL); ap;
3622 		    ap = itree_next_arrow(bp, ap)) {
3623 			struct event *ep2 = ap->arrowp->head->myevent;
3624 			/*
3625 			 * if we're clearing marks, we can avoid doing
3626 			 * all that work evaluating constraints.
3627 			 */
3628 			if (mark == 0) {
3629 				if (ap->arrowp->arrow_marked == 0)
3630 					continue;
3631 				ap->arrowp->arrow_marked = 0;
3632 				ap->arrowp->mark &= ~EFFECTS_COUNTER;
3633 				if (keep && (ep2->cached_state &
3634 				    (WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT)))
3635 					ep2->keep_in_tree = 1;
3636 				ep2->cached_state &=
3637 				    ~(WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT);
3638 				(void) mark_arrows(fmep, ep2, mark, 0, NULL,
3639 				    keep);
3640 				continue;
3641 			}
3642 			ap->arrowp->arrow_marked = 1;
3643 			if (ep2->cached_state & REQMNTS_DISPROVED) {
3644 				indent();
3645 				out(O_ALTFP|O_VERB|O_NONL,
3646 				    "  ALREADY DISPROVED ");
3647 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3648 				out(O_ALTFP|O_VERB, NULL);
3649 				continue;
3650 			}
3651 			if (ep2->cached_state & WAIT_EFFECT) {
3652 				indent();
3653 				out(O_ALTFP|O_VERB|O_NONL,
3654 				    "  ALREADY EFFECTS WAIT ");
3655 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3656 				out(O_ALTFP|O_VERB, NULL);
3657 				continue;
3658 			}
3659 			if (ep2->cached_state & CREDIBLE_EFFECT) {
3660 				indent();
3661 				out(O_ALTFP|O_VERB|O_NONL,
3662 				    "  ALREADY EFFECTS CREDIBLE ");
3663 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3664 				out(O_ALTFP|O_VERB, NULL);
3665 				continue;
3666 			}
3667 			if ((ep2->cached_state & PARENT_WAIT) &&
3668 			    (mark & PARENT_WAIT)) {
3669 				indent();
3670 				out(O_ALTFP|O_VERB|O_NONL,
3671 				    "  ALREADY PARENT EFFECTS WAIT ");
3672 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3673 				out(O_ALTFP|O_VERB, NULL);
3674 				continue;
3675 			}
3676 			platform_set_payloadnvp(ep2->nvp);
3677 			if (checkconstraints(fmep, ap->arrowp) == 0) {
3678 				platform_set_payloadnvp(NULL);
3679 				indent();
3680 				out(O_ALTFP|O_VERB|O_NONL,
3681 				    "  CONSTRAINTS FAIL ");
3682 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3683 				out(O_ALTFP|O_VERB, NULL);
3684 				continue;
3685 			}
3686 			platform_set_payloadnvp(NULL);
3687 			ap->arrowp->mark |= EFFECTS_COUNTER;
3688 			if (!triggered(fmep, ep2, EFFECTS_COUNTER)) {
3689 				indent();
3690 				out(O_ALTFP|O_VERB|O_NONL,
3691 				    "  K-COUNT NOT YET MET ");
3692 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3693 				out(O_ALTFP|O_VERB, NULL);
3694 				continue;
3695 			}
3696 			ep2->cached_state &= ~PARENT_WAIT;
3697 			/*
3698 			 * if we've reached an ereport and no propagation time
3699 			 * is specified, use the Hesitate value
3700 			 */
3701 			if (ep2->t == N_EREPORT && at_latest_by == 0ULL &&
3702 			    ap->arrowp->maxdelay == 0ULL) {
3703 				out(O_ALTFP|O_VERB|O_NONL, "  default wait ");
3704 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3705 				out(O_ALTFP|O_VERB, NULL);
3706 				result = requirements_test(fmep, ep2, Hesitate,
3707 				    &my_delay);
3708 			} else {
3709 				result = requirements_test(fmep, ep2,
3710 				    at_latest_by + ap->arrowp->maxdelay,
3711 				    &my_delay);
3712 			}
3713 			if (result == FME_WAIT) {
3714 				retval = WAIT_EFFECT;
3715 				if (overall_delay > my_delay)
3716 					overall_delay = my_delay;
3717 				ep2->cached_state |= WAIT_EFFECT;
3718 				indent();
3719 				out(O_ALTFP|O_VERB|O_NONL, "  EFFECTS WAIT ");
3720 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3721 				out(O_ALTFP|O_VERB, NULL);
3722 				indent_push("  E");
3723 				if (mark_arrows(fmep, ep2, PARENT_WAIT,
3724 				    at_latest_by, &my_delay, 0) ==
3725 				    WAIT_EFFECT) {
3726 					retval = WAIT_EFFECT;
3727 					if (overall_delay > my_delay)
3728 						overall_delay = my_delay;
3729 				}
3730 				indent_pop();
3731 			} else if (result == FME_DISPROVED) {
3732 				indent();
3733 				out(O_ALTFP|O_VERB|O_NONL,
3734 				    "  EFFECTS DISPROVED ");
3735 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3736 				out(O_ALTFP|O_VERB, NULL);
3737 			} else {
3738 				ep2->cached_state |= mark;
3739 				indent();
3740 				if (mark == CREDIBLE_EFFECT)
3741 					out(O_ALTFP|O_VERB|O_NONL,
3742 					    "  EFFECTS CREDIBLE ");
3743 				else
3744 					out(O_ALTFP|O_VERB|O_NONL,
3745 					    "  PARENT EFFECTS WAIT ");
3746 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3747 				out(O_ALTFP|O_VERB, NULL);
3748 				indent_push("  E");
3749 				if (mark_arrows(fmep, ep2, mark, at_latest_by,
3750 				    &my_delay, 0) == WAIT_EFFECT) {
3751 					retval = WAIT_EFFECT;
3752 					if (overall_delay > my_delay)
3753 						overall_delay = my_delay;
3754 				}
3755 				indent_pop();
3756 			}
3757 		}
3758 	}
3759 	if (retval == WAIT_EFFECT)
3760 		*pdelay = overall_delay;
3761 	return (retval);
3762 }
3763 
3764 static enum fme_state
3765 effects_test(struct fme *fmep, struct event *fault_event,
3766     unsigned long long at_latest_by, unsigned long long *pdelay)
3767 {
3768 	struct event *error_event;
3769 	enum fme_state return_value = FME_CREDIBLE;
3770 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3771 	unsigned long long my_delay;
3772 
3773 	stats_counter_bump(fmep->Ecallcount);
3774 	indent_push("  E");
3775 	indent();
3776 	out(O_ALTFP|O_VERB|O_NONL, "->");
3777 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event);
3778 	out(O_ALTFP|O_VERB, NULL);
3779 
3780 	if (mark_arrows(fmep, fault_event, CREDIBLE_EFFECT, at_latest_by,
3781 	    &my_delay, 0) == WAIT_EFFECT) {
3782 		return_value = FME_WAIT;
3783 		if (overall_delay > my_delay)
3784 			overall_delay = my_delay;
3785 	}
3786 	for (error_event = fmep->observations;
3787 	    error_event; error_event = error_event->observations) {
3788 		indent();
3789 		out(O_ALTFP|O_VERB|O_NONL, " ");
3790 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, error_event);
3791 		if (!(error_event->cached_state & CREDIBLE_EFFECT)) {
3792 			if (error_event->cached_state &
3793 			    (PARENT_WAIT|WAIT_EFFECT)) {
3794 				out(O_ALTFP|O_VERB, " NOT YET triggered");
3795 				continue;
3796 			}
3797 			return_value = FME_DISPROVED;
3798 			out(O_ALTFP|O_VERB, " NOT triggered");
3799 			break;
3800 		} else {
3801 			out(O_ALTFP|O_VERB, " triggered");
3802 		}
3803 	}
3804 	if (return_value == FME_DISPROVED) {
3805 		(void) mark_arrows(fmep, fault_event, 0, 0, NULL, 0);
3806 	} else {
3807 		fault_event->keep_in_tree = 1;
3808 		(void) mark_arrows(fmep, fault_event, 0, 0, NULL, 1);
3809 	}
3810 
3811 	indent();
3812 	out(O_ALTFP|O_VERB|O_NONL, "<-EFFECTS %s ",
3813 	    fme_state2str(return_value));
3814 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event);
3815 	out(O_ALTFP|O_VERB, NULL);
3816 	indent_pop();
3817 	if (return_value == FME_WAIT)
3818 		*pdelay = overall_delay;
3819 	return (return_value);
3820 }
3821 
3822 static enum fme_state
3823 requirements_test(struct fme *fmep, struct event *ep,
3824     unsigned long long at_latest_by, unsigned long long *pdelay)
3825 {
3826 	int waiting_events;
3827 	int credible_events;
3828 	int deferred_events;
3829 	enum fme_state return_value = FME_CREDIBLE;
3830 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3831 	unsigned long long arrow_delay;
3832 	unsigned long long my_delay;
3833 	struct event *ep2;
3834 	struct bubble *bp;
3835 	struct arrowlist *ap;
3836 
3837 	if (ep->cached_state & REQMNTS_CREDIBLE) {
3838 		indent();
3839 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY CREDIBLE ");
3840 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3841 		out(O_ALTFP|O_VERB, NULL);
3842 		return (FME_CREDIBLE);
3843 	}
3844 	if (ep->cached_state & REQMNTS_DISPROVED) {
3845 		indent();
3846 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY DISPROVED ");
3847 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3848 		out(O_ALTFP|O_VERB, NULL);
3849 		return (FME_DISPROVED);
3850 	}
3851 	if (ep->cached_state & REQMNTS_WAIT) {
3852 		indent();
3853 		*pdelay = ep->cached_delay;
3854 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY WAIT ");
3855 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3856 		out(O_ALTFP|O_VERB|O_NONL, ", wait for: ");
3857 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3858 		out(O_ALTFP|O_VERB, NULL);
3859 		return (FME_WAIT);
3860 	}
3861 	stats_counter_bump(fmep->Rcallcount);
3862 	indent_push("  R");
3863 	indent();
3864 	out(O_ALTFP|O_VERB|O_NONL, "->");
3865 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3866 	out(O_ALTFP|O_VERB|O_NONL, ", at latest by: ");
3867 	ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3868 	out(O_ALTFP|O_VERB, NULL);
3869 
3870 	if (ep->t == N_EREPORT) {
3871 		if (ep->count == 0) {
3872 			if (fmep->pull >= at_latest_by) {
3873 				return_value = FME_DISPROVED;
3874 			} else {
3875 				ep->cached_delay = *pdelay = at_latest_by;
3876 				return_value = FME_WAIT;
3877 			}
3878 		}
3879 
3880 		indent();
3881 		switch (return_value) {
3882 		case FME_CREDIBLE:
3883 			ep->cached_state |= REQMNTS_CREDIBLE;
3884 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS CREDIBLE ");
3885 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3886 			break;
3887 		case FME_DISPROVED:
3888 			ep->cached_state |= REQMNTS_DISPROVED;
3889 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED ");
3890 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3891 			break;
3892 		case FME_WAIT:
3893 			ep->cached_state |= REQMNTS_WAIT;
3894 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS WAIT ");
3895 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3896 			out(O_ALTFP|O_VERB|O_NONL, " to ");
3897 			ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3898 			break;
3899 		default:
3900 			out(O_DIE, "requirements_test: unexpected fme_state");
3901 			break;
3902 		}
3903 		out(O_ALTFP|O_VERB, NULL);
3904 		indent_pop();
3905 
3906 		return (return_value);
3907 	}
3908 
3909 	/* this event is not a report, descend the tree */
3910 	for (bp = itree_next_bubble(ep, NULL); bp;
3911 	    bp = itree_next_bubble(ep, bp)) {
3912 		int n;
3913 
3914 		if (bp->t != B_FROM)
3915 			continue;
3916 
3917 		n = bp->nork;
3918 
3919 		credible_events = 0;
3920 		waiting_events = 0;
3921 		deferred_events = 0;
3922 		arrow_delay = TIMEVAL_EVENTUALLY;
3923 		/*
3924 		 * n is -1 for 'A' so adjust it.
3925 		 * XXX just count up the arrows for now.
3926 		 */
3927 		if (n < 0) {
3928 			n = 0;
3929 			for (ap = itree_next_arrow(bp, NULL); ap;
3930 			    ap = itree_next_arrow(bp, ap))
3931 				n++;
3932 			indent();
3933 			out(O_ALTFP|O_VERB, " Bubble Counted N=%d", n);
3934 		} else {
3935 			indent();
3936 			out(O_ALTFP|O_VERB, " Bubble N=%d", n);
3937 		}
3938 
3939 		if (n == 0)
3940 			continue;
3941 		if (!(bp->mark & (BUBBLE_ELIDED|BUBBLE_OK))) {
3942 			for (ap = itree_next_arrow(bp, NULL); ap;
3943 			    ap = itree_next_arrow(bp, ap)) {
3944 				ep2 = ap->arrowp->head->myevent;
3945 				platform_set_payloadnvp(ep2->nvp);
3946 				(void) checkconstraints(fmep, ap->arrowp);
3947 				if (!ap->arrowp->forever_false) {
3948 					/*
3949 					 * if all arrows are invalidated by the
3950 					 * constraints, then we should elide the
3951 					 * whole bubble to be consistant with
3952 					 * the tree creation time behaviour
3953 					 */
3954 					bp->mark |= BUBBLE_OK;
3955 					platform_set_payloadnvp(NULL);
3956 					break;
3957 				}
3958 				platform_set_payloadnvp(NULL);
3959 			}
3960 		}
3961 		for (ap = itree_next_arrow(bp, NULL); ap;
3962 		    ap = itree_next_arrow(bp, ap)) {
3963 			ep2 = ap->arrowp->head->myevent;
3964 			if (n <= credible_events)
3965 				break;
3966 
3967 			ap->arrowp->mark |= REQMNTS_COUNTER;
3968 			if (triggered(fmep, ep2, REQMNTS_COUNTER))
3969 				/* XXX adding max timevals! */
3970 				switch (requirements_test(fmep, ep2,
3971 				    at_latest_by + ap->arrowp->maxdelay,
3972 				    &my_delay)) {
3973 				case FME_DEFERRED:
3974 					deferred_events++;
3975 					break;
3976 				case FME_CREDIBLE:
3977 					credible_events++;
3978 					break;
3979 				case FME_DISPROVED:
3980 					break;
3981 				case FME_WAIT:
3982 					if (my_delay < arrow_delay)
3983 						arrow_delay = my_delay;
3984 					waiting_events++;
3985 					break;
3986 				default:
3987 					out(O_DIE,
3988 					"Bug in requirements_test.");
3989 				}
3990 			else
3991 				deferred_events++;
3992 		}
3993 		if (!(bp->mark & BUBBLE_OK) && waiting_events == 0) {
3994 			bp->mark |= BUBBLE_ELIDED;
3995 			continue;
3996 		}
3997 		indent();
3998 		out(O_ALTFP|O_VERB, " Credible: %d Waiting %d",
3999 		    credible_events + deferred_events, waiting_events);
4000 		if (credible_events + deferred_events + waiting_events < n) {
4001 			/* Can never meet requirements */
4002 			ep->cached_state |= REQMNTS_DISPROVED;
4003 			indent();
4004 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED ");
4005 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4006 			out(O_ALTFP|O_VERB, NULL);
4007 			indent_pop();
4008 			return (FME_DISPROVED);
4009 		}
4010 		if (credible_events + deferred_events < n) {
4011 			/* will have to wait */
4012 			/* wait time is shortest known */
4013 			if (arrow_delay < overall_delay)
4014 				overall_delay = arrow_delay;
4015 			return_value = FME_WAIT;
4016 		} else if (credible_events < n) {
4017 			if (return_value != FME_WAIT)
4018 				return_value = FME_DEFERRED;
4019 		}
4020 	}
4021 
4022 	/*
4023 	 * don't mark as FME_DEFERRED. If this event isn't reached by another
4024 	 * path, then this will be considered FME_CREDIBLE. But if it is
4025 	 * reached by a different path so the K-count is met, then might
4026 	 * get overridden by FME_WAIT or FME_DISPROVED.
4027 	 */
4028 	if (return_value == FME_WAIT) {
4029 		ep->cached_state |= REQMNTS_WAIT;
4030 		ep->cached_delay = *pdelay = overall_delay;
4031 	} else if (return_value == FME_CREDIBLE) {
4032 		ep->cached_state |= REQMNTS_CREDIBLE;
4033 	}
4034 	indent();
4035 	out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS %s ",
4036 	    fme_state2str(return_value));
4037 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4038 	out(O_ALTFP|O_VERB, NULL);
4039 	indent_pop();
4040 	return (return_value);
4041 }
4042 
4043 static enum fme_state
4044 causes_test(struct fme *fmep, struct event *ep,
4045     unsigned long long at_latest_by, unsigned long long *pdelay)
4046 {
4047 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
4048 	unsigned long long my_delay;
4049 	int credible_results = 0;
4050 	int waiting_results = 0;
4051 	enum fme_state fstate;
4052 	struct event *tail_event;
4053 	struct bubble *bp;
4054 	struct arrowlist *ap;
4055 	int k = 1;
4056 
4057 	stats_counter_bump(fmep->Ccallcount);
4058 	indent_push("  C");
4059 	indent();
4060 	out(O_ALTFP|O_VERB|O_NONL, "->");
4061 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4062 	out(O_ALTFP|O_VERB, NULL);
4063 
4064 	for (bp = itree_next_bubble(ep, NULL); bp;
4065 	    bp = itree_next_bubble(ep, bp)) {
4066 		if (bp->t != B_TO)
4067 			continue;
4068 		k = bp->nork;	/* remember the K value */
4069 		for (ap = itree_next_arrow(bp, NULL); ap;
4070 		    ap = itree_next_arrow(bp, ap)) {
4071 			int do_not_follow = 0;
4072 
4073 			/*
4074 			 * if we get to the same event multiple times
4075 			 * only worry about the first one.
4076 			 */
4077 			if (ap->arrowp->tail->myevent->cached_state &
4078 			    CAUSES_TESTED) {
4079 				indent();
4080 				out(O_ALTFP|O_VERB|O_NONL,
4081 				    "  causes test already run for ");
4082 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL,
4083 				    ap->arrowp->tail->myevent);
4084 				out(O_ALTFP|O_VERB, NULL);
4085 				continue;
4086 			}
4087 
4088 			/*
4089 			 * see if false constraint prevents us
4090 			 * from traversing this arrow
4091 			 */
4092 			platform_set_payloadnvp(ep->nvp);
4093 			if (checkconstraints(fmep, ap->arrowp) == 0)
4094 				do_not_follow = 1;
4095 			platform_set_payloadnvp(NULL);
4096 			if (do_not_follow) {
4097 				indent();
4098 				out(O_ALTFP|O_VERB|O_NONL,
4099 				    "  False arrow from ");
4100 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL,
4101 				    ap->arrowp->tail->myevent);
4102 				out(O_ALTFP|O_VERB, NULL);
4103 				continue;
4104 			}
4105 
4106 			ap->arrowp->tail->myevent->cached_state |=
4107 			    CAUSES_TESTED;
4108 			tail_event = ap->arrowp->tail->myevent;
4109 			fstate = hypothesise(fmep, tail_event, at_latest_by,
4110 			    &my_delay);
4111 
4112 			switch (fstate) {
4113 			case FME_WAIT:
4114 				if (my_delay < overall_delay)
4115 					overall_delay = my_delay;
4116 				waiting_results++;
4117 				break;
4118 			case FME_CREDIBLE:
4119 				credible_results++;
4120 				break;
4121 			case FME_DISPROVED:
4122 				break;
4123 			default:
4124 				out(O_DIE, "Bug in causes_test");
4125 			}
4126 		}
4127 	}
4128 	/* compare against K */
4129 	if (credible_results + waiting_results < k) {
4130 		indent();
4131 		out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES DISPROVED ");
4132 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4133 		out(O_ALTFP|O_VERB, NULL);
4134 		indent_pop();
4135 		return (FME_DISPROVED);
4136 	}
4137 	if (waiting_results != 0) {
4138 		*pdelay = overall_delay;
4139 		indent();
4140 		out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES WAIT ");
4141 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4142 		out(O_ALTFP|O_VERB|O_NONL, " to ");
4143 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
4144 		out(O_ALTFP|O_VERB, NULL);
4145 		indent_pop();
4146 		return (FME_WAIT);
4147 	}
4148 	indent();
4149 	out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES CREDIBLE ");
4150 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4151 	out(O_ALTFP|O_VERB, NULL);
4152 	indent_pop();
4153 	return (FME_CREDIBLE);
4154 }
4155 
4156 static enum fme_state
4157 hypothesise(struct fme *fmep, struct event *ep,
4158     unsigned long long at_latest_by, unsigned long long *pdelay)
4159 {
4160 	enum fme_state rtr, otr;
4161 	unsigned long long my_delay;
4162 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
4163 
4164 	stats_counter_bump(fmep->Hcallcount);
4165 	indent_push("  H");
4166 	indent();
4167 	out(O_ALTFP|O_VERB|O_NONL, "->");
4168 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4169 	out(O_ALTFP|O_VERB|O_NONL, ", at latest by: ");
4170 	ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
4171 	out(O_ALTFP|O_VERB, NULL);
4172 
4173 	rtr = requirements_test(fmep, ep, at_latest_by, &my_delay);
4174 	if ((rtr == FME_WAIT) && (my_delay < overall_delay))
4175 		overall_delay = my_delay;
4176 	if (rtr != FME_DISPROVED) {
4177 		if (is_problem(ep->t)) {
4178 			otr = effects_test(fmep, ep, at_latest_by, &my_delay);
4179 			if (otr != FME_DISPROVED) {
4180 				if (fmep->peek == 0 && ep->is_suspect == 0) {
4181 					ep->suspects = fmep->suspects;
4182 					ep->is_suspect = 1;
4183 					fmep->suspects = ep;
4184 					fmep->nsuspects++;
4185 				}
4186 			}
4187 		} else
4188 			otr = causes_test(fmep, ep, at_latest_by, &my_delay);
4189 		if ((otr == FME_WAIT) && (my_delay < overall_delay))
4190 			overall_delay = my_delay;
4191 		if ((otr != FME_DISPROVED) &&
4192 		    ((rtr == FME_WAIT) || (otr == FME_WAIT)))
4193 			*pdelay = overall_delay;
4194 	}
4195 	if (rtr == FME_DISPROVED) {
4196 		indent();
4197 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4198 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4199 		out(O_ALTFP|O_VERB, " (doesn't meet requirements)");
4200 		indent_pop();
4201 		return (FME_DISPROVED);
4202 	}
4203 	if ((otr == FME_DISPROVED) && is_problem(ep->t)) {
4204 		indent();
4205 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4206 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4207 		out(O_ALTFP|O_VERB, " (doesn't explain all reports)");
4208 		indent_pop();
4209 		return (FME_DISPROVED);
4210 	}
4211 	if (otr == FME_DISPROVED) {
4212 		indent();
4213 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4214 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4215 		out(O_ALTFP|O_VERB, " (causes are not credible)");
4216 		indent_pop();
4217 		return (FME_DISPROVED);
4218 	}
4219 	if ((rtr == FME_WAIT) || (otr == FME_WAIT)) {
4220 		indent();
4221 		out(O_ALTFP|O_VERB|O_NONL, "<-WAIT ");
4222 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4223 		out(O_ALTFP|O_VERB|O_NONL, " to ");
4224 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &overall_delay);
4225 		out(O_ALTFP|O_VERB, NULL);
4226 		indent_pop();
4227 		return (FME_WAIT);
4228 	}
4229 	indent();
4230 	out(O_ALTFP|O_VERB|O_NONL, "<-CREDIBLE ");
4231 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4232 	out(O_ALTFP|O_VERB, NULL);
4233 	indent_pop();
4234 	return (FME_CREDIBLE);
4235 }
4236 
4237 /*
4238  * fme_istat_load -- reconstitute any persistent istats
4239  */
4240 void
4241 fme_istat_load(fmd_hdl_t *hdl)
4242 {
4243 	int sz;
4244 	char *sbuf;
4245 	char *ptr;
4246 
4247 	if ((sz = fmd_buf_size(hdl, NULL, WOBUF_ISTATS)) == 0) {
4248 		out(O_ALTFP, "fme_istat_load: No stats");
4249 		return;
4250 	}
4251 
4252 	sbuf = alloca(sz);
4253 
4254 	fmd_buf_read(hdl, NULL, WOBUF_ISTATS, sbuf, sz);
4255 
4256 	/*
4257 	 * pick apart the serialized stats
4258 	 *
4259 	 * format is:
4260 	 *	<class-name>, '@', <path>, '\0', <value>, '\0'
4261 	 * for example:
4262 	 *	"stat.first@stat0/path0\02\0stat.second@stat0/path1\023\0"
4263 	 *
4264 	 * since this is parsing our own serialized data, any parsing issues
4265 	 * are fatal, so we check for them all with ASSERT() below.
4266 	 */
4267 	ptr = sbuf;
4268 	while (ptr < &sbuf[sz]) {
4269 		char *sepptr;
4270 		struct node *np;
4271 		int val;
4272 
4273 		sepptr = strchr(ptr, '@');
4274 		ASSERT(sepptr != NULL);
4275 		*sepptr = '\0';
4276 
4277 		/* construct the event */
4278 		np = newnode(T_EVENT, NULL, 0);
4279 		np->u.event.ename = newnode(T_NAME, NULL, 0);
4280 		np->u.event.ename->u.name.t = N_STAT;
4281 		np->u.event.ename->u.name.s = stable(ptr);
4282 		np->u.event.ename->u.name.it = IT_ENAME;
4283 		np->u.event.ename->u.name.last = np->u.event.ename;
4284 
4285 		ptr = sepptr + 1;
4286 		ASSERT(ptr < &sbuf[sz]);
4287 		ptr += strlen(ptr);
4288 		ptr++;	/* move past the '\0' separating path from value */
4289 		ASSERT(ptr < &sbuf[sz]);
4290 		ASSERT(isdigit(*ptr));
4291 		val = atoi(ptr);
4292 		ASSERT(val > 0);
4293 		ptr += strlen(ptr);
4294 		ptr++;	/* move past the final '\0' for this entry */
4295 
4296 		np->u.event.epname = pathstring2epnamenp(sepptr + 1);
4297 		ASSERT(np->u.event.epname != NULL);
4298 
4299 		istat_bump(np, val);
4300 		tree_free(np);
4301 	}
4302 
4303 	istat_save();
4304 }
4305