xref: /titanic_44/usr/src/cmd/fm/modules/common/eversholt/fme.c (revision 72612f86fafbe2510a166b48e158c9031e0dd63b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  *
26  * fme.c -- fault management exercise module
27  *
28  * this module provides the simulated fault management exercise.
29  */
30 
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <strings.h>
35 #include <ctype.h>
36 #include <alloca.h>
37 #include <libnvpair.h>
38 #include <sys/fm/protocol.h>
39 #include <fm/fmd_api.h>
40 #include "alloc.h"
41 #include "out.h"
42 #include "stats.h"
43 #include "stable.h"
44 #include "literals.h"
45 #include "lut.h"
46 #include "tree.h"
47 #include "ptree.h"
48 #include "itree.h"
49 #include "ipath.h"
50 #include "fme.h"
51 #include "evnv.h"
52 #include "eval.h"
53 #include "config.h"
54 #include "platform.h"
55 #include "esclex.h"
56 
57 /* imported from eft.c... */
58 extern hrtime_t Hesitate;
59 extern char *Serd_Override;
60 extern nv_alloc_t Eft_nv_hdl;
61 extern int Max_fme;
62 extern fmd_hdl_t *Hdl;
63 
64 static int Istat_need_save;
65 static int Serd_need_save;
66 void istat_save(void);
67 void serd_save(void);
68 
69 /* fme under construction is global so we can free it on module abort */
70 static struct fme *Nfmep;
71 
72 static const char *Undiag_reason;
73 
74 static int Nextid = 0;
75 
76 static int Open_fme_count = 0;	/* Count of open FMEs */
77 
78 /* list of fault management exercises underway */
79 static struct fme {
80 	struct fme *next;		/* next exercise */
81 	unsigned long long ull;		/* time when fme was created */
82 	int id;				/* FME id */
83 	struct config *config;		/* cooked configuration data */
84 	struct lut *eventtree;		/* propagation tree for this FME */
85 	/*
86 	 * The initial error report that created this FME is kept in
87 	 * two forms.  e0 points to the instance tree node and is used
88 	 * by fme_eval() as the starting point for the inference
89 	 * algorithm.  e0r is the event handle FMD passed to us when
90 	 * the ereport first arrived and is used when setting timers,
91 	 * which are always relative to the time of this initial
92 	 * report.
93 	 */
94 	struct event *e0;
95 	fmd_event_t *e0r;
96 
97 	id_t    timer;			/* for setting an fmd time-out */
98 
99 	struct event *ecurrent;		/* ereport under consideration */
100 	struct event *suspects;		/* current suspect list */
101 	struct event *psuspects;	/* previous suspect list */
102 	int nsuspects;			/* count of suspects */
103 	int nonfault;			/* zero if all suspects T_FAULT */
104 	int posted_suspects;		/* true if we've posted a diagnosis */
105 	int uniqobs;			/* number of unique events observed */
106 	int peek;			/* just peeking, don't track suspects */
107 	int overflow;			/* true if overflow FME */
108 	enum fme_state {
109 		FME_NOTHING = 5000,	/* not evaluated yet */
110 		FME_WAIT,		/* need to wait for more info */
111 		FME_CREDIBLE,		/* suspect list is credible */
112 		FME_DISPROVED,		/* no valid suspects found */
113 		FME_DEFERRED		/* don't know yet (k-count not met) */
114 	} state;
115 
116 	unsigned long long pull;	/* time passed since created */
117 	unsigned long long wull;	/* wait until this time for re-eval */
118 	struct event *observations;	/* observation list */
119 	struct lut *globals;		/* values of global variables */
120 	/* fmd interfacing */
121 	fmd_hdl_t *hdl;			/* handle for talking with fmd */
122 	fmd_case_t *fmcase;		/* what fmd 'case' we associate with */
123 	/* stats */
124 	struct stats *Rcount;
125 	struct stats *Hcallcount;
126 	struct stats *Rcallcount;
127 	struct stats *Ccallcount;
128 	struct stats *Ecallcount;
129 	struct stats *Tcallcount;
130 	struct stats *Marrowcount;
131 	struct stats *diags;
132 } *FMElist, *EFMElist, *ClosedFMEs;
133 
134 static struct case_list {
135 	fmd_case_t *fmcase;
136 	struct case_list *next;
137 } *Undiagablecaselist;
138 
139 static void fme_eval(struct fme *fmep, fmd_event_t *ffep);
140 static enum fme_state hypothesise(struct fme *fmep, struct event *ep,
141 	unsigned long long at_latest_by, unsigned long long *pdelay);
142 static struct node *eventprop_lookup(struct event *ep, const char *propname);
143 static struct node *pathstring2epnamenp(char *path);
144 static void publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep,
145 	fmd_case_t *fmcase);
146 static void restore_suspects(struct fme *fmep);
147 static void save_suspects(struct fme *fmep);
148 static void destroy_fme(struct fme *f);
149 static void fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep,
150     const char *eventstring, const struct ipath *ipp, nvlist_t *nvl);
151 static void istat_counter_reset_cb(struct istat_entry *entp,
152     struct stats *statp, const struct ipath *ipp);
153 static void istat_counter_topo_chg_cb(struct istat_entry *entp,
154     struct stats *statp, void *unused);
155 static void serd_reset_cb(struct serd_entry *entp, void *unused,
156     const struct ipath *ipp);
157 static void serd_topo_chg_cb(struct serd_entry *entp, void *unused,
158     void *unused2);
159 static void destroy_fme_bufs(struct fme *fp);
160 
161 static struct fme *
162 alloc_fme(void)
163 {
164 	struct fme *fmep;
165 
166 	fmep = MALLOC(sizeof (*fmep));
167 	bzero(fmep, sizeof (*fmep));
168 	return (fmep);
169 }
170 
171 /*
172  * fme_ready -- called when all initialization of the FME (except for
173  *	stats) has completed successfully.  Adds the fme to global lists
174  *	and establishes its stats.
175  */
176 static struct fme *
177 fme_ready(struct fme *fmep)
178 {
179 	char nbuf[100];
180 
181 	Nfmep = NULL;	/* don't need to free this on module abort now */
182 
183 	if (EFMElist) {
184 		EFMElist->next = fmep;
185 		EFMElist = fmep;
186 	} else
187 		FMElist = EFMElist = fmep;
188 
189 	(void) sprintf(nbuf, "fme%d.Rcount", fmep->id);
190 	fmep->Rcount = stats_new_counter(nbuf, "ereports received", 0);
191 	(void) sprintf(nbuf, "fme%d.Hcall", fmep->id);
192 	fmep->Hcallcount = stats_new_counter(nbuf, "calls to hypothesise()", 1);
193 	(void) sprintf(nbuf, "fme%d.Rcall", fmep->id);
194 	fmep->Rcallcount = stats_new_counter(nbuf,
195 	    "calls to requirements_test()", 1);
196 	(void) sprintf(nbuf, "fme%d.Ccall", fmep->id);
197 	fmep->Ccallcount = stats_new_counter(nbuf, "calls to causes_test()", 1);
198 	(void) sprintf(nbuf, "fme%d.Ecall", fmep->id);
199 	fmep->Ecallcount =
200 	    stats_new_counter(nbuf, "calls to effects_test()", 1);
201 	(void) sprintf(nbuf, "fme%d.Tcall", fmep->id);
202 	fmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1);
203 	(void) sprintf(nbuf, "fme%d.Marrow", fmep->id);
204 	fmep->Marrowcount = stats_new_counter(nbuf,
205 	    "arrows marked by mark_arrows()", 1);
206 	(void) sprintf(nbuf, "fme%d.diags", fmep->id);
207 	fmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0);
208 
209 	out(O_ALTFP|O_VERB2, "newfme: config snapshot contains...");
210 	config_print(O_ALTFP|O_VERB2, fmep->config);
211 
212 	return (fmep);
213 }
214 
215 extern void ipath_dummy_lut(struct arrow *);
216 extern struct lut *itree_create_dummy(const char *, const struct ipath *);
217 
218 /* ARGSUSED */
219 static void
220 set_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
221 {
222 	struct bubble *bp;
223 	struct arrowlist *ap;
224 
225 	for (bp = itree_next_bubble(ep, NULL); bp;
226 	    bp = itree_next_bubble(ep, bp)) {
227 		if (bp->t != B_FROM)
228 			continue;
229 		for (ap = itree_next_arrow(bp, NULL); ap;
230 		    ap = itree_next_arrow(bp, ap)) {
231 			ap->arrowp->pnode->u.arrow.needed = 1;
232 			ipath_dummy_lut(ap->arrowp);
233 		}
234 	}
235 }
236 
237 /* ARGSUSED */
238 static void
239 unset_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
240 {
241 	struct bubble *bp;
242 	struct arrowlist *ap;
243 
244 	for (bp = itree_next_bubble(ep, NULL); bp;
245 	    bp = itree_next_bubble(ep, bp)) {
246 		if (bp->t != B_FROM)
247 			continue;
248 		for (ap = itree_next_arrow(bp, NULL); ap;
249 		    ap = itree_next_arrow(bp, ap))
250 			ap->arrowp->pnode->u.arrow.needed = 0;
251 	}
252 }
253 
254 static void globals_destructor(void *left, void *right, void *arg);
255 static void clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep);
256 
257 static void
258 prune_propagations(const char *e0class, const struct ipath *e0ipp)
259 {
260 	char nbuf[100];
261 	unsigned long long my_delay = TIMEVAL_EVENTUALLY;
262 	extern struct lut *Usednames;
263 
264 	Nfmep = alloc_fme();
265 	Nfmep->id = Nextid;
266 	Nfmep->state = FME_NOTHING;
267 	Nfmep->eventtree = itree_create_dummy(e0class, e0ipp);
268 	if ((Nfmep->e0 =
269 	    itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) {
270 		out(O_ALTFP, "prune_propagations: e0 not in instance tree");
271 		itree_free(Nfmep->eventtree);
272 		FREE(Nfmep);
273 		Nfmep = NULL;
274 		return;
275 	}
276 	Nfmep->ecurrent = Nfmep->observations = Nfmep->e0;
277 	Nfmep->e0->count++;
278 
279 	(void) sprintf(nbuf, "fme%d.Rcount", Nfmep->id);
280 	Nfmep->Rcount = stats_new_counter(nbuf, "ereports received", 0);
281 	(void) sprintf(nbuf, "fme%d.Hcall", Nfmep->id);
282 	Nfmep->Hcallcount =
283 	    stats_new_counter(nbuf, "calls to hypothesise()", 1);
284 	(void) sprintf(nbuf, "fme%d.Rcall", Nfmep->id);
285 	Nfmep->Rcallcount = stats_new_counter(nbuf,
286 	    "calls to requirements_test()", 1);
287 	(void) sprintf(nbuf, "fme%d.Ccall", Nfmep->id);
288 	Nfmep->Ccallcount =
289 	    stats_new_counter(nbuf, "calls to causes_test()", 1);
290 	(void) sprintf(nbuf, "fme%d.Ecall", Nfmep->id);
291 	Nfmep->Ecallcount =
292 	    stats_new_counter(nbuf, "calls to effects_test()", 1);
293 	(void) sprintf(nbuf, "fme%d.Tcall", Nfmep->id);
294 	Nfmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1);
295 	(void) sprintf(nbuf, "fme%d.Marrow", Nfmep->id);
296 	Nfmep->Marrowcount = stats_new_counter(nbuf,
297 	    "arrows marked by mark_arrows()", 1);
298 	(void) sprintf(nbuf, "fme%d.diags", Nfmep->id);
299 	Nfmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0);
300 
301 	Nfmep->peek = 1;
302 	lut_walk(Nfmep->eventtree, (lut_cb)unset_needed_arrows, (void *)Nfmep);
303 	lut_free(Usednames, NULL, NULL);
304 	Usednames = NULL;
305 	lut_walk(Nfmep->eventtree, (lut_cb)clear_arrows, (void *)Nfmep);
306 	(void) hypothesise(Nfmep, Nfmep->e0, Nfmep->ull, &my_delay);
307 	itree_prune(Nfmep->eventtree);
308 	lut_walk(Nfmep->eventtree, (lut_cb)set_needed_arrows, (void *)Nfmep);
309 
310 	stats_delete(Nfmep->Rcount);
311 	stats_delete(Nfmep->Hcallcount);
312 	stats_delete(Nfmep->Rcallcount);
313 	stats_delete(Nfmep->Ccallcount);
314 	stats_delete(Nfmep->Ecallcount);
315 	stats_delete(Nfmep->Tcallcount);
316 	stats_delete(Nfmep->Marrowcount);
317 	stats_delete(Nfmep->diags);
318 	itree_free(Nfmep->eventtree);
319 	lut_free(Nfmep->globals, globals_destructor, NULL);
320 	FREE(Nfmep);
321 }
322 
323 static struct fme *
324 newfme(const char *e0class, const struct ipath *e0ipp, fmd_hdl_t *hdl,
325 	fmd_case_t *fmcase)
326 {
327 	struct cfgdata *cfgdata;
328 	int init_size;
329 	extern int alloc_total();
330 
331 	init_size = alloc_total();
332 	out(O_ALTFP|O_STAMP, "start config_snapshot using %d bytes", init_size);
333 	if ((cfgdata = config_snapshot()) == NULL) {
334 		out(O_ALTFP, "newfme: NULL configuration");
335 		Undiag_reason = UD_NOCONF;
336 		return (NULL);
337 	}
338 	platform_save_config(hdl, fmcase);
339 	out(O_ALTFP|O_STAMP, "config_snapshot added %d bytes",
340 	    alloc_total() - init_size);
341 
342 	Nfmep = alloc_fme();
343 
344 	Nfmep->id = Nextid++;
345 	Nfmep->config = cfgdata->cooked;
346 	config_free(cfgdata);
347 	Nfmep->posted_suspects = 0;
348 	Nfmep->uniqobs = 0;
349 	Nfmep->state = FME_NOTHING;
350 	Nfmep->pull = 0ULL;
351 	Nfmep->overflow = 0;
352 
353 	Nfmep->fmcase = fmcase;
354 	Nfmep->hdl = hdl;
355 
356 	if ((Nfmep->eventtree = itree_create(Nfmep->config)) == NULL) {
357 		out(O_ALTFP, "newfme: NULL instance tree");
358 		Undiag_reason = UD_INSTFAIL;
359 		structconfig_free(Nfmep->config);
360 		destroy_fme_bufs(Nfmep);
361 		FREE(Nfmep);
362 		Nfmep = NULL;
363 		return (NULL);
364 	}
365 
366 	itree_ptree(O_ALTFP|O_VERB2, Nfmep->eventtree);
367 
368 	if ((Nfmep->e0 =
369 	    itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) {
370 		out(O_ALTFP, "newfme: e0 not in instance tree");
371 		Undiag_reason = UD_BADEVENTI;
372 		itree_free(Nfmep->eventtree);
373 		structconfig_free(Nfmep->config);
374 		destroy_fme_bufs(Nfmep);
375 		FREE(Nfmep);
376 		Nfmep = NULL;
377 		return (NULL);
378 	}
379 
380 	return (fme_ready(Nfmep));
381 }
382 
383 void
384 fme_fini(void)
385 {
386 	struct fme *sfp, *fp;
387 	struct case_list *ucasep, *nextcasep;
388 
389 	ucasep = Undiagablecaselist;
390 	while (ucasep != NULL) {
391 		nextcasep = ucasep->next;
392 		FREE(ucasep);
393 		ucasep = nextcasep;
394 	}
395 	Undiagablecaselist = NULL;
396 
397 	/* clean up closed fmes */
398 	fp = ClosedFMEs;
399 	while (fp != NULL) {
400 		sfp = fp->next;
401 		destroy_fme(fp);
402 		fp = sfp;
403 	}
404 	ClosedFMEs = NULL;
405 
406 	fp = FMElist;
407 	while (fp != NULL) {
408 		sfp = fp->next;
409 		destroy_fme(fp);
410 		fp = sfp;
411 	}
412 	FMElist = EFMElist = NULL;
413 
414 	/* if we were in the middle of creating an fme, free it now */
415 	if (Nfmep) {
416 		destroy_fme(Nfmep);
417 		Nfmep = NULL;
418 	}
419 }
420 
421 /*
422  * Allocated space for a buffer name.  20 bytes allows for
423  * a ridiculous 9,999,999 unique observations.
424  */
425 #define	OBBUFNMSZ 20
426 
427 /*
428  *  serialize_observation
429  *
430  *  Create a recoverable version of the current observation
431  *  (f->ecurrent).  We keep a serialized version of each unique
432  *  observation in order that we may resume correctly the fme in the
433  *  correct state if eft or fmd crashes and we're restarted.
434  */
435 static void
436 serialize_observation(struct fme *fp, const char *cls, const struct ipath *ipp)
437 {
438 	size_t pkdlen;
439 	char tmpbuf[OBBUFNMSZ];
440 	char *pkd = NULL;
441 	char *estr;
442 
443 	(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", fp->uniqobs);
444 	estr = ipath2str(cls, ipp);
445 	fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, strlen(estr) + 1);
446 	fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)estr,
447 	    strlen(estr) + 1);
448 	FREE(estr);
449 
450 	if (fp->ecurrent != NULL && fp->ecurrent->nvp != NULL) {
451 		(void) snprintf(tmpbuf,
452 		    OBBUFNMSZ, "observed%d.nvp", fp->uniqobs);
453 		if (nvlist_xpack(fp->ecurrent->nvp,
454 		    &pkd, &pkdlen, NV_ENCODE_XDR, &Eft_nv_hdl) != 0)
455 			out(O_DIE|O_SYS, "pack of observed nvl failed");
456 		fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, pkdlen);
457 		fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)pkd, pkdlen);
458 		FREE(pkd);
459 	}
460 
461 	fp->uniqobs++;
462 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs,
463 	    sizeof (fp->uniqobs));
464 }
465 
466 /*
467  *  init_fme_bufs -- We keep several bits of state about an fme for
468  *	use if eft or fmd crashes and we're restarted.
469  */
470 static void
471 init_fme_bufs(struct fme *fp)
472 {
473 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_PULL, sizeof (fp->pull));
474 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_PULL, (void *)&fp->pull,
475 	    sizeof (fp->pull));
476 
477 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_ID, sizeof (fp->id));
478 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_ID, (void *)&fp->id,
479 	    sizeof (fp->id));
480 
481 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_NOBS, sizeof (fp->uniqobs));
482 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs,
483 	    sizeof (fp->uniqobs));
484 
485 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_POSTD,
486 	    sizeof (fp->posted_suspects));
487 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_POSTD,
488 	    (void *)&fp->posted_suspects, sizeof (fp->posted_suspects));
489 }
490 
491 static void
492 destroy_fme_bufs(struct fme *fp)
493 {
494 	char tmpbuf[OBBUFNMSZ];
495 	int o;
496 
497 	platform_restore_config(fp->hdl, fp->fmcase);
498 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFGLEN);
499 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFG);
500 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_PULL);
501 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_ID);
502 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_POSTD);
503 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_NOBS);
504 
505 	for (o = 0; o < fp->uniqobs; o++) {
506 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", o);
507 		fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf);
508 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", o);
509 		fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf);
510 	}
511 }
512 
513 /*
514  * reconstitute_observations -- convert a case's serialized observations
515  *	back into struct events.  Returns zero if all observations are
516  *	successfully reconstituted.
517  */
518 static int
519 reconstitute_observations(struct fme *fmep)
520 {
521 	struct event *ep;
522 	struct node *epnamenp = NULL;
523 	size_t pkdlen;
524 	char *pkd = NULL;
525 	char *tmpbuf = alloca(OBBUFNMSZ);
526 	char *sepptr;
527 	char *estr;
528 	int ocnt;
529 	int elen;
530 
531 	for (ocnt = 0; ocnt < fmep->uniqobs; ocnt++) {
532 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", ocnt);
533 		elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
534 		if (elen == 0) {
535 			out(O_ALTFP,
536 			    "reconstitute_observation: no %s buffer found.",
537 			    tmpbuf);
538 			Undiag_reason = UD_MISSINGOBS;
539 			break;
540 		}
541 
542 		estr = MALLOC(elen);
543 		fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen);
544 		sepptr = strchr(estr, '@');
545 		if (sepptr == NULL) {
546 			out(O_ALTFP,
547 			    "reconstitute_observation: %s: "
548 			    "missing @ separator in %s.",
549 			    tmpbuf, estr);
550 			Undiag_reason = UD_MISSINGPATH;
551 			FREE(estr);
552 			break;
553 		}
554 
555 		*sepptr = '\0';
556 		if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) {
557 			out(O_ALTFP,
558 			    "reconstitute_observation: %s: "
559 			    "trouble converting path string \"%s\" "
560 			    "to internal representation.",
561 			    tmpbuf, sepptr + 1);
562 			Undiag_reason = UD_MISSINGPATH;
563 			FREE(estr);
564 			break;
565 		}
566 
567 		/* construct the event */
568 		ep = itree_lookup(fmep->eventtree,
569 		    stable(estr), ipath(epnamenp));
570 		if (ep == NULL) {
571 			out(O_ALTFP,
572 			    "reconstitute_observation: %s: "
573 			    "lookup of  \"%s\" in itree failed.",
574 			    tmpbuf, ipath2str(estr, ipath(epnamenp)));
575 			Undiag_reason = UD_BADOBS;
576 			tree_free(epnamenp);
577 			FREE(estr);
578 			break;
579 		}
580 		tree_free(epnamenp);
581 
582 		/*
583 		 * We may or may not have a saved nvlist for the observation
584 		 */
585 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", ocnt);
586 		pkdlen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
587 		if (pkdlen != 0) {
588 			pkd = MALLOC(pkdlen);
589 			fmd_buf_read(fmep->hdl,
590 			    fmep->fmcase, tmpbuf, pkd, pkdlen);
591 			ASSERT(ep->nvp == NULL);
592 			if (nvlist_xunpack(pkd,
593 			    pkdlen, &ep->nvp, &Eft_nv_hdl) != 0)
594 				out(O_DIE|O_SYS, "pack of observed nvl failed");
595 			FREE(pkd);
596 		}
597 
598 		if (ocnt == 0)
599 			fmep->e0 = ep;
600 
601 		FREE(estr);
602 		fmep->ecurrent = ep;
603 		ep->count++;
604 
605 		/* link it into list of observations seen */
606 		ep->observations = fmep->observations;
607 		fmep->observations = ep;
608 	}
609 
610 	if (ocnt == fmep->uniqobs) {
611 		(void) fme_ready(fmep);
612 		return (0);
613 	}
614 
615 	return (1);
616 }
617 
618 /*
619  * restart_fme -- called during eft initialization.  Reconstitutes
620  *	an in-progress fme.
621  */
622 void
623 fme_restart(fmd_hdl_t *hdl, fmd_case_t *inprogress)
624 {
625 	nvlist_t *defect;
626 	struct case_list *bad;
627 	struct fme *fmep;
628 	struct cfgdata *cfgdata;
629 	size_t rawsz;
630 	struct event *ep;
631 	char *tmpbuf = alloca(OBBUFNMSZ);
632 	char *sepptr;
633 	char *estr;
634 	int elen;
635 	struct node *epnamenp = NULL;
636 	int init_size;
637 	extern int alloc_total();
638 
639 	/*
640 	 * ignore solved or closed cases
641 	 */
642 	if (fmd_case_solved(hdl, inprogress) ||
643 	    fmd_case_closed(hdl, inprogress))
644 		return;
645 
646 	fmep = alloc_fme();
647 	fmep->fmcase = inprogress;
648 	fmep->hdl = hdl;
649 
650 	if (fmd_buf_size(hdl, inprogress, WOBUF_POSTD) == 0) {
651 		out(O_ALTFP, "restart_fme: no saved posted status");
652 		Undiag_reason = UD_MISSINGINFO;
653 		goto badcase;
654 	} else {
655 		fmd_buf_read(hdl, inprogress, WOBUF_POSTD,
656 		    (void *)&fmep->posted_suspects,
657 		    sizeof (fmep->posted_suspects));
658 	}
659 
660 	if (fmd_buf_size(hdl, inprogress, WOBUF_ID) == 0) {
661 		out(O_ALTFP, "restart_fme: no saved id");
662 		Undiag_reason = UD_MISSINGINFO;
663 		goto badcase;
664 	} else {
665 		fmd_buf_read(hdl, inprogress, WOBUF_ID, (void *)&fmep->id,
666 		    sizeof (fmep->id));
667 	}
668 	if (Nextid <= fmep->id)
669 		Nextid = fmep->id + 1;
670 
671 	out(O_ALTFP, "Replay FME %d", fmep->id);
672 
673 	if (fmd_buf_size(hdl, inprogress, WOBUF_CFGLEN) != sizeof (size_t)) {
674 		out(O_ALTFP, "restart_fme: No config data");
675 		Undiag_reason = UD_MISSINGINFO;
676 		goto badcase;
677 	}
678 	fmd_buf_read(hdl, inprogress, WOBUF_CFGLEN, (void *)&rawsz,
679 	    sizeof (size_t));
680 
681 	if ((fmep->e0r = fmd_case_getprincipal(hdl, inprogress)) == NULL) {
682 		out(O_ALTFP, "restart_fme: No event zero");
683 		Undiag_reason = UD_MISSINGZERO;
684 		goto badcase;
685 	}
686 
687 	if (fmd_buf_size(hdl, inprogress, WOBUF_PULL) == 0) {
688 		out(O_ALTFP, "restart_fme: no saved wait time");
689 		Undiag_reason = UD_MISSINGINFO;
690 		goto badcase;
691 	} else {
692 		fmd_buf_read(hdl, inprogress, WOBUF_PULL, (void *)&fmep->pull,
693 		    sizeof (fmep->pull));
694 	}
695 
696 	if (fmd_buf_size(hdl, inprogress, WOBUF_NOBS) == 0) {
697 		out(O_ALTFP, "restart_fme: no count of observations");
698 		Undiag_reason = UD_MISSINGINFO;
699 		goto badcase;
700 	} else {
701 		fmd_buf_read(hdl, inprogress, WOBUF_NOBS,
702 		    (void *)&fmep->uniqobs, sizeof (fmep->uniqobs));
703 	}
704 
705 	(void) snprintf(tmpbuf, OBBUFNMSZ, "observed0");
706 	elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
707 	if (elen == 0) {
708 		out(O_ALTFP, "reconstitute_observation: no %s buffer found.",
709 		    tmpbuf);
710 		Undiag_reason = UD_MISSINGOBS;
711 		goto badcase;
712 	}
713 	estr = MALLOC(elen);
714 	fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen);
715 	sepptr = strchr(estr, '@');
716 	if (sepptr == NULL) {
717 		out(O_ALTFP, "reconstitute_observation: %s: "
718 		    "missing @ separator in %s.",
719 		    tmpbuf, estr);
720 		Undiag_reason = UD_MISSINGPATH;
721 		FREE(estr);
722 		goto badcase;
723 	}
724 	*sepptr = '\0';
725 	if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) {
726 		out(O_ALTFP, "reconstitute_observation: %s: "
727 		    "trouble converting path string \"%s\" "
728 		    "to internal representation.", tmpbuf, sepptr + 1);
729 		Undiag_reason = UD_MISSINGPATH;
730 		FREE(estr);
731 		goto badcase;
732 	}
733 	prune_propagations(stable(estr), ipath(epnamenp));
734 	tree_free(epnamenp);
735 	FREE(estr);
736 
737 	init_size = alloc_total();
738 	out(O_ALTFP|O_STAMP, "start config_restore using %d bytes", init_size);
739 	cfgdata = MALLOC(sizeof (struct cfgdata));
740 	cfgdata->cooked = NULL;
741 	cfgdata->devcache = NULL;
742 	cfgdata->devidcache = NULL;
743 	cfgdata->cpucache = NULL;
744 	cfgdata->raw_refcnt = 1;
745 
746 	if (rawsz > 0) {
747 		if (fmd_buf_size(hdl, inprogress, WOBUF_CFG) != rawsz) {
748 			out(O_ALTFP, "restart_fme: Config data size mismatch");
749 			Undiag_reason = UD_CFGMISMATCH;
750 			goto badcase;
751 		}
752 		cfgdata->begin = MALLOC(rawsz);
753 		cfgdata->end = cfgdata->nextfree = cfgdata->begin + rawsz;
754 		fmd_buf_read(hdl,
755 		    inprogress, WOBUF_CFG, cfgdata->begin, rawsz);
756 	} else {
757 		cfgdata->begin = cfgdata->end = cfgdata->nextfree = NULL;
758 	}
759 
760 	config_cook(cfgdata);
761 	fmep->config = cfgdata->cooked;
762 	config_free(cfgdata);
763 	out(O_ALTFP|O_STAMP, "config_restore added %d bytes",
764 	    alloc_total() - init_size);
765 
766 	if ((fmep->eventtree = itree_create(fmep->config)) == NULL) {
767 		/* case not properly saved or irretrievable */
768 		out(O_ALTFP, "restart_fme: NULL instance tree");
769 		Undiag_reason = UD_INSTFAIL;
770 		goto badcase;
771 	}
772 
773 	itree_ptree(O_ALTFP|O_VERB2, fmep->eventtree);
774 
775 	if (reconstitute_observations(fmep) != 0)
776 		goto badcase;
777 
778 	out(O_ALTFP|O_NONL, "FME %d replay observations: ", fmep->id);
779 	for (ep = fmep->observations; ep; ep = ep->observations) {
780 		out(O_ALTFP|O_NONL, " ");
781 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
782 	}
783 	out(O_ALTFP, NULL);
784 
785 	Open_fme_count++;
786 
787 	/* give the diagnosis algorithm a shot at the new FME state */
788 	fme_eval(fmep, fmep->e0r);
789 	return;
790 
791 badcase:
792 	if (fmep->eventtree != NULL)
793 		itree_free(fmep->eventtree);
794 	if (fmep->config)
795 		structconfig_free(fmep->config);
796 	destroy_fme_bufs(fmep);
797 	FREE(fmep);
798 
799 	/*
800 	 * Since we're unable to restart the case, add it to the undiagable
801 	 * list and solve and close it as appropriate.
802 	 */
803 	bad = MALLOC(sizeof (struct case_list));
804 	bad->next = NULL;
805 
806 	if (Undiagablecaselist != NULL)
807 		bad->next = Undiagablecaselist;
808 	Undiagablecaselist = bad;
809 	bad->fmcase = inprogress;
810 
811 	out(O_ALTFP|O_NONL, "[case %s (unable to restart), ",
812 	    fmd_case_uuid(hdl, bad->fmcase));
813 
814 	if (fmd_case_solved(hdl, bad->fmcase)) {
815 		out(O_ALTFP|O_NONL, "already solved, ");
816 	} else {
817 		out(O_ALTFP|O_NONL, "solving, ");
818 		defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100,
819 		    NULL, NULL, NULL);
820 		if (Undiag_reason != NULL)
821 			(void) nvlist_add_string(defect,
822 			    UNDIAG_REASON, Undiag_reason);
823 		fmd_case_add_suspect(hdl, bad->fmcase, defect);
824 		fmd_case_solve(hdl, bad->fmcase);
825 	}
826 
827 	if (fmd_case_closed(hdl, bad->fmcase)) {
828 		out(O_ALTFP, "already closed ]");
829 	} else {
830 		out(O_ALTFP, "closing ]");
831 		fmd_case_close(hdl, bad->fmcase);
832 	}
833 }
834 
835 /*ARGSUSED*/
836 static void
837 globals_destructor(void *left, void *right, void *arg)
838 {
839 	struct evalue *evp = (struct evalue *)right;
840 	if (evp->t == NODEPTR)
841 		tree_free((struct node *)(uintptr_t)evp->v);
842 	evp->v = (uintptr_t)NULL;
843 	FREE(evp);
844 }
845 
846 void
847 destroy_fme(struct fme *f)
848 {
849 	stats_delete(f->Rcount);
850 	stats_delete(f->Hcallcount);
851 	stats_delete(f->Rcallcount);
852 	stats_delete(f->Ccallcount);
853 	stats_delete(f->Ecallcount);
854 	stats_delete(f->Tcallcount);
855 	stats_delete(f->Marrowcount);
856 	stats_delete(f->diags);
857 
858 	if (f->eventtree != NULL)
859 		itree_free(f->eventtree);
860 	if (f->config)
861 		structconfig_free(f->config);
862 	lut_free(f->globals, globals_destructor, NULL);
863 	FREE(f);
864 }
865 
866 static const char *
867 fme_state2str(enum fme_state s)
868 {
869 	switch (s) {
870 	case FME_NOTHING:	return ("NOTHING");
871 	case FME_WAIT:		return ("WAIT");
872 	case FME_CREDIBLE:	return ("CREDIBLE");
873 	case FME_DISPROVED:	return ("DISPROVED");
874 	case FME_DEFERRED:	return ("DEFERRED");
875 	default:		return ("UNKNOWN");
876 	}
877 }
878 
879 static int
880 is_problem(enum nametype t)
881 {
882 	return (t == N_FAULT || t == N_DEFECT || t == N_UPSET);
883 }
884 
885 static int
886 is_fault(enum nametype t)
887 {
888 	return (t == N_FAULT);
889 }
890 
891 static int
892 is_defect(enum nametype t)
893 {
894 	return (t == N_DEFECT);
895 }
896 
897 static int
898 is_upset(enum nametype t)
899 {
900 	return (t == N_UPSET);
901 }
902 
903 static void
904 fme_print(int flags, struct fme *fmep)
905 {
906 	struct event *ep;
907 
908 	out(flags, "Fault Management Exercise %d", fmep->id);
909 	out(flags, "\t       State: %s", fme_state2str(fmep->state));
910 	out(flags|O_NONL, "\t  Start time: ");
911 	ptree_timeval(flags|O_NONL, &fmep->ull);
912 	out(flags, NULL);
913 	if (fmep->wull) {
914 		out(flags|O_NONL, "\t   Wait time: ");
915 		ptree_timeval(flags|O_NONL, &fmep->wull);
916 		out(flags, NULL);
917 	}
918 	out(flags|O_NONL, "\t          E0: ");
919 	if (fmep->e0)
920 		itree_pevent_brief(flags|O_NONL, fmep->e0);
921 	else
922 		out(flags|O_NONL, "NULL");
923 	out(flags, NULL);
924 	out(flags|O_NONL, "\tObservations:");
925 	for (ep = fmep->observations; ep; ep = ep->observations) {
926 		out(flags|O_NONL, " ");
927 		itree_pevent_brief(flags|O_NONL, ep);
928 	}
929 	out(flags, NULL);
930 	out(flags|O_NONL, "\tSuspect list:");
931 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
932 		out(flags|O_NONL, " ");
933 		itree_pevent_brief(flags|O_NONL, ep);
934 	}
935 	out(flags, NULL);
936 	if (fmep->eventtree != NULL) {
937 		out(flags|O_VERB2, "\t        Tree:");
938 		itree_ptree(flags|O_VERB2, fmep->eventtree);
939 	}
940 }
941 
942 static struct node *
943 pathstring2epnamenp(char *path)
944 {
945 	char *sep = "/";
946 	struct node *ret;
947 	char *ptr;
948 
949 	if ((ptr = strtok(path, sep)) == NULL)
950 		out(O_DIE, "pathstring2epnamenp: invalid empty class");
951 
952 	ret = tree_iname(stable(ptr), NULL, 0);
953 
954 	while ((ptr = strtok(NULL, sep)) != NULL)
955 		ret = tree_name_append(ret,
956 		    tree_iname(stable(ptr), NULL, 0));
957 
958 	return (ret);
959 }
960 
961 /*
962  * for a given upset sp, increment the corresponding SERD engine.  if the
963  * SERD engine trips, return the ename and ipp of the resulting ereport.
964  * returns true if engine tripped and *enamep and *ippp were filled in.
965  */
966 static int
967 serd_eval(struct fme *fmep, fmd_hdl_t *hdl, fmd_event_t *ffep,
968     fmd_case_t *fmcase, struct event *sp, const char **enamep,
969     const struct ipath **ippp)
970 {
971 	struct node *serdinst;
972 	char *serdname;
973 	char *serdresource;
974 	struct node *nid;
975 	struct serd_entry *newentp;
976 	int i, serdn = -1, serdincrement = 1, len = 0;
977 	char *serdsuffix = NULL, *serdt = NULL, *ptr;
978 	struct evalue *ep;
979 
980 	ASSERT(sp->t == N_UPSET);
981 	ASSERT(ffep != NULL);
982 
983 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
984 	    (void *)"n", (lut_cmp)strcmp)) != NULL) {
985 		ASSERT(ep->t == UINT64);
986 		serdn = (int)ep->v;
987 	}
988 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
989 	    (void *)"t", (lut_cmp)strcmp)) != NULL) {
990 		ASSERT(ep->t == STRING);
991 		serdt = (char *)(uintptr_t)ep->v;
992 	}
993 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
994 	    (void *)"suffix", (lut_cmp)strcmp)) != NULL) {
995 		ASSERT(ep->t == STRING);
996 		serdsuffix = (char *)(uintptr_t)ep->v;
997 	}
998 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
999 	    (void *)"increment", (lut_cmp)strcmp)) != NULL) {
1000 		ASSERT(ep->t == UINT64);
1001 		serdincrement = (int)ep->v;
1002 	}
1003 
1004 	/*
1005 	 * obtain instanced SERD engine from the upset sp.  from this
1006 	 * derive serdname, the string used to identify the SERD engine.
1007 	 */
1008 	serdinst = eventprop_lookup(sp, L_engine);
1009 
1010 	if (serdinst == NULL)
1011 		return (-1);
1012 
1013 	serdname = ipath2str(serdinst->u.stmt.np->u.event.ename->u.name.s,
1014 	    NULL);
1015 	serdresource = ipath2str(NULL,
1016 	    ipath(serdinst->u.stmt.np->u.event.epname));
1017 
1018 	len = strlen(serdname) + strlen(serdresource) + 2;
1019 	if (serdsuffix != NULL)
1020 		len += strlen(serdsuffix);
1021 
1022 	ptr = MALLOC(len);
1023 	if (serdsuffix != NULL) {
1024 		(void) snprintf(ptr, len, "%s%s@%s", serdname, serdsuffix,
1025 		    serdresource);
1026 	} else {
1027 		(void) snprintf(ptr, len, "%s@%s", serdname, serdresource);
1028 	}
1029 	FREE(serdname);
1030 	FREE(serdresource);
1031 	serdname = ptr;
1032 
1033 	/* handle serd engine "id" property, if there is one */
1034 	if ((nid =
1035 	    lut_lookup(serdinst->u.stmt.lutp, (void *)L_id, NULL)) != NULL) {
1036 		struct evalue *gval;
1037 		char suffixbuf[200];
1038 		char *suffix;
1039 		char *nserdname;
1040 		size_t nname;
1041 
1042 		out(O_ALTFP|O_NONL, "serd \"%s\" id: ", serdname);
1043 		ptree_name_iter(O_ALTFP|O_NONL, nid);
1044 
1045 		ASSERTinfo(nid->t == T_GLOBID, ptree_nodetype2str(nid->t));
1046 
1047 		if ((gval = lut_lookup(fmep->globals,
1048 		    (void *)nid->u.globid.s, NULL)) == NULL) {
1049 			out(O_ALTFP, " undefined");
1050 		} else if (gval->t == UINT64) {
1051 			out(O_ALTFP, " %llu", gval->v);
1052 			(void) sprintf(suffixbuf, "%llu", gval->v);
1053 			suffix = suffixbuf;
1054 		} else {
1055 			out(O_ALTFP, " \"%s\"", (char *)(uintptr_t)gval->v);
1056 			suffix = (char *)(uintptr_t)gval->v;
1057 		}
1058 
1059 		nname = strlen(serdname) + strlen(suffix) + 2;
1060 		nserdname = MALLOC(nname);
1061 		(void) snprintf(nserdname, nname, "%s:%s", serdname, suffix);
1062 		FREE(serdname);
1063 		serdname = nserdname;
1064 	}
1065 
1066 	/*
1067 	 * if the engine is empty, and we have an override for n/t then
1068 	 * destroy and recreate it.
1069 	 */
1070 	if ((serdn != -1 || serdt != NULL) && fmd_serd_exists(hdl, serdname) &&
1071 	    fmd_serd_empty(hdl, serdname))
1072 		fmd_serd_destroy(hdl, serdname);
1073 
1074 	if (!fmd_serd_exists(hdl, serdname)) {
1075 		struct node *nN, *nT;
1076 		const char *s;
1077 		struct node *nodep;
1078 		struct config *cp;
1079 		char *path;
1080 		uint_t nval;
1081 		hrtime_t tval;
1082 		const char *name;
1083 		char *serd_name;
1084 		int i;
1085 		char *ptr;
1086 		int got_n_override = 0, got_t_override = 0;
1087 
1088 		/* no SERD engine yet, so create it */
1089 		nodep = serdinst->u.stmt.np->u.event.epname;
1090 		name = serdinst->u.stmt.np->u.event.ename->u.name.s;
1091 		path = ipath2str(NULL, ipath(nodep));
1092 		cp = config_lookup(fmep->config, path, 0);
1093 		FREE((void *)path);
1094 
1095 		/*
1096 		 * We allow serd paramaters to be overridden, either from
1097 		 * eft.conf file values (if Serd_Override is set) or from
1098 		 * driver properties (for "serd.io.device" engines).
1099 		 */
1100 		if (Serd_Override != NULL) {
1101 			char *save_ptr, *ptr1, *ptr2, *ptr3;
1102 			ptr3 = save_ptr = STRDUP(Serd_Override);
1103 			while (*ptr3 != '\0') {
1104 				ptr1 = strchr(ptr3, ',');
1105 				*ptr1 = '\0';
1106 				if (strcmp(ptr3, name) == 0) {
1107 					ptr2 =  strchr(ptr1 + 1, ',');
1108 					*ptr2 = '\0';
1109 					nval = atoi(ptr1 + 1);
1110 					out(O_ALTFP, "serd override %s_n %d",
1111 					    name, nval);
1112 					ptr3 =  strchr(ptr2 + 1, ' ');
1113 					if (ptr3)
1114 						*ptr3 = '\0';
1115 					ptr = STRDUP(ptr2 + 1);
1116 					out(O_ALTFP, "serd override %s_t %s",
1117 					    name, ptr);
1118 					got_n_override = 1;
1119 					got_t_override = 1;
1120 					break;
1121 				} else {
1122 					ptr2 =  strchr(ptr1 + 1, ',');
1123 					ptr3 =  strchr(ptr2 + 1, ' ');
1124 					if (ptr3 == NULL)
1125 						break;
1126 				}
1127 				ptr3++;
1128 			}
1129 			FREE(save_ptr);
1130 		}
1131 
1132 		if (cp && got_n_override == 0) {
1133 			/*
1134 			 * convert serd engine name into property name
1135 			 */
1136 			serd_name = MALLOC(strlen(name) + 3);
1137 			for (i = 0; i < strlen(name); i++) {
1138 				if (name[i] == '.')
1139 					serd_name[i] = '_';
1140 				else
1141 					serd_name[i] = name[i];
1142 			}
1143 			serd_name[i++] = '_';
1144 			serd_name[i++] = 'n';
1145 			serd_name[i] = '\0';
1146 			if (s = config_getprop(cp, serd_name)) {
1147 				nval = atoi(s);
1148 				out(O_ALTFP, "serd override %s_n %s", name, s);
1149 				got_n_override = 1;
1150 			}
1151 			serd_name[i - 1] = 't';
1152 			if (s = config_getprop(cp, serd_name)) {
1153 				ptr = STRDUP(s);
1154 				out(O_ALTFP, "serd override %s_t %s", name, s);
1155 				got_t_override = 1;
1156 			}
1157 			FREE(serd_name);
1158 		}
1159 
1160 		if (serdn != -1 && got_n_override == 0) {
1161 			nval = serdn;
1162 			out(O_ALTFP, "serd override %s_n %d", name, serdn);
1163 			got_n_override = 1;
1164 		}
1165 		if (serdt != NULL && got_t_override == 0) {
1166 			ptr = STRDUP(serdt);
1167 			out(O_ALTFP, "serd override %s_t %s", name, serdt);
1168 			got_t_override = 1;
1169 		}
1170 
1171 		if (!got_n_override) {
1172 			nN = lut_lookup(serdinst->u.stmt.lutp, (void *)L_N,
1173 			    NULL);
1174 			ASSERT(nN->t == T_NUM);
1175 			nval = (uint_t)nN->u.ull;
1176 		}
1177 		if (!got_t_override) {
1178 			nT = lut_lookup(serdinst->u.stmt.lutp, (void *)L_T,
1179 			    NULL);
1180 			ASSERT(nT->t == T_TIMEVAL);
1181 			tval = (hrtime_t)nT->u.ull;
1182 		} else {
1183 			const unsigned long long *ullp;
1184 			const char *suffix;
1185 			int len;
1186 
1187 			len = strspn(ptr, "0123456789");
1188 			suffix = stable(&ptr[len]);
1189 			ullp = (unsigned long long *)lut_lookup(Timesuffixlut,
1190 			    (void *)suffix, NULL);
1191 			ptr[len] = '\0';
1192 			tval = strtoull(ptr, NULL, 0) * (ullp ? *ullp : 1ll);
1193 			FREE(ptr);
1194 		}
1195 		fmd_serd_create(hdl, serdname, nval, tval);
1196 	}
1197 
1198 	newentp = MALLOC(sizeof (*newentp));
1199 	newentp->ename = stable(serdinst->u.stmt.np->u.event.ename->u.name.s);
1200 	newentp->ipath = ipath(serdinst->u.stmt.np->u.event.epname);
1201 	newentp->hdl = hdl;
1202 	if (lut_lookup(SerdEngines, newentp, (lut_cmp)serd_cmp) == NULL) {
1203 		SerdEngines = lut_add(SerdEngines, (void *)newentp,
1204 		    (void *)newentp, (lut_cmp)serd_cmp);
1205 		Serd_need_save = 1;
1206 		serd_save();
1207 	} else {
1208 		FREE(newentp);
1209 	}
1210 
1211 
1212 	/*
1213 	 * increment SERD engine.  if engine fires, reset serd
1214 	 * engine and return trip_strcode if required.
1215 	 */
1216 	for (i = 0; i < serdincrement; i++) {
1217 		if (fmd_serd_record(hdl, serdname, ffep)) {
1218 			fmd_case_add_serd(hdl, fmcase, serdname);
1219 			fmd_serd_reset(hdl, serdname);
1220 
1221 			if (ippp) {
1222 				struct node *tripinst =
1223 				    lut_lookup(serdinst->u.stmt.lutp,
1224 				    (void *)L_trip, NULL);
1225 				ASSERT(tripinst != NULL);
1226 				*enamep = tripinst->u.event.ename->u.name.s;
1227 				*ippp = ipath(tripinst->u.event.epname);
1228 				out(O_ALTFP|O_NONL,
1229 				    "[engine fired: %s, sending: ", serdname);
1230 				ipath_print(O_ALTFP|O_NONL, *enamep, *ippp);
1231 				out(O_ALTFP, "]");
1232 			} else {
1233 				out(O_ALTFP, "[engine fired: %s, no trip]",
1234 				    serdname);
1235 			}
1236 			FREE(serdname);
1237 			return (1);
1238 		}
1239 	}
1240 
1241 	FREE(serdname);
1242 	return (0);
1243 }
1244 
1245 /*
1246  * search a suspect list for upsets.  feed each upset to serd_eval() and
1247  * build up tripped[], an array of ereports produced by the firing of
1248  * any SERD engines.  then feed each ereport back into
1249  * fme_receive_report().
1250  *
1251  * returns ntrip, the number of these ereports produced.
1252  */
1253 static int
1254 upsets_eval(struct fme *fmep, fmd_event_t *ffep)
1255 {
1256 	/* we build an array of tripped ereports that we send ourselves */
1257 	struct {
1258 		const char *ename;
1259 		const struct ipath *ipp;
1260 	} *tripped;
1261 	struct event *sp;
1262 	int ntrip, nupset, i;
1263 
1264 	/*
1265 	 * count the number of upsets to determine the upper limit on
1266 	 * expected trip ereport strings.  remember that one upset can
1267 	 * lead to at most one ereport.
1268 	 */
1269 	nupset = 0;
1270 	for (sp = fmep->suspects; sp; sp = sp->suspects) {
1271 		if (sp->t == N_UPSET)
1272 			nupset++;
1273 	}
1274 
1275 	if (nupset == 0)
1276 		return (0);
1277 
1278 	/*
1279 	 * get to this point if we have upsets and expect some trip
1280 	 * ereports
1281 	 */
1282 	tripped = alloca(sizeof (*tripped) * nupset);
1283 	bzero((void *)tripped, sizeof (*tripped) * nupset);
1284 
1285 	ntrip = 0;
1286 	for (sp = fmep->suspects; sp; sp = sp->suspects)
1287 		if (sp->t == N_UPSET &&
1288 		    serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, sp,
1289 		    &tripped[ntrip].ename, &tripped[ntrip].ipp) == 1)
1290 			ntrip++;
1291 
1292 	for (i = 0; i < ntrip; i++) {
1293 		struct event *ep, *nep;
1294 		struct fme *nfmep;
1295 		fmd_case_t *fmcase;
1296 		const struct ipath *ipp;
1297 		const char *eventstring;
1298 		int prev_verbose;
1299 		unsigned long long my_delay = TIMEVAL_EVENTUALLY;
1300 		enum fme_state state;
1301 
1302 		/*
1303 		 * First try and evaluate a case with the trip ereport plus
1304 		 * all the other ereports that cause the trip. If that fails
1305 		 * to evaluate then try again with just this ereport on its own.
1306 		 */
1307 		out(O_ALTFP|O_NONL, "fme_receive_report_serd: ");
1308 		ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp);
1309 		out(O_ALTFP|O_STAMP, NULL);
1310 		ep = fmep->e0;
1311 		eventstring = ep->enode->u.event.ename->u.name.s;
1312 		ipp = ep->ipp;
1313 		prune_propagations(eventstring, ipp);
1314 
1315 		/*
1316 		 * create a duplicate fme and case
1317 		 */
1318 		fmcase = fmd_case_open(fmep->hdl, NULL);
1319 		out(O_ALTFP|O_NONL, "duplicate fme for event [");
1320 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1321 		out(O_ALTFP, " ]");
1322 		if ((nfmep = newfme(eventstring, ipp, fmep->hdl,
1323 		    fmcase)) == NULL) {
1324 			out(O_ALTFP|O_NONL, "[");
1325 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1326 			out(O_ALTFP, " CANNOT DIAGNOSE]");
1327 			publish_undiagnosable(fmep->hdl, ffep, fmcase);
1328 			continue;
1329 		}
1330 		Open_fme_count++;
1331 		nfmep->pull = fmep->pull;
1332 		init_fme_bufs(nfmep);
1333 		out(O_ALTFP|O_NONL, "[");
1334 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1335 		out(O_ALTFP, " created FME%d, case %s]", nfmep->id,
1336 		    fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
1337 		if (ffep) {
1338 			fmd_case_setprincipal(nfmep->hdl, nfmep->fmcase, ffep);
1339 			fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase, ffep);
1340 			nfmep->e0r = ffep;
1341 		}
1342 
1343 		/*
1344 		 * add the original ereports
1345 		 */
1346 		for (ep = fmep->observations; ep; ep = ep->observations) {
1347 			eventstring = ep->enode->u.event.ename->u.name.s;
1348 			ipp = ep->ipp;
1349 			out(O_ALTFP|O_NONL, "adding event [");
1350 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1351 			out(O_ALTFP, " ]");
1352 			nep = itree_lookup(nfmep->eventtree, eventstring, ipp);
1353 			if (nep->count++ == 0) {
1354 				nep->observations = nfmep->observations;
1355 				nfmep->observations = nep;
1356 				serialize_observation(nfmep, eventstring, ipp);
1357 				nep->nvp = evnv_dupnvl(ep->nvp);
1358 			}
1359 			if (ep->ffep && ep->ffep != ffep)
1360 				fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase,
1361 				    ep->ffep);
1362 			stats_counter_bump(nfmep->Rcount);
1363 		}
1364 
1365 		/*
1366 		 * add the serd trigger ereport
1367 		 */
1368 		if ((ep = itree_lookup(nfmep->eventtree, tripped[i].ename,
1369 		    tripped[i].ipp)) == NULL) {
1370 			/*
1371 			 * The trigger ereport is not in the instance tree. It
1372 			 * was presumably removed by prune_propagations() as
1373 			 * this combination of events is not present in the
1374 			 * rules.
1375 			 */
1376 			out(O_ALTFP, "upsets_eval: e0 not in instance tree");
1377 			Undiag_reason = UD_BADEVENTI;
1378 			goto retry_lone_ereport;
1379 		}
1380 		out(O_ALTFP|O_NONL, "adding event [");
1381 		ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp);
1382 		out(O_ALTFP, " ]");
1383 		nfmep->ecurrent = ep;
1384 		ep->nvp = NULL;
1385 		ep->count = 1;
1386 		ep->observations = nfmep->observations;
1387 		nfmep->observations = ep;
1388 
1389 		/*
1390 		 * just peek first.
1391 		 */
1392 		nfmep->peek = 1;
1393 		prev_verbose = Verbose;
1394 		if (Debug == 0)
1395 			Verbose = 0;
1396 		lut_walk(nfmep->eventtree, (lut_cb)clear_arrows, (void *)nfmep);
1397 		state = hypothesise(nfmep, nfmep->e0, nfmep->ull, &my_delay);
1398 		nfmep->peek = 0;
1399 		Verbose = prev_verbose;
1400 		if (state == FME_DISPROVED) {
1401 			out(O_ALTFP, "upsets_eval: hypothesis disproved");
1402 			Undiag_reason = UD_UNSOLVD;
1403 retry_lone_ereport:
1404 			/*
1405 			 * However the trigger ereport on its own might be
1406 			 * diagnosable, so check for that. Undo the new fme
1407 			 * and case we just created and call fme_receive_report.
1408 			 */
1409 			out(O_ALTFP|O_NONL, "[");
1410 			ipath_print(O_ALTFP|O_NONL, tripped[i].ename,
1411 			    tripped[i].ipp);
1412 			out(O_ALTFP, " retrying with just trigger ereport]");
1413 			itree_free(nfmep->eventtree);
1414 			nfmep->eventtree = NULL;
1415 			structconfig_free(nfmep->config);
1416 			nfmep->config = NULL;
1417 			destroy_fme_bufs(nfmep);
1418 			fmd_case_close(nfmep->hdl, nfmep->fmcase);
1419 			fme_receive_report(fmep->hdl, ffep,
1420 			    tripped[i].ename, tripped[i].ipp, NULL);
1421 			continue;
1422 		}
1423 
1424 		/*
1425 		 * and evaluate
1426 		 */
1427 		serialize_observation(nfmep, tripped[i].ename, tripped[i].ipp);
1428 		fme_eval(nfmep, ffep);
1429 	}
1430 
1431 	return (ntrip);
1432 }
1433 
1434 /*
1435  * fme_receive_external_report -- call when an external ereport comes in
1436  *
1437  * this routine just converts the relevant information from the ereport
1438  * into a format used internally and passes it on to fme_receive_report().
1439  */
1440 void
1441 fme_receive_external_report(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl,
1442     const char *class)
1443 {
1444 	struct node		*epnamenp;
1445 	fmd_case_t		*fmcase;
1446 	const struct ipath	*ipp;
1447 
1448 	class = stable(class);
1449 
1450 	/* Get the component path from the ereport */
1451 	epnamenp = platform_getpath(nvl);
1452 
1453 	/* See if we ended up without a path. */
1454 	if (epnamenp == NULL) {
1455 		/* See if class permits silent discard on unknown component. */
1456 		if (lut_lookup(Ereportenames_discard, (void *)class, NULL)) {
1457 			out(O_ALTFP|O_VERB2, "Unable to map \"%s\" ereport "
1458 			    "to component path, but silent discard allowed.",
1459 			    class);
1460 		} else {
1461 			/*
1462 			 * XFILE: Failure to find a component is bad unless
1463 			 * 'discard_if_config_unknown=1' was specified in the
1464 			 * ereport definition. Indicate undiagnosable.
1465 			 */
1466 			out(O_ALTFP, "XFILE: Unable to map \"%s\" ereport "
1467 			    "to component path.", class);
1468 			Undiag_reason = UD_NOPATH;
1469 			fmcase = fmd_case_open(hdl, NULL);
1470 			publish_undiagnosable(hdl, ffep, fmcase);
1471 		}
1472 		return;
1473 	}
1474 
1475 	ipp = ipath(epnamenp);
1476 	tree_free(epnamenp);
1477 	fme_receive_report(hdl, ffep, class, ipp, nvl);
1478 }
1479 
1480 /*ARGSUSED*/
1481 void
1482 fme_receive_repair_list(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl,
1483     const char *eventstring)
1484 {
1485 	char *uuid;
1486 	nvlist_t **nva;
1487 	uint_t nvc;
1488 	const struct ipath *ipp;
1489 
1490 	if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) != 0 ||
1491 	    nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST,
1492 	    &nva, &nvc) != 0) {
1493 		out(O_ALTFP, "No uuid or fault list for list.repaired event");
1494 		return;
1495 	}
1496 
1497 	out(O_ALTFP, "Processing list.repaired from case %s", uuid);
1498 
1499 	while (nvc-- != 0) {
1500 		/*
1501 		 * Reset any istat or serd engine associated with this path.
1502 		 */
1503 		char *path;
1504 
1505 		if ((ipp = platform_fault2ipath(*nva++)) == NULL)
1506 			continue;
1507 
1508 		path = ipath2str(NULL, ipp);
1509 		out(O_ALTFP, "fme_receive_repair_list: resetting state for %s",
1510 		    path);
1511 		FREE(path);
1512 
1513 		lut_walk(Istats, (lut_cb)istat_counter_reset_cb, (void *)ipp);
1514 		istat_save();
1515 
1516 		lut_walk(SerdEngines, (lut_cb)serd_reset_cb, (void *)ipp);
1517 		serd_save();
1518 	}
1519 }
1520 
1521 /*ARGSUSED*/
1522 void
1523 fme_receive_topology_change(void)
1524 {
1525 	lut_walk(Istats, (lut_cb)istat_counter_topo_chg_cb, NULL);
1526 	istat_save();
1527 
1528 	lut_walk(SerdEngines, (lut_cb)serd_topo_chg_cb, NULL);
1529 	serd_save();
1530 }
1531 
1532 static int mark_arrows(struct fme *fmep, struct event *ep, int mark,
1533     unsigned long long at_latest_by, unsigned long long *pdelay, int keep);
1534 
1535 /* ARGSUSED */
1536 static void
1537 clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
1538 {
1539 	struct bubble *bp;
1540 	struct arrowlist *ap;
1541 
1542 	ep->cached_state = 0;
1543 	ep->keep_in_tree = 0;
1544 	for (bp = itree_next_bubble(ep, NULL); bp;
1545 	    bp = itree_next_bubble(ep, bp)) {
1546 		if (bp->t != B_FROM)
1547 			continue;
1548 		bp->mark = 0;
1549 		for (ap = itree_next_arrow(bp, NULL); ap;
1550 		    ap = itree_next_arrow(bp, ap))
1551 			ap->arrowp->mark = 0;
1552 	}
1553 }
1554 
1555 static void
1556 fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep,
1557     const char *eventstring, const struct ipath *ipp, nvlist_t *nvl)
1558 {
1559 	struct event *ep;
1560 	struct fme *fmep = NULL;
1561 	struct fme *ofmep = NULL;
1562 	struct fme *cfmep, *svfmep;
1563 	int matched = 0;
1564 	nvlist_t *defect;
1565 	fmd_case_t *fmcase;
1566 
1567 	out(O_ALTFP|O_NONL, "fme_receive_report: ");
1568 	ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1569 	out(O_ALTFP|O_STAMP, NULL);
1570 
1571 	/* decide which FME it goes to */
1572 	for (fmep = FMElist; fmep; fmep = fmep->next) {
1573 		int prev_verbose;
1574 		unsigned long long my_delay = TIMEVAL_EVENTUALLY;
1575 		enum fme_state state;
1576 		nvlist_t *pre_peek_nvp = NULL;
1577 
1578 		if (fmep->overflow) {
1579 			if (!(fmd_case_closed(fmep->hdl, fmep->fmcase)))
1580 				ofmep = fmep;
1581 
1582 			continue;
1583 		}
1584 
1585 		/*
1586 		 * ignore solved or closed cases
1587 		 */
1588 		if (fmep->posted_suspects ||
1589 		    fmd_case_solved(fmep->hdl, fmep->fmcase) ||
1590 		    fmd_case_closed(fmep->hdl, fmep->fmcase))
1591 			continue;
1592 
1593 		/* look up event in event tree for this FME */
1594 		if ((ep = itree_lookup(fmep->eventtree,
1595 		    eventstring, ipp)) == NULL)
1596 			continue;
1597 
1598 		/* note observation */
1599 		fmep->ecurrent = ep;
1600 		if (ep->count++ == 0) {
1601 			/* link it into list of observations seen */
1602 			ep->observations = fmep->observations;
1603 			fmep->observations = ep;
1604 			ep->nvp = evnv_dupnvl(nvl);
1605 		} else {
1606 			/* use new payload values for peek */
1607 			pre_peek_nvp = ep->nvp;
1608 			ep->nvp = evnv_dupnvl(nvl);
1609 		}
1610 
1611 		/* tell hypothesise() not to mess with suspect list */
1612 		fmep->peek = 1;
1613 
1614 		/* don't want this to be verbose (unless Debug is set) */
1615 		prev_verbose = Verbose;
1616 		if (Debug == 0)
1617 			Verbose = 0;
1618 
1619 		lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep);
1620 		state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay);
1621 
1622 		fmep->peek = 0;
1623 
1624 		/* put verbose flag back */
1625 		Verbose = prev_verbose;
1626 
1627 		if (state != FME_DISPROVED) {
1628 			/* found an FME that explains the ereport */
1629 			matched++;
1630 			out(O_ALTFP|O_NONL, "[");
1631 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1632 			out(O_ALTFP, " explained by FME%d]", fmep->id);
1633 
1634 			if (pre_peek_nvp)
1635 				nvlist_free(pre_peek_nvp);
1636 
1637 			if (ep->count == 1)
1638 				serialize_observation(fmep, eventstring, ipp);
1639 
1640 			if (ffep) {
1641 				fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1642 				ep->ffep = ffep;
1643 			}
1644 
1645 			stats_counter_bump(fmep->Rcount);
1646 
1647 			/* re-eval FME */
1648 			fme_eval(fmep, ffep);
1649 		} else {
1650 
1651 			/* not a match, undo noting of observation */
1652 			fmep->ecurrent = NULL;
1653 			if (--ep->count == 0) {
1654 				/* unlink it from observations */
1655 				fmep->observations = ep->observations;
1656 				ep->observations = NULL;
1657 				nvlist_free(ep->nvp);
1658 				ep->nvp = NULL;
1659 			} else {
1660 				nvlist_free(ep->nvp);
1661 				ep->nvp = pre_peek_nvp;
1662 			}
1663 		}
1664 	}
1665 
1666 	if (matched)
1667 		return;	/* explained by at least one existing FME */
1668 
1669 	/* clean up closed fmes */
1670 	cfmep = ClosedFMEs;
1671 	while (cfmep != NULL) {
1672 		svfmep = cfmep->next;
1673 		destroy_fme(cfmep);
1674 		cfmep = svfmep;
1675 	}
1676 	ClosedFMEs = NULL;
1677 	prune_propagations(eventstring, ipp);
1678 
1679 	if (ofmep) {
1680 		out(O_ALTFP|O_NONL, "[");
1681 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1682 		out(O_ALTFP, " ADDING TO OVERFLOW FME]");
1683 		if (ffep)
1684 			fmd_case_add_ereport(hdl, ofmep->fmcase, ffep);
1685 
1686 		return;
1687 
1688 	} else if (Max_fme && (Open_fme_count >= Max_fme)) {
1689 		out(O_ALTFP|O_NONL, "[");
1690 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1691 		out(O_ALTFP, " MAX OPEN FME REACHED]");
1692 
1693 		fmcase = fmd_case_open(hdl, NULL);
1694 
1695 		/* Create overflow fme */
1696 		if ((fmep = newfme(eventstring, ipp, hdl, fmcase)) == NULL) {
1697 			out(O_ALTFP|O_NONL, "[");
1698 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1699 			out(O_ALTFP, " CANNOT OPEN OVERFLOW FME]");
1700 			publish_undiagnosable(hdl, ffep, fmcase);
1701 			return;
1702 		}
1703 
1704 		Open_fme_count++;
1705 
1706 		init_fme_bufs(fmep);
1707 		fmep->overflow = B_TRUE;
1708 
1709 		if (ffep)
1710 			fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1711 
1712 		defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100,
1713 		    NULL, NULL, NULL);
1714 		(void) nvlist_add_string(defect, UNDIAG_REASON, UD_MAXFME);
1715 		fmd_case_add_suspect(hdl, fmep->fmcase, defect);
1716 		fmd_case_solve(hdl, fmep->fmcase);
1717 		return;
1718 	}
1719 
1720 	/* open a case */
1721 	fmcase = fmd_case_open(hdl, NULL);
1722 
1723 	/* start a new FME */
1724 	if ((fmep = newfme(eventstring, ipp, hdl, fmcase)) == NULL) {
1725 		out(O_ALTFP|O_NONL, "[");
1726 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1727 		out(O_ALTFP, " CANNOT DIAGNOSE]");
1728 		publish_undiagnosable(hdl, ffep, fmcase);
1729 		return;
1730 	}
1731 
1732 	Open_fme_count++;
1733 
1734 	init_fme_bufs(fmep);
1735 
1736 	out(O_ALTFP|O_NONL, "[");
1737 	ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1738 	out(O_ALTFP, " created FME%d, case %s]", fmep->id,
1739 	    fmd_case_uuid(hdl, fmep->fmcase));
1740 
1741 	ep = fmep->e0;
1742 	ASSERT(ep != NULL);
1743 
1744 	/* note observation */
1745 	fmep->ecurrent = ep;
1746 	if (ep->count++ == 0) {
1747 		/* link it into list of observations seen */
1748 		ep->observations = fmep->observations;
1749 		fmep->observations = ep;
1750 		ep->nvp = evnv_dupnvl(nvl);
1751 		serialize_observation(fmep, eventstring, ipp);
1752 	} else {
1753 		/* new payload overrides any previous */
1754 		nvlist_free(ep->nvp);
1755 		ep->nvp = evnv_dupnvl(nvl);
1756 	}
1757 
1758 	stats_counter_bump(fmep->Rcount);
1759 
1760 	if (ffep) {
1761 		fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1762 		fmd_case_setprincipal(hdl, fmep->fmcase, ffep);
1763 		fmep->e0r = ffep;
1764 		ep->ffep = ffep;
1765 	}
1766 
1767 	/* give the diagnosis algorithm a shot at the new FME state */
1768 	fme_eval(fmep, ffep);
1769 }
1770 
1771 void
1772 fme_status(int flags)
1773 {
1774 	struct fme *fmep;
1775 
1776 	if (FMElist == NULL) {
1777 		out(flags, "No fault management exercises underway.");
1778 		return;
1779 	}
1780 
1781 	for (fmep = FMElist; fmep; fmep = fmep->next)
1782 		fme_print(flags, fmep);
1783 }
1784 
1785 /*
1786  * "indent" routines used mostly for nicely formatted debug output, but also
1787  * for sanity checking for infinite recursion bugs.
1788  */
1789 
1790 #define	MAX_INDENT 1024
1791 static const char *indent_s[MAX_INDENT];
1792 static int current_indent;
1793 
1794 static void
1795 indent_push(const char *s)
1796 {
1797 	if (current_indent < MAX_INDENT)
1798 		indent_s[current_indent++] = s;
1799 	else
1800 		out(O_DIE, "unexpected recursion depth (%d)", current_indent);
1801 }
1802 
1803 static void
1804 indent_set(const char *s)
1805 {
1806 	current_indent = 0;
1807 	indent_push(s);
1808 }
1809 
1810 static void
1811 indent_pop(void)
1812 {
1813 	if (current_indent > 0)
1814 		current_indent--;
1815 	else
1816 		out(O_DIE, "recursion underflow");
1817 }
1818 
1819 static void
1820 indent(void)
1821 {
1822 	int i;
1823 	if (!Verbose)
1824 		return;
1825 	for (i = 0; i < current_indent; i++)
1826 		out(O_ALTFP|O_VERB|O_NONL, indent_s[i]);
1827 }
1828 
1829 #define	SLNEW		1
1830 #define	SLCHANGED	2
1831 #define	SLWAIT		3
1832 #define	SLDISPROVED	4
1833 
1834 static void
1835 print_suspects(int circumstance, struct fme *fmep)
1836 {
1837 	struct event *ep;
1838 
1839 	out(O_ALTFP|O_NONL, "[");
1840 	if (circumstance == SLCHANGED) {
1841 		out(O_ALTFP|O_NONL, "FME%d diagnosis changed. state: %s, "
1842 		    "suspect list:", fmep->id, fme_state2str(fmep->state));
1843 	} else if (circumstance == SLWAIT) {
1844 		out(O_ALTFP|O_NONL, "FME%d set wait timer %ld ", fmep->id,
1845 		    fmep->timer);
1846 		ptree_timeval(O_ALTFP|O_NONL, &fmep->wull);
1847 	} else if (circumstance == SLDISPROVED) {
1848 		out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS UNKNOWN", fmep->id);
1849 	} else {
1850 		out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS PRODUCED:", fmep->id);
1851 	}
1852 
1853 	if (circumstance == SLWAIT || circumstance == SLDISPROVED) {
1854 		out(O_ALTFP, "]");
1855 		return;
1856 	}
1857 
1858 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
1859 		out(O_ALTFP|O_NONL, " ");
1860 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
1861 	}
1862 	out(O_ALTFP, "]");
1863 }
1864 
1865 static struct node *
1866 eventprop_lookup(struct event *ep, const char *propname)
1867 {
1868 	return (lut_lookup(ep->props, (void *)propname, NULL));
1869 }
1870 
1871 #define	MAXDIGITIDX	23
1872 static char numbuf[MAXDIGITIDX + 1];
1873 
1874 static int
1875 node2uint(struct node *n, uint_t *valp)
1876 {
1877 	struct evalue value;
1878 	struct lut *globals = NULL;
1879 
1880 	if (n == NULL)
1881 		return (1);
1882 
1883 	/*
1884 	 * check value.v since we are being asked to convert an unsigned
1885 	 * long long int to an unsigned int
1886 	 */
1887 	if (! eval_expr(n, NULL, NULL, &globals, NULL, NULL, 0, &value) ||
1888 	    value.t != UINT64 || value.v > (1ULL << 32))
1889 		return (1);
1890 
1891 	*valp = (uint_t)value.v;
1892 
1893 	return (0);
1894 }
1895 
1896 static nvlist_t *
1897 node2fmri(struct node *n)
1898 {
1899 	nvlist_t **pa, *f, *p;
1900 	struct node *nc;
1901 	uint_t depth = 0;
1902 	char *numstr, *nullbyte;
1903 	char *failure;
1904 	int err, i;
1905 
1906 	/* XXX do we need to be able to handle a non-T_NAME node? */
1907 	if (n == NULL || n->t != T_NAME)
1908 		return (NULL);
1909 
1910 	for (nc = n; nc != NULL; nc = nc->u.name.next) {
1911 		if (nc->u.name.child == NULL || nc->u.name.child->t != T_NUM)
1912 			break;
1913 		depth++;
1914 	}
1915 
1916 	if (nc != NULL) {
1917 		/* We bailed early, something went wrong */
1918 		return (NULL);
1919 	}
1920 
1921 	if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0)
1922 		out(O_DIE|O_SYS, "alloc of fmri nvl failed");
1923 	pa = alloca(depth * sizeof (nvlist_t *));
1924 	for (i = 0; i < depth; i++)
1925 		pa[i] = NULL;
1926 
1927 	err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC);
1928 	err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION);
1929 	err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, "");
1930 	err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth);
1931 	if (err != 0) {
1932 		failure = "basic construction of FMRI failed";
1933 		goto boom;
1934 	}
1935 
1936 	numbuf[MAXDIGITIDX] = '\0';
1937 	nullbyte = &numbuf[MAXDIGITIDX];
1938 	i = 0;
1939 
1940 	for (nc = n; nc != NULL; nc = nc->u.name.next) {
1941 		err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl);
1942 		if (err != 0) {
1943 			failure = "alloc of an hc-pair failed";
1944 			goto boom;
1945 		}
1946 		err = nvlist_add_string(p, FM_FMRI_HC_NAME, nc->u.name.s);
1947 		numstr = ulltostr(nc->u.name.child->u.ull, nullbyte);
1948 		err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr);
1949 		if (err != 0) {
1950 			failure = "construction of an hc-pair failed";
1951 			goto boom;
1952 		}
1953 		pa[i++] = p;
1954 	}
1955 
1956 	err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth);
1957 	if (err == 0) {
1958 		for (i = 0; i < depth; i++)
1959 			if (pa[i] != NULL)
1960 				nvlist_free(pa[i]);
1961 		return (f);
1962 	}
1963 	failure = "addition of hc-pair array to FMRI failed";
1964 
1965 boom:
1966 	for (i = 0; i < depth; i++)
1967 		if (pa[i] != NULL)
1968 			nvlist_free(pa[i]);
1969 	nvlist_free(f);
1970 	out(O_DIE, "%s", failure);
1971 	/*NOTREACHED*/
1972 	return (NULL);
1973 }
1974 
1975 /* an ipath cache entry is an array of these, with s==NULL at the end */
1976 struct ipath {
1977 	const char *s;	/* component name (in stable) */
1978 	int i;		/* instance number */
1979 };
1980 
1981 static nvlist_t *
1982 ipath2fmri(struct ipath *ipath)
1983 {
1984 	nvlist_t **pa, *f, *p;
1985 	uint_t depth = 0;
1986 	char *numstr, *nullbyte;
1987 	char *failure;
1988 	int err, i;
1989 	struct ipath *ipp;
1990 
1991 	for (ipp = ipath; ipp->s != NULL; ipp++)
1992 		depth++;
1993 
1994 	if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0)
1995 		out(O_DIE|O_SYS, "alloc of fmri nvl failed");
1996 	pa = alloca(depth * sizeof (nvlist_t *));
1997 	for (i = 0; i < depth; i++)
1998 		pa[i] = NULL;
1999 
2000 	err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC);
2001 	err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION);
2002 	err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, "");
2003 	err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth);
2004 	if (err != 0) {
2005 		failure = "basic construction of FMRI failed";
2006 		goto boom;
2007 	}
2008 
2009 	numbuf[MAXDIGITIDX] = '\0';
2010 	nullbyte = &numbuf[MAXDIGITIDX];
2011 	i = 0;
2012 
2013 	for (ipp = ipath; ipp->s != NULL; ipp++) {
2014 		err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl);
2015 		if (err != 0) {
2016 			failure = "alloc of an hc-pair failed";
2017 			goto boom;
2018 		}
2019 		err = nvlist_add_string(p, FM_FMRI_HC_NAME, ipp->s);
2020 		numstr = ulltostr(ipp->i, nullbyte);
2021 		err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr);
2022 		if (err != 0) {
2023 			failure = "construction of an hc-pair failed";
2024 			goto boom;
2025 		}
2026 		pa[i++] = p;
2027 	}
2028 
2029 	err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth);
2030 	if (err == 0) {
2031 		for (i = 0; i < depth; i++)
2032 			if (pa[i] != NULL)
2033 				nvlist_free(pa[i]);
2034 		return (f);
2035 	}
2036 	failure = "addition of hc-pair array to FMRI failed";
2037 
2038 boom:
2039 	for (i = 0; i < depth; i++)
2040 		if (pa[i] != NULL)
2041 			nvlist_free(pa[i]);
2042 	nvlist_free(f);
2043 	out(O_DIE, "%s", failure);
2044 	/*NOTREACHED*/
2045 	return (NULL);
2046 }
2047 
2048 static uint_t
2049 avg(uint_t sum, uint_t cnt)
2050 {
2051 	unsigned long long s = sum * 10;
2052 
2053 	return ((s / cnt / 10) + (((s / cnt % 10) >= 5) ? 1 : 0));
2054 }
2055 
2056 static uint8_t
2057 percentof(uint_t part, uint_t whole)
2058 {
2059 	unsigned long long p = part * 1000;
2060 
2061 	return ((p / whole / 10) + (((p / whole % 10) >= 5) ? 1 : 0));
2062 }
2063 
2064 struct rsl {
2065 	struct event *suspect;
2066 	nvlist_t *asru;
2067 	nvlist_t *fru;
2068 	nvlist_t *rsrc;
2069 };
2070 
2071 static void publish_suspects(struct fme *fmep, struct rsl *srl);
2072 
2073 /*
2074  *  rslfree -- free internal members of struct rsl not expected to be
2075  *	freed elsewhere.
2076  */
2077 static void
2078 rslfree(struct rsl *freeme)
2079 {
2080 	if (freeme->asru != NULL)
2081 		nvlist_free(freeme->asru);
2082 	if (freeme->fru != NULL)
2083 		nvlist_free(freeme->fru);
2084 	if (freeme->rsrc != NULL && freeme->rsrc != freeme->asru)
2085 		nvlist_free(freeme->rsrc);
2086 }
2087 
2088 /*
2089  *  rslcmp -- compare two rsl structures.  Use the following
2090  *	comparisons to establish cardinality:
2091  *
2092  *	1. Name of the suspect's class. (simple strcmp)
2093  *	2. Name of the suspect's ASRU. (trickier, since nvlist)
2094  *
2095  */
2096 static int
2097 rslcmp(const void *a, const void *b)
2098 {
2099 	struct rsl *r1 = (struct rsl *)a;
2100 	struct rsl *r2 = (struct rsl *)b;
2101 	int rv;
2102 
2103 	rv = strcmp(r1->suspect->enode->u.event.ename->u.name.s,
2104 	    r2->suspect->enode->u.event.ename->u.name.s);
2105 	if (rv != 0)
2106 		return (rv);
2107 
2108 	if (r1->rsrc == NULL && r2->rsrc == NULL)
2109 		return (0);
2110 	if (r1->rsrc == NULL)
2111 		return (-1);
2112 	if (r2->rsrc == NULL)
2113 		return (1);
2114 	return (evnv_cmpnvl(r1->rsrc, r2->rsrc, 0));
2115 }
2116 
2117 /*
2118  *  rsluniq -- given an array of rsl structures, seek out and "remove"
2119  *	any duplicates.  Dups are "remove"d by NULLing the suspect pointer
2120  *	of the array element.  Removal also means updating the number of
2121  *	problems and the number of problems which are not faults.  User
2122  *	provides the first and last element pointers.
2123  */
2124 static void
2125 rsluniq(struct rsl *first, struct rsl *last, int *nprobs, int *nnonf)
2126 {
2127 	struct rsl *cr;
2128 
2129 	if (*nprobs == 1)
2130 		return;
2131 
2132 	/*
2133 	 *  At this point, we only expect duplicate defects.
2134 	 *  Eversholt's diagnosis algorithm prevents duplicate
2135 	 *  suspects, but we rewrite defects in the platform code after
2136 	 *  the diagnosis is made, and that can introduce new
2137 	 *  duplicates.
2138 	 */
2139 	while (first <= last) {
2140 		if (first->suspect == NULL || !is_defect(first->suspect->t)) {
2141 			first++;
2142 			continue;
2143 		}
2144 		cr = first + 1;
2145 		while (cr <= last) {
2146 			if (is_defect(first->suspect->t)) {
2147 				if (rslcmp(first, cr) == 0) {
2148 					cr->suspect = NULL;
2149 					rslfree(cr);
2150 					(*nprobs)--;
2151 					(*nnonf)--;
2152 				}
2153 			}
2154 			/*
2155 			 * assume all defects are in order after our
2156 			 * sort and short circuit here with "else break" ?
2157 			 */
2158 			cr++;
2159 		}
2160 		first++;
2161 	}
2162 }
2163 
2164 /*
2165  * get_resources -- for a given suspect, determine what ASRU, FRU and
2166  *     RSRC nvlists should be advertised in the final suspect list.
2167  */
2168 void
2169 get_resources(struct event *sp, struct rsl *rsrcs, struct config *croot)
2170 {
2171 	struct node *asrudef, *frudef;
2172 	nvlist_t *asru, *fru;
2173 	nvlist_t *rsrc = NULL;
2174 	char *pathstr;
2175 
2176 	/*
2177 	 * First find any ASRU and/or FRU defined in the
2178 	 * initial fault tree.
2179 	 */
2180 	asrudef = eventprop_lookup(sp, L_ASRU);
2181 	frudef = eventprop_lookup(sp, L_FRU);
2182 
2183 	/*
2184 	 * Create FMRIs based on those definitions
2185 	 */
2186 	asru = node2fmri(asrudef);
2187 	fru = node2fmri(frudef);
2188 	pathstr = ipath2str(NULL, sp->ipp);
2189 
2190 	/*
2191 	 *  Allow for platform translations of the FMRIs
2192 	 */
2193 	platform_units_translate(is_defect(sp->t), croot, &asru, &fru, &rsrc,
2194 	    pathstr);
2195 
2196 	FREE(pathstr);
2197 	rsrcs->suspect = sp;
2198 	rsrcs->asru = asru;
2199 	rsrcs->fru = fru;
2200 	rsrcs->rsrc = rsrc;
2201 }
2202 
2203 /*
2204  * trim_suspects -- prior to publishing, we may need to remove some
2205  *    suspects from the list.  If we're auto-closing upsets, we don't
2206  *    want any of those in the published list.  If the ASRUs for multiple
2207  *    defects resolve to the same ASRU (driver) we only want to publish
2208  *    that as a single suspect.
2209  */
2210 static int
2211 trim_suspects(struct fme *fmep, struct rsl *begin, struct rsl *begin2,
2212     fmd_event_t *ffep, int *mess_zero_nonfaultp)
2213 {
2214 	struct event *ep;
2215 	struct rsl *rp = begin;
2216 	struct rsl *rp2 = begin2;
2217 	int mess_zero_count = 0;
2218 	int serd_rval;
2219 	uint_t messval;
2220 
2221 	/* remove any unwanted upsets and populate our array */
2222 	for (ep = fmep->psuspects; ep; ep = ep->psuspects) {
2223 		if (is_upset(ep->t))
2224 			continue;
2225 		serd_rval = serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, ep,
2226 		    NULL, NULL);
2227 		if (serd_rval == 0)
2228 			continue;
2229 		if (node2uint(eventprop_lookup(ep, L_message),
2230 		    &messval) == 0 && messval == 0) {
2231 			get_resources(ep, rp2, fmep->config);
2232 			rp2++;
2233 			mess_zero_count++;
2234 			if (!is_fault(ep->t))
2235 				(*mess_zero_nonfaultp)++;
2236 		} else {
2237 			get_resources(ep, rp, fmep->config);
2238 			rp++;
2239 			fmep->nsuspects++;
2240 			if (!is_fault(ep->t))
2241 				fmep->nonfault++;
2242 		}
2243 	}
2244 	return (mess_zero_count);
2245 }
2246 
2247 /*
2248  * addpayloadprop -- add a payload prop to a problem
2249  */
2250 static void
2251 addpayloadprop(const char *lhs, struct evalue *rhs, nvlist_t *fault)
2252 {
2253 	nvlist_t *rsrc, *hcs;
2254 
2255 	ASSERT(fault != NULL);
2256 	ASSERT(lhs != NULL);
2257 	ASSERT(rhs != NULL);
2258 
2259 	if (nvlist_lookup_nvlist(fault, FM_FAULT_RESOURCE, &rsrc) != 0)
2260 		out(O_DIE, "cannot add payloadprop \"%s\" to fault", lhs);
2261 
2262 	if (nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcs) != 0) {
2263 		out(O_ALTFP|O_VERB2, "addpayloadprop: create hc_specific");
2264 		if (nvlist_xalloc(&hcs, NV_UNIQUE_NAME, &Eft_nv_hdl) != 0)
2265 			out(O_DIE,
2266 			    "cannot add payloadprop \"%s\" to fault", lhs);
2267 		if (nvlist_add_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, hcs) != 0)
2268 			out(O_DIE,
2269 			    "cannot add payloadprop \"%s\" to fault", lhs);
2270 		nvlist_free(hcs);
2271 		if (nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcs) != 0)
2272 			out(O_DIE,
2273 			    "cannot add payloadprop \"%s\" to fault", lhs);
2274 	} else
2275 		out(O_ALTFP|O_VERB2, "addpayloadprop: reuse hc_specific");
2276 
2277 	if (rhs->t == UINT64) {
2278 		out(O_ALTFP|O_VERB2, "addpayloadprop: %s=%llu", lhs, rhs->v);
2279 
2280 		if (nvlist_add_uint64(hcs, lhs, rhs->v) != 0)
2281 			out(O_DIE,
2282 			    "cannot add payloadprop \"%s\" to fault", lhs);
2283 	} else {
2284 		out(O_ALTFP|O_VERB2, "addpayloadprop: %s=\"%s\"",
2285 		    lhs, (char *)(uintptr_t)rhs->v);
2286 
2287 		if (nvlist_add_string(hcs, lhs, (char *)(uintptr_t)rhs->v) != 0)
2288 			out(O_DIE,
2289 			    "cannot add payloadprop \"%s\" to fault", lhs);
2290 	}
2291 }
2292 
2293 static char *Istatbuf;
2294 static char *Istatbufptr;
2295 static int Istatsz;
2296 
2297 /*
2298  * istataddsize -- calculate size of istat and add it to Istatsz
2299  */
2300 /*ARGSUSED2*/
2301 static void
2302 istataddsize(const struct istat_entry *lhs, struct stats *rhs, void *arg)
2303 {
2304 	int val;
2305 
2306 	ASSERT(lhs != NULL);
2307 	ASSERT(rhs != NULL);
2308 
2309 	if ((val = stats_counter_value(rhs)) == 0)
2310 		return;	/* skip zero-valued stats */
2311 
2312 	/* count up the size of the stat name */
2313 	Istatsz += ipath2strlen(lhs->ename, lhs->ipath);
2314 	Istatsz++;	/* for the trailing NULL byte */
2315 
2316 	/* count up the size of the stat value */
2317 	Istatsz += snprintf(NULL, 0, "%d", val);
2318 	Istatsz++;	/* for the trailing NULL byte */
2319 }
2320 
2321 /*
2322  * istat2str -- serialize an istat, writing result to *Istatbufptr
2323  */
2324 /*ARGSUSED2*/
2325 static void
2326 istat2str(const struct istat_entry *lhs, struct stats *rhs, void *arg)
2327 {
2328 	char *str;
2329 	int len;
2330 	int val;
2331 
2332 	ASSERT(lhs != NULL);
2333 	ASSERT(rhs != NULL);
2334 
2335 	if ((val = stats_counter_value(rhs)) == 0)
2336 		return;	/* skip zero-valued stats */
2337 
2338 	/* serialize the stat name */
2339 	str = ipath2str(lhs->ename, lhs->ipath);
2340 	len = strlen(str);
2341 
2342 	ASSERT(Istatbufptr + len + 1 < &Istatbuf[Istatsz]);
2343 	(void) strlcpy(Istatbufptr, str, &Istatbuf[Istatsz] - Istatbufptr);
2344 	Istatbufptr += len;
2345 	FREE(str);
2346 	*Istatbufptr++ = '\0';
2347 
2348 	/* serialize the stat value */
2349 	Istatbufptr += snprintf(Istatbufptr, &Istatbuf[Istatsz] - Istatbufptr,
2350 	    "%d", val);
2351 	*Istatbufptr++ = '\0';
2352 
2353 	ASSERT(Istatbufptr <= &Istatbuf[Istatsz]);
2354 }
2355 
2356 void
2357 istat_save()
2358 {
2359 	if (Istat_need_save == 0)
2360 		return;
2361 
2362 	/* figure out how big the serialzed info is */
2363 	Istatsz = 0;
2364 	lut_walk(Istats, (lut_cb)istataddsize, NULL);
2365 
2366 	if (Istatsz == 0) {
2367 		/* no stats to save */
2368 		fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS);
2369 		return;
2370 	}
2371 
2372 	/* create the serialized buffer */
2373 	Istatbufptr = Istatbuf = MALLOC(Istatsz);
2374 	lut_walk(Istats, (lut_cb)istat2str, NULL);
2375 
2376 	/* clear out current saved stats */
2377 	fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS);
2378 
2379 	/* write out the new version */
2380 	fmd_buf_write(Hdl, NULL, WOBUF_ISTATS, Istatbuf, Istatsz);
2381 	FREE(Istatbuf);
2382 
2383 	Istat_need_save = 0;
2384 }
2385 
2386 int
2387 istat_cmp(struct istat_entry *ent1, struct istat_entry *ent2)
2388 {
2389 	if (ent1->ename != ent2->ename)
2390 		return (ent2->ename - ent1->ename);
2391 	if (ent1->ipath != ent2->ipath)
2392 		return ((char *)ent2->ipath - (char *)ent1->ipath);
2393 
2394 	return (0);
2395 }
2396 
2397 /*
2398  * istat-verify -- verify the component associated with a stat still exists
2399  *
2400  * if the component no longer exists, this routine resets the stat and
2401  * returns 0.  if the component still exists, it returns 1.
2402  */
2403 static int
2404 istat_verify(struct node *snp, struct istat_entry *entp)
2405 {
2406 	struct stats *statp;
2407 	nvlist_t *fmri;
2408 
2409 	fmri = node2fmri(snp->u.event.epname);
2410 	if (platform_path_exists(fmri)) {
2411 		nvlist_free(fmri);
2412 		return (1);
2413 	}
2414 	nvlist_free(fmri);
2415 
2416 	/* component no longer in system.  zero out the associated stats */
2417 	if ((statp = (struct stats *)
2418 	    lut_lookup(Istats, entp, (lut_cmp)istat_cmp)) == NULL ||
2419 	    stats_counter_value(statp) == 0)
2420 		return (0);	/* stat is already reset */
2421 
2422 	Istat_need_save = 1;
2423 	stats_counter_reset(statp);
2424 	return (0);
2425 }
2426 
2427 static void
2428 istat_bump(struct node *snp, int n)
2429 {
2430 	struct stats *statp;
2431 	struct istat_entry ent;
2432 
2433 	ASSERT(snp != NULL);
2434 	ASSERTinfo(snp->t == T_EVENT, ptree_nodetype2str(snp->t));
2435 	ASSERT(snp->u.event.epname != NULL);
2436 
2437 	/* class name should be hoisted into a single stable entry */
2438 	ASSERT(snp->u.event.ename->u.name.next == NULL);
2439 	ent.ename = snp->u.event.ename->u.name.s;
2440 	ent.ipath = ipath(snp->u.event.epname);
2441 
2442 	if (!istat_verify(snp, &ent)) {
2443 		/* component no longer exists in system, nothing to do */
2444 		return;
2445 	}
2446 
2447 	if ((statp = (struct stats *)
2448 	    lut_lookup(Istats, &ent, (lut_cmp)istat_cmp)) == NULL) {
2449 		/* need to create the counter */
2450 		int cnt = 0;
2451 		struct node *np;
2452 		char *sname;
2453 		char *snamep;
2454 		struct istat_entry *newentp;
2455 
2456 		/* count up the size of the stat name */
2457 		np = snp->u.event.ename;
2458 		while (np != NULL) {
2459 			cnt += strlen(np->u.name.s);
2460 			cnt++;	/* for the '.' or '@' */
2461 			np = np->u.name.next;
2462 		}
2463 		np = snp->u.event.epname;
2464 		while (np != NULL) {
2465 			cnt += snprintf(NULL, 0, "%s%llu",
2466 			    np->u.name.s, np->u.name.child->u.ull);
2467 			cnt++;	/* for the '/' or trailing NULL byte */
2468 			np = np->u.name.next;
2469 		}
2470 
2471 		/* build the stat name */
2472 		snamep = sname = alloca(cnt);
2473 		np = snp->u.event.ename;
2474 		while (np != NULL) {
2475 			snamep += snprintf(snamep, &sname[cnt] - snamep,
2476 			    "%s", np->u.name.s);
2477 			np = np->u.name.next;
2478 			if (np)
2479 				*snamep++ = '.';
2480 		}
2481 		*snamep++ = '@';
2482 		np = snp->u.event.epname;
2483 		while (np != NULL) {
2484 			snamep += snprintf(snamep, &sname[cnt] - snamep,
2485 			    "%s%llu", np->u.name.s, np->u.name.child->u.ull);
2486 			np = np->u.name.next;
2487 			if (np)
2488 				*snamep++ = '/';
2489 		}
2490 		*snamep++ = '\0';
2491 
2492 		/* create the new stat & add it to our list */
2493 		newentp = MALLOC(sizeof (*newentp));
2494 		*newentp = ent;
2495 		statp = stats_new_counter(NULL, sname, 0);
2496 		Istats = lut_add(Istats, (void *)newentp, (void *)statp,
2497 		    (lut_cmp)istat_cmp);
2498 	}
2499 
2500 	/* if n is non-zero, set that value instead of bumping */
2501 	if (n) {
2502 		stats_counter_reset(statp);
2503 		stats_counter_add(statp, n);
2504 	} else
2505 		stats_counter_bump(statp);
2506 	Istat_need_save = 1;
2507 
2508 	ipath_print(O_ALTFP|O_VERB2, ent.ename, ent.ipath);
2509 	out(O_ALTFP|O_VERB2, " %s to value %d", n ? "set" : "incremented",
2510 	    stats_counter_value(statp));
2511 }
2512 
2513 /*ARGSUSED*/
2514 static void
2515 istat_destructor(void *left, void *right, void *arg)
2516 {
2517 	struct istat_entry *entp = (struct istat_entry *)left;
2518 	struct stats *statp = (struct stats *)right;
2519 	FREE(entp);
2520 	stats_delete(statp);
2521 }
2522 
2523 /*
2524  * Callback used in a walk of the Istats to reset matching stat counters.
2525  */
2526 static void
2527 istat_counter_reset_cb(struct istat_entry *entp, struct stats *statp,
2528     const struct ipath *ipp)
2529 {
2530 	char *path;
2531 
2532 	if (entp->ipath == ipp) {
2533 		path = ipath2str(entp->ename, ipp);
2534 		out(O_ALTFP, "istat_counter_reset_cb: resetting %s", path);
2535 		FREE(path);
2536 		stats_counter_reset(statp);
2537 		Istat_need_save = 1;
2538 	}
2539 }
2540 
2541 /*ARGSUSED*/
2542 static void
2543 istat_counter_topo_chg_cb(struct istat_entry *entp, struct stats *statp,
2544     void *unused)
2545 {
2546 	char *path;
2547 	nvlist_t *fmri;
2548 
2549 	fmri = ipath2fmri((struct ipath *)(entp->ipath));
2550 	if (!platform_path_exists(fmri)) {
2551 		path = ipath2str(entp->ename, entp->ipath);
2552 		out(O_ALTFP, "istat_counter_topo_chg_cb: not present %s", path);
2553 		FREE(path);
2554 		stats_counter_reset(statp);
2555 		Istat_need_save = 1;
2556 	}
2557 	nvlist_free(fmri);
2558 }
2559 
2560 void
2561 istat_fini(void)
2562 {
2563 	lut_free(Istats, istat_destructor, NULL);
2564 }
2565 
2566 static char *Serdbuf;
2567 static char *Serdbufptr;
2568 static int Serdsz;
2569 
2570 /*
2571  * serdaddsize -- calculate size of serd and add it to Serdsz
2572  */
2573 /*ARGSUSED*/
2574 static void
2575 serdaddsize(const struct serd_entry *lhs, struct stats *rhs, void *arg)
2576 {
2577 	ASSERT(lhs != NULL);
2578 
2579 	/* count up the size of the stat name */
2580 	Serdsz += ipath2strlen(lhs->ename, lhs->ipath);
2581 	Serdsz++;	/* for the trailing NULL byte */
2582 }
2583 
2584 /*
2585  * serd2str -- serialize a serd engine, writing result to *Serdbufptr
2586  */
2587 /*ARGSUSED*/
2588 static void
2589 serd2str(const struct serd_entry *lhs, struct stats *rhs, void *arg)
2590 {
2591 	char *str;
2592 	int len;
2593 
2594 	ASSERT(lhs != NULL);
2595 
2596 	/* serialize the serd engine name */
2597 	str = ipath2str(lhs->ename, lhs->ipath);
2598 	len = strlen(str);
2599 
2600 	ASSERT(Serdbufptr + len + 1 <= &Serdbuf[Serdsz]);
2601 	(void) strlcpy(Serdbufptr, str, &Serdbuf[Serdsz] - Serdbufptr);
2602 	Serdbufptr += len;
2603 	FREE(str);
2604 	*Serdbufptr++ = '\0';
2605 	ASSERT(Serdbufptr <= &Serdbuf[Serdsz]);
2606 }
2607 
2608 void
2609 serd_save()
2610 {
2611 	if (Serd_need_save == 0)
2612 		return;
2613 
2614 	/* figure out how big the serialzed info is */
2615 	Serdsz = 0;
2616 	lut_walk(SerdEngines, (lut_cb)serdaddsize, NULL);
2617 
2618 	if (Serdsz == 0) {
2619 		/* no serd engines to save */
2620 		fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS);
2621 		return;
2622 	}
2623 
2624 	/* create the serialized buffer */
2625 	Serdbufptr = Serdbuf = MALLOC(Serdsz);
2626 	lut_walk(SerdEngines, (lut_cb)serd2str, NULL);
2627 
2628 	/* clear out current saved stats */
2629 	fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS);
2630 
2631 	/* write out the new version */
2632 	fmd_buf_write(Hdl, NULL, WOBUF_SERDS, Serdbuf, Serdsz);
2633 	FREE(Serdbuf);
2634 	Serd_need_save = 0;
2635 }
2636 
2637 int
2638 serd_cmp(struct serd_entry *ent1, struct serd_entry *ent2)
2639 {
2640 	if (ent1->ename != ent2->ename)
2641 		return (ent2->ename - ent1->ename);
2642 	if (ent1->ipath != ent2->ipath)
2643 		return ((char *)ent2->ipath - (char *)ent1->ipath);
2644 
2645 	return (0);
2646 }
2647 
2648 void
2649 fme_serd_load(fmd_hdl_t *hdl)
2650 {
2651 	int sz;
2652 	char *sbuf;
2653 	char *sepptr;
2654 	char *ptr;
2655 	struct serd_entry *newentp;
2656 	struct node *epname;
2657 	nvlist_t *fmri;
2658 	char *namestring;
2659 
2660 	if ((sz = fmd_buf_size(hdl, NULL, WOBUF_SERDS)) == 0)
2661 		return;
2662 	sbuf = alloca(sz);
2663 	fmd_buf_read(hdl, NULL, WOBUF_SERDS, sbuf, sz);
2664 	ptr = sbuf;
2665 	while (ptr < &sbuf[sz]) {
2666 		sepptr = strchr(ptr, '@');
2667 		*sepptr = '\0';
2668 		namestring = ptr;
2669 		sepptr++;
2670 		ptr = sepptr;
2671 		ptr += strlen(ptr);
2672 		ptr++;	/* move past the '\0' separating paths */
2673 		epname = pathstring2epnamenp(sepptr);
2674 		fmri = node2fmri(epname);
2675 		if (platform_path_exists(fmri)) {
2676 			newentp = MALLOC(sizeof (*newentp));
2677 			newentp->hdl = hdl;
2678 			newentp->ipath = ipath(epname);
2679 			newentp->ename = stable(namestring);
2680 			SerdEngines = lut_add(SerdEngines, (void *)newentp,
2681 			    (void *)newentp, (lut_cmp)serd_cmp);
2682 		} else
2683 			Serd_need_save = 1;
2684 		tree_free(epname);
2685 		nvlist_free(fmri);
2686 	}
2687 	/* save it back again in case some of the paths no longer exist */
2688 	serd_save();
2689 }
2690 
2691 /*ARGSUSED*/
2692 static void
2693 serd_destructor(void *left, void *right, void *arg)
2694 {
2695 	struct serd_entry *entp = (struct serd_entry *)left;
2696 	FREE(entp);
2697 }
2698 
2699 /*
2700  * Callback used in a walk of the SerdEngines to reset matching serd engines.
2701  */
2702 /*ARGSUSED*/
2703 static void
2704 serd_reset_cb(struct serd_entry *entp, void *unused, const struct ipath *ipp)
2705 {
2706 	char *path;
2707 
2708 	if (entp->ipath == ipp) {
2709 		path = ipath2str(entp->ename, ipp);
2710 		out(O_ALTFP, "serd_reset_cb: resetting %s", path);
2711 		fmd_serd_reset(entp->hdl, path);
2712 		FREE(path);
2713 		Serd_need_save = 1;
2714 	}
2715 }
2716 
2717 /*ARGSUSED*/
2718 static void
2719 serd_topo_chg_cb(struct serd_entry *entp, void *unused, void *unused2)
2720 {
2721 	char *path;
2722 	nvlist_t *fmri;
2723 
2724 	fmri = ipath2fmri((struct ipath *)(entp->ipath));
2725 	if (!platform_path_exists(fmri)) {
2726 		path = ipath2str(entp->ename, entp->ipath);
2727 		out(O_ALTFP, "serd_topo_chg_cb: not present %s", path);
2728 		fmd_serd_reset(entp->hdl, path);
2729 		FREE(path);
2730 		Serd_need_save = 1;
2731 	}
2732 	nvlist_free(fmri);
2733 }
2734 
2735 void
2736 serd_fini(void)
2737 {
2738 	lut_free(SerdEngines, serd_destructor, NULL);
2739 }
2740 
2741 static void
2742 publish_suspects(struct fme *fmep, struct rsl *srl)
2743 {
2744 	struct rsl *rp;
2745 	nvlist_t *fault;
2746 	uint8_t cert;
2747 	uint_t *frs;
2748 	uint_t fravg, frsum, fr;
2749 	uint_t messval;
2750 	uint_t retireval;
2751 	uint_t responseval;
2752 	struct node *snp;
2753 	int frcnt, fridx;
2754 	boolean_t allfaulty = B_TRUE;
2755 	struct rsl *erl = srl + fmep->nsuspects - 1;
2756 
2757 	/*
2758 	 * sort the array
2759 	 */
2760 	qsort(srl, fmep->nsuspects, sizeof (struct rsl), rslcmp);
2761 	rsluniq(srl, erl, &fmep->nsuspects, &fmep->nonfault);
2762 
2763 	/*
2764 	 * If the suspect list is all faults, then for a given fault,
2765 	 * say X of N, X's certainty is computed via:
2766 	 *
2767 	 * fitrate(X) / (fitrate(1) + ... + fitrate(N)) * 100
2768 	 *
2769 	 * If none of the suspects are faults, and there are N suspects,
2770 	 * the certainty of a given suspect is 100/N.
2771 	 *
2772 	 * If there are are a mixture of faults and other problems in
2773 	 * the suspect list, we take an average of the faults'
2774 	 * FITrates and treat this average as the FITrate for any
2775 	 * non-faults.  The fitrate of any given suspect is then
2776 	 * computed per the first formula above.
2777 	 */
2778 	if (fmep->nonfault == fmep->nsuspects) {
2779 		/* NO faults in the suspect list */
2780 		cert = percentof(1, fmep->nsuspects);
2781 	} else {
2782 		/* sum the fitrates */
2783 		frs = alloca(fmep->nsuspects * sizeof (uint_t));
2784 		fridx = frcnt = frsum = 0;
2785 
2786 		for (rp = srl; rp <= erl; rp++) {
2787 			struct node *n;
2788 
2789 			if (rp->suspect == NULL)
2790 				continue;
2791 			if (!is_fault(rp->suspect->t)) {
2792 				frs[fridx++] = 0;
2793 				continue;
2794 			}
2795 			n = eventprop_lookup(rp->suspect, L_FITrate);
2796 			if (node2uint(n, &fr) != 0) {
2797 				out(O_DEBUG|O_NONL, "event ");
2798 				ipath_print(O_DEBUG|O_NONL,
2799 				    rp->suspect->enode->u.event.ename->u.name.s,
2800 				    rp->suspect->ipp);
2801 				out(O_DEBUG, " has no FITrate (using 1)");
2802 				fr = 1;
2803 			} else if (fr == 0) {
2804 				out(O_DEBUG|O_NONL, "event ");
2805 				ipath_print(O_DEBUG|O_NONL,
2806 				    rp->suspect->enode->u.event.ename->u.name.s,
2807 				    rp->suspect->ipp);
2808 				out(O_DEBUG, " has zero FITrate (using 1)");
2809 				fr = 1;
2810 			}
2811 
2812 			frs[fridx++] = fr;
2813 			frsum += fr;
2814 			frcnt++;
2815 		}
2816 		fravg = avg(frsum, frcnt);
2817 		for (fridx = 0; fridx < fmep->nsuspects; fridx++)
2818 			if (frs[fridx] == 0) {
2819 				frs[fridx] = fravg;
2820 				frsum += fravg;
2821 			}
2822 	}
2823 
2824 	/* Add them in reverse order of our sort, as fmd reverses order */
2825 	for (rp = erl; rp >= srl; rp--) {
2826 		if (rp->suspect == NULL)
2827 			continue;
2828 		if (!is_fault(rp->suspect->t))
2829 			allfaulty = B_FALSE;
2830 		if (fmep->nonfault != fmep->nsuspects)
2831 			cert = percentof(frs[--fridx], frsum);
2832 		fault = fmd_nvl_create_fault(fmep->hdl,
2833 		    rp->suspect->enode->u.event.ename->u.name.s,
2834 		    cert,
2835 		    rp->asru,
2836 		    rp->fru,
2837 		    rp->rsrc);
2838 		if (fault == NULL)
2839 			out(O_DIE, "fault creation failed");
2840 		/* if "message" property exists, add it to the fault */
2841 		if (node2uint(eventprop_lookup(rp->suspect, L_message),
2842 		    &messval) == 0) {
2843 
2844 			out(O_ALTFP,
2845 			    "[FME%d, %s adds message=%d to suspect list]",
2846 			    fmep->id,
2847 			    rp->suspect->enode->u.event.ename->u.name.s,
2848 			    messval);
2849 			if (nvlist_add_boolean_value(fault,
2850 			    FM_SUSPECT_MESSAGE,
2851 			    (messval) ? B_TRUE : B_FALSE) != 0) {
2852 				out(O_DIE, "cannot add no-message to fault");
2853 			}
2854 		}
2855 
2856 		/* if "retire" property exists, add it to the fault */
2857 		if (node2uint(eventprop_lookup(rp->suspect, L_retire),
2858 		    &retireval) == 0) {
2859 
2860 			out(O_ALTFP,
2861 			    "[FME%d, %s adds retire=%d to suspect list]",
2862 			    fmep->id,
2863 			    rp->suspect->enode->u.event.ename->u.name.s,
2864 			    retireval);
2865 			if (nvlist_add_boolean_value(fault,
2866 			    FM_SUSPECT_RETIRE,
2867 			    (retireval) ? B_TRUE : B_FALSE) != 0) {
2868 				out(O_DIE, "cannot add no-retire to fault");
2869 			}
2870 		}
2871 
2872 		/* if "response" property exists, add it to the fault */
2873 		if (node2uint(eventprop_lookup(rp->suspect, L_response),
2874 		    &responseval) == 0) {
2875 
2876 			out(O_ALTFP,
2877 			    "[FME%d, %s adds response=%d to suspect list]",
2878 			    fmep->id,
2879 			    rp->suspect->enode->u.event.ename->u.name.s,
2880 			    responseval);
2881 			if (nvlist_add_boolean_value(fault,
2882 			    FM_SUSPECT_RESPONSE,
2883 			    (responseval) ? B_TRUE : B_FALSE) != 0) {
2884 				out(O_DIE, "cannot add no-response to fault");
2885 			}
2886 		}
2887 
2888 		/* add any payload properties */
2889 		lut_walk(rp->suspect->payloadprops,
2890 		    (lut_cb)addpayloadprop, (void *)fault);
2891 		rslfree(rp);
2892 
2893 		/*
2894 		 * If "action" property exists, evaluate it;  this must be done
2895 		 * before the allfaulty check below since some actions may
2896 		 * modify the asru to be used in fmd_nvl_fmri_has_fault.  This
2897 		 * needs to be restructured if any new actions are introduced
2898 		 * that have effects that we do not want to be visible if
2899 		 * we decide not to publish in the dupclose check below.
2900 		 */
2901 		if ((snp = eventprop_lookup(rp->suspect, L_action)) != NULL) {
2902 			struct evalue evalue;
2903 
2904 			out(O_ALTFP|O_NONL,
2905 			    "[FME%d, %s action ", fmep->id,
2906 			    rp->suspect->enode->u.event.ename->u.name.s);
2907 			ptree_name_iter(O_ALTFP|O_NONL, snp);
2908 			out(O_ALTFP, "]");
2909 			Action_nvl = fault;
2910 			(void) eval_expr(snp, NULL, NULL, NULL, NULL,
2911 			    NULL, 0, &evalue);
2912 		}
2913 
2914 		fmd_case_add_suspect(fmep->hdl, fmep->fmcase, fault);
2915 
2916 		/*
2917 		 * check if the asru is already marked as "faulty".
2918 		 */
2919 		if (allfaulty) {
2920 			nvlist_t *asru;
2921 
2922 			out(O_ALTFP|O_VERB, "FME%d dup check ", fmep->id);
2923 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, rp->suspect);
2924 			out(O_ALTFP|O_VERB|O_NONL, " ");
2925 			if (nvlist_lookup_nvlist(fault,
2926 			    FM_FAULT_ASRU, &asru) != 0) {
2927 				out(O_ALTFP|O_VERB, "NULL asru");
2928 				allfaulty = B_FALSE;
2929 			} else if (fmd_nvl_fmri_has_fault(fmep->hdl, asru,
2930 			    FMD_HAS_FAULT_ASRU, NULL)) {
2931 				out(O_ALTFP|O_VERB, "faulty");
2932 			} else {
2933 				out(O_ALTFP|O_VERB, "not faulty");
2934 				allfaulty = B_FALSE;
2935 			}
2936 		}
2937 
2938 	}
2939 
2940 	if (!allfaulty) {
2941 		/*
2942 		 * don't update the count stat if all asrus are already
2943 		 * present and unrepaired in the asru cache
2944 		 */
2945 		for (rp = erl; rp >= srl; rp--) {
2946 			struct event *suspect = rp->suspect;
2947 
2948 			if (suspect == NULL)
2949 				continue;
2950 
2951 			/* if "count" exists, increment the appropriate stat */
2952 			if ((snp = eventprop_lookup(suspect,
2953 			    L_count)) != NULL) {
2954 				out(O_ALTFP|O_NONL,
2955 				    "[FME%d, %s count ", fmep->id,
2956 				    suspect->enode->u.event.ename->u.name.s);
2957 				ptree_name_iter(O_ALTFP|O_NONL, snp);
2958 				out(O_ALTFP, "]");
2959 				istat_bump(snp, 0);
2960 
2961 			}
2962 		}
2963 		istat_save();	/* write out any istat changes */
2964 	}
2965 }
2966 
2967 static void
2968 publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep, fmd_case_t *fmcase)
2969 {
2970 	struct case_list *newcase;
2971 	nvlist_t *defect;
2972 
2973 	out(O_ALTFP,
2974 	    "[undiagnosable ereport received, "
2975 	    "creating and closing a new case (%s)]",
2976 	    Undiag_reason ? Undiag_reason : "reason not provided");
2977 
2978 	newcase = MALLOC(sizeof (struct case_list));
2979 	newcase->next = NULL;
2980 	newcase->fmcase = fmcase;
2981 	if (Undiagablecaselist != NULL)
2982 		newcase->next = Undiagablecaselist;
2983 	Undiagablecaselist = newcase;
2984 
2985 	if (ffep != NULL)
2986 		fmd_case_add_ereport(hdl, newcase->fmcase, ffep);
2987 
2988 	defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100,
2989 	    NULL, NULL, NULL);
2990 	if (Undiag_reason != NULL)
2991 		(void) nvlist_add_string(defect, UNDIAG_REASON, Undiag_reason);
2992 	fmd_case_add_suspect(hdl, newcase->fmcase, defect);
2993 
2994 	fmd_case_solve(hdl, newcase->fmcase);
2995 	fmd_case_close(hdl, newcase->fmcase);
2996 }
2997 
2998 static void
2999 fme_undiagnosable(struct fme *f)
3000 {
3001 	nvlist_t *defect;
3002 
3003 	out(O_ALTFP, "[solving/closing FME%d, case %s (%s)]",
3004 	    f->id, fmd_case_uuid(f->hdl, f->fmcase),
3005 	    Undiag_reason ? Undiag_reason : "undiagnosable");
3006 
3007 	defect = fmd_nvl_create_fault(f->hdl, UNDIAGNOSABLE_DEFECT, 100,
3008 	    NULL, NULL, NULL);
3009 	if (Undiag_reason != NULL)
3010 		(void) nvlist_add_string(defect, UNDIAG_REASON, Undiag_reason);
3011 	fmd_case_add_suspect(f->hdl, f->fmcase, defect);
3012 	fmd_case_solve(f->hdl, f->fmcase);
3013 	fmd_case_close(f->hdl, f->fmcase);
3014 }
3015 
3016 /*
3017  * fme_close_case
3018  *
3019  *	Find the requested case amongst our fmes and close it.  Free up
3020  *	the related fme.
3021  */
3022 void
3023 fme_close_case(fmd_hdl_t *hdl, fmd_case_t *fmcase)
3024 {
3025 	struct case_list *ucasep, *prevcasep = NULL;
3026 	struct fme *prev = NULL;
3027 	struct fme *fmep;
3028 
3029 	for (ucasep = Undiagablecaselist; ucasep; ucasep = ucasep->next) {
3030 		if (fmcase != ucasep->fmcase) {
3031 			prevcasep = ucasep;
3032 			continue;
3033 		}
3034 
3035 		if (prevcasep == NULL)
3036 			Undiagablecaselist = Undiagablecaselist->next;
3037 		else
3038 			prevcasep->next = ucasep->next;
3039 
3040 		FREE(ucasep);
3041 		return;
3042 	}
3043 
3044 	for (fmep = FMElist; fmep; fmep = fmep->next) {
3045 		if (fmep->hdl == hdl && fmep->fmcase == fmcase)
3046 			break;
3047 		prev = fmep;
3048 	}
3049 
3050 	if (fmep == NULL) {
3051 		out(O_WARN, "Eft asked to close unrecognized case [%s].",
3052 		    fmd_case_uuid(hdl, fmcase));
3053 		return;
3054 	}
3055 
3056 	if (EFMElist == fmep)
3057 		EFMElist = prev;
3058 
3059 	if (prev == NULL)
3060 		FMElist = FMElist->next;
3061 	else
3062 		prev->next = fmep->next;
3063 
3064 	fmep->next = NULL;
3065 
3066 	/* Get rid of any timer this fme has set */
3067 	if (fmep->wull != 0)
3068 		fmd_timer_remove(fmep->hdl, fmep->timer);
3069 
3070 	if (ClosedFMEs == NULL) {
3071 		ClosedFMEs = fmep;
3072 	} else {
3073 		fmep->next = ClosedFMEs;
3074 		ClosedFMEs = fmep;
3075 	}
3076 
3077 	Open_fme_count--;
3078 
3079 	/* See if we can close the overflow FME */
3080 	if (Open_fme_count <= Max_fme) {
3081 		for (fmep = FMElist; fmep; fmep = fmep->next) {
3082 			if (fmep->overflow && !(fmd_case_closed(fmep->hdl,
3083 			    fmep->fmcase)))
3084 				break;
3085 		}
3086 
3087 		if (fmep != NULL)
3088 			fmd_case_close(fmep->hdl, fmep->fmcase);
3089 	}
3090 }
3091 
3092 /*
3093  * fme_set_timer()
3094  *	If the time we need to wait for the given FME is less than the
3095  *	current timer, kick that old timer out and establish a new one.
3096  */
3097 static int
3098 fme_set_timer(struct fme *fmep, unsigned long long wull)
3099 {
3100 	out(O_ALTFP|O_VERB|O_NONL, " fme_set_timer: request to wait ");
3101 	ptree_timeval(O_ALTFP|O_VERB, &wull);
3102 
3103 	if (wull <= fmep->pull) {
3104 		out(O_ALTFP|O_VERB|O_NONL, "already have waited at least ");
3105 		ptree_timeval(O_ALTFP|O_VERB, &fmep->pull);
3106 		out(O_ALTFP|O_VERB, NULL);
3107 		/* we've waited at least wull already, don't need timer */
3108 		return (0);
3109 	}
3110 
3111 	out(O_ALTFP|O_VERB|O_NONL, " currently ");
3112 	if (fmep->wull != 0) {
3113 		out(O_ALTFP|O_VERB|O_NONL, "waiting ");
3114 		ptree_timeval(O_ALTFP|O_VERB, &fmep->wull);
3115 		out(O_ALTFP|O_VERB, NULL);
3116 	} else {
3117 		out(O_ALTFP|O_VERB|O_NONL, "not waiting");
3118 		out(O_ALTFP|O_VERB, NULL);
3119 	}
3120 
3121 	if (fmep->wull != 0)
3122 		if (wull >= fmep->wull)
3123 			/* New timer would fire later than established timer */
3124 			return (0);
3125 
3126 	if (fmep->wull != 0) {
3127 		fmd_timer_remove(fmep->hdl, fmep->timer);
3128 	}
3129 
3130 	fmep->timer = fmd_timer_install(fmep->hdl, (void *)fmep,
3131 	    fmep->e0r, wull);
3132 	out(O_ALTFP|O_VERB, "timer set, id is %ld", fmep->timer);
3133 	fmep->wull = wull;
3134 	return (1);
3135 }
3136 
3137 void
3138 fme_timer_fired(struct fme *fmep, id_t tid)
3139 {
3140 	struct fme *ffmep = NULL;
3141 
3142 	for (ffmep = FMElist; ffmep; ffmep = ffmep->next)
3143 		if (ffmep == fmep)
3144 			break;
3145 
3146 	if (ffmep == NULL) {
3147 		out(O_WARN, "Timer fired for an FME (%p) not in FMEs list.",
3148 		    (void *)fmep);
3149 		return;
3150 	}
3151 
3152 	out(O_ALTFP|O_VERB, "Timer fired %lx", tid);
3153 	fmep->pull = fmep->wull;
3154 	fmep->wull = 0;
3155 	fmd_buf_write(fmep->hdl, fmep->fmcase,
3156 	    WOBUF_PULL, (void *)&fmep->pull, sizeof (fmep->pull));
3157 
3158 	fme_eval(fmep, fmep->e0r);
3159 }
3160 
3161 /*
3162  * Preserve the fme's suspect list in its psuspects list, NULLing the
3163  * suspects list in the meantime.
3164  */
3165 static void
3166 save_suspects(struct fme *fmep)
3167 {
3168 	struct event *ep;
3169 	struct event *nextep;
3170 
3171 	/* zero out the previous suspect list */
3172 	for (ep = fmep->psuspects; ep; ep = nextep) {
3173 		nextep = ep->psuspects;
3174 		ep->psuspects = NULL;
3175 	}
3176 	fmep->psuspects = NULL;
3177 
3178 	/* zero out the suspect list, copying it to previous suspect list */
3179 	fmep->psuspects = fmep->suspects;
3180 	for (ep = fmep->suspects; ep; ep = nextep) {
3181 		nextep = ep->suspects;
3182 		ep->psuspects = ep->suspects;
3183 		ep->suspects = NULL;
3184 		ep->is_suspect = 0;
3185 	}
3186 	fmep->suspects = NULL;
3187 	fmep->nsuspects = 0;
3188 	fmep->nonfault = 0;
3189 }
3190 
3191 /*
3192  * Retrieve the fme's suspect list from its psuspects list.
3193  */
3194 static void
3195 restore_suspects(struct fme *fmep)
3196 {
3197 	struct event *ep;
3198 	struct event *nextep;
3199 
3200 	fmep->nsuspects = fmep->nonfault = 0;
3201 	fmep->suspects = fmep->psuspects;
3202 	for (ep = fmep->psuspects; ep; ep = nextep) {
3203 		fmep->nsuspects++;
3204 		if (!is_fault(ep->t))
3205 			fmep->nonfault++;
3206 		nextep = ep->psuspects;
3207 		ep->suspects = ep->psuspects;
3208 	}
3209 }
3210 
3211 /*
3212  * this is what we use to call the Emrys prototype code instead of main()
3213  */
3214 static void
3215 fme_eval(struct fme *fmep, fmd_event_t *ffep)
3216 {
3217 	struct event *ep;
3218 	unsigned long long my_delay = TIMEVAL_EVENTUALLY;
3219 	struct rsl *srl = NULL;
3220 	struct rsl *srl2 = NULL;
3221 	int mess_zero_count;
3222 	int mess_zero_nonfault = 0;
3223 	int rpcnt;
3224 
3225 	save_suspects(fmep);
3226 
3227 	out(O_ALTFP, "Evaluate FME %d", fmep->id);
3228 	indent_set("  ");
3229 
3230 	lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep);
3231 	fmep->state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay);
3232 
3233 	out(O_ALTFP|O_NONL, "FME%d state: %s, suspect list:", fmep->id,
3234 	    fme_state2str(fmep->state));
3235 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
3236 		out(O_ALTFP|O_NONL, " ");
3237 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
3238 	}
3239 	out(O_ALTFP, NULL);
3240 
3241 	switch (fmep->state) {
3242 	case FME_CREDIBLE:
3243 		print_suspects(SLNEW, fmep);
3244 		(void) upsets_eval(fmep, ffep);
3245 
3246 		/*
3247 		 * we may have already posted suspects in upsets_eval() which
3248 		 * can recurse into fme_eval() again. If so then just return.
3249 		 */
3250 		if (fmep->posted_suspects)
3251 			return;
3252 
3253 		stats_counter_bump(fmep->diags);
3254 		rpcnt = fmep->nsuspects;
3255 		save_suspects(fmep);
3256 
3257 		/*
3258 		 * create two lists, one for "message=1" faults and one for
3259 		 * "message=0" faults. If we have a mixture we will generate
3260 		 * two separate suspect lists.
3261 		 */
3262 		srl = MALLOC(rpcnt * sizeof (struct rsl));
3263 		bzero(srl, rpcnt * sizeof (struct rsl));
3264 		srl2 = MALLOC(rpcnt * sizeof (struct rsl));
3265 		bzero(srl2, rpcnt * sizeof (struct rsl));
3266 		mess_zero_count = trim_suspects(fmep, srl, srl2, ffep,
3267 		    &mess_zero_nonfault);
3268 
3269 		/*
3270 		 * If the resulting suspect list has no members, we're
3271 		 * done so simply close the case. Otherwise sort and publish.
3272 		 */
3273 		if (fmep->nsuspects == 0 && mess_zero_count == 0) {
3274 			out(O_ALTFP,
3275 			    "[FME%d, case %s (all suspects are upsets)]",
3276 			    fmep->id, fmd_case_uuid(fmep->hdl, fmep->fmcase));
3277 			fmd_case_close(fmep->hdl, fmep->fmcase);
3278 		} else if (fmep->nsuspects != 0 && mess_zero_count == 0) {
3279 			publish_suspects(fmep, srl);
3280 			out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3281 			    fmd_case_uuid(fmep->hdl, fmep->fmcase));
3282 			fmd_case_solve(fmep->hdl, fmep->fmcase);
3283 		} else if (fmep->nsuspects == 0 && mess_zero_count != 0) {
3284 			fmep->nsuspects = mess_zero_count;
3285 			fmep->nonfault = mess_zero_nonfault;
3286 			publish_suspects(fmep, srl2);
3287 			out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3288 			    fmd_case_uuid(fmep->hdl, fmep->fmcase));
3289 			fmd_case_solve(fmep->hdl, fmep->fmcase);
3290 		} else {
3291 			struct event *obsp;
3292 			struct fme *nfmep;
3293 
3294 			publish_suspects(fmep, srl);
3295 			out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3296 			    fmd_case_uuid(fmep->hdl, fmep->fmcase));
3297 			fmd_case_solve(fmep->hdl, fmep->fmcase);
3298 
3299 			/*
3300 			 * Got both message=0 and message=1 so create a
3301 			 * duplicate case. Also need a temporary duplicate fme
3302 			 * structure for use by publish_suspects().
3303 			 */
3304 			nfmep = alloc_fme();
3305 			nfmep->id =  Nextid++;
3306 			nfmep->hdl = fmep->hdl;
3307 			nfmep->nsuspects = mess_zero_count;
3308 			nfmep->nonfault = mess_zero_nonfault;
3309 			nfmep->fmcase = fmd_case_open(fmep->hdl, NULL);
3310 			out(O_ALTFP|O_STAMP,
3311 			    "[creating parallel FME%d, case %s]", nfmep->id,
3312 			    fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
3313 			Open_fme_count++;
3314 			if (ffep) {
3315 				fmd_case_setprincipal(nfmep->hdl,
3316 				    nfmep->fmcase, ffep);
3317 				fmd_case_add_ereport(nfmep->hdl,
3318 				    nfmep->fmcase, ffep);
3319 			}
3320 			for (obsp = fmep->observations; obsp;
3321 			    obsp = obsp->observations)
3322 				if (obsp->ffep && obsp->ffep != ffep)
3323 					fmd_case_add_ereport(nfmep->hdl,
3324 					    nfmep->fmcase, obsp->ffep);
3325 
3326 			publish_suspects(nfmep, srl2);
3327 			out(O_ALTFP, "[solving FME%d, case %s]", nfmep->id,
3328 			    fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
3329 			fmd_case_solve(nfmep->hdl, nfmep->fmcase);
3330 			FREE(nfmep);
3331 		}
3332 		FREE(srl);
3333 		FREE(srl2);
3334 		restore_suspects(fmep);
3335 
3336 		fmep->posted_suspects = 1;
3337 		fmd_buf_write(fmep->hdl, fmep->fmcase,
3338 		    WOBUF_POSTD,
3339 		    (void *)&fmep->posted_suspects,
3340 		    sizeof (fmep->posted_suspects));
3341 
3342 		/*
3343 		 * Now the suspects have been posted, we can clear up
3344 		 * the instance tree as we won't be looking at it again.
3345 		 * Also cancel the timer as the case is now solved.
3346 		 */
3347 		if (fmep->wull != 0) {
3348 			fmd_timer_remove(fmep->hdl, fmep->timer);
3349 			fmep->wull = 0;
3350 		}
3351 		break;
3352 
3353 	case FME_WAIT:
3354 		ASSERT(my_delay > fmep->ull);
3355 		(void) fme_set_timer(fmep, my_delay);
3356 		print_suspects(SLWAIT, fmep);
3357 		itree_prune(fmep->eventtree);
3358 		return;
3359 
3360 	case FME_DISPROVED:
3361 		print_suspects(SLDISPROVED, fmep);
3362 		Undiag_reason = UD_UNSOLVD;
3363 		fme_undiagnosable(fmep);
3364 		break;
3365 	}
3366 
3367 	itree_free(fmep->eventtree);
3368 	fmep->eventtree = NULL;
3369 	structconfig_free(fmep->config);
3370 	fmep->config = NULL;
3371 	destroy_fme_bufs(fmep);
3372 }
3373 
3374 static void indent(void);
3375 static int triggered(struct fme *fmep, struct event *ep, int mark);
3376 static enum fme_state effects_test(struct fme *fmep,
3377     struct event *fault_event, unsigned long long at_latest_by,
3378     unsigned long long *pdelay);
3379 static enum fme_state requirements_test(struct fme *fmep, struct event *ep,
3380     unsigned long long at_latest_by, unsigned long long *pdelay);
3381 static enum fme_state causes_test(struct fme *fmep, struct event *ep,
3382     unsigned long long at_latest_by, unsigned long long *pdelay);
3383 
3384 static int
3385 checkconstraints(struct fme *fmep, struct arrow *arrowp)
3386 {
3387 	struct constraintlist *ctp;
3388 	struct evalue value;
3389 	char *sep = "";
3390 
3391 	if (arrowp->forever_false) {
3392 		indent();
3393 		out(O_ALTFP|O_VERB|O_NONL, "  Forever false constraint: ");
3394 		for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3395 			out(O_ALTFP|O_VERB|O_NONL, sep);
3396 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3397 			sep = ", ";
3398 		}
3399 		out(O_ALTFP|O_VERB, NULL);
3400 		return (0);
3401 	}
3402 	if (arrowp->forever_true) {
3403 		indent();
3404 		out(O_ALTFP|O_VERB|O_NONL, "  Forever true constraint: ");
3405 		for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3406 			out(O_ALTFP|O_VERB|O_NONL, sep);
3407 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3408 			sep = ", ";
3409 		}
3410 		out(O_ALTFP|O_VERB, NULL);
3411 		return (1);
3412 	}
3413 
3414 	for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3415 		if (eval_expr(ctp->cnode, NULL, NULL,
3416 		    &fmep->globals, fmep->config,
3417 		    arrowp, 0, &value)) {
3418 			/* evaluation successful */
3419 			if (value.t == UNDEFINED || value.v == 0) {
3420 				/* known false */
3421 				arrowp->forever_false = 1;
3422 				indent();
3423 				out(O_ALTFP|O_VERB|O_NONL,
3424 				    "  False constraint: ");
3425 				ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3426 				out(O_ALTFP|O_VERB, NULL);
3427 				return (0);
3428 			}
3429 		} else {
3430 			/* evaluation unsuccessful -- unknown value */
3431 			indent();
3432 			out(O_ALTFP|O_VERB|O_NONL,
3433 			    "  Deferred constraint: ");
3434 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3435 			out(O_ALTFP|O_VERB, NULL);
3436 			return (1);
3437 		}
3438 	}
3439 	/* known true */
3440 	arrowp->forever_true = 1;
3441 	indent();
3442 	out(O_ALTFP|O_VERB|O_NONL, "  True constraint: ");
3443 	for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3444 		out(O_ALTFP|O_VERB|O_NONL, sep);
3445 		ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3446 		sep = ", ";
3447 	}
3448 	out(O_ALTFP|O_VERB, NULL);
3449 	return (1);
3450 }
3451 
3452 static int
3453 triggered(struct fme *fmep, struct event *ep, int mark)
3454 {
3455 	struct bubble *bp;
3456 	struct arrowlist *ap;
3457 	int count = 0;
3458 
3459 	stats_counter_bump(fmep->Tcallcount);
3460 	for (bp = itree_next_bubble(ep, NULL); bp;
3461 	    bp = itree_next_bubble(ep, bp)) {
3462 		if (bp->t != B_TO)
3463 			continue;
3464 		for (ap = itree_next_arrow(bp, NULL); ap;
3465 		    ap = itree_next_arrow(bp, ap)) {
3466 			/* check count of marks against K in the bubble */
3467 			if ((ap->arrowp->mark & mark) &&
3468 			    ++count >= bp->nork)
3469 				return (1);
3470 		}
3471 	}
3472 	return (0);
3473 }
3474 
3475 static int
3476 mark_arrows(struct fme *fmep, struct event *ep, int mark,
3477     unsigned long long at_latest_by, unsigned long long *pdelay, int keep)
3478 {
3479 	struct bubble *bp;
3480 	struct arrowlist *ap;
3481 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3482 	unsigned long long my_delay;
3483 	enum fme_state result;
3484 	int retval = 0;
3485 
3486 	for (bp = itree_next_bubble(ep, NULL); bp;
3487 	    bp = itree_next_bubble(ep, bp)) {
3488 		if (bp->t != B_FROM)
3489 			continue;
3490 		stats_counter_bump(fmep->Marrowcount);
3491 		for (ap = itree_next_arrow(bp, NULL); ap;
3492 		    ap = itree_next_arrow(bp, ap)) {
3493 			struct event *ep2 = ap->arrowp->head->myevent;
3494 			/*
3495 			 * if we're clearing marks, we can avoid doing
3496 			 * all that work evaluating constraints.
3497 			 */
3498 			if (mark == 0) {
3499 				if (ap->arrowp->arrow_marked == 0)
3500 					continue;
3501 				ap->arrowp->arrow_marked = 0;
3502 				ap->arrowp->mark &= ~EFFECTS_COUNTER;
3503 				if (keep && (ep2->cached_state &
3504 				    (WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT)))
3505 					ep2->keep_in_tree = 1;
3506 				ep2->cached_state &=
3507 				    ~(WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT);
3508 				(void) mark_arrows(fmep, ep2, mark, 0, NULL,
3509 				    keep);
3510 				continue;
3511 			}
3512 			ap->arrowp->arrow_marked = 1;
3513 			if (ep2->cached_state & REQMNTS_DISPROVED) {
3514 				indent();
3515 				out(O_ALTFP|O_VERB|O_NONL,
3516 				    "  ALREADY DISPROVED ");
3517 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3518 				out(O_ALTFP|O_VERB, NULL);
3519 				continue;
3520 			}
3521 			if (ep2->cached_state & WAIT_EFFECT) {
3522 				indent();
3523 				out(O_ALTFP|O_VERB|O_NONL,
3524 				    "  ALREADY EFFECTS WAIT ");
3525 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3526 				out(O_ALTFP|O_VERB, NULL);
3527 				continue;
3528 			}
3529 			if (ep2->cached_state & CREDIBLE_EFFECT) {
3530 				indent();
3531 				out(O_ALTFP|O_VERB|O_NONL,
3532 				    "  ALREADY EFFECTS CREDIBLE ");
3533 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3534 				out(O_ALTFP|O_VERB, NULL);
3535 				continue;
3536 			}
3537 			if ((ep2->cached_state & PARENT_WAIT) &&
3538 			    (mark & PARENT_WAIT)) {
3539 				indent();
3540 				out(O_ALTFP|O_VERB|O_NONL,
3541 				    "  ALREADY PARENT EFFECTS WAIT ");
3542 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3543 				out(O_ALTFP|O_VERB, NULL);
3544 				continue;
3545 			}
3546 			platform_set_payloadnvp(ep2->nvp);
3547 			if (checkconstraints(fmep, ap->arrowp) == 0) {
3548 				platform_set_payloadnvp(NULL);
3549 				indent();
3550 				out(O_ALTFP|O_VERB|O_NONL,
3551 				    "  CONSTRAINTS FAIL ");
3552 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3553 				out(O_ALTFP|O_VERB, NULL);
3554 				continue;
3555 			}
3556 			platform_set_payloadnvp(NULL);
3557 			ap->arrowp->mark |= EFFECTS_COUNTER;
3558 			if (!triggered(fmep, ep2, EFFECTS_COUNTER)) {
3559 				indent();
3560 				out(O_ALTFP|O_VERB|O_NONL,
3561 				    "  K-COUNT NOT YET MET ");
3562 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3563 				out(O_ALTFP|O_VERB, NULL);
3564 				continue;
3565 			}
3566 			ep2->cached_state &= ~PARENT_WAIT;
3567 			/*
3568 			 * if we've reached an ereport and no propagation time
3569 			 * is specified, use the Hesitate value
3570 			 */
3571 			if (ep2->t == N_EREPORT && at_latest_by == 0ULL &&
3572 			    ap->arrowp->maxdelay == 0ULL) {
3573 				out(O_ALTFP|O_VERB|O_NONL, "  default wait ");
3574 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3575 				out(O_ALTFP|O_VERB, NULL);
3576 				result = requirements_test(fmep, ep2, Hesitate,
3577 				    &my_delay);
3578 			} else {
3579 				result = requirements_test(fmep, ep2,
3580 				    at_latest_by + ap->arrowp->maxdelay,
3581 				    &my_delay);
3582 			}
3583 			if (result == FME_WAIT) {
3584 				retval = WAIT_EFFECT;
3585 				if (overall_delay > my_delay)
3586 					overall_delay = my_delay;
3587 				ep2->cached_state |= WAIT_EFFECT;
3588 				indent();
3589 				out(O_ALTFP|O_VERB|O_NONL, "  EFFECTS WAIT ");
3590 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3591 				out(O_ALTFP|O_VERB, NULL);
3592 				indent_push("  E");
3593 				if (mark_arrows(fmep, ep2, PARENT_WAIT,
3594 				    at_latest_by, &my_delay, 0) ==
3595 				    WAIT_EFFECT) {
3596 					retval = WAIT_EFFECT;
3597 					if (overall_delay > my_delay)
3598 						overall_delay = my_delay;
3599 				}
3600 				indent_pop();
3601 			} else if (result == FME_DISPROVED) {
3602 				indent();
3603 				out(O_ALTFP|O_VERB|O_NONL,
3604 				    "  EFFECTS DISPROVED ");
3605 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3606 				out(O_ALTFP|O_VERB, NULL);
3607 			} else {
3608 				ep2->cached_state |= mark;
3609 				indent();
3610 				if (mark == CREDIBLE_EFFECT)
3611 					out(O_ALTFP|O_VERB|O_NONL,
3612 					    "  EFFECTS CREDIBLE ");
3613 				else
3614 					out(O_ALTFP|O_VERB|O_NONL,
3615 					    "  PARENT EFFECTS WAIT ");
3616 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3617 				out(O_ALTFP|O_VERB, NULL);
3618 				indent_push("  E");
3619 				if (mark_arrows(fmep, ep2, mark, at_latest_by,
3620 				    &my_delay, 0) == WAIT_EFFECT) {
3621 					retval = WAIT_EFFECT;
3622 					if (overall_delay > my_delay)
3623 						overall_delay = my_delay;
3624 				}
3625 				indent_pop();
3626 			}
3627 		}
3628 	}
3629 	if (retval == WAIT_EFFECT)
3630 		*pdelay = overall_delay;
3631 	return (retval);
3632 }
3633 
3634 static enum fme_state
3635 effects_test(struct fme *fmep, struct event *fault_event,
3636     unsigned long long at_latest_by, unsigned long long *pdelay)
3637 {
3638 	struct event *error_event;
3639 	enum fme_state return_value = FME_CREDIBLE;
3640 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3641 	unsigned long long my_delay;
3642 
3643 	stats_counter_bump(fmep->Ecallcount);
3644 	indent_push("  E");
3645 	indent();
3646 	out(O_ALTFP|O_VERB|O_NONL, "->");
3647 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event);
3648 	out(O_ALTFP|O_VERB, NULL);
3649 
3650 	if (mark_arrows(fmep, fault_event, CREDIBLE_EFFECT, at_latest_by,
3651 	    &my_delay, 0) == WAIT_EFFECT) {
3652 		return_value = FME_WAIT;
3653 		if (overall_delay > my_delay)
3654 			overall_delay = my_delay;
3655 	}
3656 	for (error_event = fmep->observations;
3657 	    error_event; error_event = error_event->observations) {
3658 		indent();
3659 		out(O_ALTFP|O_VERB|O_NONL, " ");
3660 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, error_event);
3661 		if (!(error_event->cached_state & CREDIBLE_EFFECT)) {
3662 			if (error_event->cached_state &
3663 			    (PARENT_WAIT|WAIT_EFFECT)) {
3664 				out(O_ALTFP|O_VERB, " NOT YET triggered");
3665 				continue;
3666 			}
3667 			return_value = FME_DISPROVED;
3668 			out(O_ALTFP|O_VERB, " NOT triggered");
3669 			break;
3670 		} else {
3671 			out(O_ALTFP|O_VERB, " triggered");
3672 		}
3673 	}
3674 	if (return_value == FME_DISPROVED) {
3675 		(void) mark_arrows(fmep, fault_event, 0, 0, NULL, 0);
3676 	} else {
3677 		fault_event->keep_in_tree = 1;
3678 		(void) mark_arrows(fmep, fault_event, 0, 0, NULL, 1);
3679 	}
3680 
3681 	indent();
3682 	out(O_ALTFP|O_VERB|O_NONL, "<-EFFECTS %s ",
3683 	    fme_state2str(return_value));
3684 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event);
3685 	out(O_ALTFP|O_VERB, NULL);
3686 	indent_pop();
3687 	if (return_value == FME_WAIT)
3688 		*pdelay = overall_delay;
3689 	return (return_value);
3690 }
3691 
3692 static enum fme_state
3693 requirements_test(struct fme *fmep, struct event *ep,
3694     unsigned long long at_latest_by, unsigned long long *pdelay)
3695 {
3696 	int waiting_events;
3697 	int credible_events;
3698 	int deferred_events;
3699 	enum fme_state return_value = FME_CREDIBLE;
3700 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3701 	unsigned long long arrow_delay;
3702 	unsigned long long my_delay;
3703 	struct event *ep2;
3704 	struct bubble *bp;
3705 	struct arrowlist *ap;
3706 
3707 	if (ep->cached_state & REQMNTS_CREDIBLE) {
3708 		indent();
3709 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY CREDIBLE ");
3710 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3711 		out(O_ALTFP|O_VERB, NULL);
3712 		return (FME_CREDIBLE);
3713 	}
3714 	if (ep->cached_state & REQMNTS_DISPROVED) {
3715 		indent();
3716 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY DISPROVED ");
3717 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3718 		out(O_ALTFP|O_VERB, NULL);
3719 		return (FME_DISPROVED);
3720 	}
3721 	if (ep->cached_state & REQMNTS_WAIT) {
3722 		indent();
3723 		*pdelay = ep->cached_delay;
3724 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY WAIT ");
3725 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3726 		out(O_ALTFP|O_VERB|O_NONL, ", wait for: ");
3727 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3728 		out(O_ALTFP|O_VERB, NULL);
3729 		return (FME_WAIT);
3730 	}
3731 	stats_counter_bump(fmep->Rcallcount);
3732 	indent_push("  R");
3733 	indent();
3734 	out(O_ALTFP|O_VERB|O_NONL, "->");
3735 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3736 	out(O_ALTFP|O_VERB|O_NONL, ", at latest by: ");
3737 	ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3738 	out(O_ALTFP|O_VERB, NULL);
3739 
3740 	if (ep->t == N_EREPORT) {
3741 		if (ep->count == 0) {
3742 			if (fmep->pull >= at_latest_by) {
3743 				return_value = FME_DISPROVED;
3744 			} else {
3745 				ep->cached_delay = *pdelay = at_latest_by;
3746 				return_value = FME_WAIT;
3747 			}
3748 		}
3749 
3750 		indent();
3751 		switch (return_value) {
3752 		case FME_CREDIBLE:
3753 			ep->cached_state |= REQMNTS_CREDIBLE;
3754 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS CREDIBLE ");
3755 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3756 			break;
3757 		case FME_DISPROVED:
3758 			ep->cached_state |= REQMNTS_DISPROVED;
3759 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED ");
3760 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3761 			break;
3762 		case FME_WAIT:
3763 			ep->cached_state |= REQMNTS_WAIT;
3764 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS WAIT ");
3765 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3766 			out(O_ALTFP|O_VERB|O_NONL, " to ");
3767 			ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3768 			break;
3769 		default:
3770 			out(O_DIE, "requirements_test: unexpected fme_state");
3771 			break;
3772 		}
3773 		out(O_ALTFP|O_VERB, NULL);
3774 		indent_pop();
3775 
3776 		return (return_value);
3777 	}
3778 
3779 	/* this event is not a report, descend the tree */
3780 	for (bp = itree_next_bubble(ep, NULL); bp;
3781 	    bp = itree_next_bubble(ep, bp)) {
3782 		int n;
3783 
3784 		if (bp->t != B_FROM)
3785 			continue;
3786 
3787 		n = bp->nork;
3788 
3789 		credible_events = 0;
3790 		waiting_events = 0;
3791 		deferred_events = 0;
3792 		arrow_delay = TIMEVAL_EVENTUALLY;
3793 		/*
3794 		 * n is -1 for 'A' so adjust it.
3795 		 * XXX just count up the arrows for now.
3796 		 */
3797 		if (n < 0) {
3798 			n = 0;
3799 			for (ap = itree_next_arrow(bp, NULL); ap;
3800 			    ap = itree_next_arrow(bp, ap))
3801 				n++;
3802 			indent();
3803 			out(O_ALTFP|O_VERB, " Bubble Counted N=%d", n);
3804 		} else {
3805 			indent();
3806 			out(O_ALTFP|O_VERB, " Bubble N=%d", n);
3807 		}
3808 
3809 		if (n == 0)
3810 			continue;
3811 		if (!(bp->mark & (BUBBLE_ELIDED|BUBBLE_OK))) {
3812 			for (ap = itree_next_arrow(bp, NULL); ap;
3813 			    ap = itree_next_arrow(bp, ap)) {
3814 				ep2 = ap->arrowp->head->myevent;
3815 				platform_set_payloadnvp(ep2->nvp);
3816 				(void) checkconstraints(fmep, ap->arrowp);
3817 				if (ap->arrowp->forever_true) {
3818 					/*
3819 					 * if all arrows are invalidated by the
3820 					 * constraints, then we should elide the
3821 					 * whole bubble to be consistant with
3822 					 * the tree creation time behaviour
3823 					 */
3824 					bp->mark |= BUBBLE_OK;
3825 					platform_set_payloadnvp(NULL);
3826 					break;
3827 				}
3828 				platform_set_payloadnvp(NULL);
3829 			}
3830 		}
3831 		for (ap = itree_next_arrow(bp, NULL); ap;
3832 		    ap = itree_next_arrow(bp, ap)) {
3833 			ep2 = ap->arrowp->head->myevent;
3834 			if (n <= credible_events)
3835 				break;
3836 
3837 			ap->arrowp->mark |= REQMNTS_COUNTER;
3838 			if (triggered(fmep, ep2, REQMNTS_COUNTER))
3839 				/* XXX adding max timevals! */
3840 				switch (requirements_test(fmep, ep2,
3841 				    at_latest_by + ap->arrowp->maxdelay,
3842 				    &my_delay)) {
3843 				case FME_DEFERRED:
3844 					deferred_events++;
3845 					break;
3846 				case FME_CREDIBLE:
3847 					credible_events++;
3848 					break;
3849 				case FME_DISPROVED:
3850 					break;
3851 				case FME_WAIT:
3852 					if (my_delay < arrow_delay)
3853 						arrow_delay = my_delay;
3854 					waiting_events++;
3855 					break;
3856 				default:
3857 					out(O_DIE,
3858 					"Bug in requirements_test.");
3859 				}
3860 			else
3861 				deferred_events++;
3862 		}
3863 		if (!(bp->mark & BUBBLE_OK) && waiting_events == 0) {
3864 			bp->mark |= BUBBLE_ELIDED;
3865 			continue;
3866 		}
3867 		indent();
3868 		out(O_ALTFP|O_VERB, " Credible: %d Waiting %d",
3869 		    credible_events + deferred_events, waiting_events);
3870 		if (credible_events + deferred_events + waiting_events < n) {
3871 			/* Can never meet requirements */
3872 			ep->cached_state |= REQMNTS_DISPROVED;
3873 			indent();
3874 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED ");
3875 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3876 			out(O_ALTFP|O_VERB, NULL);
3877 			indent_pop();
3878 			return (FME_DISPROVED);
3879 		}
3880 		if (credible_events + deferred_events < n) {
3881 			/* will have to wait */
3882 			/* wait time is shortest known */
3883 			if (arrow_delay < overall_delay)
3884 				overall_delay = arrow_delay;
3885 			return_value = FME_WAIT;
3886 		} else if (credible_events < n) {
3887 			if (return_value != FME_WAIT)
3888 				return_value = FME_DEFERRED;
3889 		}
3890 	}
3891 
3892 	/*
3893 	 * don't mark as FME_DEFERRED. If this event isn't reached by another
3894 	 * path, then this will be considered FME_CREDIBLE. But if it is
3895 	 * reached by a different path so the K-count is met, then might
3896 	 * get overridden by FME_WAIT or FME_DISPROVED.
3897 	 */
3898 	if (return_value == FME_WAIT) {
3899 		ep->cached_state |= REQMNTS_WAIT;
3900 		ep->cached_delay = *pdelay = overall_delay;
3901 	} else if (return_value == FME_CREDIBLE) {
3902 		ep->cached_state |= REQMNTS_CREDIBLE;
3903 	}
3904 	indent();
3905 	out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS %s ",
3906 	    fme_state2str(return_value));
3907 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3908 	out(O_ALTFP|O_VERB, NULL);
3909 	indent_pop();
3910 	return (return_value);
3911 }
3912 
3913 static enum fme_state
3914 causes_test(struct fme *fmep, struct event *ep,
3915     unsigned long long at_latest_by, unsigned long long *pdelay)
3916 {
3917 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3918 	unsigned long long my_delay;
3919 	int credible_results = 0;
3920 	int waiting_results = 0;
3921 	enum fme_state fstate;
3922 	struct event *tail_event;
3923 	struct bubble *bp;
3924 	struct arrowlist *ap;
3925 	int k = 1;
3926 
3927 	stats_counter_bump(fmep->Ccallcount);
3928 	indent_push("  C");
3929 	indent();
3930 	out(O_ALTFP|O_VERB|O_NONL, "->");
3931 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3932 	out(O_ALTFP|O_VERB, NULL);
3933 
3934 	for (bp = itree_next_bubble(ep, NULL); bp;
3935 	    bp = itree_next_bubble(ep, bp)) {
3936 		if (bp->t != B_TO)
3937 			continue;
3938 		k = bp->nork;	/* remember the K value */
3939 		for (ap = itree_next_arrow(bp, NULL); ap;
3940 		    ap = itree_next_arrow(bp, ap)) {
3941 			int do_not_follow = 0;
3942 
3943 			/*
3944 			 * if we get to the same event multiple times
3945 			 * only worry about the first one.
3946 			 */
3947 			if (ap->arrowp->tail->myevent->cached_state &
3948 			    CAUSES_TESTED) {
3949 				indent();
3950 				out(O_ALTFP|O_VERB|O_NONL,
3951 				    "  causes test already run for ");
3952 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL,
3953 				    ap->arrowp->tail->myevent);
3954 				out(O_ALTFP|O_VERB, NULL);
3955 				continue;
3956 			}
3957 
3958 			/*
3959 			 * see if false constraint prevents us
3960 			 * from traversing this arrow
3961 			 */
3962 			platform_set_payloadnvp(ep->nvp);
3963 			if (checkconstraints(fmep, ap->arrowp) == 0)
3964 				do_not_follow = 1;
3965 			platform_set_payloadnvp(NULL);
3966 			if (do_not_follow) {
3967 				indent();
3968 				out(O_ALTFP|O_VERB|O_NONL,
3969 				    "  False arrow from ");
3970 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL,
3971 				    ap->arrowp->tail->myevent);
3972 				out(O_ALTFP|O_VERB, NULL);
3973 				continue;
3974 			}
3975 
3976 			ap->arrowp->tail->myevent->cached_state |=
3977 			    CAUSES_TESTED;
3978 			tail_event = ap->arrowp->tail->myevent;
3979 			fstate = hypothesise(fmep, tail_event, at_latest_by,
3980 			    &my_delay);
3981 
3982 			switch (fstate) {
3983 			case FME_WAIT:
3984 				if (my_delay < overall_delay)
3985 					overall_delay = my_delay;
3986 				waiting_results++;
3987 				break;
3988 			case FME_CREDIBLE:
3989 				credible_results++;
3990 				break;
3991 			case FME_DISPROVED:
3992 				break;
3993 			default:
3994 				out(O_DIE, "Bug in causes_test");
3995 			}
3996 		}
3997 	}
3998 	/* compare against K */
3999 	if (credible_results + waiting_results < k) {
4000 		indent();
4001 		out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES DISPROVED ");
4002 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4003 		out(O_ALTFP|O_VERB, NULL);
4004 		indent_pop();
4005 		return (FME_DISPROVED);
4006 	}
4007 	if (waiting_results != 0) {
4008 		*pdelay = overall_delay;
4009 		indent();
4010 		out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES WAIT ");
4011 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4012 		out(O_ALTFP|O_VERB|O_NONL, " to ");
4013 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
4014 		out(O_ALTFP|O_VERB, NULL);
4015 		indent_pop();
4016 		return (FME_WAIT);
4017 	}
4018 	indent();
4019 	out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES CREDIBLE ");
4020 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4021 	out(O_ALTFP|O_VERB, NULL);
4022 	indent_pop();
4023 	return (FME_CREDIBLE);
4024 }
4025 
4026 static enum fme_state
4027 hypothesise(struct fme *fmep, struct event *ep,
4028 	unsigned long long at_latest_by, unsigned long long *pdelay)
4029 {
4030 	enum fme_state rtr, otr;
4031 	unsigned long long my_delay;
4032 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
4033 
4034 	stats_counter_bump(fmep->Hcallcount);
4035 	indent_push("  H");
4036 	indent();
4037 	out(O_ALTFP|O_VERB|O_NONL, "->");
4038 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4039 	out(O_ALTFP|O_VERB|O_NONL, ", at latest by: ");
4040 	ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
4041 	out(O_ALTFP|O_VERB, NULL);
4042 
4043 	rtr = requirements_test(fmep, ep, at_latest_by, &my_delay);
4044 	if ((rtr == FME_WAIT) && (my_delay < overall_delay))
4045 		overall_delay = my_delay;
4046 	if (rtr != FME_DISPROVED) {
4047 		if (is_problem(ep->t)) {
4048 			otr = effects_test(fmep, ep, at_latest_by, &my_delay);
4049 			if (otr != FME_DISPROVED) {
4050 				if (fmep->peek == 0 && ep->is_suspect == 0) {
4051 					ep->suspects = fmep->suspects;
4052 					ep->is_suspect = 1;
4053 					fmep->suspects = ep;
4054 					fmep->nsuspects++;
4055 					if (!is_fault(ep->t))
4056 						fmep->nonfault++;
4057 				}
4058 			}
4059 		} else
4060 			otr = causes_test(fmep, ep, at_latest_by, &my_delay);
4061 		if ((otr == FME_WAIT) && (my_delay < overall_delay))
4062 			overall_delay = my_delay;
4063 		if ((otr != FME_DISPROVED) &&
4064 		    ((rtr == FME_WAIT) || (otr == FME_WAIT)))
4065 			*pdelay = overall_delay;
4066 	}
4067 	if (rtr == FME_DISPROVED) {
4068 		indent();
4069 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4070 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4071 		out(O_ALTFP|O_VERB, " (doesn't meet requirements)");
4072 		indent_pop();
4073 		return (FME_DISPROVED);
4074 	}
4075 	if ((otr == FME_DISPROVED) && is_problem(ep->t)) {
4076 		indent();
4077 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4078 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4079 		out(O_ALTFP|O_VERB, " (doesn't explain all reports)");
4080 		indent_pop();
4081 		return (FME_DISPROVED);
4082 	}
4083 	if (otr == FME_DISPROVED) {
4084 		indent();
4085 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4086 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4087 		out(O_ALTFP|O_VERB, " (causes are not credible)");
4088 		indent_pop();
4089 		return (FME_DISPROVED);
4090 	}
4091 	if ((rtr == FME_WAIT) || (otr == FME_WAIT)) {
4092 		indent();
4093 		out(O_ALTFP|O_VERB|O_NONL, "<-WAIT ");
4094 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4095 		out(O_ALTFP|O_VERB|O_NONL, " to ");
4096 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &overall_delay);
4097 		out(O_ALTFP|O_VERB, NULL);
4098 		indent_pop();
4099 		return (FME_WAIT);
4100 	}
4101 	indent();
4102 	out(O_ALTFP|O_VERB|O_NONL, "<-CREDIBLE ");
4103 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4104 	out(O_ALTFP|O_VERB, NULL);
4105 	indent_pop();
4106 	return (FME_CREDIBLE);
4107 }
4108 
4109 /*
4110  * fme_istat_load -- reconstitute any persistent istats
4111  */
4112 void
4113 fme_istat_load(fmd_hdl_t *hdl)
4114 {
4115 	int sz;
4116 	char *sbuf;
4117 	char *ptr;
4118 
4119 	if ((sz = fmd_buf_size(hdl, NULL, WOBUF_ISTATS)) == 0) {
4120 		out(O_ALTFP, "fme_istat_load: No stats");
4121 		return;
4122 	}
4123 
4124 	sbuf = alloca(sz);
4125 
4126 	fmd_buf_read(hdl, NULL, WOBUF_ISTATS, sbuf, sz);
4127 
4128 	/*
4129 	 * pick apart the serialized stats
4130 	 *
4131 	 * format is:
4132 	 *	<class-name>, '@', <path>, '\0', <value>, '\0'
4133 	 * for example:
4134 	 *	"stat.first@stat0/path0\02\0stat.second@stat0/path1\023\0"
4135 	 *
4136 	 * since this is parsing our own serialized data, any parsing issues
4137 	 * are fatal, so we check for them all with ASSERT() below.
4138 	 */
4139 	ptr = sbuf;
4140 	while (ptr < &sbuf[sz]) {
4141 		char *sepptr;
4142 		struct node *np;
4143 		int val;
4144 
4145 		sepptr = strchr(ptr, '@');
4146 		ASSERT(sepptr != NULL);
4147 		*sepptr = '\0';
4148 
4149 		/* construct the event */
4150 		np = newnode(T_EVENT, NULL, 0);
4151 		np->u.event.ename = newnode(T_NAME, NULL, 0);
4152 		np->u.event.ename->u.name.t = N_STAT;
4153 		np->u.event.ename->u.name.s = stable(ptr);
4154 		np->u.event.ename->u.name.it = IT_ENAME;
4155 		np->u.event.ename->u.name.last = np->u.event.ename;
4156 
4157 		ptr = sepptr + 1;
4158 		ASSERT(ptr < &sbuf[sz]);
4159 		ptr += strlen(ptr);
4160 		ptr++;	/* move past the '\0' separating path from value */
4161 		ASSERT(ptr < &sbuf[sz]);
4162 		ASSERT(isdigit(*ptr));
4163 		val = atoi(ptr);
4164 		ASSERT(val > 0);
4165 		ptr += strlen(ptr);
4166 		ptr++;	/* move past the final '\0' for this entry */
4167 
4168 		np->u.event.epname = pathstring2epnamenp(sepptr + 1);
4169 		ASSERT(np->u.event.epname != NULL);
4170 
4171 		istat_bump(np, val);
4172 		tree_free(np);
4173 	}
4174 
4175 	istat_save();
4176 }
4177