xref: /titanic_50/usr/src/cmd/fm/modules/common/eversholt/fme.c (revision d3a1459128b677cee1a84512ca49eef4bffd392d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  *
26  * fme.c -- fault management exercise module
27  *
28  * this module provides the simulated fault management exercise.
29  */
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <strings.h>
37 #include <ctype.h>
38 #include <alloca.h>
39 #include <libnvpair.h>
40 #include <sys/fm/protocol.h>
41 #include <fm/fmd_api.h>
42 #include "alloc.h"
43 #include "out.h"
44 #include "stats.h"
45 #include "stable.h"
46 #include "literals.h"
47 #include "lut.h"
48 #include "tree.h"
49 #include "ptree.h"
50 #include "itree.h"
51 #include "ipath.h"
52 #include "fme.h"
53 #include "evnv.h"
54 #include "eval.h"
55 #include "config.h"
56 #include "platform.h"
57 #include "esclex.h"
58 
59 /* imported from eft.c... */
60 extern hrtime_t Hesitate;
61 extern char *Serd_Override;
62 extern nv_alloc_t Eft_nv_hdl;
63 extern int Max_fme;
64 extern fmd_hdl_t *Hdl;
65 
66 static int Istat_need_save;
67 static int Serd_need_save;
68 void istat_save(void);
69 void serd_save(void);
70 
71 /* fme under construction is global so we can free it on module abort */
72 static struct fme *Nfmep;
73 
74 static const char *Undiag_reason;
75 
76 static int Nextid = 0;
77 
78 static int Open_fme_count = 0;	/* Count of open FMEs */
79 
80 /* list of fault management exercises underway */
81 static struct fme {
82 	struct fme *next;		/* next exercise */
83 	unsigned long long ull;		/* time when fme was created */
84 	int id;				/* FME id */
85 	struct config *config;		/* cooked configuration data */
86 	struct lut *eventtree;		/* propagation tree for this FME */
87 	/*
88 	 * The initial error report that created this FME is kept in
89 	 * two forms.  e0 points to the instance tree node and is used
90 	 * by fme_eval() as the starting point for the inference
91 	 * algorithm.  e0r is the event handle FMD passed to us when
92 	 * the ereport first arrived and is used when setting timers,
93 	 * which are always relative to the time of this initial
94 	 * report.
95 	 */
96 	struct event *e0;
97 	fmd_event_t *e0r;
98 
99 	id_t    timer;			/* for setting an fmd time-out */
100 
101 	struct event *ecurrent;		/* ereport under consideration */
102 	struct event *suspects;		/* current suspect list */
103 	struct event *psuspects;	/* previous suspect list */
104 	int nsuspects;			/* count of suspects */
105 	int nonfault;			/* zero if all suspects T_FAULT */
106 	int posted_suspects;		/* true if we've posted a diagnosis */
107 	int uniqobs;			/* number of unique events observed */
108 	int peek;			/* just peeking, don't track suspects */
109 	int overflow;			/* true if overflow FME */
110 	enum fme_state {
111 		FME_NOTHING = 5000,	/* not evaluated yet */
112 		FME_WAIT,		/* need to wait for more info */
113 		FME_CREDIBLE,		/* suspect list is credible */
114 		FME_DISPROVED,		/* no valid suspects found */
115 		FME_DEFERRED		/* don't know yet (k-count not met) */
116 	} state;
117 
118 	unsigned long long pull;	/* time passed since created */
119 	unsigned long long wull;	/* wait until this time for re-eval */
120 	struct event *observations;	/* observation list */
121 	struct lut *globals;		/* values of global variables */
122 	/* fmd interfacing */
123 	fmd_hdl_t *hdl;			/* handle for talking with fmd */
124 	fmd_case_t *fmcase;		/* what fmd 'case' we associate with */
125 	/* stats */
126 	struct stats *Rcount;
127 	struct stats *Hcallcount;
128 	struct stats *Rcallcount;
129 	struct stats *Ccallcount;
130 	struct stats *Ecallcount;
131 	struct stats *Tcallcount;
132 	struct stats *Marrowcount;
133 	struct stats *diags;
134 } *FMElist, *EFMElist, *ClosedFMEs;
135 
136 static struct case_list {
137 	fmd_case_t *fmcase;
138 	struct case_list *next;
139 } *Undiagablecaselist;
140 
141 static void fme_eval(struct fme *fmep, fmd_event_t *ffep);
142 static enum fme_state hypothesise(struct fme *fmep, struct event *ep,
143 	unsigned long long at_latest_by, unsigned long long *pdelay);
144 static struct node *eventprop_lookup(struct event *ep, const char *propname);
145 static struct node *pathstring2epnamenp(char *path);
146 static void publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep,
147 	fmd_case_t *fmcase);
148 static void restore_suspects(struct fme *fmep);
149 static void save_suspects(struct fme *fmep);
150 static void destroy_fme(struct fme *f);
151 static void fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep,
152     const char *eventstring, const struct ipath *ipp, nvlist_t *nvl);
153 static void istat_counter_reset_cb(struct istat_entry *entp,
154     struct stats *statp, const struct ipath *ipp);
155 static void istat_counter_topo_chg_cb(struct istat_entry *entp,
156     struct stats *statp, void *unused);
157 static void serd_reset_cb(struct serd_entry *entp, void *unused,
158     const struct ipath *ipp);
159 static void serd_topo_chg_cb(struct serd_entry *entp, void *unused,
160     void *unused2);
161 static void destroy_fme_bufs(struct fme *fp);
162 
163 static struct fme *
164 alloc_fme(void)
165 {
166 	struct fme *fmep;
167 
168 	fmep = MALLOC(sizeof (*fmep));
169 	bzero(fmep, sizeof (*fmep));
170 	return (fmep);
171 }
172 
173 /*
174  * fme_ready -- called when all initialization of the FME (except for
175  *	stats) has completed successfully.  Adds the fme to global lists
176  *	and establishes its stats.
177  */
178 static struct fme *
179 fme_ready(struct fme *fmep)
180 {
181 	char nbuf[100];
182 
183 	Nfmep = NULL;	/* don't need to free this on module abort now */
184 
185 	if (EFMElist) {
186 		EFMElist->next = fmep;
187 		EFMElist = fmep;
188 	} else
189 		FMElist = EFMElist = fmep;
190 
191 	(void) sprintf(nbuf, "fme%d.Rcount", fmep->id);
192 	fmep->Rcount = stats_new_counter(nbuf, "ereports received", 0);
193 	(void) sprintf(nbuf, "fme%d.Hcall", fmep->id);
194 	fmep->Hcallcount = stats_new_counter(nbuf, "calls to hypothesise()", 1);
195 	(void) sprintf(nbuf, "fme%d.Rcall", fmep->id);
196 	fmep->Rcallcount = stats_new_counter(nbuf,
197 	    "calls to requirements_test()", 1);
198 	(void) sprintf(nbuf, "fme%d.Ccall", fmep->id);
199 	fmep->Ccallcount = stats_new_counter(nbuf, "calls to causes_test()", 1);
200 	(void) sprintf(nbuf, "fme%d.Ecall", fmep->id);
201 	fmep->Ecallcount =
202 	    stats_new_counter(nbuf, "calls to effects_test()", 1);
203 	(void) sprintf(nbuf, "fme%d.Tcall", fmep->id);
204 	fmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1);
205 	(void) sprintf(nbuf, "fme%d.Marrow", fmep->id);
206 	fmep->Marrowcount = stats_new_counter(nbuf,
207 	    "arrows marked by mark_arrows()", 1);
208 	(void) sprintf(nbuf, "fme%d.diags", fmep->id);
209 	fmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0);
210 
211 	out(O_ALTFP|O_VERB2, "newfme: config snapshot contains...");
212 	config_print(O_ALTFP|O_VERB2, fmep->config);
213 
214 	return (fmep);
215 }
216 
217 extern void ipath_dummy_lut(struct arrow *);
218 extern struct lut *itree_create_dummy(const char *, const struct ipath *);
219 
220 /* ARGSUSED */
221 static void
222 set_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
223 {
224 	struct bubble *bp;
225 	struct arrowlist *ap;
226 
227 	for (bp = itree_next_bubble(ep, NULL); bp;
228 	    bp = itree_next_bubble(ep, bp)) {
229 		if (bp->t != B_FROM)
230 			continue;
231 		for (ap = itree_next_arrow(bp, NULL); ap;
232 		    ap = itree_next_arrow(bp, ap)) {
233 			ap->arrowp->pnode->u.arrow.needed = 1;
234 			ipath_dummy_lut(ap->arrowp);
235 		}
236 	}
237 }
238 
239 /* ARGSUSED */
240 static void
241 unset_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
242 {
243 	struct bubble *bp;
244 	struct arrowlist *ap;
245 
246 	for (bp = itree_next_bubble(ep, NULL); bp;
247 	    bp = itree_next_bubble(ep, bp)) {
248 		if (bp->t != B_FROM)
249 			continue;
250 		for (ap = itree_next_arrow(bp, NULL); ap;
251 		    ap = itree_next_arrow(bp, ap))
252 			ap->arrowp->pnode->u.arrow.needed = 0;
253 	}
254 }
255 
256 static void globals_destructor(void *left, void *right, void *arg);
257 static void clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep);
258 
259 static void
260 prune_propagations(const char *e0class, const struct ipath *e0ipp)
261 {
262 	char nbuf[100];
263 	unsigned long long my_delay = TIMEVAL_EVENTUALLY;
264 	extern struct lut *Usednames;
265 
266 	Nfmep = alloc_fme();
267 	Nfmep->id = Nextid;
268 	Nfmep->state = FME_NOTHING;
269 	Nfmep->eventtree = itree_create_dummy(e0class, e0ipp);
270 	if ((Nfmep->e0 =
271 	    itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) {
272 		out(O_ALTFP, "prune_propagations: e0 not in instance tree");
273 		itree_free(Nfmep->eventtree);
274 		FREE(Nfmep);
275 		Nfmep = NULL;
276 		return;
277 	}
278 	Nfmep->ecurrent = Nfmep->observations = Nfmep->e0;
279 	Nfmep->e0->count++;
280 
281 	(void) sprintf(nbuf, "fme%d.Rcount", Nfmep->id);
282 	Nfmep->Rcount = stats_new_counter(nbuf, "ereports received", 0);
283 	(void) sprintf(nbuf, "fme%d.Hcall", Nfmep->id);
284 	Nfmep->Hcallcount =
285 	    stats_new_counter(nbuf, "calls to hypothesise()", 1);
286 	(void) sprintf(nbuf, "fme%d.Rcall", Nfmep->id);
287 	Nfmep->Rcallcount = stats_new_counter(nbuf,
288 	    "calls to requirements_test()", 1);
289 	(void) sprintf(nbuf, "fme%d.Ccall", Nfmep->id);
290 	Nfmep->Ccallcount =
291 	    stats_new_counter(nbuf, "calls to causes_test()", 1);
292 	(void) sprintf(nbuf, "fme%d.Ecall", Nfmep->id);
293 	Nfmep->Ecallcount =
294 	    stats_new_counter(nbuf, "calls to effects_test()", 1);
295 	(void) sprintf(nbuf, "fme%d.Tcall", Nfmep->id);
296 	Nfmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1);
297 	(void) sprintf(nbuf, "fme%d.Marrow", Nfmep->id);
298 	Nfmep->Marrowcount = stats_new_counter(nbuf,
299 	    "arrows marked by mark_arrows()", 1);
300 	(void) sprintf(nbuf, "fme%d.diags", Nfmep->id);
301 	Nfmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0);
302 
303 	Nfmep->peek = 1;
304 	lut_walk(Nfmep->eventtree, (lut_cb)unset_needed_arrows, (void *)Nfmep);
305 	lut_free(Usednames, NULL, NULL);
306 	Usednames = NULL;
307 	lut_walk(Nfmep->eventtree, (lut_cb)clear_arrows, (void *)Nfmep);
308 	(void) hypothesise(Nfmep, Nfmep->e0, Nfmep->ull, &my_delay);
309 	itree_prune(Nfmep->eventtree);
310 	lut_walk(Nfmep->eventtree, (lut_cb)set_needed_arrows, (void *)Nfmep);
311 
312 	stats_delete(Nfmep->Rcount);
313 	stats_delete(Nfmep->Hcallcount);
314 	stats_delete(Nfmep->Rcallcount);
315 	stats_delete(Nfmep->Ccallcount);
316 	stats_delete(Nfmep->Ecallcount);
317 	stats_delete(Nfmep->Tcallcount);
318 	stats_delete(Nfmep->Marrowcount);
319 	stats_delete(Nfmep->diags);
320 	itree_free(Nfmep->eventtree);
321 	lut_free(Nfmep->globals, globals_destructor, NULL);
322 	FREE(Nfmep);
323 }
324 
325 static struct fme *
326 newfme(const char *e0class, const struct ipath *e0ipp, fmd_hdl_t *hdl,
327 	fmd_case_t *fmcase)
328 {
329 	struct cfgdata *cfgdata;
330 	int init_size;
331 	extern int alloc_total();
332 
333 	init_size = alloc_total();
334 	out(O_ALTFP|O_STAMP, "start config_snapshot using %d bytes", init_size);
335 	if ((cfgdata = config_snapshot()) == NULL) {
336 		out(O_ALTFP, "newfme: NULL configuration");
337 		Undiag_reason = UD_NOCONF;
338 		return (NULL);
339 	}
340 	platform_save_config(hdl, fmcase);
341 	out(O_ALTFP|O_STAMP, "config_snapshot added %d bytes",
342 	    alloc_total() - init_size);
343 
344 	Nfmep = alloc_fme();
345 
346 	Nfmep->id = Nextid++;
347 	Nfmep->config = cfgdata->cooked;
348 	config_free(cfgdata);
349 	Nfmep->posted_suspects = 0;
350 	Nfmep->uniqobs = 0;
351 	Nfmep->state = FME_NOTHING;
352 	Nfmep->pull = 0ULL;
353 	Nfmep->overflow = 0;
354 
355 	Nfmep->fmcase = fmcase;
356 	Nfmep->hdl = hdl;
357 
358 	if ((Nfmep->eventtree = itree_create(Nfmep->config)) == NULL) {
359 		out(O_ALTFP, "newfme: NULL instance tree");
360 		Undiag_reason = UD_INSTFAIL;
361 		structconfig_free(Nfmep->config);
362 		destroy_fme_bufs(Nfmep);
363 		FREE(Nfmep);
364 		Nfmep = NULL;
365 		return (NULL);
366 	}
367 
368 	itree_ptree(O_ALTFP|O_VERB2, Nfmep->eventtree);
369 
370 	if ((Nfmep->e0 =
371 	    itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) {
372 		out(O_ALTFP, "newfme: e0 not in instance tree");
373 		Undiag_reason = UD_BADEVENTI;
374 		itree_free(Nfmep->eventtree);
375 		structconfig_free(Nfmep->config);
376 		destroy_fme_bufs(Nfmep);
377 		FREE(Nfmep);
378 		Nfmep = NULL;
379 		return (NULL);
380 	}
381 
382 	return (fme_ready(Nfmep));
383 }
384 
385 void
386 fme_fini(void)
387 {
388 	struct fme *sfp, *fp;
389 	struct case_list *ucasep, *nextcasep;
390 
391 	ucasep = Undiagablecaselist;
392 	while (ucasep != NULL) {
393 		nextcasep = ucasep->next;
394 		FREE(ucasep);
395 		ucasep = nextcasep;
396 	}
397 	Undiagablecaselist = NULL;
398 
399 	/* clean up closed fmes */
400 	fp = ClosedFMEs;
401 	while (fp != NULL) {
402 		sfp = fp->next;
403 		destroy_fme(fp);
404 		fp = sfp;
405 	}
406 	ClosedFMEs = NULL;
407 
408 	fp = FMElist;
409 	while (fp != NULL) {
410 		sfp = fp->next;
411 		destroy_fme(fp);
412 		fp = sfp;
413 	}
414 	FMElist = EFMElist = NULL;
415 
416 	/* if we were in the middle of creating an fme, free it now */
417 	if (Nfmep) {
418 		destroy_fme(Nfmep);
419 		Nfmep = NULL;
420 	}
421 }
422 
423 /*
424  * Allocated space for a buffer name.  20 bytes allows for
425  * a ridiculous 9,999,999 unique observations.
426  */
427 #define	OBBUFNMSZ 20
428 
429 /*
430  *  serialize_observation
431  *
432  *  Create a recoverable version of the current observation
433  *  (f->ecurrent).  We keep a serialized version of each unique
434  *  observation in order that we may resume correctly the fme in the
435  *  correct state if eft or fmd crashes and we're restarted.
436  */
437 static void
438 serialize_observation(struct fme *fp, const char *cls, const struct ipath *ipp)
439 {
440 	size_t pkdlen;
441 	char tmpbuf[OBBUFNMSZ];
442 	char *pkd = NULL;
443 	char *estr;
444 
445 	(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", fp->uniqobs);
446 	estr = ipath2str(cls, ipp);
447 	fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, strlen(estr) + 1);
448 	fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)estr,
449 	    strlen(estr) + 1);
450 	FREE(estr);
451 
452 	if (fp->ecurrent != NULL && fp->ecurrent->nvp != NULL) {
453 		(void) snprintf(tmpbuf,
454 		    OBBUFNMSZ, "observed%d.nvp", fp->uniqobs);
455 		if (nvlist_xpack(fp->ecurrent->nvp,
456 		    &pkd, &pkdlen, NV_ENCODE_XDR, &Eft_nv_hdl) != 0)
457 			out(O_DIE|O_SYS, "pack of observed nvl failed");
458 		fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, pkdlen);
459 		fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)pkd, pkdlen);
460 		FREE(pkd);
461 	}
462 
463 	fp->uniqobs++;
464 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs,
465 	    sizeof (fp->uniqobs));
466 }
467 
468 /*
469  *  init_fme_bufs -- We keep several bits of state about an fme for
470  *	use if eft or fmd crashes and we're restarted.
471  */
472 static void
473 init_fme_bufs(struct fme *fp)
474 {
475 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_PULL, sizeof (fp->pull));
476 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_PULL, (void *)&fp->pull,
477 	    sizeof (fp->pull));
478 
479 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_ID, sizeof (fp->id));
480 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_ID, (void *)&fp->id,
481 	    sizeof (fp->id));
482 
483 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_NOBS, sizeof (fp->uniqobs));
484 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs,
485 	    sizeof (fp->uniqobs));
486 
487 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_POSTD,
488 	    sizeof (fp->posted_suspects));
489 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_POSTD,
490 	    (void *)&fp->posted_suspects, sizeof (fp->posted_suspects));
491 }
492 
493 static void
494 destroy_fme_bufs(struct fme *fp)
495 {
496 	char tmpbuf[OBBUFNMSZ];
497 	int o;
498 
499 	platform_restore_config(fp->hdl, fp->fmcase);
500 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFGLEN);
501 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFG);
502 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_PULL);
503 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_ID);
504 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_POSTD);
505 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_NOBS);
506 
507 	for (o = 0; o < fp->uniqobs; o++) {
508 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", o);
509 		fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf);
510 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", o);
511 		fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf);
512 	}
513 }
514 
515 /*
516  * reconstitute_observations -- convert a case's serialized observations
517  *	back into struct events.  Returns zero if all observations are
518  *	successfully reconstituted.
519  */
520 static int
521 reconstitute_observations(struct fme *fmep)
522 {
523 	struct event *ep;
524 	struct node *epnamenp = NULL;
525 	size_t pkdlen;
526 	char *pkd = NULL;
527 	char *tmpbuf = alloca(OBBUFNMSZ);
528 	char *sepptr;
529 	char *estr;
530 	int ocnt;
531 	int elen;
532 
533 	for (ocnt = 0; ocnt < fmep->uniqobs; ocnt++) {
534 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", ocnt);
535 		elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
536 		if (elen == 0) {
537 			out(O_ALTFP,
538 			    "reconstitute_observation: no %s buffer found.",
539 			    tmpbuf);
540 			Undiag_reason = UD_MISSINGOBS;
541 			break;
542 		}
543 
544 		estr = MALLOC(elen);
545 		fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen);
546 		sepptr = strchr(estr, '@');
547 		if (sepptr == NULL) {
548 			out(O_ALTFP,
549 			    "reconstitute_observation: %s: "
550 			    "missing @ separator in %s.",
551 			    tmpbuf, estr);
552 			Undiag_reason = UD_MISSINGPATH;
553 			FREE(estr);
554 			break;
555 		}
556 
557 		*sepptr = '\0';
558 		if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) {
559 			out(O_ALTFP,
560 			    "reconstitute_observation: %s: "
561 			    "trouble converting path string \"%s\" "
562 			    "to internal representation.",
563 			    tmpbuf, sepptr + 1);
564 			Undiag_reason = UD_MISSINGPATH;
565 			FREE(estr);
566 			break;
567 		}
568 
569 		/* construct the event */
570 		ep = itree_lookup(fmep->eventtree,
571 		    stable(estr), ipath(epnamenp));
572 		if (ep == NULL) {
573 			out(O_ALTFP,
574 			    "reconstitute_observation: %s: "
575 			    "lookup of  \"%s\" in itree failed.",
576 			    tmpbuf, ipath2str(estr, ipath(epnamenp)));
577 			Undiag_reason = UD_BADOBS;
578 			tree_free(epnamenp);
579 			FREE(estr);
580 			break;
581 		}
582 		tree_free(epnamenp);
583 
584 		/*
585 		 * We may or may not have a saved nvlist for the observation
586 		 */
587 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", ocnt);
588 		pkdlen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
589 		if (pkdlen != 0) {
590 			pkd = MALLOC(pkdlen);
591 			fmd_buf_read(fmep->hdl,
592 			    fmep->fmcase, tmpbuf, pkd, pkdlen);
593 			ASSERT(ep->nvp == NULL);
594 			if (nvlist_xunpack(pkd,
595 			    pkdlen, &ep->nvp, &Eft_nv_hdl) != 0)
596 				out(O_DIE|O_SYS, "pack of observed nvl failed");
597 			FREE(pkd);
598 		}
599 
600 		if (ocnt == 0)
601 			fmep->e0 = ep;
602 
603 		FREE(estr);
604 		fmep->ecurrent = ep;
605 		ep->count++;
606 
607 		/* link it into list of observations seen */
608 		ep->observations = fmep->observations;
609 		fmep->observations = ep;
610 	}
611 
612 	if (ocnt == fmep->uniqobs) {
613 		(void) fme_ready(fmep);
614 		return (0);
615 	}
616 
617 	return (1);
618 }
619 
620 /*
621  * restart_fme -- called during eft initialization.  Reconstitutes
622  *	an in-progress fme.
623  */
624 void
625 fme_restart(fmd_hdl_t *hdl, fmd_case_t *inprogress)
626 {
627 	nvlist_t *defect;
628 	struct case_list *bad;
629 	struct fme *fmep;
630 	struct cfgdata *cfgdata;
631 	size_t rawsz;
632 	struct event *ep;
633 	char *tmpbuf = alloca(OBBUFNMSZ);
634 	char *sepptr;
635 	char *estr;
636 	int elen;
637 	struct node *epnamenp = NULL;
638 	int init_size;
639 	extern int alloc_total();
640 
641 	/*
642 	 * ignore solved or closed cases
643 	 */
644 	if (fmd_case_solved(hdl, inprogress) ||
645 	    fmd_case_closed(hdl, inprogress))
646 		return;
647 
648 	fmep = alloc_fme();
649 	fmep->fmcase = inprogress;
650 	fmep->hdl = hdl;
651 
652 	if (fmd_buf_size(hdl, inprogress, WOBUF_POSTD) == 0) {
653 		out(O_ALTFP, "restart_fme: no saved posted status");
654 		Undiag_reason = UD_MISSINGINFO;
655 		goto badcase;
656 	} else {
657 		fmd_buf_read(hdl, inprogress, WOBUF_POSTD,
658 		    (void *)&fmep->posted_suspects,
659 		    sizeof (fmep->posted_suspects));
660 	}
661 
662 	if (fmd_buf_size(hdl, inprogress, WOBUF_ID) == 0) {
663 		out(O_ALTFP, "restart_fme: no saved id");
664 		Undiag_reason = UD_MISSINGINFO;
665 		goto badcase;
666 	} else {
667 		fmd_buf_read(hdl, inprogress, WOBUF_ID, (void *)&fmep->id,
668 		    sizeof (fmep->id));
669 	}
670 	if (Nextid <= fmep->id)
671 		Nextid = fmep->id + 1;
672 
673 	out(O_ALTFP, "Replay FME %d", fmep->id);
674 
675 	if (fmd_buf_size(hdl, inprogress, WOBUF_CFGLEN) != sizeof (size_t)) {
676 		out(O_ALTFP, "restart_fme: No config data");
677 		Undiag_reason = UD_MISSINGINFO;
678 		goto badcase;
679 	}
680 	fmd_buf_read(hdl, inprogress, WOBUF_CFGLEN, (void *)&rawsz,
681 	    sizeof (size_t));
682 
683 	if ((fmep->e0r = fmd_case_getprincipal(hdl, inprogress)) == NULL) {
684 		out(O_ALTFP, "restart_fme: No event zero");
685 		Undiag_reason = UD_MISSINGZERO;
686 		goto badcase;
687 	}
688 
689 	if (fmd_buf_size(hdl, inprogress, WOBUF_PULL) == 0) {
690 		out(O_ALTFP, "restart_fme: no saved wait time");
691 		Undiag_reason = UD_MISSINGINFO;
692 		goto badcase;
693 	} else {
694 		fmd_buf_read(hdl, inprogress, WOBUF_PULL, (void *)&fmep->pull,
695 		    sizeof (fmep->pull));
696 	}
697 
698 	if (fmd_buf_size(hdl, inprogress, WOBUF_NOBS) == 0) {
699 		out(O_ALTFP, "restart_fme: no count of observations");
700 		Undiag_reason = UD_MISSINGINFO;
701 		goto badcase;
702 	} else {
703 		fmd_buf_read(hdl, inprogress, WOBUF_NOBS,
704 		    (void *)&fmep->uniqobs, sizeof (fmep->uniqobs));
705 	}
706 
707 	(void) snprintf(tmpbuf, OBBUFNMSZ, "observed0");
708 	elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
709 	if (elen == 0) {
710 		out(O_ALTFP, "reconstitute_observation: no %s buffer found.",
711 		    tmpbuf);
712 		Undiag_reason = UD_MISSINGOBS;
713 		goto badcase;
714 	}
715 	estr = MALLOC(elen);
716 	fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen);
717 	sepptr = strchr(estr, '@');
718 	if (sepptr == NULL) {
719 		out(O_ALTFP, "reconstitute_observation: %s: "
720 		    "missing @ separator in %s.",
721 		    tmpbuf, estr);
722 		Undiag_reason = UD_MISSINGPATH;
723 		FREE(estr);
724 		goto badcase;
725 	}
726 	*sepptr = '\0';
727 	if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) {
728 		out(O_ALTFP, "reconstitute_observation: %s: "
729 		    "trouble converting path string \"%s\" "
730 		    "to internal representation.", tmpbuf, sepptr + 1);
731 		Undiag_reason = UD_MISSINGPATH;
732 		FREE(estr);
733 		goto badcase;
734 	}
735 	prune_propagations(stable(estr), ipath(epnamenp));
736 	tree_free(epnamenp);
737 	FREE(estr);
738 
739 	init_size = alloc_total();
740 	out(O_ALTFP|O_STAMP, "start config_restore using %d bytes", init_size);
741 	cfgdata = MALLOC(sizeof (struct cfgdata));
742 	cfgdata->cooked = NULL;
743 	cfgdata->devcache = NULL;
744 	cfgdata->devidcache = NULL;
745 	cfgdata->cpucache = NULL;
746 	cfgdata->raw_refcnt = 1;
747 
748 	if (rawsz > 0) {
749 		if (fmd_buf_size(hdl, inprogress, WOBUF_CFG) != rawsz) {
750 			out(O_ALTFP, "restart_fme: Config data size mismatch");
751 			Undiag_reason = UD_CFGMISMATCH;
752 			goto badcase;
753 		}
754 		cfgdata->begin = MALLOC(rawsz);
755 		cfgdata->end = cfgdata->nextfree = cfgdata->begin + rawsz;
756 		fmd_buf_read(hdl,
757 		    inprogress, WOBUF_CFG, cfgdata->begin, rawsz);
758 	} else {
759 		cfgdata->begin = cfgdata->end = cfgdata->nextfree = NULL;
760 	}
761 
762 	config_cook(cfgdata);
763 	fmep->config = cfgdata->cooked;
764 	config_free(cfgdata);
765 	out(O_ALTFP|O_STAMP, "config_restore added %d bytes",
766 	    alloc_total() - init_size);
767 
768 	if ((fmep->eventtree = itree_create(fmep->config)) == NULL) {
769 		/* case not properly saved or irretrievable */
770 		out(O_ALTFP, "restart_fme: NULL instance tree");
771 		Undiag_reason = UD_INSTFAIL;
772 		goto badcase;
773 	}
774 
775 	itree_ptree(O_ALTFP|O_VERB2, fmep->eventtree);
776 
777 	if (reconstitute_observations(fmep) != 0)
778 		goto badcase;
779 
780 	out(O_ALTFP|O_NONL, "FME %d replay observations: ", fmep->id);
781 	for (ep = fmep->observations; ep; ep = ep->observations) {
782 		out(O_ALTFP|O_NONL, " ");
783 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
784 	}
785 	out(O_ALTFP, NULL);
786 
787 	Open_fme_count++;
788 
789 	/* give the diagnosis algorithm a shot at the new FME state */
790 	fme_eval(fmep, fmep->e0r);
791 	return;
792 
793 badcase:
794 	if (fmep->eventtree != NULL)
795 		itree_free(fmep->eventtree);
796 	if (fmep->config)
797 		structconfig_free(fmep->config);
798 	destroy_fme_bufs(fmep);
799 	FREE(fmep);
800 
801 	/*
802 	 * Since we're unable to restart the case, add it to the undiagable
803 	 * list and solve and close it as appropriate.
804 	 */
805 	bad = MALLOC(sizeof (struct case_list));
806 	bad->next = NULL;
807 
808 	if (Undiagablecaselist != NULL)
809 		bad->next = Undiagablecaselist;
810 	Undiagablecaselist = bad;
811 	bad->fmcase = inprogress;
812 
813 	out(O_ALTFP|O_NONL, "[case %s (unable to restart), ",
814 	    fmd_case_uuid(hdl, bad->fmcase));
815 
816 	if (fmd_case_solved(hdl, bad->fmcase)) {
817 		out(O_ALTFP|O_NONL, "already solved, ");
818 	} else {
819 		out(O_ALTFP|O_NONL, "solving, ");
820 		defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100,
821 		    NULL, NULL, NULL);
822 		if (Undiag_reason != NULL)
823 			(void) nvlist_add_string(defect,
824 			    UNDIAG_REASON, Undiag_reason);
825 		fmd_case_add_suspect(hdl, bad->fmcase, defect);
826 		fmd_case_solve(hdl, bad->fmcase);
827 	}
828 
829 	if (fmd_case_closed(hdl, bad->fmcase)) {
830 		out(O_ALTFP, "already closed ]");
831 	} else {
832 		out(O_ALTFP, "closing ]");
833 		fmd_case_close(hdl, bad->fmcase);
834 	}
835 }
836 
837 /*ARGSUSED*/
838 static void
839 globals_destructor(void *left, void *right, void *arg)
840 {
841 	struct evalue *evp = (struct evalue *)right;
842 	if (evp->t == NODEPTR)
843 		tree_free((struct node *)(uintptr_t)evp->v);
844 	evp->v = (uintptr_t)NULL;
845 	FREE(evp);
846 }
847 
848 void
849 destroy_fme(struct fme *f)
850 {
851 	stats_delete(f->Rcount);
852 	stats_delete(f->Hcallcount);
853 	stats_delete(f->Rcallcount);
854 	stats_delete(f->Ccallcount);
855 	stats_delete(f->Ecallcount);
856 	stats_delete(f->Tcallcount);
857 	stats_delete(f->Marrowcount);
858 	stats_delete(f->diags);
859 
860 	if (f->eventtree != NULL)
861 		itree_free(f->eventtree);
862 	if (f->config)
863 		structconfig_free(f->config);
864 	lut_free(f->globals, globals_destructor, NULL);
865 	FREE(f);
866 }
867 
868 static const char *
869 fme_state2str(enum fme_state s)
870 {
871 	switch (s) {
872 	case FME_NOTHING:	return ("NOTHING");
873 	case FME_WAIT:		return ("WAIT");
874 	case FME_CREDIBLE:	return ("CREDIBLE");
875 	case FME_DISPROVED:	return ("DISPROVED");
876 	case FME_DEFERRED:	return ("DEFERRED");
877 	default:		return ("UNKNOWN");
878 	}
879 }
880 
881 static int
882 is_problem(enum nametype t)
883 {
884 	return (t == N_FAULT || t == N_DEFECT || t == N_UPSET);
885 }
886 
887 static int
888 is_fault(enum nametype t)
889 {
890 	return (t == N_FAULT);
891 }
892 
893 static int
894 is_defect(enum nametype t)
895 {
896 	return (t == N_DEFECT);
897 }
898 
899 static int
900 is_upset(enum nametype t)
901 {
902 	return (t == N_UPSET);
903 }
904 
905 static void
906 fme_print(int flags, struct fme *fmep)
907 {
908 	struct event *ep;
909 
910 	out(flags, "Fault Management Exercise %d", fmep->id);
911 	out(flags, "\t       State: %s", fme_state2str(fmep->state));
912 	out(flags|O_NONL, "\t  Start time: ");
913 	ptree_timeval(flags|O_NONL, &fmep->ull);
914 	out(flags, NULL);
915 	if (fmep->wull) {
916 		out(flags|O_NONL, "\t   Wait time: ");
917 		ptree_timeval(flags|O_NONL, &fmep->wull);
918 		out(flags, NULL);
919 	}
920 	out(flags|O_NONL, "\t          E0: ");
921 	if (fmep->e0)
922 		itree_pevent_brief(flags|O_NONL, fmep->e0);
923 	else
924 		out(flags|O_NONL, "NULL");
925 	out(flags, NULL);
926 	out(flags|O_NONL, "\tObservations:");
927 	for (ep = fmep->observations; ep; ep = ep->observations) {
928 		out(flags|O_NONL, " ");
929 		itree_pevent_brief(flags|O_NONL, ep);
930 	}
931 	out(flags, NULL);
932 	out(flags|O_NONL, "\tSuspect list:");
933 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
934 		out(flags|O_NONL, " ");
935 		itree_pevent_brief(flags|O_NONL, ep);
936 	}
937 	out(flags, NULL);
938 	if (fmep->eventtree != NULL) {
939 		out(flags|O_VERB2, "\t        Tree:");
940 		itree_ptree(flags|O_VERB2, fmep->eventtree);
941 	}
942 }
943 
944 static struct node *
945 pathstring2epnamenp(char *path)
946 {
947 	char *sep = "/";
948 	struct node *ret;
949 	char *ptr;
950 
951 	if ((ptr = strtok(path, sep)) == NULL)
952 		out(O_DIE, "pathstring2epnamenp: invalid empty class");
953 
954 	ret = tree_iname(stable(ptr), NULL, 0);
955 
956 	while ((ptr = strtok(NULL, sep)) != NULL)
957 		ret = tree_name_append(ret,
958 		    tree_iname(stable(ptr), NULL, 0));
959 
960 	return (ret);
961 }
962 
963 /*
964  * for a given upset sp, increment the corresponding SERD engine.  if the
965  * SERD engine trips, return the ename and ipp of the resulting ereport.
966  * returns true if engine tripped and *enamep and *ippp were filled in.
967  */
968 static int
969 serd_eval(struct fme *fmep, fmd_hdl_t *hdl, fmd_event_t *ffep,
970     fmd_case_t *fmcase, struct event *sp, const char **enamep,
971     const struct ipath **ippp)
972 {
973 	struct node *serdinst;
974 	char *serdname;
975 	struct node *nid;
976 	struct serd_entry *newentp;
977 	int i, serdn = -1, serdincrement = 1;
978 	char *serdsuffix = NULL, *serdt = NULL;
979 	struct evalue *ep;
980 
981 	ASSERT(sp->t == N_UPSET);
982 	ASSERT(ffep != NULL);
983 
984 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
985 	    (void *)"n", (lut_cmp)strcmp)) != NULL) {
986 		ASSERT(ep->t == UINT64);
987 		serdn = (int)ep->v;
988 	}
989 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
990 	    (void *)"t", (lut_cmp)strcmp)) != NULL) {
991 		ASSERT(ep->t == STRING);
992 		serdt = (char *)(uintptr_t)ep->v;
993 	}
994 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
995 	    (void *)"suffix", (lut_cmp)strcmp)) != NULL) {
996 		ASSERT(ep->t == STRING);
997 		serdsuffix = (char *)(uintptr_t)ep->v;
998 	}
999 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1000 	    (void *)"increment", (lut_cmp)strcmp)) != NULL) {
1001 		ASSERT(ep->t == UINT64);
1002 		serdincrement = (int)ep->v;
1003 	}
1004 
1005 	/*
1006 	 * obtain instanced SERD engine from the upset sp.  from this
1007 	 * derive serdname, the string used to identify the SERD engine.
1008 	 */
1009 	serdinst = eventprop_lookup(sp, L_engine);
1010 
1011 	if (serdinst == NULL)
1012 		return (-1);
1013 
1014 	serdname = ipath2str(serdinst->u.stmt.np->u.event.ename->u.name.s,
1015 	    ipath(serdinst->u.stmt.np->u.event.epname));
1016 
1017 	if (serdsuffix != NULL) {
1018 		int len = strlen(serdname) + strlen(serdsuffix) + 1;
1019 		char *ptr = MALLOC(len);
1020 		(void) snprintf(ptr, len, "%s%s", serdname, serdsuffix);
1021 		FREE(serdname);
1022 		serdname = ptr;
1023 	}
1024 
1025 	/* handle serd engine "id" property, if there is one */
1026 	if ((nid =
1027 	    lut_lookup(serdinst->u.stmt.lutp, (void *)L_id, NULL)) != NULL) {
1028 		struct evalue *gval;
1029 		char suffixbuf[200];
1030 		char *suffix;
1031 		char *nserdname;
1032 		size_t nname;
1033 
1034 		out(O_ALTFP|O_NONL, "serd \"%s\" id: ", serdname);
1035 		ptree_name_iter(O_ALTFP|O_NONL, nid);
1036 
1037 		ASSERTinfo(nid->t == T_GLOBID, ptree_nodetype2str(nid->t));
1038 
1039 		if ((gval = lut_lookup(fmep->globals,
1040 		    (void *)nid->u.globid.s, NULL)) == NULL) {
1041 			out(O_ALTFP, " undefined");
1042 		} else if (gval->t == UINT64) {
1043 			out(O_ALTFP, " %llu", gval->v);
1044 			(void) sprintf(suffixbuf, "%llu", gval->v);
1045 			suffix = suffixbuf;
1046 		} else {
1047 			out(O_ALTFP, " \"%s\"", (char *)(uintptr_t)gval->v);
1048 			suffix = (char *)(uintptr_t)gval->v;
1049 		}
1050 
1051 		nname = strlen(serdname) + strlen(suffix) + 2;
1052 		nserdname = MALLOC(nname);
1053 		(void) snprintf(nserdname, nname, "%s:%s", serdname, suffix);
1054 		FREE(serdname);
1055 		serdname = nserdname;
1056 	}
1057 
1058 	/*
1059 	 * if the engine is empty, and we have an override for n/t then
1060 	 * destroy and recreate it.
1061 	 */
1062 	if ((serdn != -1 || serdt != NULL) && fmd_serd_exists(hdl, serdname) &&
1063 	    fmd_serd_empty(hdl, serdname))
1064 		fmd_serd_destroy(hdl, serdname);
1065 
1066 	if (!fmd_serd_exists(hdl, serdname)) {
1067 		struct node *nN, *nT;
1068 		const char *s;
1069 		struct node *nodep;
1070 		struct config *cp;
1071 		char *path;
1072 		uint_t nval;
1073 		hrtime_t tval;
1074 		const char *name;
1075 		char *serd_name;
1076 		int i;
1077 		char *ptr;
1078 		int got_n_override = 0, got_t_override = 0;
1079 
1080 		/* no SERD engine yet, so create it */
1081 		nodep = serdinst->u.stmt.np->u.event.epname;
1082 		name = serdinst->u.stmt.np->u.event.ename->u.name.s;
1083 		path = ipath2str(NULL, ipath(nodep));
1084 		cp = config_lookup(fmep->config, path, 0);
1085 		FREE((void *)path);
1086 
1087 		/*
1088 		 * We allow serd paramaters to be overridden, either from
1089 		 * eft.conf file values (if Serd_Override is set) or from
1090 		 * driver properties (for "serd.io.device" engines).
1091 		 */
1092 		if (Serd_Override != NULL) {
1093 			char *save_ptr, *ptr1, *ptr2, *ptr3;
1094 			ptr3 = save_ptr = STRDUP(Serd_Override);
1095 			while (*ptr3 != '\0') {
1096 				ptr1 = strchr(ptr3, ',');
1097 				*ptr1 = '\0';
1098 				if (strcmp(ptr3, name) == 0) {
1099 					ptr2 =  strchr(ptr1 + 1, ',');
1100 					*ptr2 = '\0';
1101 					nval = atoi(ptr1 + 1);
1102 					out(O_ALTFP, "serd override %s_n %d",
1103 					    name, nval);
1104 					ptr3 =  strchr(ptr2 + 1, ' ');
1105 					if (ptr3)
1106 						*ptr3 = '\0';
1107 					ptr = STRDUP(ptr2 + 1);
1108 					out(O_ALTFP, "serd override %s_t %s",
1109 					    name, ptr);
1110 					got_n_override = 1;
1111 					got_t_override = 1;
1112 					break;
1113 				} else {
1114 					ptr2 =  strchr(ptr1 + 1, ',');
1115 					ptr3 =  strchr(ptr2 + 1, ' ');
1116 					if (ptr3 == NULL)
1117 						break;
1118 				}
1119 				ptr3++;
1120 			}
1121 			FREE(save_ptr);
1122 		}
1123 
1124 		if (cp && got_n_override == 0) {
1125 			/*
1126 			 * convert serd engine name into property name
1127 			 */
1128 			serd_name = MALLOC(strlen(name) + 3);
1129 			for (i = 0; i < strlen(name); i++) {
1130 				if (name[i] == '.')
1131 					serd_name[i] = '_';
1132 				else
1133 					serd_name[i] = name[i];
1134 			}
1135 			serd_name[i++] = '_';
1136 			serd_name[i++] = 'n';
1137 			serd_name[i] = '\0';
1138 			if (s = config_getprop(cp, serd_name)) {
1139 				nval = atoi(s);
1140 				out(O_ALTFP, "serd override %s_n %s", name, s);
1141 				got_n_override = 1;
1142 			}
1143 			serd_name[i - 1] = 't';
1144 			if (s = config_getprop(cp, serd_name)) {
1145 				ptr = STRDUP(s);
1146 				out(O_ALTFP, "serd override %s_t %s", name, s);
1147 				got_t_override = 1;
1148 			}
1149 			FREE(serd_name);
1150 		}
1151 
1152 		if (serdn != -1 && got_n_override == 0) {
1153 			nval = serdn;
1154 			out(O_ALTFP, "serd override %s_n %d", name, serdn);
1155 			got_n_override = 1;
1156 		}
1157 		if (serdt != NULL && got_t_override == 0) {
1158 			ptr = STRDUP(serdt);
1159 			out(O_ALTFP, "serd override %s_t %s", name, serdt);
1160 			got_t_override = 1;
1161 		}
1162 
1163 		if (!got_n_override) {
1164 			nN = lut_lookup(serdinst->u.stmt.lutp, (void *)L_N,
1165 			    NULL);
1166 			ASSERT(nN->t == T_NUM);
1167 			nval = (uint_t)nN->u.ull;
1168 		}
1169 		if (!got_t_override) {
1170 			nT = lut_lookup(serdinst->u.stmt.lutp, (void *)L_T,
1171 			    NULL);
1172 			ASSERT(nT->t == T_TIMEVAL);
1173 			tval = (hrtime_t)nT->u.ull;
1174 		} else {
1175 			const unsigned long long *ullp;
1176 			const char *suffix;
1177 			int len;
1178 
1179 			len = strspn(ptr, "0123456789");
1180 			suffix = stable(&ptr[len]);
1181 			ullp = (unsigned long long *)lut_lookup(Timesuffixlut,
1182 			    (void *)suffix, NULL);
1183 			ptr[len] = '\0';
1184 			tval = strtoull(ptr, NULL, 0) * (ullp ? *ullp : 1ll);
1185 			FREE(ptr);
1186 		}
1187 		fmd_serd_create(hdl, serdname, nval, tval);
1188 	}
1189 
1190 	newentp = MALLOC(sizeof (*newentp));
1191 	newentp->ename = stable(serdinst->u.stmt.np->u.event.ename->u.name.s);
1192 	newentp->ipath = ipath(serdinst->u.stmt.np->u.event.epname);
1193 	newentp->hdl = hdl;
1194 	if (lut_lookup(SerdEngines, newentp, (lut_cmp)serd_cmp) == NULL) {
1195 		SerdEngines = lut_add(SerdEngines, (void *)newentp,
1196 		    (void *)newentp, (lut_cmp)serd_cmp);
1197 		Serd_need_save = 1;
1198 		serd_save();
1199 	} else {
1200 		FREE(newentp);
1201 	}
1202 
1203 
1204 	/*
1205 	 * increment SERD engine.  if engine fires, reset serd
1206 	 * engine and return trip_strcode if required.
1207 	 */
1208 	for (i = 0; i < serdincrement; i++) {
1209 		if (fmd_serd_record(hdl, serdname, ffep)) {
1210 			fmd_case_add_serd(hdl, fmcase, serdname);
1211 			fmd_serd_reset(hdl, serdname);
1212 
1213 			if (ippp) {
1214 				struct node *tripinst =
1215 				    lut_lookup(serdinst->u.stmt.lutp,
1216 				    (void *)L_trip, NULL);
1217 				ASSERT(tripinst != NULL);
1218 				*enamep = tripinst->u.event.ename->u.name.s;
1219 				*ippp = ipath(tripinst->u.event.epname);
1220 				out(O_ALTFP|O_NONL,
1221 				    "[engine fired: %s, sending: ", serdname);
1222 				ipath_print(O_ALTFP|O_NONL, *enamep, *ippp);
1223 				out(O_ALTFP, "]");
1224 			} else {
1225 				out(O_ALTFP, "[engine fired: %s, no trip]",
1226 				    serdname);
1227 			}
1228 			FREE(serdname);
1229 			return (1);
1230 		}
1231 	}
1232 
1233 	FREE(serdname);
1234 	return (0);
1235 }
1236 
1237 /*
1238  * search a suspect list for upsets.  feed each upset to serd_eval() and
1239  * build up tripped[], an array of ereports produced by the firing of
1240  * any SERD engines.  then feed each ereport back into
1241  * fme_receive_report().
1242  *
1243  * returns ntrip, the number of these ereports produced.
1244  */
1245 static int
1246 upsets_eval(struct fme *fmep, fmd_event_t *ffep)
1247 {
1248 	/* we build an array of tripped ereports that we send ourselves */
1249 	struct {
1250 		const char *ename;
1251 		const struct ipath *ipp;
1252 	} *tripped;
1253 	struct event *sp;
1254 	int ntrip, nupset, i;
1255 
1256 	/*
1257 	 * count the number of upsets to determine the upper limit on
1258 	 * expected trip ereport strings.  remember that one upset can
1259 	 * lead to at most one ereport.
1260 	 */
1261 	nupset = 0;
1262 	for (sp = fmep->suspects; sp; sp = sp->suspects) {
1263 		if (sp->t == N_UPSET)
1264 			nupset++;
1265 	}
1266 
1267 	if (nupset == 0)
1268 		return (0);
1269 
1270 	/*
1271 	 * get to this point if we have upsets and expect some trip
1272 	 * ereports
1273 	 */
1274 	tripped = alloca(sizeof (*tripped) * nupset);
1275 	bzero((void *)tripped, sizeof (*tripped) * nupset);
1276 
1277 	ntrip = 0;
1278 	for (sp = fmep->suspects; sp; sp = sp->suspects)
1279 		if (sp->t == N_UPSET &&
1280 		    serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, sp,
1281 		    &tripped[ntrip].ename, &tripped[ntrip].ipp) == 1)
1282 			ntrip++;
1283 
1284 	for (i = 0; i < ntrip; i++) {
1285 		struct event *ep, *nep;
1286 		struct fme *nfmep;
1287 		fmd_case_t *fmcase;
1288 		const struct ipath *ipp;
1289 		const char *eventstring;
1290 		int prev_verbose;
1291 		unsigned long long my_delay = TIMEVAL_EVENTUALLY;
1292 		enum fme_state state;
1293 
1294 		/*
1295 		 * First try and evaluate a case with the trip ereport plus
1296 		 * all the other ereports that cause the trip. If that fails
1297 		 * to evaluate then try again with just this ereport on its own.
1298 		 */
1299 		out(O_ALTFP|O_NONL, "fme_receive_report_serd: ");
1300 		ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp);
1301 		out(O_ALTFP|O_STAMP, NULL);
1302 		ep = fmep->e0;
1303 		eventstring = ep->enode->u.event.ename->u.name.s;
1304 		ipp = ep->ipp;
1305 		prune_propagations(eventstring, ipp);
1306 
1307 		/*
1308 		 * create a duplicate fme and case
1309 		 */
1310 		fmcase = fmd_case_open(fmep->hdl, NULL);
1311 		out(O_ALTFP|O_NONL, "duplicate fme for event [");
1312 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1313 		out(O_ALTFP, " ]");
1314 		if ((nfmep = newfme(eventstring, ipp, fmep->hdl,
1315 		    fmcase)) == NULL) {
1316 			out(O_ALTFP|O_NONL, "[");
1317 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1318 			out(O_ALTFP, " CANNOT DIAGNOSE]");
1319 			publish_undiagnosable(fmep->hdl, ffep, fmcase);
1320 			continue;
1321 		}
1322 		Open_fme_count++;
1323 		nfmep->pull = fmep->pull;
1324 		init_fme_bufs(nfmep);
1325 		out(O_ALTFP|O_NONL, "[");
1326 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1327 		out(O_ALTFP, " created FME%d, case %s]", nfmep->id,
1328 		    fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
1329 		if (ffep) {
1330 			fmd_case_setprincipal(nfmep->hdl, nfmep->fmcase, ffep);
1331 			fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase, ffep);
1332 			nfmep->e0r = ffep;
1333 		}
1334 
1335 		/*
1336 		 * add the original ereports
1337 		 */
1338 		for (ep = fmep->observations; ep; ep = ep->observations) {
1339 			eventstring = ep->enode->u.event.ename->u.name.s;
1340 			ipp = ep->ipp;
1341 			out(O_ALTFP|O_NONL, "adding event [");
1342 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1343 			out(O_ALTFP, " ]");
1344 			nep = itree_lookup(nfmep->eventtree, eventstring, ipp);
1345 			if (nep->count++ == 0) {
1346 				nep->observations = nfmep->observations;
1347 				nfmep->observations = nep;
1348 				serialize_observation(nfmep, eventstring, ipp);
1349 				nep->nvp = evnv_dupnvl(ep->nvp);
1350 			}
1351 			if (ep->ffep && ep->ffep != ffep)
1352 				fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase,
1353 				    ep->ffep);
1354 			stats_counter_bump(nfmep->Rcount);
1355 		}
1356 
1357 		/*
1358 		 * add the serd trigger ereport
1359 		 */
1360 		if ((ep = itree_lookup(nfmep->eventtree, tripped[i].ename,
1361 		    tripped[i].ipp)) == NULL) {
1362 			/*
1363 			 * The trigger ereport is not in the instance tree. It
1364 			 * was presumably removed by prune_propagations() as
1365 			 * this combination of events is not present in the
1366 			 * rules.
1367 			 */
1368 			out(O_ALTFP, "upsets_eval: e0 not in instance tree");
1369 			Undiag_reason = UD_BADEVENTI;
1370 			goto retry_lone_ereport;
1371 		}
1372 		out(O_ALTFP|O_NONL, "adding event [");
1373 		ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp);
1374 		out(O_ALTFP, " ]");
1375 		nfmep->ecurrent = ep;
1376 		ep->nvp = NULL;
1377 		ep->count = 1;
1378 		ep->observations = nfmep->observations;
1379 		nfmep->observations = ep;
1380 
1381 		/*
1382 		 * just peek first.
1383 		 */
1384 		nfmep->peek = 1;
1385 		prev_verbose = Verbose;
1386 		if (Debug == 0)
1387 			Verbose = 0;
1388 		lut_walk(nfmep->eventtree, (lut_cb)clear_arrows, (void *)nfmep);
1389 		state = hypothesise(nfmep, nfmep->e0, nfmep->ull, &my_delay);
1390 		nfmep->peek = 0;
1391 		Verbose = prev_verbose;
1392 		if (state == FME_DISPROVED) {
1393 			out(O_ALTFP, "upsets_eval: hypothesis disproved");
1394 			Undiag_reason = UD_UNSOLVD;
1395 retry_lone_ereport:
1396 			/*
1397 			 * However the trigger ereport on its own might be
1398 			 * diagnosable, so check for that. Undo the new fme
1399 			 * and case we just created and call fme_receive_report.
1400 			 */
1401 			out(O_ALTFP|O_NONL, "[");
1402 			ipath_print(O_ALTFP|O_NONL, tripped[i].ename,
1403 			    tripped[i].ipp);
1404 			out(O_ALTFP, " retrying with just trigger ereport]");
1405 			itree_free(nfmep->eventtree);
1406 			nfmep->eventtree = NULL;
1407 			structconfig_free(nfmep->config);
1408 			nfmep->config = NULL;
1409 			destroy_fme_bufs(nfmep);
1410 			fmd_case_close(nfmep->hdl, nfmep->fmcase);
1411 			fme_receive_report(fmep->hdl, ffep,
1412 			    tripped[i].ename, tripped[i].ipp, NULL);
1413 			continue;
1414 		}
1415 
1416 		/*
1417 		 * and evaluate
1418 		 */
1419 		serialize_observation(nfmep, tripped[i].ename, tripped[i].ipp);
1420 		fme_eval(nfmep, ffep);
1421 	}
1422 
1423 	return (ntrip);
1424 }
1425 
1426 /*
1427  * fme_receive_external_report -- call when an external ereport comes in
1428  *
1429  * this routine just converts the relevant information from the ereport
1430  * into a format used internally and passes it on to fme_receive_report().
1431  */
1432 void
1433 fme_receive_external_report(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl,
1434     const char *class)
1435 {
1436 	struct node		*epnamenp;
1437 	fmd_case_t		*fmcase;
1438 	const struct ipath	*ipp;
1439 
1440 	class = stable(class);
1441 
1442 	/* Get the component path from the ereport */
1443 	epnamenp = platform_getpath(nvl);
1444 
1445 	/* See if we ended up without a path. */
1446 	if (epnamenp == NULL) {
1447 		/* See if class permits silent discard on unknown component. */
1448 		if (lut_lookup(Ereportenames_discard, (void *)class, NULL)) {
1449 			out(O_ALTFP|O_VERB2, "Unable to map \"%s\" ereport "
1450 			    "to component path, but silent discard allowed.",
1451 			    class);
1452 		} else {
1453 			/*
1454 			 * XFILE: Failure to find a component is bad unless
1455 			 * 'discard_if_config_unknown=1' was specified in the
1456 			 * ereport definition. Indicate undiagnosable.
1457 			 */
1458 			out(O_ALTFP, "XFILE: Unable to map \"%s\" ereport "
1459 			    "to component path.", class);
1460 			Undiag_reason = UD_NOPATH;
1461 			fmcase = fmd_case_open(hdl, NULL);
1462 			publish_undiagnosable(hdl, ffep, fmcase);
1463 		}
1464 		return;
1465 	}
1466 
1467 	ipp = ipath(epnamenp);
1468 	tree_free(epnamenp);
1469 	fme_receive_report(hdl, ffep, class, ipp, nvl);
1470 }
1471 
1472 /*ARGSUSED*/
1473 void
1474 fme_receive_repair_list(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl,
1475     const char *eventstring)
1476 {
1477 	char *uuid;
1478 	nvlist_t **nva;
1479 	uint_t nvc;
1480 	const struct ipath *ipp;
1481 
1482 	if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) != 0 ||
1483 	    nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST,
1484 	    &nva, &nvc) != 0) {
1485 		out(O_ALTFP, "No uuid or fault list for list.repaired event");
1486 		return;
1487 	}
1488 
1489 	out(O_ALTFP, "Processing list.repaired from case %s", uuid);
1490 
1491 	while (nvc-- != 0) {
1492 		/*
1493 		 * Reset any istat or serd engine associated with this path.
1494 		 */
1495 		char *path;
1496 
1497 		if ((ipp = platform_fault2ipath(*nva++)) == NULL)
1498 			continue;
1499 
1500 		path = ipath2str(NULL, ipp);
1501 		out(O_ALTFP, "fme_receive_repair_list: resetting state for %s",
1502 		    path);
1503 		FREE(path);
1504 
1505 		lut_walk(Istats, (lut_cb)istat_counter_reset_cb, (void *)ipp);
1506 		istat_save();
1507 
1508 		lut_walk(SerdEngines, (lut_cb)serd_reset_cb, (void *)ipp);
1509 		serd_save();
1510 	}
1511 }
1512 
1513 /*ARGSUSED*/
1514 void
1515 fme_receive_topology_change(void)
1516 {
1517 	lut_walk(Istats, (lut_cb)istat_counter_topo_chg_cb, NULL);
1518 	istat_save();
1519 
1520 	lut_walk(SerdEngines, (lut_cb)serd_topo_chg_cb, NULL);
1521 	serd_save();
1522 }
1523 
1524 static int mark_arrows(struct fme *fmep, struct event *ep, int mark,
1525     unsigned long long at_latest_by, unsigned long long *pdelay, int keep);
1526 
1527 /* ARGSUSED */
1528 static void
1529 clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
1530 {
1531 	struct bubble *bp;
1532 	struct arrowlist *ap;
1533 
1534 	ep->cached_state = 0;
1535 	ep->keep_in_tree = 0;
1536 	for (bp = itree_next_bubble(ep, NULL); bp;
1537 	    bp = itree_next_bubble(ep, bp)) {
1538 		if (bp->t != B_FROM)
1539 			continue;
1540 		bp->mark = 0;
1541 		for (ap = itree_next_arrow(bp, NULL); ap;
1542 		    ap = itree_next_arrow(bp, ap))
1543 			ap->arrowp->mark = 0;
1544 	}
1545 }
1546 
1547 static void
1548 fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep,
1549     const char *eventstring, const struct ipath *ipp, nvlist_t *nvl)
1550 {
1551 	struct event *ep;
1552 	struct fme *fmep = NULL;
1553 	struct fme *ofmep = NULL;
1554 	struct fme *cfmep, *svfmep;
1555 	int matched = 0;
1556 	nvlist_t *defect;
1557 	fmd_case_t *fmcase;
1558 
1559 	out(O_ALTFP|O_NONL, "fme_receive_report: ");
1560 	ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1561 	out(O_ALTFP|O_STAMP, NULL);
1562 
1563 	/* decide which FME it goes to */
1564 	for (fmep = FMElist; fmep; fmep = fmep->next) {
1565 		int prev_verbose;
1566 		unsigned long long my_delay = TIMEVAL_EVENTUALLY;
1567 		enum fme_state state;
1568 		nvlist_t *pre_peek_nvp = NULL;
1569 
1570 		if (fmep->overflow) {
1571 			if (!(fmd_case_closed(fmep->hdl, fmep->fmcase)))
1572 				ofmep = fmep;
1573 
1574 			continue;
1575 		}
1576 
1577 		/*
1578 		 * ignore solved or closed cases
1579 		 */
1580 		if (fmep->posted_suspects ||
1581 		    fmd_case_solved(fmep->hdl, fmep->fmcase) ||
1582 		    fmd_case_closed(fmep->hdl, fmep->fmcase))
1583 			continue;
1584 
1585 		/* look up event in event tree for this FME */
1586 		if ((ep = itree_lookup(fmep->eventtree,
1587 		    eventstring, ipp)) == NULL)
1588 			continue;
1589 
1590 		/* note observation */
1591 		fmep->ecurrent = ep;
1592 		if (ep->count++ == 0) {
1593 			/* link it into list of observations seen */
1594 			ep->observations = fmep->observations;
1595 			fmep->observations = ep;
1596 			ep->nvp = evnv_dupnvl(nvl);
1597 		} else {
1598 			/* use new payload values for peek */
1599 			pre_peek_nvp = ep->nvp;
1600 			ep->nvp = evnv_dupnvl(nvl);
1601 		}
1602 
1603 		/* tell hypothesise() not to mess with suspect list */
1604 		fmep->peek = 1;
1605 
1606 		/* don't want this to be verbose (unless Debug is set) */
1607 		prev_verbose = Verbose;
1608 		if (Debug == 0)
1609 			Verbose = 0;
1610 
1611 		lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep);
1612 		state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay);
1613 
1614 		fmep->peek = 0;
1615 
1616 		/* put verbose flag back */
1617 		Verbose = prev_verbose;
1618 
1619 		if (state != FME_DISPROVED) {
1620 			/* found an FME that explains the ereport */
1621 			matched++;
1622 			out(O_ALTFP|O_NONL, "[");
1623 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1624 			out(O_ALTFP, " explained by FME%d]", fmep->id);
1625 
1626 			if (pre_peek_nvp)
1627 				nvlist_free(pre_peek_nvp);
1628 
1629 			if (ep->count == 1)
1630 				serialize_observation(fmep, eventstring, ipp);
1631 
1632 			if (ffep) {
1633 				fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1634 				ep->ffep = ffep;
1635 			}
1636 
1637 			stats_counter_bump(fmep->Rcount);
1638 
1639 			/* re-eval FME */
1640 			fme_eval(fmep, ffep);
1641 		} else {
1642 
1643 			/* not a match, undo noting of observation */
1644 			fmep->ecurrent = NULL;
1645 			if (--ep->count == 0) {
1646 				/* unlink it from observations */
1647 				fmep->observations = ep->observations;
1648 				ep->observations = NULL;
1649 				nvlist_free(ep->nvp);
1650 				ep->nvp = NULL;
1651 			} else {
1652 				nvlist_free(ep->nvp);
1653 				ep->nvp = pre_peek_nvp;
1654 			}
1655 		}
1656 	}
1657 
1658 	if (matched)
1659 		return;	/* explained by at least one existing FME */
1660 
1661 	/* clean up closed fmes */
1662 	cfmep = ClosedFMEs;
1663 	while (cfmep != NULL) {
1664 		svfmep = cfmep->next;
1665 		destroy_fme(cfmep);
1666 		cfmep = svfmep;
1667 	}
1668 	ClosedFMEs = NULL;
1669 	prune_propagations(eventstring, ipp);
1670 
1671 	if (ofmep) {
1672 		out(O_ALTFP|O_NONL, "[");
1673 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1674 		out(O_ALTFP, " ADDING TO OVERFLOW FME]");
1675 		if (ffep)
1676 			fmd_case_add_ereport(hdl, ofmep->fmcase, ffep);
1677 
1678 		return;
1679 
1680 	} else if (Max_fme && (Open_fme_count >= Max_fme)) {
1681 		out(O_ALTFP|O_NONL, "[");
1682 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1683 		out(O_ALTFP, " MAX OPEN FME REACHED]");
1684 
1685 		fmcase = fmd_case_open(hdl, NULL);
1686 
1687 		/* Create overflow fme */
1688 		if ((fmep = newfme(eventstring, ipp, hdl, fmcase)) == NULL) {
1689 			out(O_ALTFP|O_NONL, "[");
1690 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1691 			out(O_ALTFP, " CANNOT OPEN OVERFLOW FME]");
1692 			publish_undiagnosable(hdl, ffep, fmcase);
1693 			return;
1694 		}
1695 
1696 		Open_fme_count++;
1697 
1698 		init_fme_bufs(fmep);
1699 		fmep->overflow = B_TRUE;
1700 
1701 		if (ffep)
1702 			fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1703 
1704 		defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100,
1705 		    NULL, NULL, NULL);
1706 		(void) nvlist_add_string(defect, UNDIAG_REASON, UD_MAXFME);
1707 		fmd_case_add_suspect(hdl, fmep->fmcase, defect);
1708 		fmd_case_solve(hdl, fmep->fmcase);
1709 		return;
1710 	}
1711 
1712 	/* open a case */
1713 	fmcase = fmd_case_open(hdl, NULL);
1714 
1715 	/* start a new FME */
1716 	if ((fmep = newfme(eventstring, ipp, hdl, fmcase)) == NULL) {
1717 		out(O_ALTFP|O_NONL, "[");
1718 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1719 		out(O_ALTFP, " CANNOT DIAGNOSE]");
1720 		publish_undiagnosable(hdl, ffep, fmcase);
1721 		return;
1722 	}
1723 
1724 	Open_fme_count++;
1725 
1726 	init_fme_bufs(fmep);
1727 
1728 	out(O_ALTFP|O_NONL, "[");
1729 	ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1730 	out(O_ALTFP, " created FME%d, case %s]", fmep->id,
1731 	    fmd_case_uuid(hdl, fmep->fmcase));
1732 
1733 	ep = fmep->e0;
1734 	ASSERT(ep != NULL);
1735 
1736 	/* note observation */
1737 	fmep->ecurrent = ep;
1738 	if (ep->count++ == 0) {
1739 		/* link it into list of observations seen */
1740 		ep->observations = fmep->observations;
1741 		fmep->observations = ep;
1742 		ep->nvp = evnv_dupnvl(nvl);
1743 		serialize_observation(fmep, eventstring, ipp);
1744 	} else {
1745 		/* new payload overrides any previous */
1746 		nvlist_free(ep->nvp);
1747 		ep->nvp = evnv_dupnvl(nvl);
1748 	}
1749 
1750 	stats_counter_bump(fmep->Rcount);
1751 
1752 	if (ffep) {
1753 		fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1754 		fmd_case_setprincipal(hdl, fmep->fmcase, ffep);
1755 		fmep->e0r = ffep;
1756 		ep->ffep = ffep;
1757 	}
1758 
1759 	/* give the diagnosis algorithm a shot at the new FME state */
1760 	fme_eval(fmep, ffep);
1761 }
1762 
1763 void
1764 fme_status(int flags)
1765 {
1766 	struct fme *fmep;
1767 
1768 	if (FMElist == NULL) {
1769 		out(flags, "No fault management exercises underway.");
1770 		return;
1771 	}
1772 
1773 	for (fmep = FMElist; fmep; fmep = fmep->next)
1774 		fme_print(flags, fmep);
1775 }
1776 
1777 /*
1778  * "indent" routines used mostly for nicely formatted debug output, but also
1779  * for sanity checking for infinite recursion bugs.
1780  */
1781 
1782 #define	MAX_INDENT 1024
1783 static const char *indent_s[MAX_INDENT];
1784 static int current_indent;
1785 
1786 static void
1787 indent_push(const char *s)
1788 {
1789 	if (current_indent < MAX_INDENT)
1790 		indent_s[current_indent++] = s;
1791 	else
1792 		out(O_DIE, "unexpected recursion depth (%d)", current_indent);
1793 }
1794 
1795 static void
1796 indent_set(const char *s)
1797 {
1798 	current_indent = 0;
1799 	indent_push(s);
1800 }
1801 
1802 static void
1803 indent_pop(void)
1804 {
1805 	if (current_indent > 0)
1806 		current_indent--;
1807 	else
1808 		out(O_DIE, "recursion underflow");
1809 }
1810 
1811 static void
1812 indent(void)
1813 {
1814 	int i;
1815 	if (!Verbose)
1816 		return;
1817 	for (i = 0; i < current_indent; i++)
1818 		out(O_ALTFP|O_VERB|O_NONL, indent_s[i]);
1819 }
1820 
1821 #define	SLNEW		1
1822 #define	SLCHANGED	2
1823 #define	SLWAIT		3
1824 #define	SLDISPROVED	4
1825 
1826 static void
1827 print_suspects(int circumstance, struct fme *fmep)
1828 {
1829 	struct event *ep;
1830 
1831 	out(O_ALTFP|O_NONL, "[");
1832 	if (circumstance == SLCHANGED) {
1833 		out(O_ALTFP|O_NONL, "FME%d diagnosis changed. state: %s, "
1834 		    "suspect list:", fmep->id, fme_state2str(fmep->state));
1835 	} else if (circumstance == SLWAIT) {
1836 		out(O_ALTFP|O_NONL, "FME%d set wait timer %ld ", fmep->id,
1837 		    fmep->timer);
1838 		ptree_timeval(O_ALTFP|O_NONL, &fmep->wull);
1839 	} else if (circumstance == SLDISPROVED) {
1840 		out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS UNKNOWN", fmep->id);
1841 	} else {
1842 		out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS PRODUCED:", fmep->id);
1843 	}
1844 
1845 	if (circumstance == SLWAIT || circumstance == SLDISPROVED) {
1846 		out(O_ALTFP, "]");
1847 		return;
1848 	}
1849 
1850 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
1851 		out(O_ALTFP|O_NONL, " ");
1852 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
1853 	}
1854 	out(O_ALTFP, "]");
1855 }
1856 
1857 static struct node *
1858 eventprop_lookup(struct event *ep, const char *propname)
1859 {
1860 	return (lut_lookup(ep->props, (void *)propname, NULL));
1861 }
1862 
1863 #define	MAXDIGITIDX	23
1864 static char numbuf[MAXDIGITIDX + 1];
1865 
1866 static int
1867 node2uint(struct node *n, uint_t *valp)
1868 {
1869 	struct evalue value;
1870 	struct lut *globals = NULL;
1871 
1872 	if (n == NULL)
1873 		return (1);
1874 
1875 	/*
1876 	 * check value.v since we are being asked to convert an unsigned
1877 	 * long long int to an unsigned int
1878 	 */
1879 	if (! eval_expr(n, NULL, NULL, &globals, NULL, NULL, 0, &value) ||
1880 	    value.t != UINT64 || value.v > (1ULL << 32))
1881 		return (1);
1882 
1883 	*valp = (uint_t)value.v;
1884 
1885 	return (0);
1886 }
1887 
1888 static nvlist_t *
1889 node2fmri(struct node *n)
1890 {
1891 	nvlist_t **pa, *f, *p;
1892 	struct node *nc;
1893 	uint_t depth = 0;
1894 	char *numstr, *nullbyte;
1895 	char *failure;
1896 	int err, i;
1897 
1898 	/* XXX do we need to be able to handle a non-T_NAME node? */
1899 	if (n == NULL || n->t != T_NAME)
1900 		return (NULL);
1901 
1902 	for (nc = n; nc != NULL; nc = nc->u.name.next) {
1903 		if (nc->u.name.child == NULL || nc->u.name.child->t != T_NUM)
1904 			break;
1905 		depth++;
1906 	}
1907 
1908 	if (nc != NULL) {
1909 		/* We bailed early, something went wrong */
1910 		return (NULL);
1911 	}
1912 
1913 	if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0)
1914 		out(O_DIE|O_SYS, "alloc of fmri nvl failed");
1915 	pa = alloca(depth * sizeof (nvlist_t *));
1916 	for (i = 0; i < depth; i++)
1917 		pa[i] = NULL;
1918 
1919 	err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC);
1920 	err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION);
1921 	err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, "");
1922 	err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth);
1923 	if (err != 0) {
1924 		failure = "basic construction of FMRI failed";
1925 		goto boom;
1926 	}
1927 
1928 	numbuf[MAXDIGITIDX] = '\0';
1929 	nullbyte = &numbuf[MAXDIGITIDX];
1930 	i = 0;
1931 
1932 	for (nc = n; nc != NULL; nc = nc->u.name.next) {
1933 		err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl);
1934 		if (err != 0) {
1935 			failure = "alloc of an hc-pair failed";
1936 			goto boom;
1937 		}
1938 		err = nvlist_add_string(p, FM_FMRI_HC_NAME, nc->u.name.s);
1939 		numstr = ulltostr(nc->u.name.child->u.ull, nullbyte);
1940 		err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr);
1941 		if (err != 0) {
1942 			failure = "construction of an hc-pair failed";
1943 			goto boom;
1944 		}
1945 		pa[i++] = p;
1946 	}
1947 
1948 	err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth);
1949 	if (err == 0) {
1950 		for (i = 0; i < depth; i++)
1951 			if (pa[i] != NULL)
1952 				nvlist_free(pa[i]);
1953 		return (f);
1954 	}
1955 	failure = "addition of hc-pair array to FMRI failed";
1956 
1957 boom:
1958 	for (i = 0; i < depth; i++)
1959 		if (pa[i] != NULL)
1960 			nvlist_free(pa[i]);
1961 	nvlist_free(f);
1962 	out(O_DIE, "%s", failure);
1963 	/*NOTREACHED*/
1964 	return (NULL);
1965 }
1966 
1967 /* an ipath cache entry is an array of these, with s==NULL at the end */
1968 struct ipath {
1969 	const char *s;	/* component name (in stable) */
1970 	int i;		/* instance number */
1971 };
1972 
1973 static nvlist_t *
1974 ipath2fmri(struct ipath *ipath)
1975 {
1976 	nvlist_t **pa, *f, *p;
1977 	uint_t depth = 0;
1978 	char *numstr, *nullbyte;
1979 	char *failure;
1980 	int err, i;
1981 	struct ipath *ipp;
1982 
1983 	for (ipp = ipath; ipp->s != NULL; ipp++)
1984 		depth++;
1985 
1986 	if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0)
1987 		out(O_DIE|O_SYS, "alloc of fmri nvl failed");
1988 	pa = alloca(depth * sizeof (nvlist_t *));
1989 	for (i = 0; i < depth; i++)
1990 		pa[i] = NULL;
1991 
1992 	err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC);
1993 	err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION);
1994 	err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, "");
1995 	err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth);
1996 	if (err != 0) {
1997 		failure = "basic construction of FMRI failed";
1998 		goto boom;
1999 	}
2000 
2001 	numbuf[MAXDIGITIDX] = '\0';
2002 	nullbyte = &numbuf[MAXDIGITIDX];
2003 	i = 0;
2004 
2005 	for (ipp = ipath; ipp->s != NULL; ipp++) {
2006 		err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl);
2007 		if (err != 0) {
2008 			failure = "alloc of an hc-pair failed";
2009 			goto boom;
2010 		}
2011 		err = nvlist_add_string(p, FM_FMRI_HC_NAME, ipp->s);
2012 		numstr = ulltostr(ipp->i, nullbyte);
2013 		err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr);
2014 		if (err != 0) {
2015 			failure = "construction of an hc-pair failed";
2016 			goto boom;
2017 		}
2018 		pa[i++] = p;
2019 	}
2020 
2021 	err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth);
2022 	if (err == 0) {
2023 		for (i = 0; i < depth; i++)
2024 			if (pa[i] != NULL)
2025 				nvlist_free(pa[i]);
2026 		return (f);
2027 	}
2028 	failure = "addition of hc-pair array to FMRI failed";
2029 
2030 boom:
2031 	for (i = 0; i < depth; i++)
2032 		if (pa[i] != NULL)
2033 			nvlist_free(pa[i]);
2034 	nvlist_free(f);
2035 	out(O_DIE, "%s", failure);
2036 	/*NOTREACHED*/
2037 	return (NULL);
2038 }
2039 
2040 static uint_t
2041 avg(uint_t sum, uint_t cnt)
2042 {
2043 	unsigned long long s = sum * 10;
2044 
2045 	return ((s / cnt / 10) + (((s / cnt % 10) >= 5) ? 1 : 0));
2046 }
2047 
2048 static uint8_t
2049 percentof(uint_t part, uint_t whole)
2050 {
2051 	unsigned long long p = part * 1000;
2052 
2053 	return ((p / whole / 10) + (((p / whole % 10) >= 5) ? 1 : 0));
2054 }
2055 
2056 struct rsl {
2057 	struct event *suspect;
2058 	nvlist_t *asru;
2059 	nvlist_t *fru;
2060 	nvlist_t *rsrc;
2061 };
2062 
2063 static void publish_suspects(struct fme *fmep, struct rsl *srl);
2064 
2065 /*
2066  *  rslfree -- free internal members of struct rsl not expected to be
2067  *	freed elsewhere.
2068  */
2069 static void
2070 rslfree(struct rsl *freeme)
2071 {
2072 	if (freeme->asru != NULL)
2073 		nvlist_free(freeme->asru);
2074 	if (freeme->fru != NULL)
2075 		nvlist_free(freeme->fru);
2076 	if (freeme->rsrc != NULL && freeme->rsrc != freeme->asru)
2077 		nvlist_free(freeme->rsrc);
2078 }
2079 
2080 /*
2081  *  rslcmp -- compare two rsl structures.  Use the following
2082  *	comparisons to establish cardinality:
2083  *
2084  *	1. Name of the suspect's class. (simple strcmp)
2085  *	2. Name of the suspect's ASRU. (trickier, since nvlist)
2086  *
2087  */
2088 static int
2089 rslcmp(const void *a, const void *b)
2090 {
2091 	struct rsl *r1 = (struct rsl *)a;
2092 	struct rsl *r2 = (struct rsl *)b;
2093 	int rv;
2094 
2095 	rv = strcmp(r1->suspect->enode->u.event.ename->u.name.s,
2096 	    r2->suspect->enode->u.event.ename->u.name.s);
2097 	if (rv != 0)
2098 		return (rv);
2099 
2100 	if (r1->rsrc == NULL && r2->rsrc == NULL)
2101 		return (0);
2102 	if (r1->rsrc == NULL)
2103 		return (-1);
2104 	if (r2->rsrc == NULL)
2105 		return (1);
2106 	return (evnv_cmpnvl(r1->rsrc, r2->rsrc, 0));
2107 }
2108 
2109 /*
2110  *  rsluniq -- given an array of rsl structures, seek out and "remove"
2111  *	any duplicates.  Dups are "remove"d by NULLing the suspect pointer
2112  *	of the array element.  Removal also means updating the number of
2113  *	problems and the number of problems which are not faults.  User
2114  *	provides the first and last element pointers.
2115  */
2116 static void
2117 rsluniq(struct rsl *first, struct rsl *last, int *nprobs, int *nnonf)
2118 {
2119 	struct rsl *cr;
2120 
2121 	if (*nprobs == 1)
2122 		return;
2123 
2124 	/*
2125 	 *  At this point, we only expect duplicate defects.
2126 	 *  Eversholt's diagnosis algorithm prevents duplicate
2127 	 *  suspects, but we rewrite defects in the platform code after
2128 	 *  the diagnosis is made, and that can introduce new
2129 	 *  duplicates.
2130 	 */
2131 	while (first <= last) {
2132 		if (first->suspect == NULL || !is_defect(first->suspect->t)) {
2133 			first++;
2134 			continue;
2135 		}
2136 		cr = first + 1;
2137 		while (cr <= last) {
2138 			if (is_defect(first->suspect->t)) {
2139 				if (rslcmp(first, cr) == 0) {
2140 					cr->suspect = NULL;
2141 					rslfree(cr);
2142 					(*nprobs)--;
2143 					(*nnonf)--;
2144 				}
2145 			}
2146 			/*
2147 			 * assume all defects are in order after our
2148 			 * sort and short circuit here with "else break" ?
2149 			 */
2150 			cr++;
2151 		}
2152 		first++;
2153 	}
2154 }
2155 
2156 /*
2157  * get_resources -- for a given suspect, determine what ASRU, FRU and
2158  *     RSRC nvlists should be advertised in the final suspect list.
2159  */
2160 void
2161 get_resources(struct event *sp, struct rsl *rsrcs, struct config *croot)
2162 {
2163 	struct node *asrudef, *frudef;
2164 	nvlist_t *asru, *fru;
2165 	nvlist_t *rsrc = NULL;
2166 	char *pathstr;
2167 
2168 	/*
2169 	 * First find any ASRU and/or FRU defined in the
2170 	 * initial fault tree.
2171 	 */
2172 	asrudef = eventprop_lookup(sp, L_ASRU);
2173 	frudef = eventprop_lookup(sp, L_FRU);
2174 
2175 	/*
2176 	 * Create FMRIs based on those definitions
2177 	 */
2178 	asru = node2fmri(asrudef);
2179 	fru = node2fmri(frudef);
2180 	pathstr = ipath2str(NULL, sp->ipp);
2181 
2182 	/*
2183 	 *  Allow for platform translations of the FMRIs
2184 	 */
2185 	platform_units_translate(is_defect(sp->t), croot, &asru, &fru, &rsrc,
2186 	    pathstr);
2187 
2188 	FREE(pathstr);
2189 	rsrcs->suspect = sp;
2190 	rsrcs->asru = asru;
2191 	rsrcs->fru = fru;
2192 	rsrcs->rsrc = rsrc;
2193 }
2194 
2195 /*
2196  * trim_suspects -- prior to publishing, we may need to remove some
2197  *    suspects from the list.  If we're auto-closing upsets, we don't
2198  *    want any of those in the published list.  If the ASRUs for multiple
2199  *    defects resolve to the same ASRU (driver) we only want to publish
2200  *    that as a single suspect.
2201  */
2202 static int
2203 trim_suspects(struct fme *fmep, struct rsl *begin, struct rsl *begin2,
2204     fmd_event_t *ffep, int *mess_zero_nonfaultp)
2205 {
2206 	struct event *ep;
2207 	struct rsl *rp = begin;
2208 	struct rsl *rp2 = begin2;
2209 	int mess_zero_count = 0;
2210 	int serd_rval;
2211 	uint_t messval;
2212 
2213 	/* remove any unwanted upsets and populate our array */
2214 	for (ep = fmep->psuspects; ep; ep = ep->psuspects) {
2215 		if (is_upset(ep->t))
2216 			continue;
2217 		serd_rval = serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, ep,
2218 		    NULL, NULL);
2219 		if (serd_rval == 0)
2220 			continue;
2221 		if (node2uint(eventprop_lookup(ep, L_message),
2222 		    &messval) == 0 && messval == 0) {
2223 			get_resources(ep, rp2, fmep->config);
2224 			rp2++;
2225 			mess_zero_count++;
2226 			if (!is_fault(ep->t))
2227 				(*mess_zero_nonfaultp)++;
2228 		} else {
2229 			get_resources(ep, rp, fmep->config);
2230 			rp++;
2231 			fmep->nsuspects++;
2232 			if (!is_fault(ep->t))
2233 				fmep->nonfault++;
2234 		}
2235 	}
2236 	return (mess_zero_count);
2237 }
2238 
2239 /*
2240  * addpayloadprop -- add a payload prop to a problem
2241  */
2242 static void
2243 addpayloadprop(const char *lhs, struct evalue *rhs, nvlist_t *fault)
2244 {
2245 	nvlist_t *rsrc, *hcs;
2246 
2247 	ASSERT(fault != NULL);
2248 	ASSERT(lhs != NULL);
2249 	ASSERT(rhs != NULL);
2250 
2251 	if (nvlist_lookup_nvlist(fault, FM_FAULT_RESOURCE, &rsrc) != 0)
2252 		out(O_DIE, "cannot add payloadprop \"%s\" to fault", lhs);
2253 
2254 	if (nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcs) != 0) {
2255 		out(O_ALTFP|O_VERB2, "addpayloadprop: create hc_specific");
2256 		if (nvlist_xalloc(&hcs, NV_UNIQUE_NAME, &Eft_nv_hdl) != 0)
2257 			out(O_DIE,
2258 			    "cannot add payloadprop \"%s\" to fault", lhs);
2259 		if (nvlist_add_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, hcs) != 0)
2260 			out(O_DIE,
2261 			    "cannot add payloadprop \"%s\" to fault", lhs);
2262 		nvlist_free(hcs);
2263 		if (nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcs) != 0)
2264 			out(O_DIE,
2265 			    "cannot add payloadprop \"%s\" to fault", lhs);
2266 	} else
2267 		out(O_ALTFP|O_VERB2, "addpayloadprop: reuse hc_specific");
2268 
2269 	if (rhs->t == UINT64) {
2270 		out(O_ALTFP|O_VERB2, "addpayloadprop: %s=%llu", lhs, rhs->v);
2271 
2272 		if (nvlist_add_uint64(hcs, lhs, rhs->v) != 0)
2273 			out(O_DIE,
2274 			    "cannot add payloadprop \"%s\" to fault", lhs);
2275 	} else {
2276 		out(O_ALTFP|O_VERB2, "addpayloadprop: %s=\"%s\"",
2277 		    lhs, (char *)(uintptr_t)rhs->v);
2278 
2279 		if (nvlist_add_string(hcs, lhs, (char *)(uintptr_t)rhs->v) != 0)
2280 			out(O_DIE,
2281 			    "cannot add payloadprop \"%s\" to fault", lhs);
2282 	}
2283 }
2284 
2285 static char *Istatbuf;
2286 static char *Istatbufptr;
2287 static int Istatsz;
2288 
2289 /*
2290  * istataddsize -- calculate size of istat and add it to Istatsz
2291  */
2292 /*ARGSUSED2*/
2293 static void
2294 istataddsize(const struct istat_entry *lhs, struct stats *rhs, void *arg)
2295 {
2296 	int val;
2297 
2298 	ASSERT(lhs != NULL);
2299 	ASSERT(rhs != NULL);
2300 
2301 	if ((val = stats_counter_value(rhs)) == 0)
2302 		return;	/* skip zero-valued stats */
2303 
2304 	/* count up the size of the stat name */
2305 	Istatsz += ipath2strlen(lhs->ename, lhs->ipath);
2306 	Istatsz++;	/* for the trailing NULL byte */
2307 
2308 	/* count up the size of the stat value */
2309 	Istatsz += snprintf(NULL, 0, "%d", val);
2310 	Istatsz++;	/* for the trailing NULL byte */
2311 }
2312 
2313 /*
2314  * istat2str -- serialize an istat, writing result to *Istatbufptr
2315  */
2316 /*ARGSUSED2*/
2317 static void
2318 istat2str(const struct istat_entry *lhs, struct stats *rhs, void *arg)
2319 {
2320 	char *str;
2321 	int len;
2322 	int val;
2323 
2324 	ASSERT(lhs != NULL);
2325 	ASSERT(rhs != NULL);
2326 
2327 	if ((val = stats_counter_value(rhs)) == 0)
2328 		return;	/* skip zero-valued stats */
2329 
2330 	/* serialize the stat name */
2331 	str = ipath2str(lhs->ename, lhs->ipath);
2332 	len = strlen(str);
2333 
2334 	ASSERT(Istatbufptr + len + 1 < &Istatbuf[Istatsz]);
2335 	(void) strlcpy(Istatbufptr, str, &Istatbuf[Istatsz] - Istatbufptr);
2336 	Istatbufptr += len;
2337 	FREE(str);
2338 	*Istatbufptr++ = '\0';
2339 
2340 	/* serialize the stat value */
2341 	Istatbufptr += snprintf(Istatbufptr, &Istatbuf[Istatsz] - Istatbufptr,
2342 	    "%d", val);
2343 	*Istatbufptr++ = '\0';
2344 
2345 	ASSERT(Istatbufptr <= &Istatbuf[Istatsz]);
2346 }
2347 
2348 void
2349 istat_save()
2350 {
2351 	if (Istat_need_save == 0)
2352 		return;
2353 
2354 	/* figure out how big the serialzed info is */
2355 	Istatsz = 0;
2356 	lut_walk(Istats, (lut_cb)istataddsize, NULL);
2357 
2358 	if (Istatsz == 0) {
2359 		/* no stats to save */
2360 		fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS);
2361 		return;
2362 	}
2363 
2364 	/* create the serialized buffer */
2365 	Istatbufptr = Istatbuf = MALLOC(Istatsz);
2366 	lut_walk(Istats, (lut_cb)istat2str, NULL);
2367 
2368 	/* clear out current saved stats */
2369 	fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS);
2370 
2371 	/* write out the new version */
2372 	fmd_buf_write(Hdl, NULL, WOBUF_ISTATS, Istatbuf, Istatsz);
2373 	FREE(Istatbuf);
2374 
2375 	Istat_need_save = 0;
2376 }
2377 
2378 int
2379 istat_cmp(struct istat_entry *ent1, struct istat_entry *ent2)
2380 {
2381 	if (ent1->ename != ent2->ename)
2382 		return (ent2->ename - ent1->ename);
2383 	if (ent1->ipath != ent2->ipath)
2384 		return ((char *)ent2->ipath - (char *)ent1->ipath);
2385 
2386 	return (0);
2387 }
2388 
2389 /*
2390  * istat-verify -- verify the component associated with a stat still exists
2391  *
2392  * if the component no longer exists, this routine resets the stat and
2393  * returns 0.  if the component still exists, it returns 1.
2394  */
2395 static int
2396 istat_verify(struct node *snp, struct istat_entry *entp)
2397 {
2398 	struct stats *statp;
2399 	nvlist_t *fmri;
2400 
2401 	fmri = node2fmri(snp->u.event.epname);
2402 	if (platform_path_exists(fmri)) {
2403 		nvlist_free(fmri);
2404 		return (1);
2405 	}
2406 	nvlist_free(fmri);
2407 
2408 	/* component no longer in system.  zero out the associated stats */
2409 	if ((statp = (struct stats *)
2410 	    lut_lookup(Istats, entp, (lut_cmp)istat_cmp)) == NULL ||
2411 	    stats_counter_value(statp) == 0)
2412 		return (0);	/* stat is already reset */
2413 
2414 	Istat_need_save = 1;
2415 	stats_counter_reset(statp);
2416 	return (0);
2417 }
2418 
2419 static void
2420 istat_bump(struct node *snp, int n)
2421 {
2422 	struct stats *statp;
2423 	struct istat_entry ent;
2424 
2425 	ASSERT(snp != NULL);
2426 	ASSERTinfo(snp->t == T_EVENT, ptree_nodetype2str(snp->t));
2427 	ASSERT(snp->u.event.epname != NULL);
2428 
2429 	/* class name should be hoisted into a single stable entry */
2430 	ASSERT(snp->u.event.ename->u.name.next == NULL);
2431 	ent.ename = snp->u.event.ename->u.name.s;
2432 	ent.ipath = ipath(snp->u.event.epname);
2433 
2434 	if (!istat_verify(snp, &ent)) {
2435 		/* component no longer exists in system, nothing to do */
2436 		return;
2437 	}
2438 
2439 	if ((statp = (struct stats *)
2440 	    lut_lookup(Istats, &ent, (lut_cmp)istat_cmp)) == NULL) {
2441 		/* need to create the counter */
2442 		int cnt = 0;
2443 		struct node *np;
2444 		char *sname;
2445 		char *snamep;
2446 		struct istat_entry *newentp;
2447 
2448 		/* count up the size of the stat name */
2449 		np = snp->u.event.ename;
2450 		while (np != NULL) {
2451 			cnt += strlen(np->u.name.s);
2452 			cnt++;	/* for the '.' or '@' */
2453 			np = np->u.name.next;
2454 		}
2455 		np = snp->u.event.epname;
2456 		while (np != NULL) {
2457 			cnt += snprintf(NULL, 0, "%s%llu",
2458 			    np->u.name.s, np->u.name.child->u.ull);
2459 			cnt++;	/* for the '/' or trailing NULL byte */
2460 			np = np->u.name.next;
2461 		}
2462 
2463 		/* build the stat name */
2464 		snamep = sname = alloca(cnt);
2465 		np = snp->u.event.ename;
2466 		while (np != NULL) {
2467 			snamep += snprintf(snamep, &sname[cnt] - snamep,
2468 			    "%s", np->u.name.s);
2469 			np = np->u.name.next;
2470 			if (np)
2471 				*snamep++ = '.';
2472 		}
2473 		*snamep++ = '@';
2474 		np = snp->u.event.epname;
2475 		while (np != NULL) {
2476 			snamep += snprintf(snamep, &sname[cnt] - snamep,
2477 			    "%s%llu", np->u.name.s, np->u.name.child->u.ull);
2478 			np = np->u.name.next;
2479 			if (np)
2480 				*snamep++ = '/';
2481 		}
2482 		*snamep++ = '\0';
2483 
2484 		/* create the new stat & add it to our list */
2485 		newentp = MALLOC(sizeof (*newentp));
2486 		*newentp = ent;
2487 		statp = stats_new_counter(NULL, sname, 0);
2488 		Istats = lut_add(Istats, (void *)newentp, (void *)statp,
2489 		    (lut_cmp)istat_cmp);
2490 	}
2491 
2492 	/* if n is non-zero, set that value instead of bumping */
2493 	if (n) {
2494 		stats_counter_reset(statp);
2495 		stats_counter_add(statp, n);
2496 	} else
2497 		stats_counter_bump(statp);
2498 	Istat_need_save = 1;
2499 
2500 	ipath_print(O_ALTFP|O_VERB2, ent.ename, ent.ipath);
2501 	out(O_ALTFP|O_VERB2, " %s to value %d", n ? "set" : "incremented",
2502 	    stats_counter_value(statp));
2503 }
2504 
2505 /*ARGSUSED*/
2506 static void
2507 istat_destructor(void *left, void *right, void *arg)
2508 {
2509 	struct istat_entry *entp = (struct istat_entry *)left;
2510 	struct stats *statp = (struct stats *)right;
2511 	FREE(entp);
2512 	stats_delete(statp);
2513 }
2514 
2515 /*
2516  * Callback used in a walk of the Istats to reset matching stat counters.
2517  */
2518 static void
2519 istat_counter_reset_cb(struct istat_entry *entp, struct stats *statp,
2520     const struct ipath *ipp)
2521 {
2522 	char *path;
2523 
2524 	if (entp->ipath == ipp) {
2525 		path = ipath2str(entp->ename, ipp);
2526 		out(O_ALTFP, "istat_counter_reset_cb: resetting %s", path);
2527 		FREE(path);
2528 		stats_counter_reset(statp);
2529 		Istat_need_save = 1;
2530 	}
2531 }
2532 
2533 /*ARGSUSED*/
2534 static void
2535 istat_counter_topo_chg_cb(struct istat_entry *entp, struct stats *statp,
2536     void *unused)
2537 {
2538 	char *path;
2539 	nvlist_t *fmri;
2540 
2541 	fmri = ipath2fmri((struct ipath *)(entp->ipath));
2542 	if (!platform_path_exists(fmri)) {
2543 		path = ipath2str(entp->ename, entp->ipath);
2544 		out(O_ALTFP, "istat_counter_topo_chg_cb: not present %s", path);
2545 		FREE(path);
2546 		stats_counter_reset(statp);
2547 		Istat_need_save = 1;
2548 	}
2549 	nvlist_free(fmri);
2550 }
2551 
2552 void
2553 istat_fini(void)
2554 {
2555 	lut_free(Istats, istat_destructor, NULL);
2556 }
2557 
2558 static char *Serdbuf;
2559 static char *Serdbufptr;
2560 static int Serdsz;
2561 
2562 /*
2563  * serdaddsize -- calculate size of serd and add it to Serdsz
2564  */
2565 /*ARGSUSED*/
2566 static void
2567 serdaddsize(const struct serd_entry *lhs, struct stats *rhs, void *arg)
2568 {
2569 	ASSERT(lhs != NULL);
2570 
2571 	/* count up the size of the stat name */
2572 	Serdsz += ipath2strlen(lhs->ename, lhs->ipath);
2573 	Serdsz++;	/* for the trailing NULL byte */
2574 }
2575 
2576 /*
2577  * serd2str -- serialize a serd engine, writing result to *Serdbufptr
2578  */
2579 /*ARGSUSED*/
2580 static void
2581 serd2str(const struct serd_entry *lhs, struct stats *rhs, void *arg)
2582 {
2583 	char *str;
2584 	int len;
2585 
2586 	ASSERT(lhs != NULL);
2587 
2588 	/* serialize the serd engine name */
2589 	str = ipath2str(lhs->ename, lhs->ipath);
2590 	len = strlen(str);
2591 
2592 	ASSERT(Serdbufptr + len + 1 <= &Serdbuf[Serdsz]);
2593 	(void) strlcpy(Serdbufptr, str, &Serdbuf[Serdsz] - Serdbufptr);
2594 	Serdbufptr += len;
2595 	FREE(str);
2596 	*Serdbufptr++ = '\0';
2597 	ASSERT(Serdbufptr <= &Serdbuf[Serdsz]);
2598 }
2599 
2600 void
2601 serd_save()
2602 {
2603 	if (Serd_need_save == 0)
2604 		return;
2605 
2606 	/* figure out how big the serialzed info is */
2607 	Serdsz = 0;
2608 	lut_walk(SerdEngines, (lut_cb)serdaddsize, NULL);
2609 
2610 	if (Serdsz == 0) {
2611 		/* no serd engines to save */
2612 		fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS);
2613 		return;
2614 	}
2615 
2616 	/* create the serialized buffer */
2617 	Serdbufptr = Serdbuf = MALLOC(Serdsz);
2618 	lut_walk(SerdEngines, (lut_cb)serd2str, NULL);
2619 
2620 	/* clear out current saved stats */
2621 	fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS);
2622 
2623 	/* write out the new version */
2624 	fmd_buf_write(Hdl, NULL, WOBUF_SERDS, Serdbuf, Serdsz);
2625 	FREE(Serdbuf);
2626 	Serd_need_save = 0;
2627 }
2628 
2629 int
2630 serd_cmp(struct serd_entry *ent1, struct serd_entry *ent2)
2631 {
2632 	if (ent1->ename != ent2->ename)
2633 		return (ent2->ename - ent1->ename);
2634 	if (ent1->ipath != ent2->ipath)
2635 		return ((char *)ent2->ipath - (char *)ent1->ipath);
2636 
2637 	return (0);
2638 }
2639 
2640 void
2641 fme_serd_load(fmd_hdl_t *hdl)
2642 {
2643 	int sz;
2644 	char *sbuf;
2645 	char *sepptr;
2646 	char *ptr;
2647 	struct serd_entry *newentp;
2648 	struct node *epname;
2649 	nvlist_t *fmri;
2650 	char *namestring;
2651 
2652 	if ((sz = fmd_buf_size(hdl, NULL, WOBUF_SERDS)) == 0)
2653 		return;
2654 	sbuf = alloca(sz);
2655 	fmd_buf_read(hdl, NULL, WOBUF_SERDS, sbuf, sz);
2656 	ptr = sbuf;
2657 	while (ptr < &sbuf[sz]) {
2658 		sepptr = strchr(ptr, '@');
2659 		*sepptr = '\0';
2660 		namestring = ptr;
2661 		sepptr++;
2662 		ptr = sepptr;
2663 		ptr += strlen(ptr);
2664 		ptr++;	/* move past the '\0' separating paths */
2665 		epname = pathstring2epnamenp(sepptr);
2666 		fmri = node2fmri(epname);
2667 		if (platform_path_exists(fmri)) {
2668 			newentp = MALLOC(sizeof (*newentp));
2669 			newentp->hdl = hdl;
2670 			newentp->ipath = ipath(epname);
2671 			newentp->ename = stable(namestring);
2672 			SerdEngines = lut_add(SerdEngines, (void *)newentp,
2673 			    (void *)newentp, (lut_cmp)serd_cmp);
2674 		} else
2675 			Serd_need_save = 1;
2676 		tree_free(epname);
2677 		nvlist_free(fmri);
2678 	}
2679 	/* save it back again in case some of the paths no longer exist */
2680 	serd_save();
2681 }
2682 
2683 /*ARGSUSED*/
2684 static void
2685 serd_destructor(void *left, void *right, void *arg)
2686 {
2687 	struct serd_entry *entp = (struct serd_entry *)left;
2688 	FREE(entp);
2689 }
2690 
2691 /*
2692  * Callback used in a walk of the SerdEngines to reset matching serd engines.
2693  */
2694 /*ARGSUSED*/
2695 static void
2696 serd_reset_cb(struct serd_entry *entp, void *unused, const struct ipath *ipp)
2697 {
2698 	char *path;
2699 
2700 	if (entp->ipath == ipp) {
2701 		path = ipath2str(entp->ename, ipp);
2702 		out(O_ALTFP, "serd_reset_cb: resetting %s", path);
2703 		fmd_serd_reset(entp->hdl, path);
2704 		FREE(path);
2705 		Serd_need_save = 1;
2706 	}
2707 }
2708 
2709 /*ARGSUSED*/
2710 static void
2711 serd_topo_chg_cb(struct serd_entry *entp, void *unused, void *unused2)
2712 {
2713 	char *path;
2714 	nvlist_t *fmri;
2715 
2716 	fmri = ipath2fmri((struct ipath *)(entp->ipath));
2717 	if (!platform_path_exists(fmri)) {
2718 		path = ipath2str(entp->ename, entp->ipath);
2719 		out(O_ALTFP, "serd_topo_chg_cb: not present %s", path);
2720 		fmd_serd_reset(entp->hdl, path);
2721 		FREE(path);
2722 		Serd_need_save = 1;
2723 	}
2724 	nvlist_free(fmri);
2725 }
2726 
2727 void
2728 serd_fini(void)
2729 {
2730 	lut_free(SerdEngines, serd_destructor, NULL);
2731 }
2732 
2733 static void
2734 publish_suspects(struct fme *fmep, struct rsl *srl)
2735 {
2736 	struct rsl *rp;
2737 	nvlist_t *fault;
2738 	uint8_t cert;
2739 	uint_t *frs;
2740 	uint_t fravg, frsum, fr;
2741 	uint_t messval;
2742 	uint_t retireval;
2743 	uint_t responseval;
2744 	struct node *snp;
2745 	int frcnt, fridx;
2746 	boolean_t allfaulty = B_TRUE;
2747 	struct rsl *erl = srl + fmep->nsuspects - 1;
2748 
2749 	/*
2750 	 * sort the array
2751 	 */
2752 	qsort(srl, fmep->nsuspects, sizeof (struct rsl), rslcmp);
2753 	rsluniq(srl, erl, &fmep->nsuspects, &fmep->nonfault);
2754 
2755 	/*
2756 	 * If the suspect list is all faults, then for a given fault,
2757 	 * say X of N, X's certainty is computed via:
2758 	 *
2759 	 * fitrate(X) / (fitrate(1) + ... + fitrate(N)) * 100
2760 	 *
2761 	 * If none of the suspects are faults, and there are N suspects,
2762 	 * the certainty of a given suspect is 100/N.
2763 	 *
2764 	 * If there are are a mixture of faults and other problems in
2765 	 * the suspect list, we take an average of the faults'
2766 	 * FITrates and treat this average as the FITrate for any
2767 	 * non-faults.  The fitrate of any given suspect is then
2768 	 * computed per the first formula above.
2769 	 */
2770 	if (fmep->nonfault == fmep->nsuspects) {
2771 		/* NO faults in the suspect list */
2772 		cert = percentof(1, fmep->nsuspects);
2773 	} else {
2774 		/* sum the fitrates */
2775 		frs = alloca(fmep->nsuspects * sizeof (uint_t));
2776 		fridx = frcnt = frsum = 0;
2777 
2778 		for (rp = srl; rp <= erl; rp++) {
2779 			struct node *n;
2780 
2781 			if (rp->suspect == NULL)
2782 				continue;
2783 			if (!is_fault(rp->suspect->t)) {
2784 				frs[fridx++] = 0;
2785 				continue;
2786 			}
2787 			n = eventprop_lookup(rp->suspect, L_FITrate);
2788 			if (node2uint(n, &fr) != 0) {
2789 				out(O_DEBUG|O_NONL, "event ");
2790 				ipath_print(O_DEBUG|O_NONL,
2791 				    rp->suspect->enode->u.event.ename->u.name.s,
2792 				    rp->suspect->ipp);
2793 				out(O_DEBUG, " has no FITrate (using 1)");
2794 				fr = 1;
2795 			} else if (fr == 0) {
2796 				out(O_DEBUG|O_NONL, "event ");
2797 				ipath_print(O_DEBUG|O_NONL,
2798 				    rp->suspect->enode->u.event.ename->u.name.s,
2799 				    rp->suspect->ipp);
2800 				out(O_DEBUG, " has zero FITrate (using 1)");
2801 				fr = 1;
2802 			}
2803 
2804 			frs[fridx++] = fr;
2805 			frsum += fr;
2806 			frcnt++;
2807 		}
2808 		fravg = avg(frsum, frcnt);
2809 		for (fridx = 0; fridx < fmep->nsuspects; fridx++)
2810 			if (frs[fridx] == 0) {
2811 				frs[fridx] = fravg;
2812 				frsum += fravg;
2813 			}
2814 	}
2815 
2816 	/* Add them in reverse order of our sort, as fmd reverses order */
2817 	for (rp = erl; rp >= srl; rp--) {
2818 		if (rp->suspect == NULL)
2819 			continue;
2820 		if (!is_fault(rp->suspect->t))
2821 			allfaulty = B_FALSE;
2822 		if (fmep->nonfault != fmep->nsuspects)
2823 			cert = percentof(frs[--fridx], frsum);
2824 		fault = fmd_nvl_create_fault(fmep->hdl,
2825 		    rp->suspect->enode->u.event.ename->u.name.s,
2826 		    cert,
2827 		    rp->asru,
2828 		    rp->fru,
2829 		    rp->rsrc);
2830 		if (fault == NULL)
2831 			out(O_DIE, "fault creation failed");
2832 		/* if "message" property exists, add it to the fault */
2833 		if (node2uint(eventprop_lookup(rp->suspect, L_message),
2834 		    &messval) == 0) {
2835 
2836 			out(O_ALTFP,
2837 			    "[FME%d, %s adds message=%d to suspect list]",
2838 			    fmep->id,
2839 			    rp->suspect->enode->u.event.ename->u.name.s,
2840 			    messval);
2841 			if (nvlist_add_boolean_value(fault,
2842 			    FM_SUSPECT_MESSAGE,
2843 			    (messval) ? B_TRUE : B_FALSE) != 0) {
2844 				out(O_DIE, "cannot add no-message to fault");
2845 			}
2846 		}
2847 
2848 		/* if "retire" property exists, add it to the fault */
2849 		if (node2uint(eventprop_lookup(rp->suspect, L_retire),
2850 		    &retireval) == 0) {
2851 
2852 			out(O_ALTFP,
2853 			    "[FME%d, %s adds retire=%d to suspect list]",
2854 			    fmep->id,
2855 			    rp->suspect->enode->u.event.ename->u.name.s,
2856 			    retireval);
2857 			if (nvlist_add_boolean_value(fault,
2858 			    FM_SUSPECT_RETIRE,
2859 			    (retireval) ? B_TRUE : B_FALSE) != 0) {
2860 				out(O_DIE, "cannot add no-retire to fault");
2861 			}
2862 		}
2863 
2864 		/* if "response" property exists, add it to the fault */
2865 		if (node2uint(eventprop_lookup(rp->suspect, L_response),
2866 		    &responseval) == 0) {
2867 
2868 			out(O_ALTFP,
2869 			    "[FME%d, %s adds response=%d to suspect list]",
2870 			    fmep->id,
2871 			    rp->suspect->enode->u.event.ename->u.name.s,
2872 			    responseval);
2873 			if (nvlist_add_boolean_value(fault,
2874 			    FM_SUSPECT_RESPONSE,
2875 			    (responseval) ? B_TRUE : B_FALSE) != 0) {
2876 				out(O_DIE, "cannot add no-response to fault");
2877 			}
2878 		}
2879 
2880 		/* add any payload properties */
2881 		lut_walk(rp->suspect->payloadprops,
2882 		    (lut_cb)addpayloadprop, (void *)fault);
2883 		rslfree(rp);
2884 
2885 		/*
2886 		 * If "action" property exists, evaluate it;  this must be done
2887 		 * before the allfaulty check below since some actions may
2888 		 * modify the asru to be used in fmd_nvl_fmri_has_fault.  This
2889 		 * needs to be restructured if any new actions are introduced
2890 		 * that have effects that we do not want to be visible if
2891 		 * we decide not to publish in the dupclose check below.
2892 		 */
2893 		if ((snp = eventprop_lookup(rp->suspect, L_action)) != NULL) {
2894 			struct evalue evalue;
2895 
2896 			out(O_ALTFP|O_NONL,
2897 			    "[FME%d, %s action ", fmep->id,
2898 			    rp->suspect->enode->u.event.ename->u.name.s);
2899 			ptree_name_iter(O_ALTFP|O_NONL, snp);
2900 			out(O_ALTFP, "]");
2901 			Action_nvl = fault;
2902 			(void) eval_expr(snp, NULL, NULL, NULL, NULL,
2903 			    NULL, 0, &evalue);
2904 		}
2905 
2906 		fmd_case_add_suspect(fmep->hdl, fmep->fmcase, fault);
2907 
2908 		/*
2909 		 * check if the asru is already marked as "faulty".
2910 		 */
2911 		if (allfaulty) {
2912 			nvlist_t *asru;
2913 
2914 			out(O_ALTFP|O_VERB, "FME%d dup check ", fmep->id);
2915 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, rp->suspect);
2916 			out(O_ALTFP|O_VERB|O_NONL, " ");
2917 			if (nvlist_lookup_nvlist(fault,
2918 			    FM_FAULT_ASRU, &asru) != 0) {
2919 				out(O_ALTFP|O_VERB, "NULL asru");
2920 				allfaulty = B_FALSE;
2921 			} else if (fmd_nvl_fmri_has_fault(fmep->hdl, asru,
2922 			    FMD_HAS_FAULT_ASRU, NULL)) {
2923 				out(O_ALTFP|O_VERB, "faulty");
2924 			} else {
2925 				out(O_ALTFP|O_VERB, "not faulty");
2926 				allfaulty = B_FALSE;
2927 			}
2928 		}
2929 
2930 	}
2931 
2932 	if (!allfaulty) {
2933 		/*
2934 		 * don't update the count stat if all asrus are already
2935 		 * present and unrepaired in the asru cache
2936 		 */
2937 		for (rp = erl; rp >= srl; rp--) {
2938 			struct event *suspect = rp->suspect;
2939 
2940 			if (suspect == NULL)
2941 				continue;
2942 
2943 			/* if "count" exists, increment the appropriate stat */
2944 			if ((snp = eventprop_lookup(suspect,
2945 			    L_count)) != NULL) {
2946 				out(O_ALTFP|O_NONL,
2947 				    "[FME%d, %s count ", fmep->id,
2948 				    suspect->enode->u.event.ename->u.name.s);
2949 				ptree_name_iter(O_ALTFP|O_NONL, snp);
2950 				out(O_ALTFP, "]");
2951 				istat_bump(snp, 0);
2952 
2953 			}
2954 		}
2955 		istat_save();	/* write out any istat changes */
2956 	}
2957 }
2958 
2959 static void
2960 publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep, fmd_case_t *fmcase)
2961 {
2962 	struct case_list *newcase;
2963 	nvlist_t *defect;
2964 
2965 	out(O_ALTFP,
2966 	    "[undiagnosable ereport received, "
2967 	    "creating and closing a new case (%s)]",
2968 	    Undiag_reason ? Undiag_reason : "reason not provided");
2969 
2970 	newcase = MALLOC(sizeof (struct case_list));
2971 	newcase->next = NULL;
2972 	newcase->fmcase = fmcase;
2973 	if (Undiagablecaselist != NULL)
2974 		newcase->next = Undiagablecaselist;
2975 	Undiagablecaselist = newcase;
2976 
2977 	if (ffep != NULL)
2978 		fmd_case_add_ereport(hdl, newcase->fmcase, ffep);
2979 
2980 	defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100,
2981 	    NULL, NULL, NULL);
2982 	if (Undiag_reason != NULL)
2983 		(void) nvlist_add_string(defect, UNDIAG_REASON, Undiag_reason);
2984 	fmd_case_add_suspect(hdl, newcase->fmcase, defect);
2985 
2986 	fmd_case_solve(hdl, newcase->fmcase);
2987 	fmd_case_close(hdl, newcase->fmcase);
2988 }
2989 
2990 static void
2991 fme_undiagnosable(struct fme *f)
2992 {
2993 	nvlist_t *defect;
2994 
2995 	out(O_ALTFP, "[solving/closing FME%d, case %s (%s)]",
2996 	    f->id, fmd_case_uuid(f->hdl, f->fmcase),
2997 	    Undiag_reason ? Undiag_reason : "undiagnosable");
2998 
2999 	defect = fmd_nvl_create_fault(f->hdl, UNDIAGNOSABLE_DEFECT, 100,
3000 	    NULL, NULL, NULL);
3001 	if (Undiag_reason != NULL)
3002 		(void) nvlist_add_string(defect, UNDIAG_REASON, Undiag_reason);
3003 	fmd_case_add_suspect(f->hdl, f->fmcase, defect);
3004 	fmd_case_solve(f->hdl, f->fmcase);
3005 	fmd_case_close(f->hdl, f->fmcase);
3006 }
3007 
3008 /*
3009  * fme_close_case
3010  *
3011  *	Find the requested case amongst our fmes and close it.  Free up
3012  *	the related fme.
3013  */
3014 void
3015 fme_close_case(fmd_hdl_t *hdl, fmd_case_t *fmcase)
3016 {
3017 	struct case_list *ucasep, *prevcasep = NULL;
3018 	struct fme *prev = NULL;
3019 	struct fme *fmep;
3020 
3021 	for (ucasep = Undiagablecaselist; ucasep; ucasep = ucasep->next) {
3022 		if (fmcase != ucasep->fmcase) {
3023 			prevcasep = ucasep;
3024 			continue;
3025 		}
3026 
3027 		if (prevcasep == NULL)
3028 			Undiagablecaselist = Undiagablecaselist->next;
3029 		else
3030 			prevcasep->next = ucasep->next;
3031 
3032 		FREE(ucasep);
3033 		return;
3034 	}
3035 
3036 	for (fmep = FMElist; fmep; fmep = fmep->next) {
3037 		if (fmep->hdl == hdl && fmep->fmcase == fmcase)
3038 			break;
3039 		prev = fmep;
3040 	}
3041 
3042 	if (fmep == NULL) {
3043 		out(O_WARN, "Eft asked to close unrecognized case [%s].",
3044 		    fmd_case_uuid(hdl, fmcase));
3045 		return;
3046 	}
3047 
3048 	if (EFMElist == fmep)
3049 		EFMElist = prev;
3050 
3051 	if (prev == NULL)
3052 		FMElist = FMElist->next;
3053 	else
3054 		prev->next = fmep->next;
3055 
3056 	fmep->next = NULL;
3057 
3058 	/* Get rid of any timer this fme has set */
3059 	if (fmep->wull != 0)
3060 		fmd_timer_remove(fmep->hdl, fmep->timer);
3061 
3062 	if (ClosedFMEs == NULL) {
3063 		ClosedFMEs = fmep;
3064 	} else {
3065 		fmep->next = ClosedFMEs;
3066 		ClosedFMEs = fmep;
3067 	}
3068 
3069 	Open_fme_count--;
3070 
3071 	/* See if we can close the overflow FME */
3072 	if (Open_fme_count <= Max_fme) {
3073 		for (fmep = FMElist; fmep; fmep = fmep->next) {
3074 			if (fmep->overflow && !(fmd_case_closed(fmep->hdl,
3075 			    fmep->fmcase)))
3076 				break;
3077 		}
3078 
3079 		if (fmep != NULL)
3080 			fmd_case_close(fmep->hdl, fmep->fmcase);
3081 	}
3082 }
3083 
3084 /*
3085  * fme_set_timer()
3086  *	If the time we need to wait for the given FME is less than the
3087  *	current timer, kick that old timer out and establish a new one.
3088  */
3089 static int
3090 fme_set_timer(struct fme *fmep, unsigned long long wull)
3091 {
3092 	out(O_ALTFP|O_VERB|O_NONL, " fme_set_timer: request to wait ");
3093 	ptree_timeval(O_ALTFP|O_VERB, &wull);
3094 
3095 	if (wull <= fmep->pull) {
3096 		out(O_ALTFP|O_VERB|O_NONL, "already have waited at least ");
3097 		ptree_timeval(O_ALTFP|O_VERB, &fmep->pull);
3098 		out(O_ALTFP|O_VERB, NULL);
3099 		/* we've waited at least wull already, don't need timer */
3100 		return (0);
3101 	}
3102 
3103 	out(O_ALTFP|O_VERB|O_NONL, " currently ");
3104 	if (fmep->wull != 0) {
3105 		out(O_ALTFP|O_VERB|O_NONL, "waiting ");
3106 		ptree_timeval(O_ALTFP|O_VERB, &fmep->wull);
3107 		out(O_ALTFP|O_VERB, NULL);
3108 	} else {
3109 		out(O_ALTFP|O_VERB|O_NONL, "not waiting");
3110 		out(O_ALTFP|O_VERB, NULL);
3111 	}
3112 
3113 	if (fmep->wull != 0)
3114 		if (wull >= fmep->wull)
3115 			/* New timer would fire later than established timer */
3116 			return (0);
3117 
3118 	if (fmep->wull != 0) {
3119 		fmd_timer_remove(fmep->hdl, fmep->timer);
3120 	}
3121 
3122 	fmep->timer = fmd_timer_install(fmep->hdl, (void *)fmep,
3123 	    fmep->e0r, wull);
3124 	out(O_ALTFP|O_VERB, "timer set, id is %ld", fmep->timer);
3125 	fmep->wull = wull;
3126 	return (1);
3127 }
3128 
3129 void
3130 fme_timer_fired(struct fme *fmep, id_t tid)
3131 {
3132 	struct fme *ffmep = NULL;
3133 
3134 	for (ffmep = FMElist; ffmep; ffmep = ffmep->next)
3135 		if (ffmep == fmep)
3136 			break;
3137 
3138 	if (ffmep == NULL) {
3139 		out(O_WARN, "Timer fired for an FME (%p) not in FMEs list.",
3140 		    (void *)fmep);
3141 		return;
3142 	}
3143 
3144 	out(O_ALTFP|O_VERB, "Timer fired %lx", tid);
3145 	fmep->pull = fmep->wull;
3146 	fmep->wull = 0;
3147 	fmd_buf_write(fmep->hdl, fmep->fmcase,
3148 	    WOBUF_PULL, (void *)&fmep->pull, sizeof (fmep->pull));
3149 
3150 	fme_eval(fmep, fmep->e0r);
3151 }
3152 
3153 /*
3154  * Preserve the fme's suspect list in its psuspects list, NULLing the
3155  * suspects list in the meantime.
3156  */
3157 static void
3158 save_suspects(struct fme *fmep)
3159 {
3160 	struct event *ep;
3161 	struct event *nextep;
3162 
3163 	/* zero out the previous suspect list */
3164 	for (ep = fmep->psuspects; ep; ep = nextep) {
3165 		nextep = ep->psuspects;
3166 		ep->psuspects = NULL;
3167 	}
3168 	fmep->psuspects = NULL;
3169 
3170 	/* zero out the suspect list, copying it to previous suspect list */
3171 	fmep->psuspects = fmep->suspects;
3172 	for (ep = fmep->suspects; ep; ep = nextep) {
3173 		nextep = ep->suspects;
3174 		ep->psuspects = ep->suspects;
3175 		ep->suspects = NULL;
3176 		ep->is_suspect = 0;
3177 	}
3178 	fmep->suspects = NULL;
3179 	fmep->nsuspects = 0;
3180 	fmep->nonfault = 0;
3181 }
3182 
3183 /*
3184  * Retrieve the fme's suspect list from its psuspects list.
3185  */
3186 static void
3187 restore_suspects(struct fme *fmep)
3188 {
3189 	struct event *ep;
3190 	struct event *nextep;
3191 
3192 	fmep->nsuspects = fmep->nonfault = 0;
3193 	fmep->suspects = fmep->psuspects;
3194 	for (ep = fmep->psuspects; ep; ep = nextep) {
3195 		fmep->nsuspects++;
3196 		if (!is_fault(ep->t))
3197 			fmep->nonfault++;
3198 		nextep = ep->psuspects;
3199 		ep->suspects = ep->psuspects;
3200 	}
3201 }
3202 
3203 /*
3204  * this is what we use to call the Emrys prototype code instead of main()
3205  */
3206 static void
3207 fme_eval(struct fme *fmep, fmd_event_t *ffep)
3208 {
3209 	struct event *ep;
3210 	unsigned long long my_delay = TIMEVAL_EVENTUALLY;
3211 	struct rsl *srl = NULL;
3212 	struct rsl *srl2 = NULL;
3213 	int mess_zero_count;
3214 	int mess_zero_nonfault = 0;
3215 	int rpcnt;
3216 
3217 	save_suspects(fmep);
3218 
3219 	out(O_ALTFP, "Evaluate FME %d", fmep->id);
3220 	indent_set("  ");
3221 
3222 	lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep);
3223 	fmep->state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay);
3224 
3225 	out(O_ALTFP|O_NONL, "FME%d state: %s, suspect list:", fmep->id,
3226 	    fme_state2str(fmep->state));
3227 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
3228 		out(O_ALTFP|O_NONL, " ");
3229 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
3230 	}
3231 	out(O_ALTFP, NULL);
3232 
3233 	switch (fmep->state) {
3234 	case FME_CREDIBLE:
3235 		print_suspects(SLNEW, fmep);
3236 		(void) upsets_eval(fmep, ffep);
3237 
3238 		/*
3239 		 * we may have already posted suspects in upsets_eval() which
3240 		 * can recurse into fme_eval() again. If so then just return.
3241 		 */
3242 		if (fmep->posted_suspects)
3243 			return;
3244 
3245 		stats_counter_bump(fmep->diags);
3246 		rpcnt = fmep->nsuspects;
3247 		save_suspects(fmep);
3248 
3249 		/*
3250 		 * create two lists, one for "message=1" faults and one for
3251 		 * "message=0" faults. If we have a mixture we will generate
3252 		 * two separate suspect lists.
3253 		 */
3254 		srl = MALLOC(rpcnt * sizeof (struct rsl));
3255 		bzero(srl, rpcnt * sizeof (struct rsl));
3256 		srl2 = MALLOC(rpcnt * sizeof (struct rsl));
3257 		bzero(srl2, rpcnt * sizeof (struct rsl));
3258 		mess_zero_count = trim_suspects(fmep, srl, srl2, ffep,
3259 		    &mess_zero_nonfault);
3260 
3261 		/*
3262 		 * If the resulting suspect list has no members, we're
3263 		 * done so simply close the case. Otherwise sort and publish.
3264 		 */
3265 		if (fmep->nsuspects == 0 && mess_zero_count == 0) {
3266 			out(O_ALTFP,
3267 			    "[FME%d, case %s (all suspects are upsets)]",
3268 			    fmep->id, fmd_case_uuid(fmep->hdl, fmep->fmcase));
3269 			fmd_case_close(fmep->hdl, fmep->fmcase);
3270 		} else if (fmep->nsuspects != 0 && mess_zero_count == 0) {
3271 			publish_suspects(fmep, srl);
3272 			out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3273 			    fmd_case_uuid(fmep->hdl, fmep->fmcase));
3274 			fmd_case_solve(fmep->hdl, fmep->fmcase);
3275 		} else if (fmep->nsuspects == 0 && mess_zero_count != 0) {
3276 			fmep->nsuspects = mess_zero_count;
3277 			fmep->nonfault = mess_zero_nonfault;
3278 			publish_suspects(fmep, srl2);
3279 			out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3280 			    fmd_case_uuid(fmep->hdl, fmep->fmcase));
3281 			fmd_case_solve(fmep->hdl, fmep->fmcase);
3282 		} else {
3283 			struct event *obsp;
3284 			struct fme *nfmep;
3285 
3286 			publish_suspects(fmep, srl);
3287 			out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3288 			    fmd_case_uuid(fmep->hdl, fmep->fmcase));
3289 			fmd_case_solve(fmep->hdl, fmep->fmcase);
3290 
3291 			/*
3292 			 * Got both message=0 and message=1 so create a
3293 			 * duplicate case. Also need a temporary duplicate fme
3294 			 * structure for use by publish_suspects().
3295 			 */
3296 			nfmep = alloc_fme();
3297 			nfmep->id =  Nextid++;
3298 			nfmep->hdl = fmep->hdl;
3299 			nfmep->nsuspects = mess_zero_count;
3300 			nfmep->nonfault = mess_zero_nonfault;
3301 			nfmep->fmcase = fmd_case_open(fmep->hdl, NULL);
3302 			out(O_ALTFP|O_STAMP,
3303 			    "[creating parallel FME%d, case %s]", nfmep->id,
3304 			    fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
3305 			Open_fme_count++;
3306 			if (ffep) {
3307 				fmd_case_setprincipal(nfmep->hdl,
3308 				    nfmep->fmcase, ffep);
3309 				fmd_case_add_ereport(nfmep->hdl,
3310 				    nfmep->fmcase, ffep);
3311 			}
3312 			for (obsp = fmep->observations; obsp;
3313 			    obsp = obsp->observations)
3314 				if (obsp->ffep && obsp->ffep != ffep)
3315 					fmd_case_add_ereport(nfmep->hdl,
3316 					    nfmep->fmcase, obsp->ffep);
3317 
3318 			publish_suspects(nfmep, srl2);
3319 			out(O_ALTFP, "[solving FME%d, case %s]", nfmep->id,
3320 			    fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
3321 			fmd_case_solve(nfmep->hdl, nfmep->fmcase);
3322 			FREE(nfmep);
3323 		}
3324 		FREE(srl);
3325 		FREE(srl2);
3326 		restore_suspects(fmep);
3327 
3328 		fmep->posted_suspects = 1;
3329 		fmd_buf_write(fmep->hdl, fmep->fmcase,
3330 		    WOBUF_POSTD,
3331 		    (void *)&fmep->posted_suspects,
3332 		    sizeof (fmep->posted_suspects));
3333 
3334 		/*
3335 		 * Now the suspects have been posted, we can clear up
3336 		 * the instance tree as we won't be looking at it again.
3337 		 * Also cancel the timer as the case is now solved.
3338 		 */
3339 		if (fmep->wull != 0) {
3340 			fmd_timer_remove(fmep->hdl, fmep->timer);
3341 			fmep->wull = 0;
3342 		}
3343 		break;
3344 
3345 	case FME_WAIT:
3346 		ASSERT(my_delay > fmep->ull);
3347 		(void) fme_set_timer(fmep, my_delay);
3348 		print_suspects(SLWAIT, fmep);
3349 		itree_prune(fmep->eventtree);
3350 		return;
3351 
3352 	case FME_DISPROVED:
3353 		print_suspects(SLDISPROVED, fmep);
3354 		Undiag_reason = UD_UNSOLVD;
3355 		fme_undiagnosable(fmep);
3356 		break;
3357 	}
3358 
3359 	itree_free(fmep->eventtree);
3360 	fmep->eventtree = NULL;
3361 	structconfig_free(fmep->config);
3362 	fmep->config = NULL;
3363 	destroy_fme_bufs(fmep);
3364 }
3365 
3366 static void indent(void);
3367 static int triggered(struct fme *fmep, struct event *ep, int mark);
3368 static enum fme_state effects_test(struct fme *fmep,
3369     struct event *fault_event, unsigned long long at_latest_by,
3370     unsigned long long *pdelay);
3371 static enum fme_state requirements_test(struct fme *fmep, struct event *ep,
3372     unsigned long long at_latest_by, unsigned long long *pdelay);
3373 static enum fme_state causes_test(struct fme *fmep, struct event *ep,
3374     unsigned long long at_latest_by, unsigned long long *pdelay);
3375 
3376 static int
3377 checkconstraints(struct fme *fmep, struct arrow *arrowp)
3378 {
3379 	struct constraintlist *ctp;
3380 	struct evalue value;
3381 	char *sep = "";
3382 
3383 	if (arrowp->forever_false) {
3384 		indent();
3385 		out(O_ALTFP|O_VERB|O_NONL, "  Forever false constraint: ");
3386 		for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3387 			out(O_ALTFP|O_VERB|O_NONL, sep);
3388 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3389 			sep = ", ";
3390 		}
3391 		out(O_ALTFP|O_VERB, NULL);
3392 		return (0);
3393 	}
3394 	if (arrowp->forever_true) {
3395 		indent();
3396 		out(O_ALTFP|O_VERB|O_NONL, "  Forever true constraint: ");
3397 		for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3398 			out(O_ALTFP|O_VERB|O_NONL, sep);
3399 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3400 			sep = ", ";
3401 		}
3402 		out(O_ALTFP|O_VERB, NULL);
3403 		return (1);
3404 	}
3405 
3406 	for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3407 		if (eval_expr(ctp->cnode, NULL, NULL,
3408 		    &fmep->globals, fmep->config,
3409 		    arrowp, 0, &value)) {
3410 			/* evaluation successful */
3411 			if (value.t == UNDEFINED || value.v == 0) {
3412 				/* known false */
3413 				arrowp->forever_false = 1;
3414 				indent();
3415 				out(O_ALTFP|O_VERB|O_NONL,
3416 				    "  False constraint: ");
3417 				ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3418 				out(O_ALTFP|O_VERB, NULL);
3419 				return (0);
3420 			}
3421 		} else {
3422 			/* evaluation unsuccessful -- unknown value */
3423 			indent();
3424 			out(O_ALTFP|O_VERB|O_NONL,
3425 			    "  Deferred constraint: ");
3426 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3427 			out(O_ALTFP|O_VERB, NULL);
3428 			return (1);
3429 		}
3430 	}
3431 	/* known true */
3432 	arrowp->forever_true = 1;
3433 	indent();
3434 	out(O_ALTFP|O_VERB|O_NONL, "  True constraint: ");
3435 	for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3436 		out(O_ALTFP|O_VERB|O_NONL, sep);
3437 		ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3438 		sep = ", ";
3439 	}
3440 	out(O_ALTFP|O_VERB, NULL);
3441 	return (1);
3442 }
3443 
3444 static int
3445 triggered(struct fme *fmep, struct event *ep, int mark)
3446 {
3447 	struct bubble *bp;
3448 	struct arrowlist *ap;
3449 	int count = 0;
3450 
3451 	stats_counter_bump(fmep->Tcallcount);
3452 	for (bp = itree_next_bubble(ep, NULL); bp;
3453 	    bp = itree_next_bubble(ep, bp)) {
3454 		if (bp->t != B_TO)
3455 			continue;
3456 		for (ap = itree_next_arrow(bp, NULL); ap;
3457 		    ap = itree_next_arrow(bp, ap)) {
3458 			/* check count of marks against K in the bubble */
3459 			if ((ap->arrowp->mark & mark) &&
3460 			    ++count >= bp->nork)
3461 				return (1);
3462 		}
3463 	}
3464 	return (0);
3465 }
3466 
3467 static int
3468 mark_arrows(struct fme *fmep, struct event *ep, int mark,
3469     unsigned long long at_latest_by, unsigned long long *pdelay, int keep)
3470 {
3471 	struct bubble *bp;
3472 	struct arrowlist *ap;
3473 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3474 	unsigned long long my_delay;
3475 	enum fme_state result;
3476 	int retval = 0;
3477 
3478 	for (bp = itree_next_bubble(ep, NULL); bp;
3479 	    bp = itree_next_bubble(ep, bp)) {
3480 		if (bp->t != B_FROM)
3481 			continue;
3482 		stats_counter_bump(fmep->Marrowcount);
3483 		for (ap = itree_next_arrow(bp, NULL); ap;
3484 		    ap = itree_next_arrow(bp, ap)) {
3485 			struct event *ep2 = ap->arrowp->head->myevent;
3486 			/*
3487 			 * if we're clearing marks, we can avoid doing
3488 			 * all that work evaluating constraints.
3489 			 */
3490 			if (mark == 0) {
3491 				if (ap->arrowp->arrow_marked == 0)
3492 					continue;
3493 				ap->arrowp->arrow_marked = 0;
3494 				ap->arrowp->mark &= ~EFFECTS_COUNTER;
3495 				if (keep && (ep2->cached_state &
3496 				    (WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT)))
3497 					ep2->keep_in_tree = 1;
3498 				ep2->cached_state &=
3499 				    ~(WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT);
3500 				(void) mark_arrows(fmep, ep2, mark, 0, NULL,
3501 				    keep);
3502 				continue;
3503 			}
3504 			ap->arrowp->arrow_marked = 1;
3505 			if (ep2->cached_state & REQMNTS_DISPROVED) {
3506 				indent();
3507 				out(O_ALTFP|O_VERB|O_NONL,
3508 				    "  ALREADY DISPROVED ");
3509 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3510 				out(O_ALTFP|O_VERB, NULL);
3511 				continue;
3512 			}
3513 			if (ep2->cached_state & WAIT_EFFECT) {
3514 				indent();
3515 				out(O_ALTFP|O_VERB|O_NONL,
3516 				    "  ALREADY EFFECTS WAIT ");
3517 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3518 				out(O_ALTFP|O_VERB, NULL);
3519 				continue;
3520 			}
3521 			if (ep2->cached_state & CREDIBLE_EFFECT) {
3522 				indent();
3523 				out(O_ALTFP|O_VERB|O_NONL,
3524 				    "  ALREADY EFFECTS CREDIBLE ");
3525 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3526 				out(O_ALTFP|O_VERB, NULL);
3527 				continue;
3528 			}
3529 			if ((ep2->cached_state & PARENT_WAIT) &&
3530 			    (mark & PARENT_WAIT)) {
3531 				indent();
3532 				out(O_ALTFP|O_VERB|O_NONL,
3533 				    "  ALREADY PARENT EFFECTS WAIT ");
3534 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3535 				out(O_ALTFP|O_VERB, NULL);
3536 				continue;
3537 			}
3538 			platform_set_payloadnvp(ep2->nvp);
3539 			if (checkconstraints(fmep, ap->arrowp) == 0) {
3540 				platform_set_payloadnvp(NULL);
3541 				indent();
3542 				out(O_ALTFP|O_VERB|O_NONL,
3543 				    "  CONSTRAINTS FAIL ");
3544 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3545 				out(O_ALTFP|O_VERB, NULL);
3546 				continue;
3547 			}
3548 			platform_set_payloadnvp(NULL);
3549 			ap->arrowp->mark |= EFFECTS_COUNTER;
3550 			if (!triggered(fmep, ep2, EFFECTS_COUNTER)) {
3551 				indent();
3552 				out(O_ALTFP|O_VERB|O_NONL,
3553 				    "  K-COUNT NOT YET MET ");
3554 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3555 				out(O_ALTFP|O_VERB, NULL);
3556 				continue;
3557 			}
3558 			ep2->cached_state &= ~PARENT_WAIT;
3559 			/*
3560 			 * if we've reached an ereport and no propagation time
3561 			 * is specified, use the Hesitate value
3562 			 */
3563 			if (ep2->t == N_EREPORT && at_latest_by == 0ULL &&
3564 			    ap->arrowp->maxdelay == 0ULL) {
3565 				out(O_ALTFP|O_VERB|O_NONL, "  default wait ");
3566 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3567 				out(O_ALTFP|O_VERB, NULL);
3568 				result = requirements_test(fmep, ep2, Hesitate,
3569 				    &my_delay);
3570 			} else {
3571 				result = requirements_test(fmep, ep2,
3572 				    at_latest_by + ap->arrowp->maxdelay,
3573 				    &my_delay);
3574 			}
3575 			if (result == FME_WAIT) {
3576 				retval = WAIT_EFFECT;
3577 				if (overall_delay > my_delay)
3578 					overall_delay = my_delay;
3579 				ep2->cached_state |= WAIT_EFFECT;
3580 				indent();
3581 				out(O_ALTFP|O_VERB|O_NONL, "  EFFECTS WAIT ");
3582 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3583 				out(O_ALTFP|O_VERB, NULL);
3584 				indent_push("  E");
3585 				if (mark_arrows(fmep, ep2, PARENT_WAIT,
3586 				    at_latest_by, &my_delay, 0) ==
3587 				    WAIT_EFFECT) {
3588 					retval = WAIT_EFFECT;
3589 					if (overall_delay > my_delay)
3590 						overall_delay = my_delay;
3591 				}
3592 				indent_pop();
3593 			} else if (result == FME_DISPROVED) {
3594 				indent();
3595 				out(O_ALTFP|O_VERB|O_NONL,
3596 				    "  EFFECTS DISPROVED ");
3597 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3598 				out(O_ALTFP|O_VERB, NULL);
3599 			} else {
3600 				ep2->cached_state |= mark;
3601 				indent();
3602 				if (mark == CREDIBLE_EFFECT)
3603 					out(O_ALTFP|O_VERB|O_NONL,
3604 					    "  EFFECTS CREDIBLE ");
3605 				else
3606 					out(O_ALTFP|O_VERB|O_NONL,
3607 					    "  PARENT EFFECTS WAIT ");
3608 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3609 				out(O_ALTFP|O_VERB, NULL);
3610 				indent_push("  E");
3611 				if (mark_arrows(fmep, ep2, mark, at_latest_by,
3612 				    &my_delay, 0) == WAIT_EFFECT) {
3613 					retval = WAIT_EFFECT;
3614 					if (overall_delay > my_delay)
3615 						overall_delay = my_delay;
3616 				}
3617 				indent_pop();
3618 			}
3619 		}
3620 	}
3621 	if (retval == WAIT_EFFECT)
3622 		*pdelay = overall_delay;
3623 	return (retval);
3624 }
3625 
3626 static enum fme_state
3627 effects_test(struct fme *fmep, struct event *fault_event,
3628     unsigned long long at_latest_by, unsigned long long *pdelay)
3629 {
3630 	struct event *error_event;
3631 	enum fme_state return_value = FME_CREDIBLE;
3632 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3633 	unsigned long long my_delay;
3634 
3635 	stats_counter_bump(fmep->Ecallcount);
3636 	indent_push("  E");
3637 	indent();
3638 	out(O_ALTFP|O_VERB|O_NONL, "->");
3639 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event);
3640 	out(O_ALTFP|O_VERB, NULL);
3641 
3642 	if (mark_arrows(fmep, fault_event, CREDIBLE_EFFECT, at_latest_by,
3643 	    &my_delay, 0) == WAIT_EFFECT) {
3644 		return_value = FME_WAIT;
3645 		if (overall_delay > my_delay)
3646 			overall_delay = my_delay;
3647 	}
3648 	for (error_event = fmep->observations;
3649 	    error_event; error_event = error_event->observations) {
3650 		indent();
3651 		out(O_ALTFP|O_VERB|O_NONL, " ");
3652 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, error_event);
3653 		if (!(error_event->cached_state & CREDIBLE_EFFECT)) {
3654 			if (error_event->cached_state &
3655 			    (PARENT_WAIT|WAIT_EFFECT)) {
3656 				out(O_ALTFP|O_VERB, " NOT YET triggered");
3657 				continue;
3658 			}
3659 			return_value = FME_DISPROVED;
3660 			out(O_ALTFP|O_VERB, " NOT triggered");
3661 			break;
3662 		} else {
3663 			out(O_ALTFP|O_VERB, " triggered");
3664 		}
3665 	}
3666 	if (return_value == FME_DISPROVED) {
3667 		(void) mark_arrows(fmep, fault_event, 0, 0, NULL, 0);
3668 	} else {
3669 		fault_event->keep_in_tree = 1;
3670 		(void) mark_arrows(fmep, fault_event, 0, 0, NULL, 1);
3671 	}
3672 
3673 	indent();
3674 	out(O_ALTFP|O_VERB|O_NONL, "<-EFFECTS %s ",
3675 	    fme_state2str(return_value));
3676 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event);
3677 	out(O_ALTFP|O_VERB, NULL);
3678 	indent_pop();
3679 	if (return_value == FME_WAIT)
3680 		*pdelay = overall_delay;
3681 	return (return_value);
3682 }
3683 
3684 static enum fme_state
3685 requirements_test(struct fme *fmep, struct event *ep,
3686     unsigned long long at_latest_by, unsigned long long *pdelay)
3687 {
3688 	int waiting_events;
3689 	int credible_events;
3690 	int deferred_events;
3691 	enum fme_state return_value = FME_CREDIBLE;
3692 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3693 	unsigned long long arrow_delay;
3694 	unsigned long long my_delay;
3695 	struct event *ep2;
3696 	struct bubble *bp;
3697 	struct arrowlist *ap;
3698 
3699 	if (ep->cached_state & REQMNTS_CREDIBLE) {
3700 		indent();
3701 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY CREDIBLE ");
3702 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3703 		out(O_ALTFP|O_VERB, NULL);
3704 		return (FME_CREDIBLE);
3705 	}
3706 	if (ep->cached_state & REQMNTS_DISPROVED) {
3707 		indent();
3708 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY DISPROVED ");
3709 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3710 		out(O_ALTFP|O_VERB, NULL);
3711 		return (FME_DISPROVED);
3712 	}
3713 	if (ep->cached_state & REQMNTS_WAIT) {
3714 		indent();
3715 		*pdelay = ep->cached_delay;
3716 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY WAIT ");
3717 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3718 		out(O_ALTFP|O_VERB|O_NONL, ", wait for: ");
3719 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3720 		out(O_ALTFP|O_VERB, NULL);
3721 		return (FME_WAIT);
3722 	}
3723 	stats_counter_bump(fmep->Rcallcount);
3724 	indent_push("  R");
3725 	indent();
3726 	out(O_ALTFP|O_VERB|O_NONL, "->");
3727 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3728 	out(O_ALTFP|O_VERB|O_NONL, ", at latest by: ");
3729 	ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3730 	out(O_ALTFP|O_VERB, NULL);
3731 
3732 	if (ep->t == N_EREPORT) {
3733 		if (ep->count == 0) {
3734 			if (fmep->pull >= at_latest_by) {
3735 				return_value = FME_DISPROVED;
3736 			} else {
3737 				ep->cached_delay = *pdelay = at_latest_by;
3738 				return_value = FME_WAIT;
3739 			}
3740 		}
3741 
3742 		indent();
3743 		switch (return_value) {
3744 		case FME_CREDIBLE:
3745 			ep->cached_state |= REQMNTS_CREDIBLE;
3746 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS CREDIBLE ");
3747 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3748 			break;
3749 		case FME_DISPROVED:
3750 			ep->cached_state |= REQMNTS_DISPROVED;
3751 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED ");
3752 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3753 			break;
3754 		case FME_WAIT:
3755 			ep->cached_state |= REQMNTS_WAIT;
3756 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS WAIT ");
3757 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3758 			out(O_ALTFP|O_VERB|O_NONL, " to ");
3759 			ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3760 			break;
3761 		default:
3762 			out(O_DIE, "requirements_test: unexpected fme_state");
3763 			break;
3764 		}
3765 		out(O_ALTFP|O_VERB, NULL);
3766 		indent_pop();
3767 
3768 		return (return_value);
3769 	}
3770 
3771 	/* this event is not a report, descend the tree */
3772 	for (bp = itree_next_bubble(ep, NULL); bp;
3773 	    bp = itree_next_bubble(ep, bp)) {
3774 		int n;
3775 
3776 		if (bp->t != B_FROM)
3777 			continue;
3778 
3779 		n = bp->nork;
3780 
3781 		credible_events = 0;
3782 		waiting_events = 0;
3783 		deferred_events = 0;
3784 		arrow_delay = TIMEVAL_EVENTUALLY;
3785 		/*
3786 		 * n is -1 for 'A' so adjust it.
3787 		 * XXX just count up the arrows for now.
3788 		 */
3789 		if (n < 0) {
3790 			n = 0;
3791 			for (ap = itree_next_arrow(bp, NULL); ap;
3792 			    ap = itree_next_arrow(bp, ap))
3793 				n++;
3794 			indent();
3795 			out(O_ALTFP|O_VERB, " Bubble Counted N=%d", n);
3796 		} else {
3797 			indent();
3798 			out(O_ALTFP|O_VERB, " Bubble N=%d", n);
3799 		}
3800 
3801 		if (n == 0)
3802 			continue;
3803 		if (!(bp->mark & (BUBBLE_ELIDED|BUBBLE_OK))) {
3804 			for (ap = itree_next_arrow(bp, NULL); ap;
3805 			    ap = itree_next_arrow(bp, ap)) {
3806 				ep2 = ap->arrowp->head->myevent;
3807 				platform_set_payloadnvp(ep2->nvp);
3808 				(void) checkconstraints(fmep, ap->arrowp);
3809 				if (ap->arrowp->forever_true) {
3810 					/*
3811 					 * if all arrows are invalidated by the
3812 					 * constraints, then we should elide the
3813 					 * whole bubble to be consistant with
3814 					 * the tree creation time behaviour
3815 					 */
3816 					bp->mark |= BUBBLE_OK;
3817 					platform_set_payloadnvp(NULL);
3818 					break;
3819 				}
3820 				platform_set_payloadnvp(NULL);
3821 			}
3822 		}
3823 		for (ap = itree_next_arrow(bp, NULL); ap;
3824 		    ap = itree_next_arrow(bp, ap)) {
3825 			ep2 = ap->arrowp->head->myevent;
3826 			if (n <= credible_events)
3827 				break;
3828 
3829 			ap->arrowp->mark |= REQMNTS_COUNTER;
3830 			if (triggered(fmep, ep2, REQMNTS_COUNTER))
3831 				/* XXX adding max timevals! */
3832 				switch (requirements_test(fmep, ep2,
3833 				    at_latest_by + ap->arrowp->maxdelay,
3834 				    &my_delay)) {
3835 				case FME_DEFERRED:
3836 					deferred_events++;
3837 					break;
3838 				case FME_CREDIBLE:
3839 					credible_events++;
3840 					break;
3841 				case FME_DISPROVED:
3842 					break;
3843 				case FME_WAIT:
3844 					if (my_delay < arrow_delay)
3845 						arrow_delay = my_delay;
3846 					waiting_events++;
3847 					break;
3848 				default:
3849 					out(O_DIE,
3850 					"Bug in requirements_test.");
3851 				}
3852 			else
3853 				deferred_events++;
3854 		}
3855 		if (!(bp->mark & BUBBLE_OK) && waiting_events == 0) {
3856 			bp->mark |= BUBBLE_ELIDED;
3857 			continue;
3858 		}
3859 		indent();
3860 		out(O_ALTFP|O_VERB, " Credible: %d Waiting %d",
3861 		    credible_events + deferred_events, waiting_events);
3862 		if (credible_events + deferred_events + waiting_events < n) {
3863 			/* Can never meet requirements */
3864 			ep->cached_state |= REQMNTS_DISPROVED;
3865 			indent();
3866 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED ");
3867 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3868 			out(O_ALTFP|O_VERB, NULL);
3869 			indent_pop();
3870 			return (FME_DISPROVED);
3871 		}
3872 		if (credible_events + deferred_events < n) {
3873 			/* will have to wait */
3874 			/* wait time is shortest known */
3875 			if (arrow_delay < overall_delay)
3876 				overall_delay = arrow_delay;
3877 			return_value = FME_WAIT;
3878 		} else if (credible_events < n) {
3879 			if (return_value != FME_WAIT)
3880 				return_value = FME_DEFERRED;
3881 		}
3882 	}
3883 
3884 	/*
3885 	 * don't mark as FME_DEFERRED. If this event isn't reached by another
3886 	 * path, then this will be considered FME_CREDIBLE. But if it is
3887 	 * reached by a different path so the K-count is met, then might
3888 	 * get overridden by FME_WAIT or FME_DISPROVED.
3889 	 */
3890 	if (return_value == FME_WAIT) {
3891 		ep->cached_state |= REQMNTS_WAIT;
3892 		ep->cached_delay = *pdelay = overall_delay;
3893 	} else if (return_value == FME_CREDIBLE) {
3894 		ep->cached_state |= REQMNTS_CREDIBLE;
3895 	}
3896 	indent();
3897 	out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS %s ",
3898 	    fme_state2str(return_value));
3899 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3900 	out(O_ALTFP|O_VERB, NULL);
3901 	indent_pop();
3902 	return (return_value);
3903 }
3904 
3905 static enum fme_state
3906 causes_test(struct fme *fmep, struct event *ep,
3907     unsigned long long at_latest_by, unsigned long long *pdelay)
3908 {
3909 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3910 	unsigned long long my_delay;
3911 	int credible_results = 0;
3912 	int waiting_results = 0;
3913 	enum fme_state fstate;
3914 	struct event *tail_event;
3915 	struct bubble *bp;
3916 	struct arrowlist *ap;
3917 	int k = 1;
3918 
3919 	stats_counter_bump(fmep->Ccallcount);
3920 	indent_push("  C");
3921 	indent();
3922 	out(O_ALTFP|O_VERB|O_NONL, "->");
3923 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3924 	out(O_ALTFP|O_VERB, NULL);
3925 
3926 	for (bp = itree_next_bubble(ep, NULL); bp;
3927 	    bp = itree_next_bubble(ep, bp)) {
3928 		if (bp->t != B_TO)
3929 			continue;
3930 		k = bp->nork;	/* remember the K value */
3931 		for (ap = itree_next_arrow(bp, NULL); ap;
3932 		    ap = itree_next_arrow(bp, ap)) {
3933 			int do_not_follow = 0;
3934 
3935 			/*
3936 			 * if we get to the same event multiple times
3937 			 * only worry about the first one.
3938 			 */
3939 			if (ap->arrowp->tail->myevent->cached_state &
3940 			    CAUSES_TESTED) {
3941 				indent();
3942 				out(O_ALTFP|O_VERB|O_NONL,
3943 				    "  causes test already run for ");
3944 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL,
3945 				    ap->arrowp->tail->myevent);
3946 				out(O_ALTFP|O_VERB, NULL);
3947 				continue;
3948 			}
3949 
3950 			/*
3951 			 * see if false constraint prevents us
3952 			 * from traversing this arrow
3953 			 */
3954 			platform_set_payloadnvp(ep->nvp);
3955 			if (checkconstraints(fmep, ap->arrowp) == 0)
3956 				do_not_follow = 1;
3957 			platform_set_payloadnvp(NULL);
3958 			if (do_not_follow) {
3959 				indent();
3960 				out(O_ALTFP|O_VERB|O_NONL,
3961 				    "  False arrow from ");
3962 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL,
3963 				    ap->arrowp->tail->myevent);
3964 				out(O_ALTFP|O_VERB, NULL);
3965 				continue;
3966 			}
3967 
3968 			ap->arrowp->tail->myevent->cached_state |=
3969 			    CAUSES_TESTED;
3970 			tail_event = ap->arrowp->tail->myevent;
3971 			fstate = hypothesise(fmep, tail_event, at_latest_by,
3972 			    &my_delay);
3973 
3974 			switch (fstate) {
3975 			case FME_WAIT:
3976 				if (my_delay < overall_delay)
3977 					overall_delay = my_delay;
3978 				waiting_results++;
3979 				break;
3980 			case FME_CREDIBLE:
3981 				credible_results++;
3982 				break;
3983 			case FME_DISPROVED:
3984 				break;
3985 			default:
3986 				out(O_DIE, "Bug in causes_test");
3987 			}
3988 		}
3989 	}
3990 	/* compare against K */
3991 	if (credible_results + waiting_results < k) {
3992 		indent();
3993 		out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES DISPROVED ");
3994 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3995 		out(O_ALTFP|O_VERB, NULL);
3996 		indent_pop();
3997 		return (FME_DISPROVED);
3998 	}
3999 	if (waiting_results != 0) {
4000 		*pdelay = overall_delay;
4001 		indent();
4002 		out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES WAIT ");
4003 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4004 		out(O_ALTFP|O_VERB|O_NONL, " to ");
4005 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
4006 		out(O_ALTFP|O_VERB, NULL);
4007 		indent_pop();
4008 		return (FME_WAIT);
4009 	}
4010 	indent();
4011 	out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES CREDIBLE ");
4012 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4013 	out(O_ALTFP|O_VERB, NULL);
4014 	indent_pop();
4015 	return (FME_CREDIBLE);
4016 }
4017 
4018 static enum fme_state
4019 hypothesise(struct fme *fmep, struct event *ep,
4020 	unsigned long long at_latest_by, unsigned long long *pdelay)
4021 {
4022 	enum fme_state rtr, otr;
4023 	unsigned long long my_delay;
4024 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
4025 
4026 	stats_counter_bump(fmep->Hcallcount);
4027 	indent_push("  H");
4028 	indent();
4029 	out(O_ALTFP|O_VERB|O_NONL, "->");
4030 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4031 	out(O_ALTFP|O_VERB|O_NONL, ", at latest by: ");
4032 	ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
4033 	out(O_ALTFP|O_VERB, NULL);
4034 
4035 	rtr = requirements_test(fmep, ep, at_latest_by, &my_delay);
4036 	if ((rtr == FME_WAIT) && (my_delay < overall_delay))
4037 		overall_delay = my_delay;
4038 	if (rtr != FME_DISPROVED) {
4039 		if (is_problem(ep->t)) {
4040 			otr = effects_test(fmep, ep, at_latest_by, &my_delay);
4041 			if (otr != FME_DISPROVED) {
4042 				if (fmep->peek == 0 && ep->is_suspect == 0) {
4043 					ep->suspects = fmep->suspects;
4044 					ep->is_suspect = 1;
4045 					fmep->suspects = ep;
4046 					fmep->nsuspects++;
4047 					if (!is_fault(ep->t))
4048 						fmep->nonfault++;
4049 				}
4050 			}
4051 		} else
4052 			otr = causes_test(fmep, ep, at_latest_by, &my_delay);
4053 		if ((otr == FME_WAIT) && (my_delay < overall_delay))
4054 			overall_delay = my_delay;
4055 		if ((otr != FME_DISPROVED) &&
4056 		    ((rtr == FME_WAIT) || (otr == FME_WAIT)))
4057 			*pdelay = overall_delay;
4058 	}
4059 	if (rtr == FME_DISPROVED) {
4060 		indent();
4061 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4062 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4063 		out(O_ALTFP|O_VERB, " (doesn't meet requirements)");
4064 		indent_pop();
4065 		return (FME_DISPROVED);
4066 	}
4067 	if ((otr == FME_DISPROVED) && is_problem(ep->t)) {
4068 		indent();
4069 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4070 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4071 		out(O_ALTFP|O_VERB, " (doesn't explain all reports)");
4072 		indent_pop();
4073 		return (FME_DISPROVED);
4074 	}
4075 	if (otr == FME_DISPROVED) {
4076 		indent();
4077 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4078 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4079 		out(O_ALTFP|O_VERB, " (causes are not credible)");
4080 		indent_pop();
4081 		return (FME_DISPROVED);
4082 	}
4083 	if ((rtr == FME_WAIT) || (otr == FME_WAIT)) {
4084 		indent();
4085 		out(O_ALTFP|O_VERB|O_NONL, "<-WAIT ");
4086 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4087 		out(O_ALTFP|O_VERB|O_NONL, " to ");
4088 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &overall_delay);
4089 		out(O_ALTFP|O_VERB, NULL);
4090 		indent_pop();
4091 		return (FME_WAIT);
4092 	}
4093 	indent();
4094 	out(O_ALTFP|O_VERB|O_NONL, "<-CREDIBLE ");
4095 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4096 	out(O_ALTFP|O_VERB, NULL);
4097 	indent_pop();
4098 	return (FME_CREDIBLE);
4099 }
4100 
4101 /*
4102  * fme_istat_load -- reconstitute any persistent istats
4103  */
4104 void
4105 fme_istat_load(fmd_hdl_t *hdl)
4106 {
4107 	int sz;
4108 	char *sbuf;
4109 	char *ptr;
4110 
4111 	if ((sz = fmd_buf_size(hdl, NULL, WOBUF_ISTATS)) == 0) {
4112 		out(O_ALTFP, "fme_istat_load: No stats");
4113 		return;
4114 	}
4115 
4116 	sbuf = alloca(sz);
4117 
4118 	fmd_buf_read(hdl, NULL, WOBUF_ISTATS, sbuf, sz);
4119 
4120 	/*
4121 	 * pick apart the serialized stats
4122 	 *
4123 	 * format is:
4124 	 *	<class-name>, '@', <path>, '\0', <value>, '\0'
4125 	 * for example:
4126 	 *	"stat.first@stat0/path0\02\0stat.second@stat0/path1\023\0"
4127 	 *
4128 	 * since this is parsing our own serialized data, any parsing issues
4129 	 * are fatal, so we check for them all with ASSERT() below.
4130 	 */
4131 	ptr = sbuf;
4132 	while (ptr < &sbuf[sz]) {
4133 		char *sepptr;
4134 		struct node *np;
4135 		int val;
4136 
4137 		sepptr = strchr(ptr, '@');
4138 		ASSERT(sepptr != NULL);
4139 		*sepptr = '\0';
4140 
4141 		/* construct the event */
4142 		np = newnode(T_EVENT, NULL, 0);
4143 		np->u.event.ename = newnode(T_NAME, NULL, 0);
4144 		np->u.event.ename->u.name.t = N_STAT;
4145 		np->u.event.ename->u.name.s = stable(ptr);
4146 		np->u.event.ename->u.name.it = IT_ENAME;
4147 		np->u.event.ename->u.name.last = np->u.event.ename;
4148 
4149 		ptr = sepptr + 1;
4150 		ASSERT(ptr < &sbuf[sz]);
4151 		ptr += strlen(ptr);
4152 		ptr++;	/* move past the '\0' separating path from value */
4153 		ASSERT(ptr < &sbuf[sz]);
4154 		ASSERT(isdigit(*ptr));
4155 		val = atoi(ptr);
4156 		ASSERT(val > 0);
4157 		ptr += strlen(ptr);
4158 		ptr++;	/* move past the final '\0' for this entry */
4159 
4160 		np->u.event.epname = pathstring2epnamenp(sepptr + 1);
4161 		ASSERT(np->u.event.epname != NULL);
4162 
4163 		istat_bump(np, val);
4164 		tree_free(np);
4165 	}
4166 
4167 	istat_save();
4168 }
4169