xref: /illumos-gate/usr/src/cmd/fm/modules/common/eversholt/fme.c (revision f6f4cb8ada400367a1921f6b93fb9e02f53ac5e6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  *
26  * fme.c -- fault management exercise module
27  *
28  * this module provides the simulated fault management exercise.
29  */
30 
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <strings.h>
35 #include <ctype.h>
36 #include <alloca.h>
37 #include <libnvpair.h>
38 #include <sys/fm/protocol.h>
39 #include <fm/fmd_api.h>
40 #include "alloc.h"
41 #include "out.h"
42 #include "stats.h"
43 #include "stable.h"
44 #include "literals.h"
45 #include "lut.h"
46 #include "tree.h"
47 #include "ptree.h"
48 #include "itree.h"
49 #include "ipath.h"
50 #include "fme.h"
51 #include "evnv.h"
52 #include "eval.h"
53 #include "config.h"
54 #include "platform.h"
55 #include "esclex.h"
56 
57 /* imported from eft.c... */
58 extern hrtime_t Hesitate;
59 extern char *Serd_Override;
60 extern nv_alloc_t Eft_nv_hdl;
61 extern int Max_fme;
62 extern fmd_hdl_t *Hdl;
63 
64 static int Istat_need_save;
65 static int Serd_need_save;
66 void istat_save(void);
67 void serd_save(void);
68 
69 /* fme under construction is global so we can free it on module abort */
70 static struct fme *Nfmep;
71 
72 static const char *Undiag_reason;
73 
74 static int Nextid = 0;
75 
76 static int Open_fme_count = 0;	/* Count of open FMEs */
77 
78 /* list of fault management exercises underway */
79 static struct fme {
80 	struct fme *next;		/* next exercise */
81 	unsigned long long ull;		/* time when fme was created */
82 	int id;				/* FME id */
83 	struct config *config;		/* cooked configuration data */
84 	struct lut *eventtree;		/* propagation tree for this FME */
85 	/*
86 	 * The initial error report that created this FME is kept in
87 	 * two forms.  e0 points to the instance tree node and is used
88 	 * by fme_eval() as the starting point for the inference
89 	 * algorithm.  e0r is the event handle FMD passed to us when
90 	 * the ereport first arrived and is used when setting timers,
91 	 * which are always relative to the time of this initial
92 	 * report.
93 	 */
94 	struct event *e0;
95 	fmd_event_t *e0r;
96 
97 	id_t    timer;			/* for setting an fmd time-out */
98 
99 	struct event *ecurrent;		/* ereport under consideration */
100 	struct event *suspects;		/* current suspect list */
101 	struct event *psuspects;	/* previous suspect list */
102 	int nsuspects;			/* count of suspects */
103 	int nonfault;			/* zero if all suspects T_FAULT */
104 	int posted_suspects;		/* true if we've posted a diagnosis */
105 	int uniqobs;			/* number of unique events observed */
106 	int peek;			/* just peeking, don't track suspects */
107 	int overflow;			/* true if overflow FME */
108 	enum fme_state {
109 		FME_NOTHING = 5000,	/* not evaluated yet */
110 		FME_WAIT,		/* need to wait for more info */
111 		FME_CREDIBLE,		/* suspect list is credible */
112 		FME_DISPROVED,		/* no valid suspects found */
113 		FME_DEFERRED		/* don't know yet (k-count not met) */
114 	} state;
115 
116 	unsigned long long pull;	/* time passed since created */
117 	unsigned long long wull;	/* wait until this time for re-eval */
118 	struct event *observations;	/* observation list */
119 	struct lut *globals;		/* values of global variables */
120 	/* fmd interfacing */
121 	fmd_hdl_t *hdl;			/* handle for talking with fmd */
122 	fmd_case_t *fmcase;		/* what fmd 'case' we associate with */
123 	/* stats */
124 	struct stats *Rcount;
125 	struct stats *Hcallcount;
126 	struct stats *Rcallcount;
127 	struct stats *Ccallcount;
128 	struct stats *Ecallcount;
129 	struct stats *Tcallcount;
130 	struct stats *Marrowcount;
131 	struct stats *diags;
132 } *FMElist, *EFMElist, *ClosedFMEs;
133 
134 static struct case_list {
135 	fmd_case_t *fmcase;
136 	struct case_list *next;
137 } *Undiagablecaselist;
138 
139 static void fme_eval(struct fme *fmep, fmd_event_t *ffep);
140 static enum fme_state hypothesise(struct fme *fmep, struct event *ep,
141 	unsigned long long at_latest_by, unsigned long long *pdelay);
142 static struct node *eventprop_lookup(struct event *ep, const char *propname);
143 static struct node *pathstring2epnamenp(char *path);
144 static void publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep,
145 	fmd_case_t *fmcase);
146 static void restore_suspects(struct fme *fmep);
147 static void save_suspects(struct fme *fmep);
148 static void destroy_fme(struct fme *f);
149 static void fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep,
150     const char *eventstring, const struct ipath *ipp, nvlist_t *nvl);
151 static void istat_counter_reset_cb(struct istat_entry *entp,
152     struct stats *statp, const struct ipath *ipp);
153 static void istat_counter_topo_chg_cb(struct istat_entry *entp,
154     struct stats *statp, void *unused);
155 static void serd_reset_cb(struct serd_entry *entp, void *unused,
156     const struct ipath *ipp);
157 static void serd_topo_chg_cb(struct serd_entry *entp, void *unused,
158     void *unused2);
159 static void destroy_fme_bufs(struct fme *fp);
160 
161 static struct fme *
162 alloc_fme(void)
163 {
164 	struct fme *fmep;
165 
166 	fmep = MALLOC(sizeof (*fmep));
167 	bzero(fmep, sizeof (*fmep));
168 	return (fmep);
169 }
170 
171 /*
172  * fme_ready -- called when all initialization of the FME (except for
173  *	stats) has completed successfully.  Adds the fme to global lists
174  *	and establishes its stats.
175  */
176 static struct fme *
177 fme_ready(struct fme *fmep)
178 {
179 	char nbuf[100];
180 
181 	Nfmep = NULL;	/* don't need to free this on module abort now */
182 
183 	if (EFMElist) {
184 		EFMElist->next = fmep;
185 		EFMElist = fmep;
186 	} else
187 		FMElist = EFMElist = fmep;
188 
189 	(void) sprintf(nbuf, "fme%d.Rcount", fmep->id);
190 	fmep->Rcount = stats_new_counter(nbuf, "ereports received", 0);
191 	(void) sprintf(nbuf, "fme%d.Hcall", fmep->id);
192 	fmep->Hcallcount = stats_new_counter(nbuf, "calls to hypothesise()", 1);
193 	(void) sprintf(nbuf, "fme%d.Rcall", fmep->id);
194 	fmep->Rcallcount = stats_new_counter(nbuf,
195 	    "calls to requirements_test()", 1);
196 	(void) sprintf(nbuf, "fme%d.Ccall", fmep->id);
197 	fmep->Ccallcount = stats_new_counter(nbuf, "calls to causes_test()", 1);
198 	(void) sprintf(nbuf, "fme%d.Ecall", fmep->id);
199 	fmep->Ecallcount =
200 	    stats_new_counter(nbuf, "calls to effects_test()", 1);
201 	(void) sprintf(nbuf, "fme%d.Tcall", fmep->id);
202 	fmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1);
203 	(void) sprintf(nbuf, "fme%d.Marrow", fmep->id);
204 	fmep->Marrowcount = stats_new_counter(nbuf,
205 	    "arrows marked by mark_arrows()", 1);
206 	(void) sprintf(nbuf, "fme%d.diags", fmep->id);
207 	fmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0);
208 
209 	out(O_ALTFP|O_VERB2, "newfme: config snapshot contains...");
210 	config_print(O_ALTFP|O_VERB2, fmep->config);
211 
212 	return (fmep);
213 }
214 
215 extern void ipath_dummy_lut(struct arrow *);
216 extern struct lut *itree_create_dummy(const char *, const struct ipath *);
217 
218 /* ARGSUSED */
219 static void
220 set_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
221 {
222 	struct bubble *bp;
223 	struct arrowlist *ap;
224 
225 	for (bp = itree_next_bubble(ep, NULL); bp;
226 	    bp = itree_next_bubble(ep, bp)) {
227 		if (bp->t != B_FROM)
228 			continue;
229 		for (ap = itree_next_arrow(bp, NULL); ap;
230 		    ap = itree_next_arrow(bp, ap)) {
231 			ap->arrowp->pnode->u.arrow.needed = 1;
232 			ipath_dummy_lut(ap->arrowp);
233 		}
234 	}
235 }
236 
237 /* ARGSUSED */
238 static void
239 unset_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
240 {
241 	struct bubble *bp;
242 	struct arrowlist *ap;
243 
244 	for (bp = itree_next_bubble(ep, NULL); bp;
245 	    bp = itree_next_bubble(ep, bp)) {
246 		if (bp->t != B_FROM)
247 			continue;
248 		for (ap = itree_next_arrow(bp, NULL); ap;
249 		    ap = itree_next_arrow(bp, ap))
250 			ap->arrowp->pnode->u.arrow.needed = 0;
251 	}
252 }
253 
254 static void globals_destructor(void *left, void *right, void *arg);
255 static void clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep);
256 
257 static void
258 prune_propagations(const char *e0class, const struct ipath *e0ipp)
259 {
260 	char nbuf[100];
261 	unsigned long long my_delay = TIMEVAL_EVENTUALLY;
262 	extern struct lut *Usednames;
263 
264 	Nfmep = alloc_fme();
265 	Nfmep->id = Nextid;
266 	Nfmep->state = FME_NOTHING;
267 	Nfmep->eventtree = itree_create_dummy(e0class, e0ipp);
268 	if ((Nfmep->e0 =
269 	    itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) {
270 		out(O_ALTFP, "prune_propagations: e0 not in instance tree");
271 		itree_free(Nfmep->eventtree);
272 		FREE(Nfmep);
273 		Nfmep = NULL;
274 		return;
275 	}
276 	Nfmep->ecurrent = Nfmep->observations = Nfmep->e0;
277 	Nfmep->e0->count++;
278 
279 	(void) sprintf(nbuf, "fme%d.Rcount", Nfmep->id);
280 	Nfmep->Rcount = stats_new_counter(nbuf, "ereports received", 0);
281 	(void) sprintf(nbuf, "fme%d.Hcall", Nfmep->id);
282 	Nfmep->Hcallcount =
283 	    stats_new_counter(nbuf, "calls to hypothesise()", 1);
284 	(void) sprintf(nbuf, "fme%d.Rcall", Nfmep->id);
285 	Nfmep->Rcallcount = stats_new_counter(nbuf,
286 	    "calls to requirements_test()", 1);
287 	(void) sprintf(nbuf, "fme%d.Ccall", Nfmep->id);
288 	Nfmep->Ccallcount =
289 	    stats_new_counter(nbuf, "calls to causes_test()", 1);
290 	(void) sprintf(nbuf, "fme%d.Ecall", Nfmep->id);
291 	Nfmep->Ecallcount =
292 	    stats_new_counter(nbuf, "calls to effects_test()", 1);
293 	(void) sprintf(nbuf, "fme%d.Tcall", Nfmep->id);
294 	Nfmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1);
295 	(void) sprintf(nbuf, "fme%d.Marrow", Nfmep->id);
296 	Nfmep->Marrowcount = stats_new_counter(nbuf,
297 	    "arrows marked by mark_arrows()", 1);
298 	(void) sprintf(nbuf, "fme%d.diags", Nfmep->id);
299 	Nfmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0);
300 
301 	Nfmep->peek = 1;
302 	lut_walk(Nfmep->eventtree, (lut_cb)unset_needed_arrows, (void *)Nfmep);
303 	lut_free(Usednames, NULL, NULL);
304 	Usednames = NULL;
305 	lut_walk(Nfmep->eventtree, (lut_cb)clear_arrows, (void *)Nfmep);
306 	(void) hypothesise(Nfmep, Nfmep->e0, Nfmep->ull, &my_delay);
307 	itree_prune(Nfmep->eventtree);
308 	lut_walk(Nfmep->eventtree, (lut_cb)set_needed_arrows, (void *)Nfmep);
309 
310 	stats_delete(Nfmep->Rcount);
311 	stats_delete(Nfmep->Hcallcount);
312 	stats_delete(Nfmep->Rcallcount);
313 	stats_delete(Nfmep->Ccallcount);
314 	stats_delete(Nfmep->Ecallcount);
315 	stats_delete(Nfmep->Tcallcount);
316 	stats_delete(Nfmep->Marrowcount);
317 	stats_delete(Nfmep->diags);
318 	itree_free(Nfmep->eventtree);
319 	lut_free(Nfmep->globals, globals_destructor, NULL);
320 	FREE(Nfmep);
321 }
322 
323 static struct fme *
324 newfme(const char *e0class, const struct ipath *e0ipp, fmd_hdl_t *hdl,
325 	fmd_case_t *fmcase)
326 {
327 	struct cfgdata *cfgdata;
328 	int init_size;
329 	extern int alloc_total();
330 
331 	init_size = alloc_total();
332 	out(O_ALTFP|O_STAMP, "start config_snapshot using %d bytes", init_size);
333 	if ((cfgdata = config_snapshot()) == NULL) {
334 		out(O_ALTFP, "newfme: NULL configuration");
335 		Undiag_reason = UD_NOCONF;
336 		return (NULL);
337 	}
338 	platform_save_config(hdl, fmcase);
339 	out(O_ALTFP|O_STAMP, "config_snapshot added %d bytes",
340 	    alloc_total() - init_size);
341 
342 	Nfmep = alloc_fme();
343 
344 	Nfmep->id = Nextid++;
345 	Nfmep->config = cfgdata->cooked;
346 	config_free(cfgdata);
347 	Nfmep->posted_suspects = 0;
348 	Nfmep->uniqobs = 0;
349 	Nfmep->state = FME_NOTHING;
350 	Nfmep->pull = 0ULL;
351 	Nfmep->overflow = 0;
352 
353 	Nfmep->fmcase = fmcase;
354 	Nfmep->hdl = hdl;
355 
356 	if ((Nfmep->eventtree = itree_create(Nfmep->config)) == NULL) {
357 		out(O_ALTFP, "newfme: NULL instance tree");
358 		Undiag_reason = UD_INSTFAIL;
359 		structconfig_free(Nfmep->config);
360 		destroy_fme_bufs(Nfmep);
361 		FREE(Nfmep);
362 		Nfmep = NULL;
363 		return (NULL);
364 	}
365 
366 	itree_ptree(O_ALTFP|O_VERB2, Nfmep->eventtree);
367 
368 	if ((Nfmep->e0 =
369 	    itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) {
370 		out(O_ALTFP, "newfme: e0 not in instance tree");
371 		Undiag_reason = UD_BADEVENTI;
372 		itree_free(Nfmep->eventtree);
373 		structconfig_free(Nfmep->config);
374 		destroy_fme_bufs(Nfmep);
375 		FREE(Nfmep);
376 		Nfmep = NULL;
377 		return (NULL);
378 	}
379 
380 	return (fme_ready(Nfmep));
381 }
382 
383 void
384 fme_fini(void)
385 {
386 	struct fme *sfp, *fp;
387 	struct case_list *ucasep, *nextcasep;
388 
389 	ucasep = Undiagablecaselist;
390 	while (ucasep != NULL) {
391 		nextcasep = ucasep->next;
392 		FREE(ucasep);
393 		ucasep = nextcasep;
394 	}
395 	Undiagablecaselist = NULL;
396 
397 	/* clean up closed fmes */
398 	fp = ClosedFMEs;
399 	while (fp != NULL) {
400 		sfp = fp->next;
401 		destroy_fme(fp);
402 		fp = sfp;
403 	}
404 	ClosedFMEs = NULL;
405 
406 	fp = FMElist;
407 	while (fp != NULL) {
408 		sfp = fp->next;
409 		destroy_fme(fp);
410 		fp = sfp;
411 	}
412 	FMElist = EFMElist = NULL;
413 
414 	/* if we were in the middle of creating an fme, free it now */
415 	if (Nfmep) {
416 		destroy_fme(Nfmep);
417 		Nfmep = NULL;
418 	}
419 }
420 
421 /*
422  * Allocated space for a buffer name.  20 bytes allows for
423  * a ridiculous 9,999,999 unique observations.
424  */
425 #define	OBBUFNMSZ 20
426 
427 /*
428  *  serialize_observation
429  *
430  *  Create a recoverable version of the current observation
431  *  (f->ecurrent).  We keep a serialized version of each unique
432  *  observation in order that we may resume correctly the fme in the
433  *  correct state if eft or fmd crashes and we're restarted.
434  */
435 static void
436 serialize_observation(struct fme *fp, const char *cls, const struct ipath *ipp)
437 {
438 	size_t pkdlen;
439 	char tmpbuf[OBBUFNMSZ];
440 	char *pkd = NULL;
441 	char *estr;
442 
443 	(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", fp->uniqobs);
444 	estr = ipath2str(cls, ipp);
445 	fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, strlen(estr) + 1);
446 	fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)estr,
447 	    strlen(estr) + 1);
448 	FREE(estr);
449 
450 	if (fp->ecurrent != NULL && fp->ecurrent->nvp != NULL) {
451 		(void) snprintf(tmpbuf,
452 		    OBBUFNMSZ, "observed%d.nvp", fp->uniqobs);
453 		if (nvlist_xpack(fp->ecurrent->nvp,
454 		    &pkd, &pkdlen, NV_ENCODE_XDR, &Eft_nv_hdl) != 0)
455 			out(O_DIE|O_SYS, "pack of observed nvl failed");
456 		fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, pkdlen);
457 		fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)pkd, pkdlen);
458 		FREE(pkd);
459 	}
460 
461 	fp->uniqobs++;
462 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs,
463 	    sizeof (fp->uniqobs));
464 }
465 
466 /*
467  *  init_fme_bufs -- We keep several bits of state about an fme for
468  *	use if eft or fmd crashes and we're restarted.
469  */
470 static void
471 init_fme_bufs(struct fme *fp)
472 {
473 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_PULL, sizeof (fp->pull));
474 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_PULL, (void *)&fp->pull,
475 	    sizeof (fp->pull));
476 
477 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_ID, sizeof (fp->id));
478 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_ID, (void *)&fp->id,
479 	    sizeof (fp->id));
480 
481 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_NOBS, sizeof (fp->uniqobs));
482 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs,
483 	    sizeof (fp->uniqobs));
484 
485 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_POSTD,
486 	    sizeof (fp->posted_suspects));
487 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_POSTD,
488 	    (void *)&fp->posted_suspects, sizeof (fp->posted_suspects));
489 }
490 
491 static void
492 destroy_fme_bufs(struct fme *fp)
493 {
494 	char tmpbuf[OBBUFNMSZ];
495 	int o;
496 
497 	platform_restore_config(fp->hdl, fp->fmcase);
498 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFGLEN);
499 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFG);
500 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_PULL);
501 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_ID);
502 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_POSTD);
503 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_NOBS);
504 
505 	for (o = 0; o < fp->uniqobs; o++) {
506 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", o);
507 		fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf);
508 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", o);
509 		fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf);
510 	}
511 }
512 
513 /*
514  * reconstitute_observations -- convert a case's serialized observations
515  *	back into struct events.  Returns zero if all observations are
516  *	successfully reconstituted.
517  */
518 static int
519 reconstitute_observations(struct fme *fmep)
520 {
521 	struct event *ep;
522 	struct node *epnamenp = NULL;
523 	size_t pkdlen;
524 	char *pkd = NULL;
525 	char *tmpbuf = alloca(OBBUFNMSZ);
526 	char *sepptr;
527 	char *estr;
528 	int ocnt;
529 	int elen;
530 
531 	for (ocnt = 0; ocnt < fmep->uniqobs; ocnt++) {
532 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", ocnt);
533 		elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
534 		if (elen == 0) {
535 			out(O_ALTFP,
536 			    "reconstitute_observation: no %s buffer found.",
537 			    tmpbuf);
538 			Undiag_reason = UD_MISSINGOBS;
539 			break;
540 		}
541 
542 		estr = MALLOC(elen);
543 		fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen);
544 		sepptr = strchr(estr, '@');
545 		if (sepptr == NULL) {
546 			out(O_ALTFP,
547 			    "reconstitute_observation: %s: "
548 			    "missing @ separator in %s.",
549 			    tmpbuf, estr);
550 			Undiag_reason = UD_MISSINGPATH;
551 			FREE(estr);
552 			break;
553 		}
554 
555 		*sepptr = '\0';
556 		if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) {
557 			out(O_ALTFP,
558 			    "reconstitute_observation: %s: "
559 			    "trouble converting path string \"%s\" "
560 			    "to internal representation.",
561 			    tmpbuf, sepptr + 1);
562 			Undiag_reason = UD_MISSINGPATH;
563 			FREE(estr);
564 			break;
565 		}
566 
567 		/* construct the event */
568 		ep = itree_lookup(fmep->eventtree,
569 		    stable(estr), ipath(epnamenp));
570 		if (ep == NULL) {
571 			out(O_ALTFP,
572 			    "reconstitute_observation: %s: "
573 			    "lookup of  \"%s\" in itree failed.",
574 			    tmpbuf, ipath2str(estr, ipath(epnamenp)));
575 			Undiag_reason = UD_BADOBS;
576 			tree_free(epnamenp);
577 			FREE(estr);
578 			break;
579 		}
580 		tree_free(epnamenp);
581 
582 		/*
583 		 * We may or may not have a saved nvlist for the observation
584 		 */
585 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", ocnt);
586 		pkdlen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
587 		if (pkdlen != 0) {
588 			pkd = MALLOC(pkdlen);
589 			fmd_buf_read(fmep->hdl,
590 			    fmep->fmcase, tmpbuf, pkd, pkdlen);
591 			ASSERT(ep->nvp == NULL);
592 			if (nvlist_xunpack(pkd,
593 			    pkdlen, &ep->nvp, &Eft_nv_hdl) != 0)
594 				out(O_DIE|O_SYS, "pack of observed nvl failed");
595 			FREE(pkd);
596 		}
597 
598 		if (ocnt == 0)
599 			fmep->e0 = ep;
600 
601 		FREE(estr);
602 		fmep->ecurrent = ep;
603 		ep->count++;
604 
605 		/* link it into list of observations seen */
606 		ep->observations = fmep->observations;
607 		fmep->observations = ep;
608 	}
609 
610 	if (ocnt == fmep->uniqobs) {
611 		(void) fme_ready(fmep);
612 		return (0);
613 	}
614 
615 	return (1);
616 }
617 
618 /*
619  * restart_fme -- called during eft initialization.  Reconstitutes
620  *	an in-progress fme.
621  */
622 void
623 fme_restart(fmd_hdl_t *hdl, fmd_case_t *inprogress)
624 {
625 	nvlist_t *defect;
626 	struct case_list *bad;
627 	struct fme *fmep;
628 	struct cfgdata *cfgdata;
629 	size_t rawsz;
630 	struct event *ep;
631 	char *tmpbuf = alloca(OBBUFNMSZ);
632 	char *sepptr;
633 	char *estr;
634 	int elen;
635 	struct node *epnamenp = NULL;
636 	int init_size;
637 	extern int alloc_total();
638 
639 	/*
640 	 * ignore solved or closed cases
641 	 */
642 	if (fmd_case_solved(hdl, inprogress) ||
643 	    fmd_case_closed(hdl, inprogress))
644 		return;
645 
646 	fmep = alloc_fme();
647 	fmep->fmcase = inprogress;
648 	fmep->hdl = hdl;
649 
650 	if (fmd_buf_size(hdl, inprogress, WOBUF_POSTD) == 0) {
651 		out(O_ALTFP, "restart_fme: no saved posted status");
652 		Undiag_reason = UD_MISSINGINFO;
653 		goto badcase;
654 	} else {
655 		fmd_buf_read(hdl, inprogress, WOBUF_POSTD,
656 		    (void *)&fmep->posted_suspects,
657 		    sizeof (fmep->posted_suspects));
658 	}
659 
660 	if (fmd_buf_size(hdl, inprogress, WOBUF_ID) == 0) {
661 		out(O_ALTFP, "restart_fme: no saved id");
662 		Undiag_reason = UD_MISSINGINFO;
663 		goto badcase;
664 	} else {
665 		fmd_buf_read(hdl, inprogress, WOBUF_ID, (void *)&fmep->id,
666 		    sizeof (fmep->id));
667 	}
668 	if (Nextid <= fmep->id)
669 		Nextid = fmep->id + 1;
670 
671 	out(O_ALTFP, "Replay FME %d", fmep->id);
672 
673 	if (fmd_buf_size(hdl, inprogress, WOBUF_CFGLEN) != sizeof (size_t)) {
674 		out(O_ALTFP, "restart_fme: No config data");
675 		Undiag_reason = UD_MISSINGINFO;
676 		goto badcase;
677 	}
678 	fmd_buf_read(hdl, inprogress, WOBUF_CFGLEN, (void *)&rawsz,
679 	    sizeof (size_t));
680 
681 	if ((fmep->e0r = fmd_case_getprincipal(hdl, inprogress)) == NULL) {
682 		out(O_ALTFP, "restart_fme: No event zero");
683 		Undiag_reason = UD_MISSINGZERO;
684 		goto badcase;
685 	}
686 
687 	if (fmd_buf_size(hdl, inprogress, WOBUF_PULL) == 0) {
688 		out(O_ALTFP, "restart_fme: no saved wait time");
689 		Undiag_reason = UD_MISSINGINFO;
690 		goto badcase;
691 	} else {
692 		fmd_buf_read(hdl, inprogress, WOBUF_PULL, (void *)&fmep->pull,
693 		    sizeof (fmep->pull));
694 	}
695 
696 	if (fmd_buf_size(hdl, inprogress, WOBUF_NOBS) == 0) {
697 		out(O_ALTFP, "restart_fme: no count of observations");
698 		Undiag_reason = UD_MISSINGINFO;
699 		goto badcase;
700 	} else {
701 		fmd_buf_read(hdl, inprogress, WOBUF_NOBS,
702 		    (void *)&fmep->uniqobs, sizeof (fmep->uniqobs));
703 	}
704 
705 	(void) snprintf(tmpbuf, OBBUFNMSZ, "observed0");
706 	elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
707 	if (elen == 0) {
708 		out(O_ALTFP, "reconstitute_observation: no %s buffer found.",
709 		    tmpbuf);
710 		Undiag_reason = UD_MISSINGOBS;
711 		goto badcase;
712 	}
713 	estr = MALLOC(elen);
714 	fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen);
715 	sepptr = strchr(estr, '@');
716 	if (sepptr == NULL) {
717 		out(O_ALTFP, "reconstitute_observation: %s: "
718 		    "missing @ separator in %s.",
719 		    tmpbuf, estr);
720 		Undiag_reason = UD_MISSINGPATH;
721 		FREE(estr);
722 		goto badcase;
723 	}
724 	*sepptr = '\0';
725 	if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) {
726 		out(O_ALTFP, "reconstitute_observation: %s: "
727 		    "trouble converting path string \"%s\" "
728 		    "to internal representation.", tmpbuf, sepptr + 1);
729 		Undiag_reason = UD_MISSINGPATH;
730 		FREE(estr);
731 		goto badcase;
732 	}
733 	prune_propagations(stable(estr), ipath(epnamenp));
734 	tree_free(epnamenp);
735 	FREE(estr);
736 
737 	init_size = alloc_total();
738 	out(O_ALTFP|O_STAMP, "start config_restore using %d bytes", init_size);
739 	cfgdata = MALLOC(sizeof (struct cfgdata));
740 	cfgdata->cooked = NULL;
741 	cfgdata->devcache = NULL;
742 	cfgdata->devidcache = NULL;
743 	cfgdata->cpucache = NULL;
744 	cfgdata->raw_refcnt = 1;
745 
746 	if (rawsz > 0) {
747 		if (fmd_buf_size(hdl, inprogress, WOBUF_CFG) != rawsz) {
748 			out(O_ALTFP, "restart_fme: Config data size mismatch");
749 			Undiag_reason = UD_CFGMISMATCH;
750 			goto badcase;
751 		}
752 		cfgdata->begin = MALLOC(rawsz);
753 		cfgdata->end = cfgdata->nextfree = cfgdata->begin + rawsz;
754 		fmd_buf_read(hdl,
755 		    inprogress, WOBUF_CFG, cfgdata->begin, rawsz);
756 	} else {
757 		cfgdata->begin = cfgdata->end = cfgdata->nextfree = NULL;
758 	}
759 
760 	config_cook(cfgdata);
761 	fmep->config = cfgdata->cooked;
762 	config_free(cfgdata);
763 	out(O_ALTFP|O_STAMP, "config_restore added %d bytes",
764 	    alloc_total() - init_size);
765 
766 	if ((fmep->eventtree = itree_create(fmep->config)) == NULL) {
767 		/* case not properly saved or irretrievable */
768 		out(O_ALTFP, "restart_fme: NULL instance tree");
769 		Undiag_reason = UD_INSTFAIL;
770 		goto badcase;
771 	}
772 
773 	itree_ptree(O_ALTFP|O_VERB2, fmep->eventtree);
774 
775 	if (reconstitute_observations(fmep) != 0)
776 		goto badcase;
777 
778 	out(O_ALTFP|O_NONL, "FME %d replay observations: ", fmep->id);
779 	for (ep = fmep->observations; ep; ep = ep->observations) {
780 		out(O_ALTFP|O_NONL, " ");
781 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
782 	}
783 	out(O_ALTFP, NULL);
784 
785 	Open_fme_count++;
786 
787 	/* give the diagnosis algorithm a shot at the new FME state */
788 	fme_eval(fmep, fmep->e0r);
789 	return;
790 
791 badcase:
792 	if (fmep->eventtree != NULL)
793 		itree_free(fmep->eventtree);
794 	if (fmep->config)
795 		structconfig_free(fmep->config);
796 	destroy_fme_bufs(fmep);
797 	FREE(fmep);
798 
799 	/*
800 	 * Since we're unable to restart the case, add it to the undiagable
801 	 * list and solve and close it as appropriate.
802 	 */
803 	bad = MALLOC(sizeof (struct case_list));
804 	bad->next = NULL;
805 
806 	if (Undiagablecaselist != NULL)
807 		bad->next = Undiagablecaselist;
808 	Undiagablecaselist = bad;
809 	bad->fmcase = inprogress;
810 
811 	out(O_ALTFP|O_NONL, "[case %s (unable to restart), ",
812 	    fmd_case_uuid(hdl, bad->fmcase));
813 
814 	if (fmd_case_solved(hdl, bad->fmcase)) {
815 		out(O_ALTFP|O_NONL, "already solved, ");
816 	} else {
817 		out(O_ALTFP|O_NONL, "solving, ");
818 		defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100,
819 		    NULL, NULL, NULL);
820 		if (Undiag_reason != NULL)
821 			(void) nvlist_add_string(defect,
822 			    UNDIAG_REASON, Undiag_reason);
823 		fmd_case_add_suspect(hdl, bad->fmcase, defect);
824 		fmd_case_solve(hdl, bad->fmcase);
825 	}
826 
827 	if (fmd_case_closed(hdl, bad->fmcase)) {
828 		out(O_ALTFP, "already closed ]");
829 	} else {
830 		out(O_ALTFP, "closing ]");
831 		fmd_case_close(hdl, bad->fmcase);
832 	}
833 }
834 
835 /*ARGSUSED*/
836 static void
837 globals_destructor(void *left, void *right, void *arg)
838 {
839 	struct evalue *evp = (struct evalue *)right;
840 	if (evp->t == NODEPTR)
841 		tree_free((struct node *)(uintptr_t)evp->v);
842 	evp->v = (uintptr_t)NULL;
843 	FREE(evp);
844 }
845 
846 void
847 destroy_fme(struct fme *f)
848 {
849 	stats_delete(f->Rcount);
850 	stats_delete(f->Hcallcount);
851 	stats_delete(f->Rcallcount);
852 	stats_delete(f->Ccallcount);
853 	stats_delete(f->Ecallcount);
854 	stats_delete(f->Tcallcount);
855 	stats_delete(f->Marrowcount);
856 	stats_delete(f->diags);
857 
858 	if (f->eventtree != NULL)
859 		itree_free(f->eventtree);
860 	if (f->config)
861 		structconfig_free(f->config);
862 	lut_free(f->globals, globals_destructor, NULL);
863 	FREE(f);
864 }
865 
866 static const char *
867 fme_state2str(enum fme_state s)
868 {
869 	switch (s) {
870 	case FME_NOTHING:	return ("NOTHING");
871 	case FME_WAIT:		return ("WAIT");
872 	case FME_CREDIBLE:	return ("CREDIBLE");
873 	case FME_DISPROVED:	return ("DISPROVED");
874 	case FME_DEFERRED:	return ("DEFERRED");
875 	default:		return ("UNKNOWN");
876 	}
877 }
878 
879 static int
880 is_problem(enum nametype t)
881 {
882 	return (t == N_FAULT || t == N_DEFECT || t == N_UPSET);
883 }
884 
885 static int
886 is_fault(enum nametype t)
887 {
888 	return (t == N_FAULT);
889 }
890 
891 static int
892 is_defect(enum nametype t)
893 {
894 	return (t == N_DEFECT);
895 }
896 
897 static int
898 is_upset(enum nametype t)
899 {
900 	return (t == N_UPSET);
901 }
902 
903 static void
904 fme_print(int flags, struct fme *fmep)
905 {
906 	struct event *ep;
907 
908 	out(flags, "Fault Management Exercise %d", fmep->id);
909 	out(flags, "\t       State: %s", fme_state2str(fmep->state));
910 	out(flags|O_NONL, "\t  Start time: ");
911 	ptree_timeval(flags|O_NONL, &fmep->ull);
912 	out(flags, NULL);
913 	if (fmep->wull) {
914 		out(flags|O_NONL, "\t   Wait time: ");
915 		ptree_timeval(flags|O_NONL, &fmep->wull);
916 		out(flags, NULL);
917 	}
918 	out(flags|O_NONL, "\t          E0: ");
919 	if (fmep->e0)
920 		itree_pevent_brief(flags|O_NONL, fmep->e0);
921 	else
922 		out(flags|O_NONL, "NULL");
923 	out(flags, NULL);
924 	out(flags|O_NONL, "\tObservations:");
925 	for (ep = fmep->observations; ep; ep = ep->observations) {
926 		out(flags|O_NONL, " ");
927 		itree_pevent_brief(flags|O_NONL, ep);
928 	}
929 	out(flags, NULL);
930 	out(flags|O_NONL, "\tSuspect list:");
931 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
932 		out(flags|O_NONL, " ");
933 		itree_pevent_brief(flags|O_NONL, ep);
934 	}
935 	out(flags, NULL);
936 	if (fmep->eventtree != NULL) {
937 		out(flags|O_VERB2, "\t        Tree:");
938 		itree_ptree(flags|O_VERB2, fmep->eventtree);
939 	}
940 }
941 
942 static struct node *
943 pathstring2epnamenp(char *path)
944 {
945 	char *sep = "/";
946 	struct node *ret;
947 	char *ptr;
948 
949 	if ((ptr = strtok(path, sep)) == NULL)
950 		out(O_DIE, "pathstring2epnamenp: invalid empty class");
951 
952 	ret = tree_iname(stable(ptr), NULL, 0);
953 
954 	while ((ptr = strtok(NULL, sep)) != NULL)
955 		ret = tree_name_append(ret,
956 		    tree_iname(stable(ptr), NULL, 0));
957 
958 	return (ret);
959 }
960 
961 /*
962  * for a given upset sp, increment the corresponding SERD engine.  if the
963  * SERD engine trips, return the ename and ipp of the resulting ereport.
964  * returns true if engine tripped and *enamep and *ippp were filled in.
965  */
966 static int
967 serd_eval(struct fme *fmep, fmd_hdl_t *hdl, fmd_event_t *ffep,
968     fmd_case_t *fmcase, struct event *sp, const char **enamep,
969     const struct ipath **ippp)
970 {
971 	struct node *serdinst;
972 	char *serdname;
973 	char *serdresource;
974 	struct node *nid;
975 	struct serd_entry *newentp;
976 	int i, serdn = -1, serdincrement = 1, len = 0;
977 	char *serdsuffix = NULL, *serdt = NULL, *ptr;
978 	struct evalue *ep;
979 
980 	ASSERT(sp->t == N_UPSET);
981 	ASSERT(ffep != NULL);
982 
983 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
984 	    (void *)"n", (lut_cmp)strcmp)) != NULL) {
985 		ASSERT(ep->t == UINT64);
986 		serdn = (int)ep->v;
987 	}
988 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
989 	    (void *)"t", (lut_cmp)strcmp)) != NULL) {
990 		ASSERT(ep->t == STRING);
991 		serdt = (char *)(uintptr_t)ep->v;
992 	}
993 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
994 	    (void *)"suffix", (lut_cmp)strcmp)) != NULL) {
995 		ASSERT(ep->t == STRING);
996 		serdsuffix = (char *)(uintptr_t)ep->v;
997 	}
998 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
999 	    (void *)"increment", (lut_cmp)strcmp)) != NULL) {
1000 		ASSERT(ep->t == UINT64);
1001 		serdincrement = (int)ep->v;
1002 	}
1003 
1004 	/*
1005 	 * obtain instanced SERD engine from the upset sp.  from this
1006 	 * derive serdname, the string used to identify the SERD engine.
1007 	 */
1008 	serdinst = eventprop_lookup(sp, L_engine);
1009 
1010 	if (serdinst == NULL)
1011 		return (-1);
1012 
1013 	serdname = ipath2str(serdinst->u.stmt.np->u.event.ename->u.name.s,
1014 	    NULL);
1015 	serdresource = ipath2str(NULL,
1016 	    ipath(serdinst->u.stmt.np->u.event.epname));
1017 
1018 	len = strlen(serdname) + strlen(serdresource) + 2;
1019 	if (serdsuffix != NULL)
1020 		len += strlen(serdsuffix);
1021 
1022 	ptr = MALLOC(len);
1023 	if (serdsuffix != NULL) {
1024 		(void) snprintf(ptr, len, "%s%s@%s", serdname, serdsuffix,
1025 		    serdresource);
1026 	} else {
1027 		(void) snprintf(ptr, len, "%s@%s", serdname, serdresource);
1028 	}
1029 	FREE(serdname);
1030 	FREE(serdresource);
1031 	serdname = ptr;
1032 
1033 	/* handle serd engine "id" property, if there is one */
1034 	if ((nid =
1035 	    lut_lookup(serdinst->u.stmt.lutp, (void *)L_id, NULL)) != NULL) {
1036 		struct evalue *gval;
1037 		char suffixbuf[200];
1038 		char *suffix;
1039 		char *nserdname;
1040 		size_t nname;
1041 
1042 		out(O_ALTFP|O_NONL, "serd \"%s\" id: ", serdname);
1043 		ptree_name_iter(O_ALTFP|O_NONL, nid);
1044 
1045 		ASSERTinfo(nid->t == T_GLOBID, ptree_nodetype2str(nid->t));
1046 
1047 		if ((gval = lut_lookup(fmep->globals,
1048 		    (void *)nid->u.globid.s, NULL)) == NULL) {
1049 			out(O_ALTFP, " undefined");
1050 		} else if (gval->t == UINT64) {
1051 			out(O_ALTFP, " %llu", gval->v);
1052 			(void) sprintf(suffixbuf, "%llu", gval->v);
1053 			suffix = suffixbuf;
1054 		} else {
1055 			out(O_ALTFP, " \"%s\"", (char *)(uintptr_t)gval->v);
1056 			suffix = (char *)(uintptr_t)gval->v;
1057 		}
1058 
1059 		nname = strlen(serdname) + strlen(suffix) + 2;
1060 		nserdname = MALLOC(nname);
1061 		(void) snprintf(nserdname, nname, "%s:%s", serdname, suffix);
1062 		FREE(serdname);
1063 		serdname = nserdname;
1064 	}
1065 
1066 	/*
1067 	 * if the engine is empty, and we have an override for n/t then
1068 	 * destroy and recreate it.
1069 	 */
1070 	if ((serdn != -1 || serdt != NULL) && fmd_serd_exists(hdl, serdname) &&
1071 	    fmd_serd_empty(hdl, serdname))
1072 		fmd_serd_destroy(hdl, serdname);
1073 
1074 	if (!fmd_serd_exists(hdl, serdname)) {
1075 		struct node *nN, *nT;
1076 		const char *s;
1077 		struct node *nodep;
1078 		struct config *cp;
1079 		char *path;
1080 		uint_t nval;
1081 		hrtime_t tval;
1082 		const char *name;
1083 		char *tptr;
1084 		char *serd_name;
1085 		int i;
1086 		int tmplen;
1087 		char *ptr;
1088 		int got_n_override = 0, got_t_override = 0;
1089 
1090 		/* no SERD engine yet, so create it */
1091 		nodep = serdinst->u.stmt.np->u.event.epname;
1092 		tmplen = strlen(serdinst->u.stmt.np->u.event.ename->u.name.s)
1093 		    + 2;
1094 		if (serdsuffix != NULL)
1095 			tmplen += strlen(serdsuffix);
1096 		tptr = MALLOC(tmplen);
1097 		if (serdsuffix != NULL) {
1098 			(void) snprintf(tptr, len, "%s%s",
1099 			    serdinst->u.stmt.np->u.event.ename->u.name.s,
1100 			    serdsuffix);
1101 		} else {
1102 			(void) snprintf(tptr, len, "%s",
1103 			    serdinst->u.stmt.np->u.event.ename->u.name.s);
1104 		}
1105 		name = (const char *)tptr;
1106 		path = ipath2str(NULL, ipath(nodep));
1107 		cp = config_lookup(fmep->config, path, 0);
1108 		FREE((void *)path);
1109 
1110 		/*
1111 		 * We allow serd paramaters to be overridden, either from
1112 		 * eft.conf file values (if Serd_Override is set) or from
1113 		 * driver properties (for "serd.io.device" engines).
1114 		 */
1115 		if (Serd_Override != NULL) {
1116 			char *save_ptr, *ptr1, *ptr2, *ptr3;
1117 			ptr3 = save_ptr = STRDUP(Serd_Override);
1118 			while (*ptr3 != '\0') {
1119 				ptr1 = strchr(ptr3, ',');
1120 				*ptr1 = '\0';
1121 				if (strcmp(ptr3, name) == 0) {
1122 					ptr2 =  strchr(ptr1 + 1, ',');
1123 					*ptr2 = '\0';
1124 					nval = atoi(ptr1 + 1);
1125 					out(O_ALTFP, "serd override %s_n %d",
1126 					    name, nval);
1127 					ptr3 =  strchr(ptr2 + 1, ' ');
1128 					if (ptr3)
1129 						*ptr3 = '\0';
1130 					ptr = STRDUP(ptr2 + 1);
1131 					out(O_ALTFP, "serd override %s_t %s",
1132 					    name, ptr);
1133 					got_n_override = 1;
1134 					got_t_override = 1;
1135 					break;
1136 				} else {
1137 					ptr2 =  strchr(ptr1 + 1, ',');
1138 					ptr3 =  strchr(ptr2 + 1, ' ');
1139 					if (ptr3 == NULL)
1140 						break;
1141 				}
1142 				ptr3++;
1143 			}
1144 			FREE(save_ptr);
1145 		}
1146 
1147 		if (cp && got_n_override == 0) {
1148 			/*
1149 			 * convert serd engine name into property name
1150 			 */
1151 			serd_name = MALLOC(strlen(name) + 3);
1152 			for (i = 0; i < strlen(name); i++) {
1153 				if (name[i] == '.')
1154 					serd_name[i] = '_';
1155 				else
1156 					serd_name[i] = name[i];
1157 			}
1158 			serd_name[i++] = '_';
1159 			serd_name[i++] = 'n';
1160 			serd_name[i] = '\0';
1161 			if (s = config_getprop(cp, serd_name)) {
1162 				nval = atoi(s);
1163 				out(O_ALTFP, "serd override %s_n %s", name, s);
1164 				got_n_override = 1;
1165 			}
1166 			serd_name[i - 1] = 't';
1167 			if (s = config_getprop(cp, serd_name)) {
1168 				ptr = STRDUP(s);
1169 				out(O_ALTFP, "serd override %s_t %s", name, s);
1170 				got_t_override = 1;
1171 			}
1172 			FREE(serd_name);
1173 		}
1174 
1175 		if (serdn != -1 && got_n_override == 0) {
1176 			nval = serdn;
1177 			out(O_ALTFP, "serd override %s_n %d", name, serdn);
1178 			got_n_override = 1;
1179 		}
1180 		if (serdt != NULL && got_t_override == 0) {
1181 			ptr = STRDUP(serdt);
1182 			out(O_ALTFP, "serd override %s_t %s", name, serdt);
1183 			got_t_override = 1;
1184 		}
1185 
1186 		if (!got_n_override) {
1187 			nN = lut_lookup(serdinst->u.stmt.lutp, (void *)L_N,
1188 			    NULL);
1189 			ASSERT(nN->t == T_NUM);
1190 			nval = (uint_t)nN->u.ull;
1191 		}
1192 		if (!got_t_override) {
1193 			nT = lut_lookup(serdinst->u.stmt.lutp, (void *)L_T,
1194 			    NULL);
1195 			ASSERT(nT->t == T_TIMEVAL);
1196 			tval = (hrtime_t)nT->u.ull;
1197 		} else {
1198 			const unsigned long long *ullp;
1199 			const char *suffix;
1200 			int len;
1201 
1202 			len = strspn(ptr, "0123456789");
1203 			suffix = stable(&ptr[len]);
1204 			ullp = (unsigned long long *)lut_lookup(Timesuffixlut,
1205 			    (void *)suffix, NULL);
1206 			ptr[len] = '\0';
1207 			tval = strtoull(ptr, NULL, 0) * (ullp ? *ullp : 1ll);
1208 			FREE(ptr);
1209 		}
1210 		fmd_serd_create(hdl, serdname, nval, tval);
1211 		FREE(tptr);
1212 	}
1213 
1214 	newentp = MALLOC(sizeof (*newentp));
1215 	newentp->ename = stable(serdinst->u.stmt.np->u.event.ename->u.name.s);
1216 	newentp->ipath = ipath(serdinst->u.stmt.np->u.event.epname);
1217 	newentp->hdl = hdl;
1218 	if (lut_lookup(SerdEngines, newentp, (lut_cmp)serd_cmp) == NULL) {
1219 		SerdEngines = lut_add(SerdEngines, (void *)newentp,
1220 		    (void *)newentp, (lut_cmp)serd_cmp);
1221 		Serd_need_save = 1;
1222 		serd_save();
1223 	} else {
1224 		FREE(newentp);
1225 	}
1226 
1227 
1228 	/*
1229 	 * increment SERD engine.  if engine fires, reset serd
1230 	 * engine and return trip_strcode if required.
1231 	 */
1232 	for (i = 0; i < serdincrement; i++) {
1233 		if (fmd_serd_record(hdl, serdname, ffep)) {
1234 			fmd_case_add_serd(hdl, fmcase, serdname);
1235 			fmd_serd_reset(hdl, serdname);
1236 
1237 			if (ippp) {
1238 				struct node *tripinst =
1239 				    lut_lookup(serdinst->u.stmt.lutp,
1240 				    (void *)L_trip, NULL);
1241 				ASSERT(tripinst != NULL);
1242 				*enamep = tripinst->u.event.ename->u.name.s;
1243 				*ippp = ipath(tripinst->u.event.epname);
1244 				out(O_ALTFP|O_NONL,
1245 				    "[engine fired: %s, sending: ", serdname);
1246 				ipath_print(O_ALTFP|O_NONL, *enamep, *ippp);
1247 				out(O_ALTFP, "]");
1248 			} else {
1249 				out(O_ALTFP, "[engine fired: %s, no trip]",
1250 				    serdname);
1251 			}
1252 			FREE(serdname);
1253 			return (1);
1254 		}
1255 	}
1256 
1257 	FREE(serdname);
1258 	return (0);
1259 }
1260 
1261 /*
1262  * search a suspect list for upsets.  feed each upset to serd_eval() and
1263  * build up tripped[], an array of ereports produced by the firing of
1264  * any SERD engines.  then feed each ereport back into
1265  * fme_receive_report().
1266  *
1267  * returns ntrip, the number of these ereports produced.
1268  */
1269 static int
1270 upsets_eval(struct fme *fmep, fmd_event_t *ffep)
1271 {
1272 	/* we build an array of tripped ereports that we send ourselves */
1273 	struct {
1274 		const char *ename;
1275 		const struct ipath *ipp;
1276 	} *tripped;
1277 	struct event *sp;
1278 	int ntrip, nupset, i;
1279 
1280 	/*
1281 	 * count the number of upsets to determine the upper limit on
1282 	 * expected trip ereport strings.  remember that one upset can
1283 	 * lead to at most one ereport.
1284 	 */
1285 	nupset = 0;
1286 	for (sp = fmep->suspects; sp; sp = sp->suspects) {
1287 		if (sp->t == N_UPSET)
1288 			nupset++;
1289 	}
1290 
1291 	if (nupset == 0)
1292 		return (0);
1293 
1294 	/*
1295 	 * get to this point if we have upsets and expect some trip
1296 	 * ereports
1297 	 */
1298 	tripped = alloca(sizeof (*tripped) * nupset);
1299 	bzero((void *)tripped, sizeof (*tripped) * nupset);
1300 
1301 	ntrip = 0;
1302 	for (sp = fmep->suspects; sp; sp = sp->suspects)
1303 		if (sp->t == N_UPSET &&
1304 		    serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, sp,
1305 		    &tripped[ntrip].ename, &tripped[ntrip].ipp) == 1)
1306 			ntrip++;
1307 
1308 	for (i = 0; i < ntrip; i++) {
1309 		struct event *ep, *nep;
1310 		struct fme *nfmep;
1311 		fmd_case_t *fmcase;
1312 		const struct ipath *ipp;
1313 		const char *eventstring;
1314 		int prev_verbose;
1315 		unsigned long long my_delay = TIMEVAL_EVENTUALLY;
1316 		enum fme_state state;
1317 
1318 		/*
1319 		 * First try and evaluate a case with the trip ereport plus
1320 		 * all the other ereports that cause the trip. If that fails
1321 		 * to evaluate then try again with just this ereport on its own.
1322 		 */
1323 		out(O_ALTFP|O_NONL, "fme_receive_report_serd: ");
1324 		ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp);
1325 		out(O_ALTFP|O_STAMP, NULL);
1326 		ep = fmep->e0;
1327 		eventstring = ep->enode->u.event.ename->u.name.s;
1328 		ipp = ep->ipp;
1329 		prune_propagations(eventstring, ipp);
1330 
1331 		/*
1332 		 * create a duplicate fme and case
1333 		 */
1334 		fmcase = fmd_case_open(fmep->hdl, NULL);
1335 		out(O_ALTFP|O_NONL, "duplicate fme for event [");
1336 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1337 		out(O_ALTFP, " ]");
1338 		if ((nfmep = newfme(eventstring, ipp, fmep->hdl,
1339 		    fmcase)) == NULL) {
1340 			out(O_ALTFP|O_NONL, "[");
1341 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1342 			out(O_ALTFP, " CANNOT DIAGNOSE]");
1343 			publish_undiagnosable(fmep->hdl, ffep, fmcase);
1344 			continue;
1345 		}
1346 		Open_fme_count++;
1347 		nfmep->pull = fmep->pull;
1348 		init_fme_bufs(nfmep);
1349 		out(O_ALTFP|O_NONL, "[");
1350 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1351 		out(O_ALTFP, " created FME%d, case %s]", nfmep->id,
1352 		    fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
1353 		if (ffep) {
1354 			fmd_case_setprincipal(nfmep->hdl, nfmep->fmcase, ffep);
1355 			fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase, ffep);
1356 			nfmep->e0r = ffep;
1357 		}
1358 
1359 		/*
1360 		 * add the original ereports
1361 		 */
1362 		for (ep = fmep->observations; ep; ep = ep->observations) {
1363 			eventstring = ep->enode->u.event.ename->u.name.s;
1364 			ipp = ep->ipp;
1365 			out(O_ALTFP|O_NONL, "adding event [");
1366 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1367 			out(O_ALTFP, " ]");
1368 			nep = itree_lookup(nfmep->eventtree, eventstring, ipp);
1369 			if (nep->count++ == 0) {
1370 				nep->observations = nfmep->observations;
1371 				nfmep->observations = nep;
1372 				serialize_observation(nfmep, eventstring, ipp);
1373 				nep->nvp = evnv_dupnvl(ep->nvp);
1374 			}
1375 			if (ep->ffep && ep->ffep != ffep)
1376 				fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase,
1377 				    ep->ffep);
1378 			stats_counter_bump(nfmep->Rcount);
1379 		}
1380 
1381 		/*
1382 		 * add the serd trigger ereport
1383 		 */
1384 		if ((ep = itree_lookup(nfmep->eventtree, tripped[i].ename,
1385 		    tripped[i].ipp)) == NULL) {
1386 			/*
1387 			 * The trigger ereport is not in the instance tree. It
1388 			 * was presumably removed by prune_propagations() as
1389 			 * this combination of events is not present in the
1390 			 * rules.
1391 			 */
1392 			out(O_ALTFP, "upsets_eval: e0 not in instance tree");
1393 			Undiag_reason = UD_BADEVENTI;
1394 			goto retry_lone_ereport;
1395 		}
1396 		out(O_ALTFP|O_NONL, "adding event [");
1397 		ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp);
1398 		out(O_ALTFP, " ]");
1399 		nfmep->ecurrent = ep;
1400 		ep->nvp = NULL;
1401 		ep->count = 1;
1402 		ep->observations = nfmep->observations;
1403 		nfmep->observations = ep;
1404 
1405 		/*
1406 		 * just peek first.
1407 		 */
1408 		nfmep->peek = 1;
1409 		prev_verbose = Verbose;
1410 		if (Debug == 0)
1411 			Verbose = 0;
1412 		lut_walk(nfmep->eventtree, (lut_cb)clear_arrows, (void *)nfmep);
1413 		state = hypothesise(nfmep, nfmep->e0, nfmep->ull, &my_delay);
1414 		nfmep->peek = 0;
1415 		Verbose = prev_verbose;
1416 		if (state == FME_DISPROVED) {
1417 			out(O_ALTFP, "upsets_eval: hypothesis disproved");
1418 			Undiag_reason = UD_UNSOLVD;
1419 retry_lone_ereport:
1420 			/*
1421 			 * However the trigger ereport on its own might be
1422 			 * diagnosable, so check for that. Undo the new fme
1423 			 * and case we just created and call fme_receive_report.
1424 			 */
1425 			out(O_ALTFP|O_NONL, "[");
1426 			ipath_print(O_ALTFP|O_NONL, tripped[i].ename,
1427 			    tripped[i].ipp);
1428 			out(O_ALTFP, " retrying with just trigger ereport]");
1429 			itree_free(nfmep->eventtree);
1430 			nfmep->eventtree = NULL;
1431 			structconfig_free(nfmep->config);
1432 			nfmep->config = NULL;
1433 			destroy_fme_bufs(nfmep);
1434 			fmd_case_close(nfmep->hdl, nfmep->fmcase);
1435 			fme_receive_report(fmep->hdl, ffep,
1436 			    tripped[i].ename, tripped[i].ipp, NULL);
1437 			continue;
1438 		}
1439 
1440 		/*
1441 		 * and evaluate
1442 		 */
1443 		serialize_observation(nfmep, tripped[i].ename, tripped[i].ipp);
1444 		fme_eval(nfmep, ffep);
1445 	}
1446 
1447 	return (ntrip);
1448 }
1449 
1450 /*
1451  * fme_receive_external_report -- call when an external ereport comes in
1452  *
1453  * this routine just converts the relevant information from the ereport
1454  * into a format used internally and passes it on to fme_receive_report().
1455  */
1456 void
1457 fme_receive_external_report(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl,
1458     const char *class)
1459 {
1460 	struct node		*epnamenp;
1461 	fmd_case_t		*fmcase;
1462 	const struct ipath	*ipp;
1463 
1464 	class = stable(class);
1465 
1466 	/* Get the component path from the ereport */
1467 	epnamenp = platform_getpath(nvl);
1468 
1469 	/* See if we ended up without a path. */
1470 	if (epnamenp == NULL) {
1471 		/* See if class permits silent discard on unknown component. */
1472 		if (lut_lookup(Ereportenames_discard, (void *)class, NULL)) {
1473 			out(O_ALTFP|O_VERB2, "Unable to map \"%s\" ereport "
1474 			    "to component path, but silent discard allowed.",
1475 			    class);
1476 		} else {
1477 			/*
1478 			 * XFILE: Failure to find a component is bad unless
1479 			 * 'discard_if_config_unknown=1' was specified in the
1480 			 * ereport definition. Indicate undiagnosable.
1481 			 */
1482 			out(O_ALTFP, "XFILE: Unable to map \"%s\" ereport "
1483 			    "to component path.", class);
1484 			Undiag_reason = UD_NOPATH;
1485 			fmcase = fmd_case_open(hdl, NULL);
1486 			publish_undiagnosable(hdl, ffep, fmcase);
1487 		}
1488 		return;
1489 	}
1490 
1491 	ipp = ipath(epnamenp);
1492 	tree_free(epnamenp);
1493 	fme_receive_report(hdl, ffep, class, ipp, nvl);
1494 }
1495 
1496 /*ARGSUSED*/
1497 void
1498 fme_receive_repair_list(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl,
1499     const char *eventstring)
1500 {
1501 	char *uuid;
1502 	nvlist_t **nva;
1503 	uint_t nvc;
1504 	const struct ipath *ipp;
1505 
1506 	if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) != 0 ||
1507 	    nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST,
1508 	    &nva, &nvc) != 0) {
1509 		out(O_ALTFP, "No uuid or fault list for list.repaired event");
1510 		return;
1511 	}
1512 
1513 	out(O_ALTFP, "Processing list.repaired from case %s", uuid);
1514 
1515 	while (nvc-- != 0) {
1516 		/*
1517 		 * Reset any istat or serd engine associated with this path.
1518 		 */
1519 		char *path;
1520 
1521 		if ((ipp = platform_fault2ipath(*nva++)) == NULL)
1522 			continue;
1523 
1524 		path = ipath2str(NULL, ipp);
1525 		out(O_ALTFP, "fme_receive_repair_list: resetting state for %s",
1526 		    path);
1527 		FREE(path);
1528 
1529 		lut_walk(Istats, (lut_cb)istat_counter_reset_cb, (void *)ipp);
1530 		istat_save();
1531 
1532 		lut_walk(SerdEngines, (lut_cb)serd_reset_cb, (void *)ipp);
1533 		serd_save();
1534 	}
1535 }
1536 
1537 /*ARGSUSED*/
1538 void
1539 fme_receive_topology_change(void)
1540 {
1541 	lut_walk(Istats, (lut_cb)istat_counter_topo_chg_cb, NULL);
1542 	istat_save();
1543 
1544 	lut_walk(SerdEngines, (lut_cb)serd_topo_chg_cb, NULL);
1545 	serd_save();
1546 }
1547 
1548 static int mark_arrows(struct fme *fmep, struct event *ep, int mark,
1549     unsigned long long at_latest_by, unsigned long long *pdelay, int keep);
1550 
1551 /* ARGSUSED */
1552 static void
1553 clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
1554 {
1555 	struct bubble *bp;
1556 	struct arrowlist *ap;
1557 
1558 	ep->cached_state = 0;
1559 	ep->keep_in_tree = 0;
1560 	for (bp = itree_next_bubble(ep, NULL); bp;
1561 	    bp = itree_next_bubble(ep, bp)) {
1562 		if (bp->t != B_FROM)
1563 			continue;
1564 		bp->mark = 0;
1565 		for (ap = itree_next_arrow(bp, NULL); ap;
1566 		    ap = itree_next_arrow(bp, ap))
1567 			ap->arrowp->mark = 0;
1568 	}
1569 }
1570 
1571 static void
1572 fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep,
1573     const char *eventstring, const struct ipath *ipp, nvlist_t *nvl)
1574 {
1575 	struct event *ep;
1576 	struct fme *fmep = NULL;
1577 	struct fme *ofmep = NULL;
1578 	struct fme *cfmep, *svfmep;
1579 	int matched = 0;
1580 	nvlist_t *defect;
1581 	fmd_case_t *fmcase;
1582 
1583 	out(O_ALTFP|O_NONL, "fme_receive_report: ");
1584 	ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1585 	out(O_ALTFP|O_STAMP, NULL);
1586 
1587 	/* decide which FME it goes to */
1588 	for (fmep = FMElist; fmep; fmep = fmep->next) {
1589 		int prev_verbose;
1590 		unsigned long long my_delay = TIMEVAL_EVENTUALLY;
1591 		enum fme_state state;
1592 		nvlist_t *pre_peek_nvp = NULL;
1593 
1594 		if (fmep->overflow) {
1595 			if (!(fmd_case_closed(fmep->hdl, fmep->fmcase)))
1596 				ofmep = fmep;
1597 
1598 			continue;
1599 		}
1600 
1601 		/*
1602 		 * ignore solved or closed cases
1603 		 */
1604 		if (fmep->posted_suspects ||
1605 		    fmd_case_solved(fmep->hdl, fmep->fmcase) ||
1606 		    fmd_case_closed(fmep->hdl, fmep->fmcase))
1607 			continue;
1608 
1609 		/* look up event in event tree for this FME */
1610 		if ((ep = itree_lookup(fmep->eventtree,
1611 		    eventstring, ipp)) == NULL)
1612 			continue;
1613 
1614 		/* note observation */
1615 		fmep->ecurrent = ep;
1616 		if (ep->count++ == 0) {
1617 			/* link it into list of observations seen */
1618 			ep->observations = fmep->observations;
1619 			fmep->observations = ep;
1620 			ep->nvp = evnv_dupnvl(nvl);
1621 		} else {
1622 			/* use new payload values for peek */
1623 			pre_peek_nvp = ep->nvp;
1624 			ep->nvp = evnv_dupnvl(nvl);
1625 		}
1626 
1627 		/* tell hypothesise() not to mess with suspect list */
1628 		fmep->peek = 1;
1629 
1630 		/* don't want this to be verbose (unless Debug is set) */
1631 		prev_verbose = Verbose;
1632 		if (Debug == 0)
1633 			Verbose = 0;
1634 
1635 		lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep);
1636 		state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay);
1637 
1638 		fmep->peek = 0;
1639 
1640 		/* put verbose flag back */
1641 		Verbose = prev_verbose;
1642 
1643 		if (state != FME_DISPROVED) {
1644 			/* found an FME that explains the ereport */
1645 			matched++;
1646 			out(O_ALTFP|O_NONL, "[");
1647 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1648 			out(O_ALTFP, " explained by FME%d]", fmep->id);
1649 
1650 			if (pre_peek_nvp)
1651 				nvlist_free(pre_peek_nvp);
1652 
1653 			if (ep->count == 1)
1654 				serialize_observation(fmep, eventstring, ipp);
1655 
1656 			if (ffep) {
1657 				fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1658 				ep->ffep = ffep;
1659 			}
1660 
1661 			stats_counter_bump(fmep->Rcount);
1662 
1663 			/* re-eval FME */
1664 			fme_eval(fmep, ffep);
1665 		} else {
1666 
1667 			/* not a match, undo noting of observation */
1668 			fmep->ecurrent = NULL;
1669 			if (--ep->count == 0) {
1670 				/* unlink it from observations */
1671 				fmep->observations = ep->observations;
1672 				ep->observations = NULL;
1673 				nvlist_free(ep->nvp);
1674 				ep->nvp = NULL;
1675 			} else {
1676 				nvlist_free(ep->nvp);
1677 				ep->nvp = pre_peek_nvp;
1678 			}
1679 		}
1680 	}
1681 
1682 	if (matched)
1683 		return;	/* explained by at least one existing FME */
1684 
1685 	/* clean up closed fmes */
1686 	cfmep = ClosedFMEs;
1687 	while (cfmep != NULL) {
1688 		svfmep = cfmep->next;
1689 		destroy_fme(cfmep);
1690 		cfmep = svfmep;
1691 	}
1692 	ClosedFMEs = NULL;
1693 	prune_propagations(eventstring, ipp);
1694 
1695 	if (ofmep) {
1696 		out(O_ALTFP|O_NONL, "[");
1697 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1698 		out(O_ALTFP, " ADDING TO OVERFLOW FME]");
1699 		if (ffep)
1700 			fmd_case_add_ereport(hdl, ofmep->fmcase, ffep);
1701 
1702 		return;
1703 
1704 	} else if (Max_fme && (Open_fme_count >= Max_fme)) {
1705 		out(O_ALTFP|O_NONL, "[");
1706 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1707 		out(O_ALTFP, " MAX OPEN FME REACHED]");
1708 
1709 		fmcase = fmd_case_open(hdl, NULL);
1710 
1711 		/* Create overflow fme */
1712 		if ((fmep = newfme(eventstring, ipp, hdl, fmcase)) == NULL) {
1713 			out(O_ALTFP|O_NONL, "[");
1714 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1715 			out(O_ALTFP, " CANNOT OPEN OVERFLOW FME]");
1716 			publish_undiagnosable(hdl, ffep, fmcase);
1717 			return;
1718 		}
1719 
1720 		Open_fme_count++;
1721 
1722 		init_fme_bufs(fmep);
1723 		fmep->overflow = B_TRUE;
1724 
1725 		if (ffep)
1726 			fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1727 
1728 		defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100,
1729 		    NULL, NULL, NULL);
1730 		(void) nvlist_add_string(defect, UNDIAG_REASON, UD_MAXFME);
1731 		fmd_case_add_suspect(hdl, fmep->fmcase, defect);
1732 		fmd_case_solve(hdl, fmep->fmcase);
1733 		return;
1734 	}
1735 
1736 	/* open a case */
1737 	fmcase = fmd_case_open(hdl, NULL);
1738 
1739 	/* start a new FME */
1740 	if ((fmep = newfme(eventstring, ipp, hdl, fmcase)) == NULL) {
1741 		out(O_ALTFP|O_NONL, "[");
1742 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1743 		out(O_ALTFP, " CANNOT DIAGNOSE]");
1744 		publish_undiagnosable(hdl, ffep, fmcase);
1745 		return;
1746 	}
1747 
1748 	Open_fme_count++;
1749 
1750 	init_fme_bufs(fmep);
1751 
1752 	out(O_ALTFP|O_NONL, "[");
1753 	ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1754 	out(O_ALTFP, " created FME%d, case %s]", fmep->id,
1755 	    fmd_case_uuid(hdl, fmep->fmcase));
1756 
1757 	ep = fmep->e0;
1758 	ASSERT(ep != NULL);
1759 
1760 	/* note observation */
1761 	fmep->ecurrent = ep;
1762 	if (ep->count++ == 0) {
1763 		/* link it into list of observations seen */
1764 		ep->observations = fmep->observations;
1765 		fmep->observations = ep;
1766 		ep->nvp = evnv_dupnvl(nvl);
1767 		serialize_observation(fmep, eventstring, ipp);
1768 	} else {
1769 		/* new payload overrides any previous */
1770 		nvlist_free(ep->nvp);
1771 		ep->nvp = evnv_dupnvl(nvl);
1772 	}
1773 
1774 	stats_counter_bump(fmep->Rcount);
1775 
1776 	if (ffep) {
1777 		fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1778 		fmd_case_setprincipal(hdl, fmep->fmcase, ffep);
1779 		fmep->e0r = ffep;
1780 		ep->ffep = ffep;
1781 	}
1782 
1783 	/* give the diagnosis algorithm a shot at the new FME state */
1784 	fme_eval(fmep, ffep);
1785 }
1786 
1787 void
1788 fme_status(int flags)
1789 {
1790 	struct fme *fmep;
1791 
1792 	if (FMElist == NULL) {
1793 		out(flags, "No fault management exercises underway.");
1794 		return;
1795 	}
1796 
1797 	for (fmep = FMElist; fmep; fmep = fmep->next)
1798 		fme_print(flags, fmep);
1799 }
1800 
1801 /*
1802  * "indent" routines used mostly for nicely formatted debug output, but also
1803  * for sanity checking for infinite recursion bugs.
1804  */
1805 
1806 #define	MAX_INDENT 1024
1807 static const char *indent_s[MAX_INDENT];
1808 static int current_indent;
1809 
1810 static void
1811 indent_push(const char *s)
1812 {
1813 	if (current_indent < MAX_INDENT)
1814 		indent_s[current_indent++] = s;
1815 	else
1816 		out(O_DIE, "unexpected recursion depth (%d)", current_indent);
1817 }
1818 
1819 static void
1820 indent_set(const char *s)
1821 {
1822 	current_indent = 0;
1823 	indent_push(s);
1824 }
1825 
1826 static void
1827 indent_pop(void)
1828 {
1829 	if (current_indent > 0)
1830 		current_indent--;
1831 	else
1832 		out(O_DIE, "recursion underflow");
1833 }
1834 
1835 static void
1836 indent(void)
1837 {
1838 	int i;
1839 	if (!Verbose)
1840 		return;
1841 	for (i = 0; i < current_indent; i++)
1842 		out(O_ALTFP|O_VERB|O_NONL, indent_s[i]);
1843 }
1844 
1845 #define	SLNEW		1
1846 #define	SLCHANGED	2
1847 #define	SLWAIT		3
1848 #define	SLDISPROVED	4
1849 
1850 static void
1851 print_suspects(int circumstance, struct fme *fmep)
1852 {
1853 	struct event *ep;
1854 
1855 	out(O_ALTFP|O_NONL, "[");
1856 	if (circumstance == SLCHANGED) {
1857 		out(O_ALTFP|O_NONL, "FME%d diagnosis changed. state: %s, "
1858 		    "suspect list:", fmep->id, fme_state2str(fmep->state));
1859 	} else if (circumstance == SLWAIT) {
1860 		out(O_ALTFP|O_NONL, "FME%d set wait timer %ld ", fmep->id,
1861 		    fmep->timer);
1862 		ptree_timeval(O_ALTFP|O_NONL, &fmep->wull);
1863 	} else if (circumstance == SLDISPROVED) {
1864 		out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS UNKNOWN", fmep->id);
1865 	} else {
1866 		out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS PRODUCED:", fmep->id);
1867 	}
1868 
1869 	if (circumstance == SLWAIT || circumstance == SLDISPROVED) {
1870 		out(O_ALTFP, "]");
1871 		return;
1872 	}
1873 
1874 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
1875 		out(O_ALTFP|O_NONL, " ");
1876 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
1877 	}
1878 	out(O_ALTFP, "]");
1879 }
1880 
1881 static struct node *
1882 eventprop_lookup(struct event *ep, const char *propname)
1883 {
1884 	return (lut_lookup(ep->props, (void *)propname, NULL));
1885 }
1886 
1887 #define	MAXDIGITIDX	23
1888 static char numbuf[MAXDIGITIDX + 1];
1889 
1890 static int
1891 node2uint(struct node *n, uint_t *valp)
1892 {
1893 	struct evalue value;
1894 	struct lut *globals = NULL;
1895 
1896 	if (n == NULL)
1897 		return (1);
1898 
1899 	/*
1900 	 * check value.v since we are being asked to convert an unsigned
1901 	 * long long int to an unsigned int
1902 	 */
1903 	if (! eval_expr(n, NULL, NULL, &globals, NULL, NULL, 0, &value) ||
1904 	    value.t != UINT64 || value.v > (1ULL << 32))
1905 		return (1);
1906 
1907 	*valp = (uint_t)value.v;
1908 
1909 	return (0);
1910 }
1911 
1912 static nvlist_t *
1913 node2fmri(struct node *n)
1914 {
1915 	nvlist_t **pa, *f, *p;
1916 	struct node *nc;
1917 	uint_t depth = 0;
1918 	char *numstr, *nullbyte;
1919 	char *failure;
1920 	int err, i;
1921 
1922 	/* XXX do we need to be able to handle a non-T_NAME node? */
1923 	if (n == NULL || n->t != T_NAME)
1924 		return (NULL);
1925 
1926 	for (nc = n; nc != NULL; nc = nc->u.name.next) {
1927 		if (nc->u.name.child == NULL || nc->u.name.child->t != T_NUM)
1928 			break;
1929 		depth++;
1930 	}
1931 
1932 	if (nc != NULL) {
1933 		/* We bailed early, something went wrong */
1934 		return (NULL);
1935 	}
1936 
1937 	if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0)
1938 		out(O_DIE|O_SYS, "alloc of fmri nvl failed");
1939 	pa = alloca(depth * sizeof (nvlist_t *));
1940 	for (i = 0; i < depth; i++)
1941 		pa[i] = NULL;
1942 
1943 	err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC);
1944 	err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION);
1945 	err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, "");
1946 	err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth);
1947 	if (err != 0) {
1948 		failure = "basic construction of FMRI failed";
1949 		goto boom;
1950 	}
1951 
1952 	numbuf[MAXDIGITIDX] = '\0';
1953 	nullbyte = &numbuf[MAXDIGITIDX];
1954 	i = 0;
1955 
1956 	for (nc = n; nc != NULL; nc = nc->u.name.next) {
1957 		err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl);
1958 		if (err != 0) {
1959 			failure = "alloc of an hc-pair failed";
1960 			goto boom;
1961 		}
1962 		err = nvlist_add_string(p, FM_FMRI_HC_NAME, nc->u.name.s);
1963 		numstr = ulltostr(nc->u.name.child->u.ull, nullbyte);
1964 		err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr);
1965 		if (err != 0) {
1966 			failure = "construction of an hc-pair failed";
1967 			goto boom;
1968 		}
1969 		pa[i++] = p;
1970 	}
1971 
1972 	err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth);
1973 	if (err == 0) {
1974 		for (i = 0; i < depth; i++)
1975 			if (pa[i] != NULL)
1976 				nvlist_free(pa[i]);
1977 		return (f);
1978 	}
1979 	failure = "addition of hc-pair array to FMRI failed";
1980 
1981 boom:
1982 	for (i = 0; i < depth; i++)
1983 		if (pa[i] != NULL)
1984 			nvlist_free(pa[i]);
1985 	nvlist_free(f);
1986 	out(O_DIE, "%s", failure);
1987 	/*NOTREACHED*/
1988 	return (NULL);
1989 }
1990 
1991 /* an ipath cache entry is an array of these, with s==NULL at the end */
1992 struct ipath {
1993 	const char *s;	/* component name (in stable) */
1994 	int i;		/* instance number */
1995 };
1996 
1997 static nvlist_t *
1998 ipath2fmri(struct ipath *ipath)
1999 {
2000 	nvlist_t **pa, *f, *p;
2001 	uint_t depth = 0;
2002 	char *numstr, *nullbyte;
2003 	char *failure;
2004 	int err, i;
2005 	struct ipath *ipp;
2006 
2007 	for (ipp = ipath; ipp->s != NULL; ipp++)
2008 		depth++;
2009 
2010 	if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0)
2011 		out(O_DIE|O_SYS, "alloc of fmri nvl failed");
2012 	pa = alloca(depth * sizeof (nvlist_t *));
2013 	for (i = 0; i < depth; i++)
2014 		pa[i] = NULL;
2015 
2016 	err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC);
2017 	err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION);
2018 	err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, "");
2019 	err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth);
2020 	if (err != 0) {
2021 		failure = "basic construction of FMRI failed";
2022 		goto boom;
2023 	}
2024 
2025 	numbuf[MAXDIGITIDX] = '\0';
2026 	nullbyte = &numbuf[MAXDIGITIDX];
2027 	i = 0;
2028 
2029 	for (ipp = ipath; ipp->s != NULL; ipp++) {
2030 		err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl);
2031 		if (err != 0) {
2032 			failure = "alloc of an hc-pair failed";
2033 			goto boom;
2034 		}
2035 		err = nvlist_add_string(p, FM_FMRI_HC_NAME, ipp->s);
2036 		numstr = ulltostr(ipp->i, nullbyte);
2037 		err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr);
2038 		if (err != 0) {
2039 			failure = "construction of an hc-pair failed";
2040 			goto boom;
2041 		}
2042 		pa[i++] = p;
2043 	}
2044 
2045 	err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth);
2046 	if (err == 0) {
2047 		for (i = 0; i < depth; i++)
2048 			if (pa[i] != NULL)
2049 				nvlist_free(pa[i]);
2050 		return (f);
2051 	}
2052 	failure = "addition of hc-pair array to FMRI failed";
2053 
2054 boom:
2055 	for (i = 0; i < depth; i++)
2056 		if (pa[i] != NULL)
2057 			nvlist_free(pa[i]);
2058 	nvlist_free(f);
2059 	out(O_DIE, "%s", failure);
2060 	/*NOTREACHED*/
2061 	return (NULL);
2062 }
2063 
2064 static uint_t
2065 avg(uint_t sum, uint_t cnt)
2066 {
2067 	unsigned long long s = sum * 10;
2068 
2069 	return ((s / cnt / 10) + (((s / cnt % 10) >= 5) ? 1 : 0));
2070 }
2071 
2072 static uint8_t
2073 percentof(uint_t part, uint_t whole)
2074 {
2075 	unsigned long long p = part * 1000;
2076 
2077 	return ((p / whole / 10) + (((p / whole % 10) >= 5) ? 1 : 0));
2078 }
2079 
2080 struct rsl {
2081 	struct event *suspect;
2082 	nvlist_t *asru;
2083 	nvlist_t *fru;
2084 	nvlist_t *rsrc;
2085 };
2086 
2087 static void publish_suspects(struct fme *fmep, struct rsl *srl);
2088 
2089 /*
2090  *  rslfree -- free internal members of struct rsl not expected to be
2091  *	freed elsewhere.
2092  */
2093 static void
2094 rslfree(struct rsl *freeme)
2095 {
2096 	if (freeme->asru != NULL)
2097 		nvlist_free(freeme->asru);
2098 	if (freeme->fru != NULL)
2099 		nvlist_free(freeme->fru);
2100 	if (freeme->rsrc != NULL && freeme->rsrc != freeme->asru)
2101 		nvlist_free(freeme->rsrc);
2102 }
2103 
2104 /*
2105  *  rslcmp -- compare two rsl structures.  Use the following
2106  *	comparisons to establish cardinality:
2107  *
2108  *	1. Name of the suspect's class. (simple strcmp)
2109  *	2. Name of the suspect's ASRU. (trickier, since nvlist)
2110  *
2111  */
2112 static int
2113 rslcmp(const void *a, const void *b)
2114 {
2115 	struct rsl *r1 = (struct rsl *)a;
2116 	struct rsl *r2 = (struct rsl *)b;
2117 	int rv;
2118 
2119 	rv = strcmp(r1->suspect->enode->u.event.ename->u.name.s,
2120 	    r2->suspect->enode->u.event.ename->u.name.s);
2121 	if (rv != 0)
2122 		return (rv);
2123 
2124 	if (r1->rsrc == NULL && r2->rsrc == NULL)
2125 		return (0);
2126 	if (r1->rsrc == NULL)
2127 		return (-1);
2128 	if (r2->rsrc == NULL)
2129 		return (1);
2130 	return (evnv_cmpnvl(r1->rsrc, r2->rsrc, 0));
2131 }
2132 
2133 /*
2134  *  rsluniq -- given an array of rsl structures, seek out and "remove"
2135  *	any duplicates.  Dups are "remove"d by NULLing the suspect pointer
2136  *	of the array element.  Removal also means updating the number of
2137  *	problems and the number of problems which are not faults.  User
2138  *	provides the first and last element pointers.
2139  */
2140 static void
2141 rsluniq(struct rsl *first, struct rsl *last, int *nprobs, int *nnonf)
2142 {
2143 	struct rsl *cr;
2144 
2145 	if (*nprobs == 1)
2146 		return;
2147 
2148 	/*
2149 	 *  At this point, we only expect duplicate defects.
2150 	 *  Eversholt's diagnosis algorithm prevents duplicate
2151 	 *  suspects, but we rewrite defects in the platform code after
2152 	 *  the diagnosis is made, and that can introduce new
2153 	 *  duplicates.
2154 	 */
2155 	while (first <= last) {
2156 		if (first->suspect == NULL || !is_defect(first->suspect->t)) {
2157 			first++;
2158 			continue;
2159 		}
2160 		cr = first + 1;
2161 		while (cr <= last) {
2162 			if (is_defect(first->suspect->t)) {
2163 				if (rslcmp(first, cr) == 0) {
2164 					cr->suspect = NULL;
2165 					rslfree(cr);
2166 					(*nprobs)--;
2167 					(*nnonf)--;
2168 				}
2169 			}
2170 			/*
2171 			 * assume all defects are in order after our
2172 			 * sort and short circuit here with "else break" ?
2173 			 */
2174 			cr++;
2175 		}
2176 		first++;
2177 	}
2178 }
2179 
2180 /*
2181  * get_resources -- for a given suspect, determine what ASRU, FRU and
2182  *     RSRC nvlists should be advertised in the final suspect list.
2183  */
2184 void
2185 get_resources(struct event *sp, struct rsl *rsrcs, struct config *croot)
2186 {
2187 	struct node *asrudef, *frudef;
2188 	nvlist_t *asru, *fru;
2189 	nvlist_t *rsrc = NULL;
2190 	char *pathstr;
2191 
2192 	/*
2193 	 * First find any ASRU and/or FRU defined in the
2194 	 * initial fault tree.
2195 	 */
2196 	asrudef = eventprop_lookup(sp, L_ASRU);
2197 	frudef = eventprop_lookup(sp, L_FRU);
2198 
2199 	/*
2200 	 * Create FMRIs based on those definitions
2201 	 */
2202 	asru = node2fmri(asrudef);
2203 	fru = node2fmri(frudef);
2204 	pathstr = ipath2str(NULL, sp->ipp);
2205 
2206 	/*
2207 	 *  Allow for platform translations of the FMRIs
2208 	 */
2209 	platform_units_translate(is_defect(sp->t), croot, &asru, &fru, &rsrc,
2210 	    pathstr);
2211 
2212 	FREE(pathstr);
2213 	rsrcs->suspect = sp;
2214 	rsrcs->asru = asru;
2215 	rsrcs->fru = fru;
2216 	rsrcs->rsrc = rsrc;
2217 }
2218 
2219 /*
2220  * trim_suspects -- prior to publishing, we may need to remove some
2221  *    suspects from the list.  If we're auto-closing upsets, we don't
2222  *    want any of those in the published list.  If the ASRUs for multiple
2223  *    defects resolve to the same ASRU (driver) we only want to publish
2224  *    that as a single suspect.
2225  */
2226 static int
2227 trim_suspects(struct fme *fmep, struct rsl *begin, struct rsl *begin2,
2228     fmd_event_t *ffep, int *mess_zero_nonfaultp)
2229 {
2230 	struct event *ep;
2231 	struct rsl *rp = begin;
2232 	struct rsl *rp2 = begin2;
2233 	int mess_zero_count = 0;
2234 	int serd_rval;
2235 	uint_t messval;
2236 
2237 	/* remove any unwanted upsets and populate our array */
2238 	for (ep = fmep->psuspects; ep; ep = ep->psuspects) {
2239 		if (is_upset(ep->t))
2240 			continue;
2241 		serd_rval = serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, ep,
2242 		    NULL, NULL);
2243 		if (serd_rval == 0)
2244 			continue;
2245 		if (node2uint(eventprop_lookup(ep, L_message),
2246 		    &messval) == 0 && messval == 0) {
2247 			get_resources(ep, rp2, fmep->config);
2248 			rp2++;
2249 			mess_zero_count++;
2250 			if (!is_fault(ep->t))
2251 				(*mess_zero_nonfaultp)++;
2252 		} else {
2253 			get_resources(ep, rp, fmep->config);
2254 			rp++;
2255 			fmep->nsuspects++;
2256 			if (!is_fault(ep->t))
2257 				fmep->nonfault++;
2258 		}
2259 	}
2260 	return (mess_zero_count);
2261 }
2262 
2263 /*
2264  * addpayloadprop -- add a payload prop to a problem
2265  */
2266 static void
2267 addpayloadprop(const char *lhs, struct evalue *rhs, nvlist_t *fault)
2268 {
2269 	nvlist_t *rsrc, *hcs;
2270 
2271 	ASSERT(fault != NULL);
2272 	ASSERT(lhs != NULL);
2273 	ASSERT(rhs != NULL);
2274 
2275 	if (nvlist_lookup_nvlist(fault, FM_FAULT_RESOURCE, &rsrc) != 0)
2276 		out(O_DIE, "cannot add payloadprop \"%s\" to fault", lhs);
2277 
2278 	if (nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcs) != 0) {
2279 		out(O_ALTFP|O_VERB2, "addpayloadprop: create hc_specific");
2280 		if (nvlist_xalloc(&hcs, NV_UNIQUE_NAME, &Eft_nv_hdl) != 0)
2281 			out(O_DIE,
2282 			    "cannot add payloadprop \"%s\" to fault", lhs);
2283 		if (nvlist_add_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, hcs) != 0)
2284 			out(O_DIE,
2285 			    "cannot add payloadprop \"%s\" to fault", lhs);
2286 		nvlist_free(hcs);
2287 		if (nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcs) != 0)
2288 			out(O_DIE,
2289 			    "cannot add payloadprop \"%s\" to fault", lhs);
2290 	} else
2291 		out(O_ALTFP|O_VERB2, "addpayloadprop: reuse hc_specific");
2292 
2293 	if (rhs->t == UINT64) {
2294 		out(O_ALTFP|O_VERB2, "addpayloadprop: %s=%llu", lhs, rhs->v);
2295 
2296 		if (nvlist_add_uint64(hcs, lhs, rhs->v) != 0)
2297 			out(O_DIE,
2298 			    "cannot add payloadprop \"%s\" to fault", lhs);
2299 	} else {
2300 		out(O_ALTFP|O_VERB2, "addpayloadprop: %s=\"%s\"",
2301 		    lhs, (char *)(uintptr_t)rhs->v);
2302 
2303 		if (nvlist_add_string(hcs, lhs, (char *)(uintptr_t)rhs->v) != 0)
2304 			out(O_DIE,
2305 			    "cannot add payloadprop \"%s\" to fault", lhs);
2306 	}
2307 }
2308 
2309 static char *Istatbuf;
2310 static char *Istatbufptr;
2311 static int Istatsz;
2312 
2313 /*
2314  * istataddsize -- calculate size of istat and add it to Istatsz
2315  */
2316 /*ARGSUSED2*/
2317 static void
2318 istataddsize(const struct istat_entry *lhs, struct stats *rhs, void *arg)
2319 {
2320 	int val;
2321 
2322 	ASSERT(lhs != NULL);
2323 	ASSERT(rhs != NULL);
2324 
2325 	if ((val = stats_counter_value(rhs)) == 0)
2326 		return;	/* skip zero-valued stats */
2327 
2328 	/* count up the size of the stat name */
2329 	Istatsz += ipath2strlen(lhs->ename, lhs->ipath);
2330 	Istatsz++;	/* for the trailing NULL byte */
2331 
2332 	/* count up the size of the stat value */
2333 	Istatsz += snprintf(NULL, 0, "%d", val);
2334 	Istatsz++;	/* for the trailing NULL byte */
2335 }
2336 
2337 /*
2338  * istat2str -- serialize an istat, writing result to *Istatbufptr
2339  */
2340 /*ARGSUSED2*/
2341 static void
2342 istat2str(const struct istat_entry *lhs, struct stats *rhs, void *arg)
2343 {
2344 	char *str;
2345 	int len;
2346 	int val;
2347 
2348 	ASSERT(lhs != NULL);
2349 	ASSERT(rhs != NULL);
2350 
2351 	if ((val = stats_counter_value(rhs)) == 0)
2352 		return;	/* skip zero-valued stats */
2353 
2354 	/* serialize the stat name */
2355 	str = ipath2str(lhs->ename, lhs->ipath);
2356 	len = strlen(str);
2357 
2358 	ASSERT(Istatbufptr + len + 1 < &Istatbuf[Istatsz]);
2359 	(void) strlcpy(Istatbufptr, str, &Istatbuf[Istatsz] - Istatbufptr);
2360 	Istatbufptr += len;
2361 	FREE(str);
2362 	*Istatbufptr++ = '\0';
2363 
2364 	/* serialize the stat value */
2365 	Istatbufptr += snprintf(Istatbufptr, &Istatbuf[Istatsz] - Istatbufptr,
2366 	    "%d", val);
2367 	*Istatbufptr++ = '\0';
2368 
2369 	ASSERT(Istatbufptr <= &Istatbuf[Istatsz]);
2370 }
2371 
2372 void
2373 istat_save()
2374 {
2375 	if (Istat_need_save == 0)
2376 		return;
2377 
2378 	/* figure out how big the serialzed info is */
2379 	Istatsz = 0;
2380 	lut_walk(Istats, (lut_cb)istataddsize, NULL);
2381 
2382 	if (Istatsz == 0) {
2383 		/* no stats to save */
2384 		fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS);
2385 		return;
2386 	}
2387 
2388 	/* create the serialized buffer */
2389 	Istatbufptr = Istatbuf = MALLOC(Istatsz);
2390 	lut_walk(Istats, (lut_cb)istat2str, NULL);
2391 
2392 	/* clear out current saved stats */
2393 	fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS);
2394 
2395 	/* write out the new version */
2396 	fmd_buf_write(Hdl, NULL, WOBUF_ISTATS, Istatbuf, Istatsz);
2397 	FREE(Istatbuf);
2398 
2399 	Istat_need_save = 0;
2400 }
2401 
2402 int
2403 istat_cmp(struct istat_entry *ent1, struct istat_entry *ent2)
2404 {
2405 	if (ent1->ename != ent2->ename)
2406 		return (ent2->ename - ent1->ename);
2407 	if (ent1->ipath != ent2->ipath)
2408 		return ((char *)ent2->ipath - (char *)ent1->ipath);
2409 
2410 	return (0);
2411 }
2412 
2413 /*
2414  * istat-verify -- verify the component associated with a stat still exists
2415  *
2416  * if the component no longer exists, this routine resets the stat and
2417  * returns 0.  if the component still exists, it returns 1.
2418  */
2419 static int
2420 istat_verify(struct node *snp, struct istat_entry *entp)
2421 {
2422 	struct stats *statp;
2423 	nvlist_t *fmri;
2424 
2425 	fmri = node2fmri(snp->u.event.epname);
2426 	if (platform_path_exists(fmri)) {
2427 		nvlist_free(fmri);
2428 		return (1);
2429 	}
2430 	nvlist_free(fmri);
2431 
2432 	/* component no longer in system.  zero out the associated stats */
2433 	if ((statp = (struct stats *)
2434 	    lut_lookup(Istats, entp, (lut_cmp)istat_cmp)) == NULL ||
2435 	    stats_counter_value(statp) == 0)
2436 		return (0);	/* stat is already reset */
2437 
2438 	Istat_need_save = 1;
2439 	stats_counter_reset(statp);
2440 	return (0);
2441 }
2442 
2443 static void
2444 istat_bump(struct node *snp, int n)
2445 {
2446 	struct stats *statp;
2447 	struct istat_entry ent;
2448 
2449 	ASSERT(snp != NULL);
2450 	ASSERTinfo(snp->t == T_EVENT, ptree_nodetype2str(snp->t));
2451 	ASSERT(snp->u.event.epname != NULL);
2452 
2453 	/* class name should be hoisted into a single stable entry */
2454 	ASSERT(snp->u.event.ename->u.name.next == NULL);
2455 	ent.ename = snp->u.event.ename->u.name.s;
2456 	ent.ipath = ipath(snp->u.event.epname);
2457 
2458 	if (!istat_verify(snp, &ent)) {
2459 		/* component no longer exists in system, nothing to do */
2460 		return;
2461 	}
2462 
2463 	if ((statp = (struct stats *)
2464 	    lut_lookup(Istats, &ent, (lut_cmp)istat_cmp)) == NULL) {
2465 		/* need to create the counter */
2466 		int cnt = 0;
2467 		struct node *np;
2468 		char *sname;
2469 		char *snamep;
2470 		struct istat_entry *newentp;
2471 
2472 		/* count up the size of the stat name */
2473 		np = snp->u.event.ename;
2474 		while (np != NULL) {
2475 			cnt += strlen(np->u.name.s);
2476 			cnt++;	/* for the '.' or '@' */
2477 			np = np->u.name.next;
2478 		}
2479 		np = snp->u.event.epname;
2480 		while (np != NULL) {
2481 			cnt += snprintf(NULL, 0, "%s%llu",
2482 			    np->u.name.s, np->u.name.child->u.ull);
2483 			cnt++;	/* for the '/' or trailing NULL byte */
2484 			np = np->u.name.next;
2485 		}
2486 
2487 		/* build the stat name */
2488 		snamep = sname = alloca(cnt);
2489 		np = snp->u.event.ename;
2490 		while (np != NULL) {
2491 			snamep += snprintf(snamep, &sname[cnt] - snamep,
2492 			    "%s", np->u.name.s);
2493 			np = np->u.name.next;
2494 			if (np)
2495 				*snamep++ = '.';
2496 		}
2497 		*snamep++ = '@';
2498 		np = snp->u.event.epname;
2499 		while (np != NULL) {
2500 			snamep += snprintf(snamep, &sname[cnt] - snamep,
2501 			    "%s%llu", np->u.name.s, np->u.name.child->u.ull);
2502 			np = np->u.name.next;
2503 			if (np)
2504 				*snamep++ = '/';
2505 		}
2506 		*snamep++ = '\0';
2507 
2508 		/* create the new stat & add it to our list */
2509 		newentp = MALLOC(sizeof (*newentp));
2510 		*newentp = ent;
2511 		statp = stats_new_counter(NULL, sname, 0);
2512 		Istats = lut_add(Istats, (void *)newentp, (void *)statp,
2513 		    (lut_cmp)istat_cmp);
2514 	}
2515 
2516 	/* if n is non-zero, set that value instead of bumping */
2517 	if (n) {
2518 		stats_counter_reset(statp);
2519 		stats_counter_add(statp, n);
2520 	} else
2521 		stats_counter_bump(statp);
2522 	Istat_need_save = 1;
2523 
2524 	ipath_print(O_ALTFP|O_VERB2, ent.ename, ent.ipath);
2525 	out(O_ALTFP|O_VERB2, " %s to value %d", n ? "set" : "incremented",
2526 	    stats_counter_value(statp));
2527 }
2528 
2529 /*ARGSUSED*/
2530 static void
2531 istat_destructor(void *left, void *right, void *arg)
2532 {
2533 	struct istat_entry *entp = (struct istat_entry *)left;
2534 	struct stats *statp = (struct stats *)right;
2535 	FREE(entp);
2536 	stats_delete(statp);
2537 }
2538 
2539 /*
2540  * Callback used in a walk of the Istats to reset matching stat counters.
2541  */
2542 static void
2543 istat_counter_reset_cb(struct istat_entry *entp, struct stats *statp,
2544     const struct ipath *ipp)
2545 {
2546 	char *path;
2547 
2548 	if (entp->ipath == ipp) {
2549 		path = ipath2str(entp->ename, ipp);
2550 		out(O_ALTFP, "istat_counter_reset_cb: resetting %s", path);
2551 		FREE(path);
2552 		stats_counter_reset(statp);
2553 		Istat_need_save = 1;
2554 	}
2555 }
2556 
2557 /*ARGSUSED*/
2558 static void
2559 istat_counter_topo_chg_cb(struct istat_entry *entp, struct stats *statp,
2560     void *unused)
2561 {
2562 	char *path;
2563 	nvlist_t *fmri;
2564 
2565 	fmri = ipath2fmri((struct ipath *)(entp->ipath));
2566 	if (!platform_path_exists(fmri)) {
2567 		path = ipath2str(entp->ename, entp->ipath);
2568 		out(O_ALTFP, "istat_counter_topo_chg_cb: not present %s", path);
2569 		FREE(path);
2570 		stats_counter_reset(statp);
2571 		Istat_need_save = 1;
2572 	}
2573 	nvlist_free(fmri);
2574 }
2575 
2576 void
2577 istat_fini(void)
2578 {
2579 	lut_free(Istats, istat_destructor, NULL);
2580 }
2581 
2582 static char *Serdbuf;
2583 static char *Serdbufptr;
2584 static int Serdsz;
2585 
2586 /*
2587  * serdaddsize -- calculate size of serd and add it to Serdsz
2588  */
2589 /*ARGSUSED*/
2590 static void
2591 serdaddsize(const struct serd_entry *lhs, struct stats *rhs, void *arg)
2592 {
2593 	ASSERT(lhs != NULL);
2594 
2595 	/* count up the size of the stat name */
2596 	Serdsz += ipath2strlen(lhs->ename, lhs->ipath);
2597 	Serdsz++;	/* for the trailing NULL byte */
2598 }
2599 
2600 /*
2601  * serd2str -- serialize a serd engine, writing result to *Serdbufptr
2602  */
2603 /*ARGSUSED*/
2604 static void
2605 serd2str(const struct serd_entry *lhs, struct stats *rhs, void *arg)
2606 {
2607 	char *str;
2608 	int len;
2609 
2610 	ASSERT(lhs != NULL);
2611 
2612 	/* serialize the serd engine name */
2613 	str = ipath2str(lhs->ename, lhs->ipath);
2614 	len = strlen(str);
2615 
2616 	ASSERT(Serdbufptr + len + 1 <= &Serdbuf[Serdsz]);
2617 	(void) strlcpy(Serdbufptr, str, &Serdbuf[Serdsz] - Serdbufptr);
2618 	Serdbufptr += len;
2619 	FREE(str);
2620 	*Serdbufptr++ = '\0';
2621 	ASSERT(Serdbufptr <= &Serdbuf[Serdsz]);
2622 }
2623 
2624 void
2625 serd_save()
2626 {
2627 	if (Serd_need_save == 0)
2628 		return;
2629 
2630 	/* figure out how big the serialzed info is */
2631 	Serdsz = 0;
2632 	lut_walk(SerdEngines, (lut_cb)serdaddsize, NULL);
2633 
2634 	if (Serdsz == 0) {
2635 		/* no serd engines to save */
2636 		fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS);
2637 		return;
2638 	}
2639 
2640 	/* create the serialized buffer */
2641 	Serdbufptr = Serdbuf = MALLOC(Serdsz);
2642 	lut_walk(SerdEngines, (lut_cb)serd2str, NULL);
2643 
2644 	/* clear out current saved stats */
2645 	fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS);
2646 
2647 	/* write out the new version */
2648 	fmd_buf_write(Hdl, NULL, WOBUF_SERDS, Serdbuf, Serdsz);
2649 	FREE(Serdbuf);
2650 	Serd_need_save = 0;
2651 }
2652 
2653 int
2654 serd_cmp(struct serd_entry *ent1, struct serd_entry *ent2)
2655 {
2656 	if (ent1->ename != ent2->ename)
2657 		return (ent2->ename - ent1->ename);
2658 	if (ent1->ipath != ent2->ipath)
2659 		return ((char *)ent2->ipath - (char *)ent1->ipath);
2660 
2661 	return (0);
2662 }
2663 
2664 void
2665 fme_serd_load(fmd_hdl_t *hdl)
2666 {
2667 	int sz;
2668 	char *sbuf;
2669 	char *sepptr;
2670 	char *ptr;
2671 	struct serd_entry *newentp;
2672 	struct node *epname;
2673 	nvlist_t *fmri;
2674 	char *namestring;
2675 
2676 	if ((sz = fmd_buf_size(hdl, NULL, WOBUF_SERDS)) == 0)
2677 		return;
2678 	sbuf = alloca(sz);
2679 	fmd_buf_read(hdl, NULL, WOBUF_SERDS, sbuf, sz);
2680 	ptr = sbuf;
2681 	while (ptr < &sbuf[sz]) {
2682 		sepptr = strchr(ptr, '@');
2683 		*sepptr = '\0';
2684 		namestring = ptr;
2685 		sepptr++;
2686 		ptr = sepptr;
2687 		ptr += strlen(ptr);
2688 		ptr++;	/* move past the '\0' separating paths */
2689 		epname = pathstring2epnamenp(sepptr);
2690 		fmri = node2fmri(epname);
2691 		if (platform_path_exists(fmri)) {
2692 			newentp = MALLOC(sizeof (*newentp));
2693 			newentp->hdl = hdl;
2694 			newentp->ipath = ipath(epname);
2695 			newentp->ename = stable(namestring);
2696 			SerdEngines = lut_add(SerdEngines, (void *)newentp,
2697 			    (void *)newentp, (lut_cmp)serd_cmp);
2698 		} else
2699 			Serd_need_save = 1;
2700 		tree_free(epname);
2701 		nvlist_free(fmri);
2702 	}
2703 	/* save it back again in case some of the paths no longer exist */
2704 	serd_save();
2705 }
2706 
2707 /*ARGSUSED*/
2708 static void
2709 serd_destructor(void *left, void *right, void *arg)
2710 {
2711 	struct serd_entry *entp = (struct serd_entry *)left;
2712 	FREE(entp);
2713 }
2714 
2715 /*
2716  * Callback used in a walk of the SerdEngines to reset matching serd engines.
2717  */
2718 /*ARGSUSED*/
2719 static void
2720 serd_reset_cb(struct serd_entry *entp, void *unused, const struct ipath *ipp)
2721 {
2722 	char *path;
2723 
2724 	if (entp->ipath == ipp) {
2725 		path = ipath2str(entp->ename, ipp);
2726 		out(O_ALTFP, "serd_reset_cb: resetting %s", path);
2727 		fmd_serd_reset(entp->hdl, path);
2728 		FREE(path);
2729 		Serd_need_save = 1;
2730 	}
2731 }
2732 
2733 /*ARGSUSED*/
2734 static void
2735 serd_topo_chg_cb(struct serd_entry *entp, void *unused, void *unused2)
2736 {
2737 	char *path;
2738 	nvlist_t *fmri;
2739 
2740 	fmri = ipath2fmri((struct ipath *)(entp->ipath));
2741 	if (!platform_path_exists(fmri)) {
2742 		path = ipath2str(entp->ename, entp->ipath);
2743 		out(O_ALTFP, "serd_topo_chg_cb: not present %s", path);
2744 		fmd_serd_reset(entp->hdl, path);
2745 		FREE(path);
2746 		Serd_need_save = 1;
2747 	}
2748 	nvlist_free(fmri);
2749 }
2750 
2751 void
2752 serd_fini(void)
2753 {
2754 	lut_free(SerdEngines, serd_destructor, NULL);
2755 }
2756 
2757 static void
2758 publish_suspects(struct fme *fmep, struct rsl *srl)
2759 {
2760 	struct rsl *rp;
2761 	nvlist_t *fault;
2762 	uint8_t cert;
2763 	uint_t *frs;
2764 	uint_t fravg, frsum, fr;
2765 	uint_t messval;
2766 	uint_t retireval;
2767 	uint_t responseval;
2768 	struct node *snp;
2769 	int frcnt, fridx;
2770 	boolean_t allfaulty = B_TRUE;
2771 	struct rsl *erl = srl + fmep->nsuspects - 1;
2772 
2773 	/*
2774 	 * sort the array
2775 	 */
2776 	qsort(srl, fmep->nsuspects, sizeof (struct rsl), rslcmp);
2777 	rsluniq(srl, erl, &fmep->nsuspects, &fmep->nonfault);
2778 
2779 	/*
2780 	 * If the suspect list is all faults, then for a given fault,
2781 	 * say X of N, X's certainty is computed via:
2782 	 *
2783 	 * fitrate(X) / (fitrate(1) + ... + fitrate(N)) * 100
2784 	 *
2785 	 * If none of the suspects are faults, and there are N suspects,
2786 	 * the certainty of a given suspect is 100/N.
2787 	 *
2788 	 * If there are are a mixture of faults and other problems in
2789 	 * the suspect list, we take an average of the faults'
2790 	 * FITrates and treat this average as the FITrate for any
2791 	 * non-faults.  The fitrate of any given suspect is then
2792 	 * computed per the first formula above.
2793 	 */
2794 	if (fmep->nonfault == fmep->nsuspects) {
2795 		/* NO faults in the suspect list */
2796 		cert = percentof(1, fmep->nsuspects);
2797 	} else {
2798 		/* sum the fitrates */
2799 		frs = alloca(fmep->nsuspects * sizeof (uint_t));
2800 		fridx = frcnt = frsum = 0;
2801 
2802 		for (rp = srl; rp <= erl; rp++) {
2803 			struct node *n;
2804 
2805 			if (rp->suspect == NULL)
2806 				continue;
2807 			if (!is_fault(rp->suspect->t)) {
2808 				frs[fridx++] = 0;
2809 				continue;
2810 			}
2811 			n = eventprop_lookup(rp->suspect, L_FITrate);
2812 			if (node2uint(n, &fr) != 0) {
2813 				out(O_DEBUG|O_NONL, "event ");
2814 				ipath_print(O_DEBUG|O_NONL,
2815 				    rp->suspect->enode->u.event.ename->u.name.s,
2816 				    rp->suspect->ipp);
2817 				out(O_DEBUG, " has no FITrate (using 1)");
2818 				fr = 1;
2819 			} else if (fr == 0) {
2820 				out(O_DEBUG|O_NONL, "event ");
2821 				ipath_print(O_DEBUG|O_NONL,
2822 				    rp->suspect->enode->u.event.ename->u.name.s,
2823 				    rp->suspect->ipp);
2824 				out(O_DEBUG, " has zero FITrate (using 1)");
2825 				fr = 1;
2826 			}
2827 
2828 			frs[fridx++] = fr;
2829 			frsum += fr;
2830 			frcnt++;
2831 		}
2832 		fravg = avg(frsum, frcnt);
2833 		for (fridx = 0; fridx < fmep->nsuspects; fridx++)
2834 			if (frs[fridx] == 0) {
2835 				frs[fridx] = fravg;
2836 				frsum += fravg;
2837 			}
2838 	}
2839 
2840 	/* Add them in reverse order of our sort, as fmd reverses order */
2841 	for (rp = erl; rp >= srl; rp--) {
2842 		if (rp->suspect == NULL)
2843 			continue;
2844 		if (!is_fault(rp->suspect->t))
2845 			allfaulty = B_FALSE;
2846 		if (fmep->nonfault != fmep->nsuspects)
2847 			cert = percentof(frs[--fridx], frsum);
2848 		fault = fmd_nvl_create_fault(fmep->hdl,
2849 		    rp->suspect->enode->u.event.ename->u.name.s,
2850 		    cert,
2851 		    rp->asru,
2852 		    rp->fru,
2853 		    rp->rsrc);
2854 		if (fault == NULL)
2855 			out(O_DIE, "fault creation failed");
2856 		/* if "message" property exists, add it to the fault */
2857 		if (node2uint(eventprop_lookup(rp->suspect, L_message),
2858 		    &messval) == 0) {
2859 
2860 			out(O_ALTFP,
2861 			    "[FME%d, %s adds message=%d to suspect list]",
2862 			    fmep->id,
2863 			    rp->suspect->enode->u.event.ename->u.name.s,
2864 			    messval);
2865 			if (nvlist_add_boolean_value(fault,
2866 			    FM_SUSPECT_MESSAGE,
2867 			    (messval) ? B_TRUE : B_FALSE) != 0) {
2868 				out(O_DIE, "cannot add no-message to fault");
2869 			}
2870 		}
2871 
2872 		/* if "retire" property exists, add it to the fault */
2873 		if (node2uint(eventprop_lookup(rp->suspect, L_retire),
2874 		    &retireval) == 0) {
2875 
2876 			out(O_ALTFP,
2877 			    "[FME%d, %s adds retire=%d to suspect list]",
2878 			    fmep->id,
2879 			    rp->suspect->enode->u.event.ename->u.name.s,
2880 			    retireval);
2881 			if (nvlist_add_boolean_value(fault,
2882 			    FM_SUSPECT_RETIRE,
2883 			    (retireval) ? B_TRUE : B_FALSE) != 0) {
2884 				out(O_DIE, "cannot add no-retire to fault");
2885 			}
2886 		}
2887 
2888 		/* if "response" property exists, add it to the fault */
2889 		if (node2uint(eventprop_lookup(rp->suspect, L_response),
2890 		    &responseval) == 0) {
2891 
2892 			out(O_ALTFP,
2893 			    "[FME%d, %s adds response=%d to suspect list]",
2894 			    fmep->id,
2895 			    rp->suspect->enode->u.event.ename->u.name.s,
2896 			    responseval);
2897 			if (nvlist_add_boolean_value(fault,
2898 			    FM_SUSPECT_RESPONSE,
2899 			    (responseval) ? B_TRUE : B_FALSE) != 0) {
2900 				out(O_DIE, "cannot add no-response to fault");
2901 			}
2902 		}
2903 
2904 		/* add any payload properties */
2905 		lut_walk(rp->suspect->payloadprops,
2906 		    (lut_cb)addpayloadprop, (void *)fault);
2907 		rslfree(rp);
2908 
2909 		/*
2910 		 * If "action" property exists, evaluate it;  this must be done
2911 		 * before the allfaulty check below since some actions may
2912 		 * modify the asru to be used in fmd_nvl_fmri_has_fault.  This
2913 		 * needs to be restructured if any new actions are introduced
2914 		 * that have effects that we do not want to be visible if
2915 		 * we decide not to publish in the dupclose check below.
2916 		 */
2917 		if ((snp = eventprop_lookup(rp->suspect, L_action)) != NULL) {
2918 			struct evalue evalue;
2919 
2920 			out(O_ALTFP|O_NONL,
2921 			    "[FME%d, %s action ", fmep->id,
2922 			    rp->suspect->enode->u.event.ename->u.name.s);
2923 			ptree_name_iter(O_ALTFP|O_NONL, snp);
2924 			out(O_ALTFP, "]");
2925 			Action_nvl = fault;
2926 			(void) eval_expr(snp, NULL, NULL, NULL, NULL,
2927 			    NULL, 0, &evalue);
2928 		}
2929 
2930 		fmd_case_add_suspect(fmep->hdl, fmep->fmcase, fault);
2931 
2932 		/*
2933 		 * check if the asru is already marked as "faulty".
2934 		 */
2935 		if (allfaulty) {
2936 			nvlist_t *asru;
2937 
2938 			out(O_ALTFP|O_VERB, "FME%d dup check ", fmep->id);
2939 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, rp->suspect);
2940 			out(O_ALTFP|O_VERB|O_NONL, " ");
2941 			if (nvlist_lookup_nvlist(fault,
2942 			    FM_FAULT_ASRU, &asru) != 0) {
2943 				out(O_ALTFP|O_VERB, "NULL asru");
2944 				allfaulty = B_FALSE;
2945 			} else if (fmd_nvl_fmri_has_fault(fmep->hdl, asru,
2946 			    FMD_HAS_FAULT_ASRU, NULL)) {
2947 				out(O_ALTFP|O_VERB, "faulty");
2948 			} else {
2949 				out(O_ALTFP|O_VERB, "not faulty");
2950 				allfaulty = B_FALSE;
2951 			}
2952 		}
2953 
2954 	}
2955 
2956 	if (!allfaulty) {
2957 		/*
2958 		 * don't update the count stat if all asrus are already
2959 		 * present and unrepaired in the asru cache
2960 		 */
2961 		for (rp = erl; rp >= srl; rp--) {
2962 			struct event *suspect = rp->suspect;
2963 
2964 			if (suspect == NULL)
2965 				continue;
2966 
2967 			/* if "count" exists, increment the appropriate stat */
2968 			if ((snp = eventprop_lookup(suspect,
2969 			    L_count)) != NULL) {
2970 				out(O_ALTFP|O_NONL,
2971 				    "[FME%d, %s count ", fmep->id,
2972 				    suspect->enode->u.event.ename->u.name.s);
2973 				ptree_name_iter(O_ALTFP|O_NONL, snp);
2974 				out(O_ALTFP, "]");
2975 				istat_bump(snp, 0);
2976 
2977 			}
2978 		}
2979 		istat_save();	/* write out any istat changes */
2980 	}
2981 }
2982 
2983 static void
2984 publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep, fmd_case_t *fmcase)
2985 {
2986 	struct case_list *newcase;
2987 	nvlist_t *defect;
2988 
2989 	out(O_ALTFP,
2990 	    "[undiagnosable ereport received, "
2991 	    "creating and closing a new case (%s)]",
2992 	    Undiag_reason ? Undiag_reason : "reason not provided");
2993 
2994 	newcase = MALLOC(sizeof (struct case_list));
2995 	newcase->next = NULL;
2996 	newcase->fmcase = fmcase;
2997 	if (Undiagablecaselist != NULL)
2998 		newcase->next = Undiagablecaselist;
2999 	Undiagablecaselist = newcase;
3000 
3001 	if (ffep != NULL)
3002 		fmd_case_add_ereport(hdl, newcase->fmcase, ffep);
3003 
3004 	defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100,
3005 	    NULL, NULL, NULL);
3006 	if (Undiag_reason != NULL)
3007 		(void) nvlist_add_string(defect, UNDIAG_REASON, Undiag_reason);
3008 	fmd_case_add_suspect(hdl, newcase->fmcase, defect);
3009 
3010 	fmd_case_solve(hdl, newcase->fmcase);
3011 	fmd_case_close(hdl, newcase->fmcase);
3012 }
3013 
3014 static void
3015 fme_undiagnosable(struct fme *f)
3016 {
3017 	nvlist_t *defect;
3018 
3019 	out(O_ALTFP, "[solving/closing FME%d, case %s (%s)]",
3020 	    f->id, fmd_case_uuid(f->hdl, f->fmcase),
3021 	    Undiag_reason ? Undiag_reason : "undiagnosable");
3022 
3023 	defect = fmd_nvl_create_fault(f->hdl, UNDIAGNOSABLE_DEFECT, 100,
3024 	    NULL, NULL, NULL);
3025 	if (Undiag_reason != NULL)
3026 		(void) nvlist_add_string(defect, UNDIAG_REASON, Undiag_reason);
3027 	fmd_case_add_suspect(f->hdl, f->fmcase, defect);
3028 	fmd_case_solve(f->hdl, f->fmcase);
3029 	fmd_case_close(f->hdl, f->fmcase);
3030 }
3031 
3032 /*
3033  * fme_close_case
3034  *
3035  *	Find the requested case amongst our fmes and close it.  Free up
3036  *	the related fme.
3037  */
3038 void
3039 fme_close_case(fmd_hdl_t *hdl, fmd_case_t *fmcase)
3040 {
3041 	struct case_list *ucasep, *prevcasep = NULL;
3042 	struct fme *prev = NULL;
3043 	struct fme *fmep;
3044 
3045 	for (ucasep = Undiagablecaselist; ucasep; ucasep = ucasep->next) {
3046 		if (fmcase != ucasep->fmcase) {
3047 			prevcasep = ucasep;
3048 			continue;
3049 		}
3050 
3051 		if (prevcasep == NULL)
3052 			Undiagablecaselist = Undiagablecaselist->next;
3053 		else
3054 			prevcasep->next = ucasep->next;
3055 
3056 		FREE(ucasep);
3057 		return;
3058 	}
3059 
3060 	for (fmep = FMElist; fmep; fmep = fmep->next) {
3061 		if (fmep->hdl == hdl && fmep->fmcase == fmcase)
3062 			break;
3063 		prev = fmep;
3064 	}
3065 
3066 	if (fmep == NULL) {
3067 		out(O_WARN, "Eft asked to close unrecognized case [%s].",
3068 		    fmd_case_uuid(hdl, fmcase));
3069 		return;
3070 	}
3071 
3072 	if (EFMElist == fmep)
3073 		EFMElist = prev;
3074 
3075 	if (prev == NULL)
3076 		FMElist = FMElist->next;
3077 	else
3078 		prev->next = fmep->next;
3079 
3080 	fmep->next = NULL;
3081 
3082 	/* Get rid of any timer this fme has set */
3083 	if (fmep->wull != 0)
3084 		fmd_timer_remove(fmep->hdl, fmep->timer);
3085 
3086 	if (ClosedFMEs == NULL) {
3087 		ClosedFMEs = fmep;
3088 	} else {
3089 		fmep->next = ClosedFMEs;
3090 		ClosedFMEs = fmep;
3091 	}
3092 
3093 	Open_fme_count--;
3094 
3095 	/* See if we can close the overflow FME */
3096 	if (Open_fme_count <= Max_fme) {
3097 		for (fmep = FMElist; fmep; fmep = fmep->next) {
3098 			if (fmep->overflow && !(fmd_case_closed(fmep->hdl,
3099 			    fmep->fmcase)))
3100 				break;
3101 		}
3102 
3103 		if (fmep != NULL)
3104 			fmd_case_close(fmep->hdl, fmep->fmcase);
3105 	}
3106 }
3107 
3108 /*
3109  * fme_set_timer()
3110  *	If the time we need to wait for the given FME is less than the
3111  *	current timer, kick that old timer out and establish a new one.
3112  */
3113 static int
3114 fme_set_timer(struct fme *fmep, unsigned long long wull)
3115 {
3116 	out(O_ALTFP|O_VERB|O_NONL, " fme_set_timer: request to wait ");
3117 	ptree_timeval(O_ALTFP|O_VERB, &wull);
3118 
3119 	if (wull <= fmep->pull) {
3120 		out(O_ALTFP|O_VERB|O_NONL, "already have waited at least ");
3121 		ptree_timeval(O_ALTFP|O_VERB, &fmep->pull);
3122 		out(O_ALTFP|O_VERB, NULL);
3123 		/* we've waited at least wull already, don't need timer */
3124 		return (0);
3125 	}
3126 
3127 	out(O_ALTFP|O_VERB|O_NONL, " currently ");
3128 	if (fmep->wull != 0) {
3129 		out(O_ALTFP|O_VERB|O_NONL, "waiting ");
3130 		ptree_timeval(O_ALTFP|O_VERB, &fmep->wull);
3131 		out(O_ALTFP|O_VERB, NULL);
3132 	} else {
3133 		out(O_ALTFP|O_VERB|O_NONL, "not waiting");
3134 		out(O_ALTFP|O_VERB, NULL);
3135 	}
3136 
3137 	if (fmep->wull != 0)
3138 		if (wull >= fmep->wull)
3139 			/* New timer would fire later than established timer */
3140 			return (0);
3141 
3142 	if (fmep->wull != 0) {
3143 		fmd_timer_remove(fmep->hdl, fmep->timer);
3144 	}
3145 
3146 	fmep->timer = fmd_timer_install(fmep->hdl, (void *)fmep,
3147 	    fmep->e0r, wull);
3148 	out(O_ALTFP|O_VERB, "timer set, id is %ld", fmep->timer);
3149 	fmep->wull = wull;
3150 	return (1);
3151 }
3152 
3153 void
3154 fme_timer_fired(struct fme *fmep, id_t tid)
3155 {
3156 	struct fme *ffmep = NULL;
3157 
3158 	for (ffmep = FMElist; ffmep; ffmep = ffmep->next)
3159 		if (ffmep == fmep)
3160 			break;
3161 
3162 	if (ffmep == NULL) {
3163 		out(O_WARN, "Timer fired for an FME (%p) not in FMEs list.",
3164 		    (void *)fmep);
3165 		return;
3166 	}
3167 
3168 	out(O_ALTFP|O_VERB, "Timer fired %lx", tid);
3169 	fmep->pull = fmep->wull;
3170 	fmep->wull = 0;
3171 	fmd_buf_write(fmep->hdl, fmep->fmcase,
3172 	    WOBUF_PULL, (void *)&fmep->pull, sizeof (fmep->pull));
3173 
3174 	fme_eval(fmep, fmep->e0r);
3175 }
3176 
3177 /*
3178  * Preserve the fme's suspect list in its psuspects list, NULLing the
3179  * suspects list in the meantime.
3180  */
3181 static void
3182 save_suspects(struct fme *fmep)
3183 {
3184 	struct event *ep;
3185 	struct event *nextep;
3186 
3187 	/* zero out the previous suspect list */
3188 	for (ep = fmep->psuspects; ep; ep = nextep) {
3189 		nextep = ep->psuspects;
3190 		ep->psuspects = NULL;
3191 	}
3192 	fmep->psuspects = NULL;
3193 
3194 	/* zero out the suspect list, copying it to previous suspect list */
3195 	fmep->psuspects = fmep->suspects;
3196 	for (ep = fmep->suspects; ep; ep = nextep) {
3197 		nextep = ep->suspects;
3198 		ep->psuspects = ep->suspects;
3199 		ep->suspects = NULL;
3200 		ep->is_suspect = 0;
3201 	}
3202 	fmep->suspects = NULL;
3203 	fmep->nsuspects = 0;
3204 	fmep->nonfault = 0;
3205 }
3206 
3207 /*
3208  * Retrieve the fme's suspect list from its psuspects list.
3209  */
3210 static void
3211 restore_suspects(struct fme *fmep)
3212 {
3213 	struct event *ep;
3214 	struct event *nextep;
3215 
3216 	fmep->nsuspects = fmep->nonfault = 0;
3217 	fmep->suspects = fmep->psuspects;
3218 	for (ep = fmep->psuspects; ep; ep = nextep) {
3219 		fmep->nsuspects++;
3220 		if (!is_fault(ep->t))
3221 			fmep->nonfault++;
3222 		nextep = ep->psuspects;
3223 		ep->suspects = ep->psuspects;
3224 	}
3225 }
3226 
3227 /*
3228  * this is what we use to call the Emrys prototype code instead of main()
3229  */
3230 static void
3231 fme_eval(struct fme *fmep, fmd_event_t *ffep)
3232 {
3233 	struct event *ep;
3234 	unsigned long long my_delay = TIMEVAL_EVENTUALLY;
3235 	struct rsl *srl = NULL;
3236 	struct rsl *srl2 = NULL;
3237 	int mess_zero_count;
3238 	int mess_zero_nonfault = 0;
3239 	int rpcnt;
3240 
3241 	save_suspects(fmep);
3242 
3243 	out(O_ALTFP, "Evaluate FME %d", fmep->id);
3244 	indent_set("  ");
3245 
3246 	lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep);
3247 	fmep->state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay);
3248 
3249 	out(O_ALTFP|O_NONL, "FME%d state: %s, suspect list:", fmep->id,
3250 	    fme_state2str(fmep->state));
3251 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
3252 		out(O_ALTFP|O_NONL, " ");
3253 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
3254 	}
3255 	out(O_ALTFP, NULL);
3256 
3257 	switch (fmep->state) {
3258 	case FME_CREDIBLE:
3259 		print_suspects(SLNEW, fmep);
3260 		(void) upsets_eval(fmep, ffep);
3261 
3262 		/*
3263 		 * we may have already posted suspects in upsets_eval() which
3264 		 * can recurse into fme_eval() again. If so then just return.
3265 		 */
3266 		if (fmep->posted_suspects)
3267 			return;
3268 
3269 		stats_counter_bump(fmep->diags);
3270 		rpcnt = fmep->nsuspects;
3271 		save_suspects(fmep);
3272 
3273 		/*
3274 		 * create two lists, one for "message=1" faults and one for
3275 		 * "message=0" faults. If we have a mixture we will generate
3276 		 * two separate suspect lists.
3277 		 */
3278 		srl = MALLOC(rpcnt * sizeof (struct rsl));
3279 		bzero(srl, rpcnt * sizeof (struct rsl));
3280 		srl2 = MALLOC(rpcnt * sizeof (struct rsl));
3281 		bzero(srl2, rpcnt * sizeof (struct rsl));
3282 		mess_zero_count = trim_suspects(fmep, srl, srl2, ffep,
3283 		    &mess_zero_nonfault);
3284 
3285 		/*
3286 		 * If the resulting suspect list has no members, we're
3287 		 * done so simply close the case. Otherwise sort and publish.
3288 		 */
3289 		if (fmep->nsuspects == 0 && mess_zero_count == 0) {
3290 			out(O_ALTFP,
3291 			    "[FME%d, case %s (all suspects are upsets)]",
3292 			    fmep->id, fmd_case_uuid(fmep->hdl, fmep->fmcase));
3293 			fmd_case_close(fmep->hdl, fmep->fmcase);
3294 		} else if (fmep->nsuspects != 0 && mess_zero_count == 0) {
3295 			publish_suspects(fmep, srl);
3296 			out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3297 			    fmd_case_uuid(fmep->hdl, fmep->fmcase));
3298 			fmd_case_solve(fmep->hdl, fmep->fmcase);
3299 		} else if (fmep->nsuspects == 0 && mess_zero_count != 0) {
3300 			fmep->nsuspects = mess_zero_count;
3301 			fmep->nonfault = mess_zero_nonfault;
3302 			publish_suspects(fmep, srl2);
3303 			out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3304 			    fmd_case_uuid(fmep->hdl, fmep->fmcase));
3305 			fmd_case_solve(fmep->hdl, fmep->fmcase);
3306 		} else {
3307 			struct event *obsp;
3308 			struct fme *nfmep;
3309 
3310 			publish_suspects(fmep, srl);
3311 			out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3312 			    fmd_case_uuid(fmep->hdl, fmep->fmcase));
3313 			fmd_case_solve(fmep->hdl, fmep->fmcase);
3314 
3315 			/*
3316 			 * Got both message=0 and message=1 so create a
3317 			 * duplicate case. Also need a temporary duplicate fme
3318 			 * structure for use by publish_suspects().
3319 			 */
3320 			nfmep = alloc_fme();
3321 			nfmep->id =  Nextid++;
3322 			nfmep->hdl = fmep->hdl;
3323 			nfmep->nsuspects = mess_zero_count;
3324 			nfmep->nonfault = mess_zero_nonfault;
3325 			nfmep->fmcase = fmd_case_open(fmep->hdl, NULL);
3326 			out(O_ALTFP|O_STAMP,
3327 			    "[creating parallel FME%d, case %s]", nfmep->id,
3328 			    fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
3329 			Open_fme_count++;
3330 			if (ffep) {
3331 				fmd_case_setprincipal(nfmep->hdl,
3332 				    nfmep->fmcase, ffep);
3333 				fmd_case_add_ereport(nfmep->hdl,
3334 				    nfmep->fmcase, ffep);
3335 			}
3336 			for (obsp = fmep->observations; obsp;
3337 			    obsp = obsp->observations)
3338 				if (obsp->ffep && obsp->ffep != ffep)
3339 					fmd_case_add_ereport(nfmep->hdl,
3340 					    nfmep->fmcase, obsp->ffep);
3341 
3342 			publish_suspects(nfmep, srl2);
3343 			out(O_ALTFP, "[solving FME%d, case %s]", nfmep->id,
3344 			    fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
3345 			fmd_case_solve(nfmep->hdl, nfmep->fmcase);
3346 			FREE(nfmep);
3347 		}
3348 		FREE(srl);
3349 		FREE(srl2);
3350 		restore_suspects(fmep);
3351 
3352 		fmep->posted_suspects = 1;
3353 		fmd_buf_write(fmep->hdl, fmep->fmcase,
3354 		    WOBUF_POSTD,
3355 		    (void *)&fmep->posted_suspects,
3356 		    sizeof (fmep->posted_suspects));
3357 
3358 		/*
3359 		 * Now the suspects have been posted, we can clear up
3360 		 * the instance tree as we won't be looking at it again.
3361 		 * Also cancel the timer as the case is now solved.
3362 		 */
3363 		if (fmep->wull != 0) {
3364 			fmd_timer_remove(fmep->hdl, fmep->timer);
3365 			fmep->wull = 0;
3366 		}
3367 		break;
3368 
3369 	case FME_WAIT:
3370 		ASSERT(my_delay > fmep->ull);
3371 		(void) fme_set_timer(fmep, my_delay);
3372 		print_suspects(SLWAIT, fmep);
3373 		itree_prune(fmep->eventtree);
3374 		return;
3375 
3376 	case FME_DISPROVED:
3377 		print_suspects(SLDISPROVED, fmep);
3378 		Undiag_reason = UD_UNSOLVD;
3379 		fme_undiagnosable(fmep);
3380 		break;
3381 	}
3382 
3383 	itree_free(fmep->eventtree);
3384 	fmep->eventtree = NULL;
3385 	structconfig_free(fmep->config);
3386 	fmep->config = NULL;
3387 	destroy_fme_bufs(fmep);
3388 }
3389 
3390 static void indent(void);
3391 static int triggered(struct fme *fmep, struct event *ep, int mark);
3392 static enum fme_state effects_test(struct fme *fmep,
3393     struct event *fault_event, unsigned long long at_latest_by,
3394     unsigned long long *pdelay);
3395 static enum fme_state requirements_test(struct fme *fmep, struct event *ep,
3396     unsigned long long at_latest_by, unsigned long long *pdelay);
3397 static enum fme_state causes_test(struct fme *fmep, struct event *ep,
3398     unsigned long long at_latest_by, unsigned long long *pdelay);
3399 
3400 static int
3401 checkconstraints(struct fme *fmep, struct arrow *arrowp)
3402 {
3403 	struct constraintlist *ctp;
3404 	struct evalue value;
3405 	char *sep = "";
3406 
3407 	if (arrowp->forever_false) {
3408 		indent();
3409 		out(O_ALTFP|O_VERB|O_NONL, "  Forever false constraint: ");
3410 		for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3411 			out(O_ALTFP|O_VERB|O_NONL, sep);
3412 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3413 			sep = ", ";
3414 		}
3415 		out(O_ALTFP|O_VERB, NULL);
3416 		return (0);
3417 	}
3418 	if (arrowp->forever_true) {
3419 		indent();
3420 		out(O_ALTFP|O_VERB|O_NONL, "  Forever true constraint: ");
3421 		for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3422 			out(O_ALTFP|O_VERB|O_NONL, sep);
3423 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3424 			sep = ", ";
3425 		}
3426 		out(O_ALTFP|O_VERB, NULL);
3427 		return (1);
3428 	}
3429 
3430 	for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3431 		if (eval_expr(ctp->cnode, NULL, NULL,
3432 		    &fmep->globals, fmep->config,
3433 		    arrowp, 0, &value)) {
3434 			/* evaluation successful */
3435 			if (value.t == UNDEFINED || value.v == 0) {
3436 				/* known false */
3437 				arrowp->forever_false = 1;
3438 				indent();
3439 				out(O_ALTFP|O_VERB|O_NONL,
3440 				    "  False constraint: ");
3441 				ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3442 				out(O_ALTFP|O_VERB, NULL);
3443 				return (0);
3444 			}
3445 		} else {
3446 			/* evaluation unsuccessful -- unknown value */
3447 			indent();
3448 			out(O_ALTFP|O_VERB|O_NONL,
3449 			    "  Deferred constraint: ");
3450 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3451 			out(O_ALTFP|O_VERB, NULL);
3452 			return (1);
3453 		}
3454 	}
3455 	/* known true */
3456 	arrowp->forever_true = 1;
3457 	indent();
3458 	out(O_ALTFP|O_VERB|O_NONL, "  True constraint: ");
3459 	for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3460 		out(O_ALTFP|O_VERB|O_NONL, sep);
3461 		ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3462 		sep = ", ";
3463 	}
3464 	out(O_ALTFP|O_VERB, NULL);
3465 	return (1);
3466 }
3467 
3468 static int
3469 triggered(struct fme *fmep, struct event *ep, int mark)
3470 {
3471 	struct bubble *bp;
3472 	struct arrowlist *ap;
3473 	int count = 0;
3474 
3475 	stats_counter_bump(fmep->Tcallcount);
3476 	for (bp = itree_next_bubble(ep, NULL); bp;
3477 	    bp = itree_next_bubble(ep, bp)) {
3478 		if (bp->t != B_TO)
3479 			continue;
3480 		for (ap = itree_next_arrow(bp, NULL); ap;
3481 		    ap = itree_next_arrow(bp, ap)) {
3482 			/* check count of marks against K in the bubble */
3483 			if ((ap->arrowp->mark & mark) &&
3484 			    ++count >= bp->nork)
3485 				return (1);
3486 		}
3487 	}
3488 	return (0);
3489 }
3490 
3491 static int
3492 mark_arrows(struct fme *fmep, struct event *ep, int mark,
3493     unsigned long long at_latest_by, unsigned long long *pdelay, int keep)
3494 {
3495 	struct bubble *bp;
3496 	struct arrowlist *ap;
3497 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3498 	unsigned long long my_delay;
3499 	enum fme_state result;
3500 	int retval = 0;
3501 
3502 	for (bp = itree_next_bubble(ep, NULL); bp;
3503 	    bp = itree_next_bubble(ep, bp)) {
3504 		if (bp->t != B_FROM)
3505 			continue;
3506 		stats_counter_bump(fmep->Marrowcount);
3507 		for (ap = itree_next_arrow(bp, NULL); ap;
3508 		    ap = itree_next_arrow(bp, ap)) {
3509 			struct event *ep2 = ap->arrowp->head->myevent;
3510 			/*
3511 			 * if we're clearing marks, we can avoid doing
3512 			 * all that work evaluating constraints.
3513 			 */
3514 			if (mark == 0) {
3515 				if (ap->arrowp->arrow_marked == 0)
3516 					continue;
3517 				ap->arrowp->arrow_marked = 0;
3518 				ap->arrowp->mark &= ~EFFECTS_COUNTER;
3519 				if (keep && (ep2->cached_state &
3520 				    (WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT)))
3521 					ep2->keep_in_tree = 1;
3522 				ep2->cached_state &=
3523 				    ~(WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT);
3524 				(void) mark_arrows(fmep, ep2, mark, 0, NULL,
3525 				    keep);
3526 				continue;
3527 			}
3528 			ap->arrowp->arrow_marked = 1;
3529 			if (ep2->cached_state & REQMNTS_DISPROVED) {
3530 				indent();
3531 				out(O_ALTFP|O_VERB|O_NONL,
3532 				    "  ALREADY DISPROVED ");
3533 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3534 				out(O_ALTFP|O_VERB, NULL);
3535 				continue;
3536 			}
3537 			if (ep2->cached_state & WAIT_EFFECT) {
3538 				indent();
3539 				out(O_ALTFP|O_VERB|O_NONL,
3540 				    "  ALREADY EFFECTS WAIT ");
3541 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3542 				out(O_ALTFP|O_VERB, NULL);
3543 				continue;
3544 			}
3545 			if (ep2->cached_state & CREDIBLE_EFFECT) {
3546 				indent();
3547 				out(O_ALTFP|O_VERB|O_NONL,
3548 				    "  ALREADY EFFECTS CREDIBLE ");
3549 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3550 				out(O_ALTFP|O_VERB, NULL);
3551 				continue;
3552 			}
3553 			if ((ep2->cached_state & PARENT_WAIT) &&
3554 			    (mark & PARENT_WAIT)) {
3555 				indent();
3556 				out(O_ALTFP|O_VERB|O_NONL,
3557 				    "  ALREADY PARENT EFFECTS WAIT ");
3558 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3559 				out(O_ALTFP|O_VERB, NULL);
3560 				continue;
3561 			}
3562 			platform_set_payloadnvp(ep2->nvp);
3563 			if (checkconstraints(fmep, ap->arrowp) == 0) {
3564 				platform_set_payloadnvp(NULL);
3565 				indent();
3566 				out(O_ALTFP|O_VERB|O_NONL,
3567 				    "  CONSTRAINTS FAIL ");
3568 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3569 				out(O_ALTFP|O_VERB, NULL);
3570 				continue;
3571 			}
3572 			platform_set_payloadnvp(NULL);
3573 			ap->arrowp->mark |= EFFECTS_COUNTER;
3574 			if (!triggered(fmep, ep2, EFFECTS_COUNTER)) {
3575 				indent();
3576 				out(O_ALTFP|O_VERB|O_NONL,
3577 				    "  K-COUNT NOT YET MET ");
3578 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3579 				out(O_ALTFP|O_VERB, NULL);
3580 				continue;
3581 			}
3582 			ep2->cached_state &= ~PARENT_WAIT;
3583 			/*
3584 			 * if we've reached an ereport and no propagation time
3585 			 * is specified, use the Hesitate value
3586 			 */
3587 			if (ep2->t == N_EREPORT && at_latest_by == 0ULL &&
3588 			    ap->arrowp->maxdelay == 0ULL) {
3589 				out(O_ALTFP|O_VERB|O_NONL, "  default wait ");
3590 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3591 				out(O_ALTFP|O_VERB, NULL);
3592 				result = requirements_test(fmep, ep2, Hesitate,
3593 				    &my_delay);
3594 			} else {
3595 				result = requirements_test(fmep, ep2,
3596 				    at_latest_by + ap->arrowp->maxdelay,
3597 				    &my_delay);
3598 			}
3599 			if (result == FME_WAIT) {
3600 				retval = WAIT_EFFECT;
3601 				if (overall_delay > my_delay)
3602 					overall_delay = my_delay;
3603 				ep2->cached_state |= WAIT_EFFECT;
3604 				indent();
3605 				out(O_ALTFP|O_VERB|O_NONL, "  EFFECTS WAIT ");
3606 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3607 				out(O_ALTFP|O_VERB, NULL);
3608 				indent_push("  E");
3609 				if (mark_arrows(fmep, ep2, PARENT_WAIT,
3610 				    at_latest_by, &my_delay, 0) ==
3611 				    WAIT_EFFECT) {
3612 					retval = WAIT_EFFECT;
3613 					if (overall_delay > my_delay)
3614 						overall_delay = my_delay;
3615 				}
3616 				indent_pop();
3617 			} else if (result == FME_DISPROVED) {
3618 				indent();
3619 				out(O_ALTFP|O_VERB|O_NONL,
3620 				    "  EFFECTS DISPROVED ");
3621 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3622 				out(O_ALTFP|O_VERB, NULL);
3623 			} else {
3624 				ep2->cached_state |= mark;
3625 				indent();
3626 				if (mark == CREDIBLE_EFFECT)
3627 					out(O_ALTFP|O_VERB|O_NONL,
3628 					    "  EFFECTS CREDIBLE ");
3629 				else
3630 					out(O_ALTFP|O_VERB|O_NONL,
3631 					    "  PARENT EFFECTS WAIT ");
3632 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3633 				out(O_ALTFP|O_VERB, NULL);
3634 				indent_push("  E");
3635 				if (mark_arrows(fmep, ep2, mark, at_latest_by,
3636 				    &my_delay, 0) == WAIT_EFFECT) {
3637 					retval = WAIT_EFFECT;
3638 					if (overall_delay > my_delay)
3639 						overall_delay = my_delay;
3640 				}
3641 				indent_pop();
3642 			}
3643 		}
3644 	}
3645 	if (retval == WAIT_EFFECT)
3646 		*pdelay = overall_delay;
3647 	return (retval);
3648 }
3649 
3650 static enum fme_state
3651 effects_test(struct fme *fmep, struct event *fault_event,
3652     unsigned long long at_latest_by, unsigned long long *pdelay)
3653 {
3654 	struct event *error_event;
3655 	enum fme_state return_value = FME_CREDIBLE;
3656 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3657 	unsigned long long my_delay;
3658 
3659 	stats_counter_bump(fmep->Ecallcount);
3660 	indent_push("  E");
3661 	indent();
3662 	out(O_ALTFP|O_VERB|O_NONL, "->");
3663 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event);
3664 	out(O_ALTFP|O_VERB, NULL);
3665 
3666 	if (mark_arrows(fmep, fault_event, CREDIBLE_EFFECT, at_latest_by,
3667 	    &my_delay, 0) == WAIT_EFFECT) {
3668 		return_value = FME_WAIT;
3669 		if (overall_delay > my_delay)
3670 			overall_delay = my_delay;
3671 	}
3672 	for (error_event = fmep->observations;
3673 	    error_event; error_event = error_event->observations) {
3674 		indent();
3675 		out(O_ALTFP|O_VERB|O_NONL, " ");
3676 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, error_event);
3677 		if (!(error_event->cached_state & CREDIBLE_EFFECT)) {
3678 			if (error_event->cached_state &
3679 			    (PARENT_WAIT|WAIT_EFFECT)) {
3680 				out(O_ALTFP|O_VERB, " NOT YET triggered");
3681 				continue;
3682 			}
3683 			return_value = FME_DISPROVED;
3684 			out(O_ALTFP|O_VERB, " NOT triggered");
3685 			break;
3686 		} else {
3687 			out(O_ALTFP|O_VERB, " triggered");
3688 		}
3689 	}
3690 	if (return_value == FME_DISPROVED) {
3691 		(void) mark_arrows(fmep, fault_event, 0, 0, NULL, 0);
3692 	} else {
3693 		fault_event->keep_in_tree = 1;
3694 		(void) mark_arrows(fmep, fault_event, 0, 0, NULL, 1);
3695 	}
3696 
3697 	indent();
3698 	out(O_ALTFP|O_VERB|O_NONL, "<-EFFECTS %s ",
3699 	    fme_state2str(return_value));
3700 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event);
3701 	out(O_ALTFP|O_VERB, NULL);
3702 	indent_pop();
3703 	if (return_value == FME_WAIT)
3704 		*pdelay = overall_delay;
3705 	return (return_value);
3706 }
3707 
3708 static enum fme_state
3709 requirements_test(struct fme *fmep, struct event *ep,
3710     unsigned long long at_latest_by, unsigned long long *pdelay)
3711 {
3712 	int waiting_events;
3713 	int credible_events;
3714 	int deferred_events;
3715 	enum fme_state return_value = FME_CREDIBLE;
3716 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3717 	unsigned long long arrow_delay;
3718 	unsigned long long my_delay;
3719 	struct event *ep2;
3720 	struct bubble *bp;
3721 	struct arrowlist *ap;
3722 
3723 	if (ep->cached_state & REQMNTS_CREDIBLE) {
3724 		indent();
3725 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY CREDIBLE ");
3726 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3727 		out(O_ALTFP|O_VERB, NULL);
3728 		return (FME_CREDIBLE);
3729 	}
3730 	if (ep->cached_state & REQMNTS_DISPROVED) {
3731 		indent();
3732 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY DISPROVED ");
3733 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3734 		out(O_ALTFP|O_VERB, NULL);
3735 		return (FME_DISPROVED);
3736 	}
3737 	if (ep->cached_state & REQMNTS_WAIT) {
3738 		indent();
3739 		*pdelay = ep->cached_delay;
3740 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY WAIT ");
3741 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3742 		out(O_ALTFP|O_VERB|O_NONL, ", wait for: ");
3743 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3744 		out(O_ALTFP|O_VERB, NULL);
3745 		return (FME_WAIT);
3746 	}
3747 	stats_counter_bump(fmep->Rcallcount);
3748 	indent_push("  R");
3749 	indent();
3750 	out(O_ALTFP|O_VERB|O_NONL, "->");
3751 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3752 	out(O_ALTFP|O_VERB|O_NONL, ", at latest by: ");
3753 	ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3754 	out(O_ALTFP|O_VERB, NULL);
3755 
3756 	if (ep->t == N_EREPORT) {
3757 		if (ep->count == 0) {
3758 			if (fmep->pull >= at_latest_by) {
3759 				return_value = FME_DISPROVED;
3760 			} else {
3761 				ep->cached_delay = *pdelay = at_latest_by;
3762 				return_value = FME_WAIT;
3763 			}
3764 		}
3765 
3766 		indent();
3767 		switch (return_value) {
3768 		case FME_CREDIBLE:
3769 			ep->cached_state |= REQMNTS_CREDIBLE;
3770 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS CREDIBLE ");
3771 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3772 			break;
3773 		case FME_DISPROVED:
3774 			ep->cached_state |= REQMNTS_DISPROVED;
3775 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED ");
3776 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3777 			break;
3778 		case FME_WAIT:
3779 			ep->cached_state |= REQMNTS_WAIT;
3780 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS WAIT ");
3781 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3782 			out(O_ALTFP|O_VERB|O_NONL, " to ");
3783 			ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3784 			break;
3785 		default:
3786 			out(O_DIE, "requirements_test: unexpected fme_state");
3787 			break;
3788 		}
3789 		out(O_ALTFP|O_VERB, NULL);
3790 		indent_pop();
3791 
3792 		return (return_value);
3793 	}
3794 
3795 	/* this event is not a report, descend the tree */
3796 	for (bp = itree_next_bubble(ep, NULL); bp;
3797 	    bp = itree_next_bubble(ep, bp)) {
3798 		int n;
3799 
3800 		if (bp->t != B_FROM)
3801 			continue;
3802 
3803 		n = bp->nork;
3804 
3805 		credible_events = 0;
3806 		waiting_events = 0;
3807 		deferred_events = 0;
3808 		arrow_delay = TIMEVAL_EVENTUALLY;
3809 		/*
3810 		 * n is -1 for 'A' so adjust it.
3811 		 * XXX just count up the arrows for now.
3812 		 */
3813 		if (n < 0) {
3814 			n = 0;
3815 			for (ap = itree_next_arrow(bp, NULL); ap;
3816 			    ap = itree_next_arrow(bp, ap))
3817 				n++;
3818 			indent();
3819 			out(O_ALTFP|O_VERB, " Bubble Counted N=%d", n);
3820 		} else {
3821 			indent();
3822 			out(O_ALTFP|O_VERB, " Bubble N=%d", n);
3823 		}
3824 
3825 		if (n == 0)
3826 			continue;
3827 		if (!(bp->mark & (BUBBLE_ELIDED|BUBBLE_OK))) {
3828 			for (ap = itree_next_arrow(bp, NULL); ap;
3829 			    ap = itree_next_arrow(bp, ap)) {
3830 				ep2 = ap->arrowp->head->myevent;
3831 				platform_set_payloadnvp(ep2->nvp);
3832 				(void) checkconstraints(fmep, ap->arrowp);
3833 				if (ap->arrowp->forever_true) {
3834 					/*
3835 					 * if all arrows are invalidated by the
3836 					 * constraints, then we should elide the
3837 					 * whole bubble to be consistant with
3838 					 * the tree creation time behaviour
3839 					 */
3840 					bp->mark |= BUBBLE_OK;
3841 					platform_set_payloadnvp(NULL);
3842 					break;
3843 				}
3844 				platform_set_payloadnvp(NULL);
3845 			}
3846 		}
3847 		for (ap = itree_next_arrow(bp, NULL); ap;
3848 		    ap = itree_next_arrow(bp, ap)) {
3849 			ep2 = ap->arrowp->head->myevent;
3850 			if (n <= credible_events)
3851 				break;
3852 
3853 			ap->arrowp->mark |= REQMNTS_COUNTER;
3854 			if (triggered(fmep, ep2, REQMNTS_COUNTER))
3855 				/* XXX adding max timevals! */
3856 				switch (requirements_test(fmep, ep2,
3857 				    at_latest_by + ap->arrowp->maxdelay,
3858 				    &my_delay)) {
3859 				case FME_DEFERRED:
3860 					deferred_events++;
3861 					break;
3862 				case FME_CREDIBLE:
3863 					credible_events++;
3864 					break;
3865 				case FME_DISPROVED:
3866 					break;
3867 				case FME_WAIT:
3868 					if (my_delay < arrow_delay)
3869 						arrow_delay = my_delay;
3870 					waiting_events++;
3871 					break;
3872 				default:
3873 					out(O_DIE,
3874 					"Bug in requirements_test.");
3875 				}
3876 			else
3877 				deferred_events++;
3878 		}
3879 		if (!(bp->mark & BUBBLE_OK) && waiting_events == 0) {
3880 			bp->mark |= BUBBLE_ELIDED;
3881 			continue;
3882 		}
3883 		indent();
3884 		out(O_ALTFP|O_VERB, " Credible: %d Waiting %d",
3885 		    credible_events + deferred_events, waiting_events);
3886 		if (credible_events + deferred_events + waiting_events < n) {
3887 			/* Can never meet requirements */
3888 			ep->cached_state |= REQMNTS_DISPROVED;
3889 			indent();
3890 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED ");
3891 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3892 			out(O_ALTFP|O_VERB, NULL);
3893 			indent_pop();
3894 			return (FME_DISPROVED);
3895 		}
3896 		if (credible_events + deferred_events < n) {
3897 			/* will have to wait */
3898 			/* wait time is shortest known */
3899 			if (arrow_delay < overall_delay)
3900 				overall_delay = arrow_delay;
3901 			return_value = FME_WAIT;
3902 		} else if (credible_events < n) {
3903 			if (return_value != FME_WAIT)
3904 				return_value = FME_DEFERRED;
3905 		}
3906 	}
3907 
3908 	/*
3909 	 * don't mark as FME_DEFERRED. If this event isn't reached by another
3910 	 * path, then this will be considered FME_CREDIBLE. But if it is
3911 	 * reached by a different path so the K-count is met, then might
3912 	 * get overridden by FME_WAIT or FME_DISPROVED.
3913 	 */
3914 	if (return_value == FME_WAIT) {
3915 		ep->cached_state |= REQMNTS_WAIT;
3916 		ep->cached_delay = *pdelay = overall_delay;
3917 	} else if (return_value == FME_CREDIBLE) {
3918 		ep->cached_state |= REQMNTS_CREDIBLE;
3919 	}
3920 	indent();
3921 	out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS %s ",
3922 	    fme_state2str(return_value));
3923 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3924 	out(O_ALTFP|O_VERB, NULL);
3925 	indent_pop();
3926 	return (return_value);
3927 }
3928 
3929 static enum fme_state
3930 causes_test(struct fme *fmep, struct event *ep,
3931     unsigned long long at_latest_by, unsigned long long *pdelay)
3932 {
3933 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3934 	unsigned long long my_delay;
3935 	int credible_results = 0;
3936 	int waiting_results = 0;
3937 	enum fme_state fstate;
3938 	struct event *tail_event;
3939 	struct bubble *bp;
3940 	struct arrowlist *ap;
3941 	int k = 1;
3942 
3943 	stats_counter_bump(fmep->Ccallcount);
3944 	indent_push("  C");
3945 	indent();
3946 	out(O_ALTFP|O_VERB|O_NONL, "->");
3947 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3948 	out(O_ALTFP|O_VERB, NULL);
3949 
3950 	for (bp = itree_next_bubble(ep, NULL); bp;
3951 	    bp = itree_next_bubble(ep, bp)) {
3952 		if (bp->t != B_TO)
3953 			continue;
3954 		k = bp->nork;	/* remember the K value */
3955 		for (ap = itree_next_arrow(bp, NULL); ap;
3956 		    ap = itree_next_arrow(bp, ap)) {
3957 			int do_not_follow = 0;
3958 
3959 			/*
3960 			 * if we get to the same event multiple times
3961 			 * only worry about the first one.
3962 			 */
3963 			if (ap->arrowp->tail->myevent->cached_state &
3964 			    CAUSES_TESTED) {
3965 				indent();
3966 				out(O_ALTFP|O_VERB|O_NONL,
3967 				    "  causes test already run for ");
3968 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL,
3969 				    ap->arrowp->tail->myevent);
3970 				out(O_ALTFP|O_VERB, NULL);
3971 				continue;
3972 			}
3973 
3974 			/*
3975 			 * see if false constraint prevents us
3976 			 * from traversing this arrow
3977 			 */
3978 			platform_set_payloadnvp(ep->nvp);
3979 			if (checkconstraints(fmep, ap->arrowp) == 0)
3980 				do_not_follow = 1;
3981 			platform_set_payloadnvp(NULL);
3982 			if (do_not_follow) {
3983 				indent();
3984 				out(O_ALTFP|O_VERB|O_NONL,
3985 				    "  False arrow from ");
3986 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL,
3987 				    ap->arrowp->tail->myevent);
3988 				out(O_ALTFP|O_VERB, NULL);
3989 				continue;
3990 			}
3991 
3992 			ap->arrowp->tail->myevent->cached_state |=
3993 			    CAUSES_TESTED;
3994 			tail_event = ap->arrowp->tail->myevent;
3995 			fstate = hypothesise(fmep, tail_event, at_latest_by,
3996 			    &my_delay);
3997 
3998 			switch (fstate) {
3999 			case FME_WAIT:
4000 				if (my_delay < overall_delay)
4001 					overall_delay = my_delay;
4002 				waiting_results++;
4003 				break;
4004 			case FME_CREDIBLE:
4005 				credible_results++;
4006 				break;
4007 			case FME_DISPROVED:
4008 				break;
4009 			default:
4010 				out(O_DIE, "Bug in causes_test");
4011 			}
4012 		}
4013 	}
4014 	/* compare against K */
4015 	if (credible_results + waiting_results < k) {
4016 		indent();
4017 		out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES DISPROVED ");
4018 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4019 		out(O_ALTFP|O_VERB, NULL);
4020 		indent_pop();
4021 		return (FME_DISPROVED);
4022 	}
4023 	if (waiting_results != 0) {
4024 		*pdelay = overall_delay;
4025 		indent();
4026 		out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES WAIT ");
4027 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4028 		out(O_ALTFP|O_VERB|O_NONL, " to ");
4029 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
4030 		out(O_ALTFP|O_VERB, NULL);
4031 		indent_pop();
4032 		return (FME_WAIT);
4033 	}
4034 	indent();
4035 	out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES CREDIBLE ");
4036 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4037 	out(O_ALTFP|O_VERB, NULL);
4038 	indent_pop();
4039 	return (FME_CREDIBLE);
4040 }
4041 
4042 static enum fme_state
4043 hypothesise(struct fme *fmep, struct event *ep,
4044 	unsigned long long at_latest_by, unsigned long long *pdelay)
4045 {
4046 	enum fme_state rtr, otr;
4047 	unsigned long long my_delay;
4048 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
4049 
4050 	stats_counter_bump(fmep->Hcallcount);
4051 	indent_push("  H");
4052 	indent();
4053 	out(O_ALTFP|O_VERB|O_NONL, "->");
4054 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4055 	out(O_ALTFP|O_VERB|O_NONL, ", at latest by: ");
4056 	ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
4057 	out(O_ALTFP|O_VERB, NULL);
4058 
4059 	rtr = requirements_test(fmep, ep, at_latest_by, &my_delay);
4060 	if ((rtr == FME_WAIT) && (my_delay < overall_delay))
4061 		overall_delay = my_delay;
4062 	if (rtr != FME_DISPROVED) {
4063 		if (is_problem(ep->t)) {
4064 			otr = effects_test(fmep, ep, at_latest_by, &my_delay);
4065 			if (otr != FME_DISPROVED) {
4066 				if (fmep->peek == 0 && ep->is_suspect == 0) {
4067 					ep->suspects = fmep->suspects;
4068 					ep->is_suspect = 1;
4069 					fmep->suspects = ep;
4070 					fmep->nsuspects++;
4071 					if (!is_fault(ep->t))
4072 						fmep->nonfault++;
4073 				}
4074 			}
4075 		} else
4076 			otr = causes_test(fmep, ep, at_latest_by, &my_delay);
4077 		if ((otr == FME_WAIT) && (my_delay < overall_delay))
4078 			overall_delay = my_delay;
4079 		if ((otr != FME_DISPROVED) &&
4080 		    ((rtr == FME_WAIT) || (otr == FME_WAIT)))
4081 			*pdelay = overall_delay;
4082 	}
4083 	if (rtr == FME_DISPROVED) {
4084 		indent();
4085 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4086 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4087 		out(O_ALTFP|O_VERB, " (doesn't meet requirements)");
4088 		indent_pop();
4089 		return (FME_DISPROVED);
4090 	}
4091 	if ((otr == FME_DISPROVED) && is_problem(ep->t)) {
4092 		indent();
4093 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4094 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4095 		out(O_ALTFP|O_VERB, " (doesn't explain all reports)");
4096 		indent_pop();
4097 		return (FME_DISPROVED);
4098 	}
4099 	if (otr == FME_DISPROVED) {
4100 		indent();
4101 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4102 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4103 		out(O_ALTFP|O_VERB, " (causes are not credible)");
4104 		indent_pop();
4105 		return (FME_DISPROVED);
4106 	}
4107 	if ((rtr == FME_WAIT) || (otr == FME_WAIT)) {
4108 		indent();
4109 		out(O_ALTFP|O_VERB|O_NONL, "<-WAIT ");
4110 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4111 		out(O_ALTFP|O_VERB|O_NONL, " to ");
4112 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &overall_delay);
4113 		out(O_ALTFP|O_VERB, NULL);
4114 		indent_pop();
4115 		return (FME_WAIT);
4116 	}
4117 	indent();
4118 	out(O_ALTFP|O_VERB|O_NONL, "<-CREDIBLE ");
4119 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4120 	out(O_ALTFP|O_VERB, NULL);
4121 	indent_pop();
4122 	return (FME_CREDIBLE);
4123 }
4124 
4125 /*
4126  * fme_istat_load -- reconstitute any persistent istats
4127  */
4128 void
4129 fme_istat_load(fmd_hdl_t *hdl)
4130 {
4131 	int sz;
4132 	char *sbuf;
4133 	char *ptr;
4134 
4135 	if ((sz = fmd_buf_size(hdl, NULL, WOBUF_ISTATS)) == 0) {
4136 		out(O_ALTFP, "fme_istat_load: No stats");
4137 		return;
4138 	}
4139 
4140 	sbuf = alloca(sz);
4141 
4142 	fmd_buf_read(hdl, NULL, WOBUF_ISTATS, sbuf, sz);
4143 
4144 	/*
4145 	 * pick apart the serialized stats
4146 	 *
4147 	 * format is:
4148 	 *	<class-name>, '@', <path>, '\0', <value>, '\0'
4149 	 * for example:
4150 	 *	"stat.first@stat0/path0\02\0stat.second@stat0/path1\023\0"
4151 	 *
4152 	 * since this is parsing our own serialized data, any parsing issues
4153 	 * are fatal, so we check for them all with ASSERT() below.
4154 	 */
4155 	ptr = sbuf;
4156 	while (ptr < &sbuf[sz]) {
4157 		char *sepptr;
4158 		struct node *np;
4159 		int val;
4160 
4161 		sepptr = strchr(ptr, '@');
4162 		ASSERT(sepptr != NULL);
4163 		*sepptr = '\0';
4164 
4165 		/* construct the event */
4166 		np = newnode(T_EVENT, NULL, 0);
4167 		np->u.event.ename = newnode(T_NAME, NULL, 0);
4168 		np->u.event.ename->u.name.t = N_STAT;
4169 		np->u.event.ename->u.name.s = stable(ptr);
4170 		np->u.event.ename->u.name.it = IT_ENAME;
4171 		np->u.event.ename->u.name.last = np->u.event.ename;
4172 
4173 		ptr = sepptr + 1;
4174 		ASSERT(ptr < &sbuf[sz]);
4175 		ptr += strlen(ptr);
4176 		ptr++;	/* move past the '\0' separating path from value */
4177 		ASSERT(ptr < &sbuf[sz]);
4178 		ASSERT(isdigit(*ptr));
4179 		val = atoi(ptr);
4180 		ASSERT(val > 0);
4181 		ptr += strlen(ptr);
4182 		ptr++;	/* move past the final '\0' for this entry */
4183 
4184 		np->u.event.epname = pathstring2epnamenp(sepptr + 1);
4185 		ASSERT(np->u.event.epname != NULL);
4186 
4187 		istat_bump(np, val);
4188 		tree_free(np);
4189 	}
4190 
4191 	istat_save();
4192 }
4193