xref: /illumos-gate/usr/src/cmd/fm/modules/common/eversholt/fme.c (revision da40b2648878aa9434c7199422846fe5a7032714)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2012 Milan Jurik. All rights reserved.
25  * Copyright (c) 2018, Joyent, Inc.
26  *
27  * fme.c -- fault management exercise module
28  *
29  * this module provides the simulated fault management exercise.
30  */
31 
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <strings.h>
36 #include <ctype.h>
37 #include <alloca.h>
38 #include <libnvpair.h>
39 #include <sys/fm/protocol.h>
40 #include <fm/fmd_api.h>
41 #include <fm/libtopo.h>
42 #include "alloc.h"
43 #include "out.h"
44 #include "stats.h"
45 #include "stable.h"
46 #include "literals.h"
47 #include "lut.h"
48 #include "tree.h"
49 #include "ptree.h"
50 #include "itree.h"
51 #include "ipath.h"
52 #include "fme.h"
53 #include "evnv.h"
54 #include "eval.h"
55 #include "config.h"
56 #include "platform.h"
57 #include "esclex.h"
58 
59 /* imported from eft.c... */
60 extern hrtime_t Hesitate;
61 extern char *Serd_Override;
62 extern nv_alloc_t Eft_nv_hdl;
63 extern int Max_fme;
64 extern fmd_hdl_t *Hdl;
65 
66 static int Istat_need_save;
67 static int Serd_need_save;
68 void istat_save(void);
69 void serd_save(void);
70 
71 /* fme under construction is global so we can free it on module abort */
72 static struct fme *Nfmep;
73 
74 static int Undiag_reason = UD_VAL_UNKNOWN;
75 
76 static int Nextid = 0;
77 
78 static int Open_fme_count = 0;	/* Count of open FMEs */
79 
80 /* list of fault management exercises underway */
81 static struct fme {
82 	struct fme *next;		/* next exercise */
83 	unsigned long long ull;		/* time when fme was created */
84 	int id;				/* FME id */
85 	struct config *config;		/* cooked configuration data */
86 	struct lut *eventtree;		/* propagation tree for this FME */
87 	/*
88 	 * The initial error report that created this FME is kept in
89 	 * two forms.  e0 points to the instance tree node and is used
90 	 * by fme_eval() as the starting point for the inference
91 	 * algorithm.  e0r is the event handle FMD passed to us when
92 	 * the ereport first arrived and is used when setting timers,
93 	 * which are always relative to the time of this initial
94 	 * report.
95 	 */
96 	struct event *e0;
97 	fmd_event_t *e0r;
98 
99 	id_t    timer;			/* for setting an fmd time-out */
100 
101 	struct event *ecurrent;		/* ereport under consideration */
102 	struct event *suspects;		/* current suspect list */
103 	struct event *psuspects;	/* previous suspect list */
104 	int nsuspects;			/* count of suspects */
105 	int posted_suspects;		/* true if we've posted a diagnosis */
106 	int uniqobs;			/* number of unique events observed */
107 	int peek;			/* just peeking, don't track suspects */
108 	int overflow;			/* true if overflow FME */
109 	enum fme_state {
110 		FME_NOTHING = 5000,	/* not evaluated yet */
111 		FME_WAIT,		/* need to wait for more info */
112 		FME_CREDIBLE,		/* suspect list is credible */
113 		FME_DISPROVED,		/* no valid suspects found */
114 		FME_DEFERRED		/* don't know yet (k-count not met) */
115 	} state;
116 
117 	unsigned long long pull;	/* time passed since created */
118 	unsigned long long wull;	/* wait until this time for re-eval */
119 	struct event *observations;	/* observation list */
120 	struct lut *globals;		/* values of global variables */
121 	/* fmd interfacing */
122 	fmd_hdl_t *hdl;			/* handle for talking with fmd */
123 	fmd_case_t *fmcase;		/* what fmd 'case' we associate with */
124 	/* stats */
125 	struct stats *Rcount;
126 	struct stats *Hcallcount;
127 	struct stats *Rcallcount;
128 	struct stats *Ccallcount;
129 	struct stats *Ecallcount;
130 	struct stats *Tcallcount;
131 	struct stats *Marrowcount;
132 	struct stats *diags;
133 } *FMElist, *EFMElist, *ClosedFMEs;
134 
135 static struct case_list {
136 	fmd_case_t *fmcase;
137 	struct case_list *next;
138 } *Undiagablecaselist;
139 
140 static void fme_eval(struct fme *fmep, fmd_event_t *ffep);
141 static enum fme_state hypothesise(struct fme *fmep, struct event *ep,
142 	unsigned long long at_latest_by, unsigned long long *pdelay);
143 static struct node *eventprop_lookup(struct event *ep, const char *propname);
144 static struct node *pathstring2epnamenp(char *path);
145 static void publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep,
146 	fmd_case_t *fmcase, nvlist_t *detector, char *arg);
147 static char *undiag_2reason_str(int ud, char *arg);
148 static const char *undiag_2defect_str(int ud);
149 static void restore_suspects(struct fme *fmep);
150 static void save_suspects(struct fme *fmep);
151 static void destroy_fme(struct fme *f);
152 static void fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep,
153     const char *eventstring, const struct ipath *ipp, nvlist_t *nvl);
154 static void istat_counter_reset_cb(struct istat_entry *entp,
155     struct stats *statp, const struct ipath *ipp);
156 static void istat_counter_topo_chg_cb(struct istat_entry *entp,
157     struct stats *statp, void *unused);
158 static void serd_reset_cb(struct serd_entry *entp, void *unused,
159     const struct ipath *ipp);
160 static void serd_topo_chg_cb(struct serd_entry *entp, void *unused,
161     void *unused2);
162 static void destroy_fme_bufs(struct fme *fp);
163 
164 static struct fme *
165 alloc_fme(void)
166 {
167 	struct fme *fmep;
168 
169 	fmep = MALLOC(sizeof (*fmep));
170 	bzero(fmep, sizeof (*fmep));
171 	return (fmep);
172 }
173 
174 /*
175  * fme_ready -- called when all initialization of the FME (except for
176  *	stats) has completed successfully.  Adds the fme to global lists
177  *	and establishes its stats.
178  */
179 static struct fme *
180 fme_ready(struct fme *fmep)
181 {
182 	char nbuf[100];
183 
184 	Nfmep = NULL;	/* don't need to free this on module abort now */
185 
186 	if (EFMElist) {
187 		EFMElist->next = fmep;
188 		EFMElist = fmep;
189 	} else
190 		FMElist = EFMElist = fmep;
191 
192 	(void) sprintf(nbuf, "fme%d.Rcount", fmep->id);
193 	fmep->Rcount = stats_new_counter(nbuf, "ereports received", 0);
194 	(void) sprintf(nbuf, "fme%d.Hcall", fmep->id);
195 	fmep->Hcallcount = stats_new_counter(nbuf, "calls to hypothesise()", 1);
196 	(void) sprintf(nbuf, "fme%d.Rcall", fmep->id);
197 	fmep->Rcallcount = stats_new_counter(nbuf,
198 	    "calls to requirements_test()", 1);
199 	(void) sprintf(nbuf, "fme%d.Ccall", fmep->id);
200 	fmep->Ccallcount = stats_new_counter(nbuf, "calls to causes_test()", 1);
201 	(void) sprintf(nbuf, "fme%d.Ecall", fmep->id);
202 	fmep->Ecallcount =
203 	    stats_new_counter(nbuf, "calls to effects_test()", 1);
204 	(void) sprintf(nbuf, "fme%d.Tcall", fmep->id);
205 	fmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1);
206 	(void) sprintf(nbuf, "fme%d.Marrow", fmep->id);
207 	fmep->Marrowcount = stats_new_counter(nbuf,
208 	    "arrows marked by mark_arrows()", 1);
209 	(void) sprintf(nbuf, "fme%d.diags", fmep->id);
210 	fmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0);
211 
212 	out(O_ALTFP|O_VERB2, "newfme: config snapshot contains...");
213 	config_print(O_ALTFP|O_VERB2, fmep->config);
214 
215 	return (fmep);
216 }
217 
218 extern void ipath_dummy_lut(struct arrow *);
219 extern struct lut *itree_create_dummy(const char *, const struct ipath *);
220 
221 /* ARGSUSED */
222 static void
223 set_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
224 {
225 	struct bubble *bp;
226 	struct arrowlist *ap;
227 
228 	for (bp = itree_next_bubble(ep, NULL); bp;
229 	    bp = itree_next_bubble(ep, bp)) {
230 		if (bp->t != B_FROM)
231 			continue;
232 		for (ap = itree_next_arrow(bp, NULL); ap;
233 		    ap = itree_next_arrow(bp, ap)) {
234 			ap->arrowp->pnode->u.arrow.needed = 1;
235 			ipath_dummy_lut(ap->arrowp);
236 		}
237 	}
238 }
239 
240 /* ARGSUSED */
241 static void
242 unset_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
243 {
244 	struct bubble *bp;
245 	struct arrowlist *ap;
246 
247 	for (bp = itree_next_bubble(ep, NULL); bp;
248 	    bp = itree_next_bubble(ep, bp)) {
249 		if (bp->t != B_FROM)
250 			continue;
251 		for (ap = itree_next_arrow(bp, NULL); ap;
252 		    ap = itree_next_arrow(bp, ap))
253 			ap->arrowp->pnode->u.arrow.needed = 0;
254 	}
255 }
256 
257 static void globals_destructor(void *left, void *right, void *arg);
258 static void clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep);
259 
260 static boolean_t
261 prune_propagations(const char *e0class, const struct ipath *e0ipp)
262 {
263 	char nbuf[100];
264 	unsigned long long my_delay = TIMEVAL_EVENTUALLY;
265 	extern struct lut *Usednames;
266 
267 	Nfmep = alloc_fme();
268 	Nfmep->id = Nextid;
269 	Nfmep->state = FME_NOTHING;
270 	Nfmep->eventtree = itree_create_dummy(e0class, e0ipp);
271 	if ((Nfmep->e0 =
272 	    itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) {
273 		itree_free(Nfmep->eventtree);
274 		FREE(Nfmep);
275 		Nfmep = NULL;
276 		return (B_FALSE);
277 	}
278 	Nfmep->ecurrent = Nfmep->observations = Nfmep->e0;
279 	Nfmep->e0->count++;
280 
281 	(void) sprintf(nbuf, "fme%d.Rcount", Nfmep->id);
282 	Nfmep->Rcount = stats_new_counter(nbuf, "ereports received", 0);
283 	(void) sprintf(nbuf, "fme%d.Hcall", Nfmep->id);
284 	Nfmep->Hcallcount =
285 	    stats_new_counter(nbuf, "calls to hypothesise()", 1);
286 	(void) sprintf(nbuf, "fme%d.Rcall", Nfmep->id);
287 	Nfmep->Rcallcount = stats_new_counter(nbuf,
288 	    "calls to requirements_test()", 1);
289 	(void) sprintf(nbuf, "fme%d.Ccall", Nfmep->id);
290 	Nfmep->Ccallcount =
291 	    stats_new_counter(nbuf, "calls to causes_test()", 1);
292 	(void) sprintf(nbuf, "fme%d.Ecall", Nfmep->id);
293 	Nfmep->Ecallcount =
294 	    stats_new_counter(nbuf, "calls to effects_test()", 1);
295 	(void) sprintf(nbuf, "fme%d.Tcall", Nfmep->id);
296 	Nfmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1);
297 	(void) sprintf(nbuf, "fme%d.Marrow", Nfmep->id);
298 	Nfmep->Marrowcount = stats_new_counter(nbuf,
299 	    "arrows marked by mark_arrows()", 1);
300 	(void) sprintf(nbuf, "fme%d.diags", Nfmep->id);
301 	Nfmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0);
302 
303 	Nfmep->peek = 1;
304 	lut_walk(Nfmep->eventtree, (lut_cb)unset_needed_arrows, (void *)Nfmep);
305 	lut_free(Usednames, NULL, NULL);
306 	Usednames = NULL;
307 	lut_walk(Nfmep->eventtree, (lut_cb)clear_arrows, (void *)Nfmep);
308 	(void) hypothesise(Nfmep, Nfmep->e0, Nfmep->ull, &my_delay);
309 	itree_prune(Nfmep->eventtree);
310 	lut_walk(Nfmep->eventtree, (lut_cb)set_needed_arrows, (void *)Nfmep);
311 
312 	stats_delete(Nfmep->Rcount);
313 	stats_delete(Nfmep->Hcallcount);
314 	stats_delete(Nfmep->Rcallcount);
315 	stats_delete(Nfmep->Ccallcount);
316 	stats_delete(Nfmep->Ecallcount);
317 	stats_delete(Nfmep->Tcallcount);
318 	stats_delete(Nfmep->Marrowcount);
319 	stats_delete(Nfmep->diags);
320 	itree_free(Nfmep->eventtree);
321 	lut_free(Nfmep->globals, globals_destructor, NULL);
322 	FREE(Nfmep);
323 	return (B_TRUE);
324 }
325 
326 static struct fme *
327 newfme(const char *e0class, const struct ipath *e0ipp, fmd_hdl_t *hdl,
328     fmd_case_t *fmcase, fmd_event_t *ffep, nvlist_t *nvl)
329 {
330 	struct cfgdata *cfgdata;
331 	int init_size;
332 	extern int alloc_total();
333 	nvlist_t *detector = NULL;
334 	char *pathstr;
335 	char *arg;
336 
337 	/*
338 	 * First check if e0ipp is actually in the topology so we can give a
339 	 * more useful error message.
340 	 */
341 	ipathlastcomp(e0ipp);
342 	pathstr = ipath2str(NULL, e0ipp);
343 	cfgdata = config_snapshot();
344 	platform_unit_translate(0, cfgdata->cooked, TOPO_PROP_RESOURCE,
345 	    &detector, pathstr);
346 	FREE(pathstr);
347 	structconfig_free(cfgdata->cooked);
348 	config_free(cfgdata);
349 	if (detector == NULL) {
350 		/* See if class permits silent discard on unknown component. */
351 		if (lut_lookup(Ereportenames_discard, (void *)e0class, NULL)) {
352 			out(O_ALTFP|O_VERB2, "Unable to map \"%s\" ereport "
353 			    "to component path, but silent discard allowed.",
354 			    e0class);
355 			fmd_case_close(hdl, fmcase);
356 		} else {
357 			Undiag_reason = UD_VAL_BADEVENTPATH;
358 			(void) nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR,
359 			    &detector);
360 			arg = ipath2str(e0class, e0ipp);
361 			publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
362 			FREE(arg);
363 		}
364 		return (NULL);
365 	}
366 
367 	/*
368 	 * Next run a quick first pass of the rules with a dummy config. This
369 	 * allows us to prune those rules which can't possibly cause this
370 	 * ereport.
371 	 */
372 	if (!prune_propagations(e0class, e0ipp)) {
373 		/*
374 		 * The fault class must have been in the rules or we would
375 		 * not have registered for it (and got a "nosub"), and the
376 		 * pathname must be in the topology or we would have failed the
377 		 * previous test. So to get here means the combination of
378 		 * class and pathname in the ereport must be invalid.
379 		 */
380 		Undiag_reason = UD_VAL_BADEVENTCLASS;
381 		arg = ipath2str(e0class, e0ipp);
382 		publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
383 		nvlist_free(detector);
384 		FREE(arg);
385 		return (NULL);
386 	}
387 
388 	/*
389 	 * Now go ahead and create the real fme using the pruned rules.
390 	 */
391 	init_size = alloc_total();
392 	out(O_ALTFP|O_STAMP, "start config_snapshot using %d bytes", init_size);
393 	nvlist_free(detector);
394 	pathstr = ipath2str(NULL, e0ipp);
395 	cfgdata = config_snapshot();
396 	platform_unit_translate(0, cfgdata->cooked, TOPO_PROP_RESOURCE,
397 	    &detector, pathstr);
398 	FREE(pathstr);
399 	platform_save_config(hdl, fmcase);
400 	out(O_ALTFP|O_STAMP, "config_snapshot added %d bytes",
401 	    alloc_total() - init_size);
402 
403 	Nfmep = alloc_fme();
404 
405 	Nfmep->id = Nextid++;
406 	Nfmep->config = cfgdata->cooked;
407 	config_free(cfgdata);
408 	Nfmep->posted_suspects = 0;
409 	Nfmep->uniqobs = 0;
410 	Nfmep->state = FME_NOTHING;
411 	Nfmep->pull = 0ULL;
412 	Nfmep->overflow = 0;
413 
414 	Nfmep->fmcase = fmcase;
415 	Nfmep->hdl = hdl;
416 
417 	if ((Nfmep->eventtree = itree_create(Nfmep->config)) == NULL) {
418 		Undiag_reason = UD_VAL_INSTFAIL;
419 		arg = ipath2str(e0class, e0ipp);
420 		publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
421 		nvlist_free(detector);
422 		FREE(arg);
423 		structconfig_free(Nfmep->config);
424 		destroy_fme_bufs(Nfmep);
425 		FREE(Nfmep);
426 		Nfmep = NULL;
427 		return (NULL);
428 	}
429 
430 	itree_ptree(O_ALTFP|O_VERB2, Nfmep->eventtree);
431 
432 	if ((Nfmep->e0 =
433 	    itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) {
434 		Undiag_reason = UD_VAL_BADEVENTI;
435 		arg = ipath2str(e0class, e0ipp);
436 		publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
437 		nvlist_free(detector);
438 		FREE(arg);
439 		itree_free(Nfmep->eventtree);
440 		structconfig_free(Nfmep->config);
441 		destroy_fme_bufs(Nfmep);
442 		FREE(Nfmep);
443 		Nfmep = NULL;
444 		return (NULL);
445 	}
446 
447 	nvlist_free(detector);
448 	return (fme_ready(Nfmep));
449 }
450 
451 void
452 fme_fini(void)
453 {
454 	struct fme *sfp, *fp;
455 	struct case_list *ucasep, *nextcasep;
456 
457 	ucasep = Undiagablecaselist;
458 	while (ucasep != NULL) {
459 		nextcasep = ucasep->next;
460 		FREE(ucasep);
461 		ucasep = nextcasep;
462 	}
463 	Undiagablecaselist = NULL;
464 
465 	/* clean up closed fmes */
466 	fp = ClosedFMEs;
467 	while (fp != NULL) {
468 		sfp = fp->next;
469 		destroy_fme(fp);
470 		fp = sfp;
471 	}
472 	ClosedFMEs = NULL;
473 
474 	fp = FMElist;
475 	while (fp != NULL) {
476 		sfp = fp->next;
477 		destroy_fme(fp);
478 		fp = sfp;
479 	}
480 	FMElist = EFMElist = NULL;
481 
482 	/* if we were in the middle of creating an fme, free it now */
483 	if (Nfmep) {
484 		destroy_fme(Nfmep);
485 		Nfmep = NULL;
486 	}
487 }
488 
489 /*
490  * Allocated space for a buffer name.  20 bytes allows for
491  * a ridiculous 9,999,999 unique observations.
492  */
493 #define	OBBUFNMSZ 20
494 
495 /*
496  *  serialize_observation
497  *
498  *  Create a recoverable version of the current observation
499  *  (f->ecurrent).  We keep a serialized version of each unique
500  *  observation in order that we may resume correctly the fme in the
501  *  correct state if eft or fmd crashes and we're restarted.
502  */
503 static void
504 serialize_observation(struct fme *fp, const char *cls, const struct ipath *ipp)
505 {
506 	size_t pkdlen;
507 	char tmpbuf[OBBUFNMSZ];
508 	char *pkd = NULL;
509 	char *estr;
510 
511 	(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", fp->uniqobs);
512 	estr = ipath2str(cls, ipp);
513 	fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, strlen(estr) + 1);
514 	fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)estr,
515 	    strlen(estr) + 1);
516 	FREE(estr);
517 
518 	if (fp->ecurrent != NULL && fp->ecurrent->nvp != NULL) {
519 		(void) snprintf(tmpbuf,
520 		    OBBUFNMSZ, "observed%d.nvp", fp->uniqobs);
521 		if (nvlist_xpack(fp->ecurrent->nvp,
522 		    &pkd, &pkdlen, NV_ENCODE_XDR, &Eft_nv_hdl) != 0)
523 			out(O_DIE|O_SYS, "pack of observed nvl failed");
524 		fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, pkdlen);
525 		fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)pkd, pkdlen);
526 		FREE(pkd);
527 	}
528 
529 	fp->uniqobs++;
530 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs,
531 	    sizeof (fp->uniqobs));
532 }
533 
534 /*
535  *  init_fme_bufs -- We keep several bits of state about an fme for
536  *	use if eft or fmd crashes and we're restarted.
537  */
538 static void
539 init_fme_bufs(struct fme *fp)
540 {
541 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_PULL, sizeof (fp->pull));
542 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_PULL, (void *)&fp->pull,
543 	    sizeof (fp->pull));
544 
545 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_ID, sizeof (fp->id));
546 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_ID, (void *)&fp->id,
547 	    sizeof (fp->id));
548 
549 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_NOBS, sizeof (fp->uniqobs));
550 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs,
551 	    sizeof (fp->uniqobs));
552 
553 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_POSTD,
554 	    sizeof (fp->posted_suspects));
555 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_POSTD,
556 	    (void *)&fp->posted_suspects, sizeof (fp->posted_suspects));
557 }
558 
559 static void
560 destroy_fme_bufs(struct fme *fp)
561 {
562 	char tmpbuf[OBBUFNMSZ];
563 	int o;
564 
565 	platform_restore_config(fp->hdl, fp->fmcase);
566 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFGLEN);
567 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFG);
568 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_PULL);
569 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_ID);
570 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_POSTD);
571 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_NOBS);
572 
573 	for (o = 0; o < fp->uniqobs; o++) {
574 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", o);
575 		fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf);
576 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", o);
577 		fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf);
578 	}
579 }
580 
581 /*
582  * reconstitute_observations -- convert a case's serialized observations
583  *	back into struct events.  Returns zero if all observations are
584  *	successfully reconstituted.
585  */
586 static int
587 reconstitute_observations(struct fme *fmep)
588 {
589 	struct event *ep;
590 	struct node *epnamenp = NULL;
591 	size_t pkdlen;
592 	char *pkd = NULL;
593 	char *tmpbuf = alloca(OBBUFNMSZ);
594 	char *sepptr;
595 	char *estr;
596 	int ocnt;
597 	int elen;
598 
599 	for (ocnt = 0; ocnt < fmep->uniqobs; ocnt++) {
600 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", ocnt);
601 		elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
602 		if (elen == 0) {
603 			out(O_ALTFP,
604 			    "reconstitute_observation: no %s buffer found.",
605 			    tmpbuf);
606 			Undiag_reason = UD_VAL_MISSINGOBS;
607 			break;
608 		}
609 
610 		estr = MALLOC(elen);
611 		fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen);
612 		sepptr = strchr(estr, '@');
613 		if (sepptr == NULL) {
614 			out(O_ALTFP,
615 			    "reconstitute_observation: %s: "
616 			    "missing @ separator in %s.",
617 			    tmpbuf, estr);
618 			Undiag_reason = UD_VAL_MISSINGPATH;
619 			FREE(estr);
620 			break;
621 		}
622 
623 		*sepptr = '\0';
624 		if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) {
625 			out(O_ALTFP,
626 			    "reconstitute_observation: %s: "
627 			    "trouble converting path string \"%s\" "
628 			    "to internal representation.",
629 			    tmpbuf, sepptr + 1);
630 			Undiag_reason = UD_VAL_MISSINGPATH;
631 			FREE(estr);
632 			break;
633 		}
634 
635 		/* construct the event */
636 		ep = itree_lookup(fmep->eventtree,
637 		    stable(estr), ipath(epnamenp));
638 		if (ep == NULL) {
639 			out(O_ALTFP,
640 			    "reconstitute_observation: %s: "
641 			    "lookup of  \"%s\" in itree failed.",
642 			    tmpbuf, ipath2str(estr, ipath(epnamenp)));
643 			Undiag_reason = UD_VAL_BADOBS;
644 			tree_free(epnamenp);
645 			FREE(estr);
646 			break;
647 		}
648 		tree_free(epnamenp);
649 
650 		/*
651 		 * We may or may not have a saved nvlist for the observation
652 		 */
653 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", ocnt);
654 		pkdlen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
655 		if (pkdlen != 0) {
656 			pkd = MALLOC(pkdlen);
657 			fmd_buf_read(fmep->hdl,
658 			    fmep->fmcase, tmpbuf, pkd, pkdlen);
659 			ASSERT(ep->nvp == NULL);
660 			if (nvlist_xunpack(pkd,
661 			    pkdlen, &ep->nvp, &Eft_nv_hdl) != 0)
662 				out(O_DIE|O_SYS, "pack of observed nvl failed");
663 			FREE(pkd);
664 		}
665 
666 		if (ocnt == 0)
667 			fmep->e0 = ep;
668 
669 		FREE(estr);
670 		fmep->ecurrent = ep;
671 		ep->count++;
672 
673 		/* link it into list of observations seen */
674 		ep->observations = fmep->observations;
675 		fmep->observations = ep;
676 	}
677 
678 	if (ocnt == fmep->uniqobs) {
679 		(void) fme_ready(fmep);
680 		return (0);
681 	}
682 
683 	return (1);
684 }
685 
686 /*
687  * restart_fme -- called during eft initialization.  Reconstitutes
688  *	an in-progress fme.
689  */
690 void
691 fme_restart(fmd_hdl_t *hdl, fmd_case_t *inprogress)
692 {
693 	nvlist_t *defect;
694 	struct case_list *bad;
695 	struct fme *fmep;
696 	struct cfgdata *cfgdata;
697 	size_t rawsz;
698 	struct event *ep;
699 	char *tmpbuf = alloca(OBBUFNMSZ);
700 	char *sepptr;
701 	char *estr;
702 	int elen;
703 	struct node *epnamenp = NULL;
704 	int init_size;
705 	extern int alloc_total();
706 	char *reason;
707 
708 	/*
709 	 * ignore solved or closed cases
710 	 */
711 	if (fmd_case_solved(hdl, inprogress) ||
712 	    fmd_case_closed(hdl, inprogress))
713 		return;
714 
715 	fmep = alloc_fme();
716 	fmep->fmcase = inprogress;
717 	fmep->hdl = hdl;
718 
719 	if (fmd_buf_size(hdl, inprogress, WOBUF_POSTD) == 0) {
720 		out(O_ALTFP, "restart_fme: no saved posted status");
721 		Undiag_reason = UD_VAL_MISSINGINFO;
722 		goto badcase;
723 	} else {
724 		fmd_buf_read(hdl, inprogress, WOBUF_POSTD,
725 		    (void *)&fmep->posted_suspects,
726 		    sizeof (fmep->posted_suspects));
727 	}
728 
729 	if (fmd_buf_size(hdl, inprogress, WOBUF_ID) == 0) {
730 		out(O_ALTFP, "restart_fme: no saved id");
731 		Undiag_reason = UD_VAL_MISSINGINFO;
732 		goto badcase;
733 	} else {
734 		fmd_buf_read(hdl, inprogress, WOBUF_ID, (void *)&fmep->id,
735 		    sizeof (fmep->id));
736 	}
737 	if (Nextid <= fmep->id)
738 		Nextid = fmep->id + 1;
739 
740 	out(O_ALTFP, "Replay FME %d", fmep->id);
741 
742 	if (fmd_buf_size(hdl, inprogress, WOBUF_CFGLEN) != sizeof (size_t)) {
743 		out(O_ALTFP, "restart_fme: No config data");
744 		Undiag_reason = UD_VAL_MISSINGINFO;
745 		goto badcase;
746 	}
747 	fmd_buf_read(hdl, inprogress, WOBUF_CFGLEN, (void *)&rawsz,
748 	    sizeof (size_t));
749 
750 	if ((fmep->e0r = fmd_case_getprincipal(hdl, inprogress)) == NULL) {
751 		out(O_ALTFP, "restart_fme: No event zero");
752 		Undiag_reason = UD_VAL_MISSINGZERO;
753 		goto badcase;
754 	}
755 
756 	if (fmd_buf_size(hdl, inprogress, WOBUF_PULL) == 0) {
757 		out(O_ALTFP, "restart_fme: no saved wait time");
758 		Undiag_reason = UD_VAL_MISSINGINFO;
759 		goto badcase;
760 	} else {
761 		fmd_buf_read(hdl, inprogress, WOBUF_PULL, (void *)&fmep->pull,
762 		    sizeof (fmep->pull));
763 	}
764 
765 	if (fmd_buf_size(hdl, inprogress, WOBUF_NOBS) == 0) {
766 		out(O_ALTFP, "restart_fme: no count of observations");
767 		Undiag_reason = UD_VAL_MISSINGINFO;
768 		goto badcase;
769 	} else {
770 		fmd_buf_read(hdl, inprogress, WOBUF_NOBS,
771 		    (void *)&fmep->uniqobs, sizeof (fmep->uniqobs));
772 	}
773 
774 	(void) snprintf(tmpbuf, OBBUFNMSZ, "observed0");
775 	elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
776 	if (elen == 0) {
777 		out(O_ALTFP, "reconstitute_observation: no %s buffer found.",
778 		    tmpbuf);
779 		Undiag_reason = UD_VAL_MISSINGOBS;
780 		goto badcase;
781 	}
782 	estr = MALLOC(elen);
783 	fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen);
784 	sepptr = strchr(estr, '@');
785 	if (sepptr == NULL) {
786 		out(O_ALTFP, "reconstitute_observation: %s: "
787 		    "missing @ separator in %s.",
788 		    tmpbuf, estr);
789 		Undiag_reason = UD_VAL_MISSINGPATH;
790 		FREE(estr);
791 		goto badcase;
792 	}
793 	*sepptr = '\0';
794 	if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) {
795 		out(O_ALTFP, "reconstitute_observation: %s: "
796 		    "trouble converting path string \"%s\" "
797 		    "to internal representation.", tmpbuf, sepptr + 1);
798 		Undiag_reason = UD_VAL_MISSINGPATH;
799 		FREE(estr);
800 		goto badcase;
801 	}
802 	(void) prune_propagations(stable(estr), ipath(epnamenp));
803 	tree_free(epnamenp);
804 	FREE(estr);
805 
806 	init_size = alloc_total();
807 	out(O_ALTFP|O_STAMP, "start config_restore using %d bytes", init_size);
808 	cfgdata = MALLOC(sizeof (struct cfgdata));
809 	cfgdata->cooked = NULL;
810 	cfgdata->devcache = NULL;
811 	cfgdata->devidcache = NULL;
812 	cfgdata->tpcache = NULL;
813 	cfgdata->cpucache = NULL;
814 	cfgdata->raw_refcnt = 1;
815 
816 	if (rawsz > 0) {
817 		if (fmd_buf_size(hdl, inprogress, WOBUF_CFG) != rawsz) {
818 			out(O_ALTFP, "restart_fme: Config data size mismatch");
819 			Undiag_reason = UD_VAL_CFGMISMATCH;
820 			goto badcase;
821 		}
822 		cfgdata->begin = MALLOC(rawsz);
823 		cfgdata->end = cfgdata->nextfree = cfgdata->begin + rawsz;
824 		fmd_buf_read(hdl,
825 		    inprogress, WOBUF_CFG, cfgdata->begin, rawsz);
826 	} else {
827 		cfgdata->begin = cfgdata->end = cfgdata->nextfree = NULL;
828 	}
829 
830 	config_cook(cfgdata);
831 	fmep->config = cfgdata->cooked;
832 	config_free(cfgdata);
833 	out(O_ALTFP|O_STAMP, "config_restore added %d bytes",
834 	    alloc_total() - init_size);
835 
836 	if ((fmep->eventtree = itree_create(fmep->config)) == NULL) {
837 		/* case not properly saved or irretrievable */
838 		out(O_ALTFP, "restart_fme: NULL instance tree");
839 		Undiag_reason = UD_VAL_INSTFAIL;
840 		goto badcase;
841 	}
842 
843 	itree_ptree(O_ALTFP|O_VERB2, fmep->eventtree);
844 
845 	if (reconstitute_observations(fmep) != 0)
846 		goto badcase;
847 
848 	out(O_ALTFP|O_NONL, "FME %d replay observations: ", fmep->id);
849 	for (ep = fmep->observations; ep; ep = ep->observations) {
850 		out(O_ALTFP|O_NONL, " ");
851 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
852 	}
853 	out(O_ALTFP, NULL);
854 
855 	Open_fme_count++;
856 
857 	/* give the diagnosis algorithm a shot at the new FME state */
858 	fme_eval(fmep, fmep->e0r);
859 	return;
860 
861 badcase:
862 	if (fmep->eventtree != NULL)
863 		itree_free(fmep->eventtree);
864 	if (fmep->config)
865 		structconfig_free(fmep->config);
866 	destroy_fme_bufs(fmep);
867 	FREE(fmep);
868 
869 	/*
870 	 * Since we're unable to restart the case, add it to the undiagable
871 	 * list and solve and close it as appropriate.
872 	 */
873 	bad = MALLOC(sizeof (struct case_list));
874 	bad->next = NULL;
875 
876 	if (Undiagablecaselist != NULL)
877 		bad->next = Undiagablecaselist;
878 	Undiagablecaselist = bad;
879 	bad->fmcase = inprogress;
880 
881 	out(O_ALTFP|O_NONL, "[case %s (unable to restart), ",
882 	    fmd_case_uuid(hdl, bad->fmcase));
883 
884 	if (fmd_case_solved(hdl, bad->fmcase)) {
885 		out(O_ALTFP|O_NONL, "already solved, ");
886 	} else {
887 		out(O_ALTFP|O_NONL, "solving, ");
888 		defect = fmd_nvl_create_fault(hdl,
889 		    undiag_2defect_str(Undiag_reason), 100, NULL, NULL, NULL);
890 		reason = undiag_2reason_str(Undiag_reason, NULL);
891 		(void) nvlist_add_string(defect, UNDIAG_REASON, reason);
892 		FREE(reason);
893 		fmd_case_add_suspect(hdl, bad->fmcase, defect);
894 		fmd_case_solve(hdl, bad->fmcase);
895 		Undiag_reason = UD_VAL_UNKNOWN;
896 	}
897 
898 	if (fmd_case_closed(hdl, bad->fmcase)) {
899 		out(O_ALTFP, "already closed ]");
900 	} else {
901 		out(O_ALTFP, "closing ]");
902 		fmd_case_close(hdl, bad->fmcase);
903 	}
904 }
905 
906 /*ARGSUSED*/
907 static void
908 globals_destructor(void *left, void *right, void *arg)
909 {
910 	struct evalue *evp = (struct evalue *)right;
911 	if (evp->t == NODEPTR)
912 		tree_free((struct node *)(uintptr_t)evp->v);
913 	evp->v = (uintptr_t)NULL;
914 	FREE(evp);
915 }
916 
917 void
918 destroy_fme(struct fme *f)
919 {
920 	stats_delete(f->Rcount);
921 	stats_delete(f->Hcallcount);
922 	stats_delete(f->Rcallcount);
923 	stats_delete(f->Ccallcount);
924 	stats_delete(f->Ecallcount);
925 	stats_delete(f->Tcallcount);
926 	stats_delete(f->Marrowcount);
927 	stats_delete(f->diags);
928 
929 	if (f->eventtree != NULL)
930 		itree_free(f->eventtree);
931 	if (f->config)
932 		structconfig_free(f->config);
933 	lut_free(f->globals, globals_destructor, NULL);
934 	FREE(f);
935 }
936 
937 static const char *
938 fme_state2str(enum fme_state s)
939 {
940 	switch (s) {
941 	case FME_NOTHING:	return ("NOTHING");
942 	case FME_WAIT:		return ("WAIT");
943 	case FME_CREDIBLE:	return ("CREDIBLE");
944 	case FME_DISPROVED:	return ("DISPROVED");
945 	case FME_DEFERRED:	return ("DEFERRED");
946 	default:		return ("UNKNOWN");
947 	}
948 }
949 
950 static int
951 is_problem(enum nametype t)
952 {
953 	return (t == N_FAULT || t == N_DEFECT || t == N_UPSET);
954 }
955 
956 static int
957 is_defect(enum nametype t)
958 {
959 	return (t == N_DEFECT);
960 }
961 
962 static int
963 is_upset(enum nametype t)
964 {
965 	return (t == N_UPSET);
966 }
967 
968 static void
969 fme_print(int flags, struct fme *fmep)
970 {
971 	struct event *ep;
972 
973 	out(flags, "Fault Management Exercise %d", fmep->id);
974 	out(flags, "\t       State: %s", fme_state2str(fmep->state));
975 	out(flags|O_NONL, "\t  Start time: ");
976 	ptree_timeval(flags|O_NONL, &fmep->ull);
977 	out(flags, NULL);
978 	if (fmep->wull) {
979 		out(flags|O_NONL, "\t   Wait time: ");
980 		ptree_timeval(flags|O_NONL, &fmep->wull);
981 		out(flags, NULL);
982 	}
983 	out(flags|O_NONL, "\t          E0: ");
984 	if (fmep->e0)
985 		itree_pevent_brief(flags|O_NONL, fmep->e0);
986 	else
987 		out(flags|O_NONL, "NULL");
988 	out(flags, NULL);
989 	out(flags|O_NONL, "\tObservations:");
990 	for (ep = fmep->observations; ep; ep = ep->observations) {
991 		out(flags|O_NONL, " ");
992 		itree_pevent_brief(flags|O_NONL, ep);
993 	}
994 	out(flags, NULL);
995 	out(flags|O_NONL, "\tSuspect list:");
996 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
997 		out(flags|O_NONL, " ");
998 		itree_pevent_brief(flags|O_NONL, ep);
999 	}
1000 	out(flags, NULL);
1001 	if (fmep->eventtree != NULL) {
1002 		out(flags|O_VERB2, "\t        Tree:");
1003 		itree_ptree(flags|O_VERB2, fmep->eventtree);
1004 	}
1005 }
1006 
1007 static struct node *
1008 pathstring2epnamenp(char *path)
1009 {
1010 	char *sep = "/";
1011 	struct node *ret;
1012 	char *ptr;
1013 
1014 	if ((ptr = strtok(path, sep)) == NULL)
1015 		out(O_DIE, "pathstring2epnamenp: invalid empty class");
1016 
1017 	ret = tree_iname(stable(ptr), NULL, 0);
1018 
1019 	while ((ptr = strtok(NULL, sep)) != NULL)
1020 		ret = tree_name_append(ret,
1021 		    tree_iname(stable(ptr), NULL, 0));
1022 
1023 	return (ret);
1024 }
1025 
1026 /*
1027  * for a given upset sp, increment the corresponding SERD engine.  if the
1028  * SERD engine trips, return the ename and ipp of the resulting ereport.
1029  * returns true if engine tripped and *enamep and *ippp were filled in.
1030  */
1031 static int
1032 serd_eval(struct fme *fmep, fmd_hdl_t *hdl, fmd_event_t *ffep,
1033     fmd_case_t *fmcase, struct event *sp, const char **enamep,
1034     const struct ipath **ippp)
1035 {
1036 	struct node *serdinst;
1037 	char *serdname;
1038 	char *serdresource;
1039 	char *serdclass;
1040 	struct node *nid;
1041 	struct serd_entry *newentp;
1042 	int i, serdn = -1, serdincrement = 1, len = 0;
1043 	char *serdsuffix = NULL, *serdt = NULL;
1044 	struct evalue *ep;
1045 
1046 	ASSERT(sp->t == N_UPSET);
1047 	ASSERT(ffep != NULL);
1048 
1049 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1050 	    (void *)"n", (lut_cmp)strcmp)) != NULL) {
1051 		ASSERT(ep->t == UINT64);
1052 		serdn = (int)ep->v;
1053 	}
1054 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1055 	    (void *)"t", (lut_cmp)strcmp)) != NULL) {
1056 		ASSERT(ep->t == STRING);
1057 		serdt = (char *)(uintptr_t)ep->v;
1058 	}
1059 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1060 	    (void *)"suffix", (lut_cmp)strcmp)) != NULL) {
1061 		ASSERT(ep->t == STRING);
1062 		serdsuffix = (char *)(uintptr_t)ep->v;
1063 	}
1064 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1065 	    (void *)"increment", (lut_cmp)strcmp)) != NULL) {
1066 		ASSERT(ep->t == UINT64);
1067 		serdincrement = (int)ep->v;
1068 	}
1069 
1070 	/*
1071 	 * obtain instanced SERD engine from the upset sp.  from this
1072 	 * derive serdname, the string used to identify the SERD engine.
1073 	 */
1074 	serdinst = eventprop_lookup(sp, L_engine);
1075 
1076 	if (serdinst == NULL)
1077 		return (-1);
1078 
1079 	len = strlen(serdinst->u.stmt.np->u.event.ename->u.name.s) + 1;
1080 	if (serdsuffix != NULL)
1081 		len += strlen(serdsuffix);
1082 	serdclass = MALLOC(len);
1083 	if (serdsuffix != NULL)
1084 		(void) snprintf(serdclass, len, "%s%s",
1085 		    serdinst->u.stmt.np->u.event.ename->u.name.s, serdsuffix);
1086 	else
1087 		(void) snprintf(serdclass, len, "%s",
1088 		    serdinst->u.stmt.np->u.event.ename->u.name.s);
1089 	serdresource = ipath2str(NULL,
1090 	    ipath(serdinst->u.stmt.np->u.event.epname));
1091 	len += strlen(serdresource) + 1;
1092 	serdname = MALLOC(len);
1093 	(void) snprintf(serdname, len, "%s@%s", serdclass, serdresource);
1094 	FREE(serdresource);
1095 
1096 	/* handle serd engine "id" property, if there is one */
1097 	if ((nid =
1098 	    lut_lookup(serdinst->u.stmt.lutp, (void *)L_id, NULL)) != NULL) {
1099 		struct evalue *gval;
1100 		char suffixbuf[200];
1101 		char *suffix;
1102 		char *nserdname;
1103 		size_t nname;
1104 
1105 		out(O_ALTFP|O_NONL, "serd \"%s\" id: ", serdname);
1106 		ptree_name_iter(O_ALTFP|O_NONL, nid);
1107 
1108 		ASSERTinfo(nid->t == T_GLOBID, ptree_nodetype2str(nid->t));
1109 
1110 		if ((gval = lut_lookup(fmep->globals,
1111 		    (void *)nid->u.globid.s, NULL)) == NULL) {
1112 			out(O_ALTFP, " undefined");
1113 		} else if (gval->t == UINT64) {
1114 			out(O_ALTFP, " %llu", gval->v);
1115 			(void) sprintf(suffixbuf, "%llu", gval->v);
1116 			suffix = suffixbuf;
1117 		} else {
1118 			out(O_ALTFP, " \"%s\"", (char *)(uintptr_t)gval->v);
1119 			suffix = (char *)(uintptr_t)gval->v;
1120 		}
1121 
1122 		nname = strlen(serdname) + strlen(suffix) + 2;
1123 		nserdname = MALLOC(nname);
1124 		(void) snprintf(nserdname, nname, "%s:%s", serdname, suffix);
1125 		FREE(serdname);
1126 		serdname = nserdname;
1127 	}
1128 
1129 	/*
1130 	 * if the engine is empty, and we have an override for n/t then
1131 	 * destroy and recreate it.
1132 	 */
1133 	if ((serdn != -1 || serdt != NULL) && fmd_serd_exists(hdl, serdname) &&
1134 	    fmd_serd_empty(hdl, serdname))
1135 		fmd_serd_destroy(hdl, serdname);
1136 
1137 	if (!fmd_serd_exists(hdl, serdname)) {
1138 		struct node *nN, *nT;
1139 		const char *s;
1140 		struct node *nodep;
1141 		struct config *cp;
1142 		char *path;
1143 		uint_t nval;
1144 		hrtime_t tval;
1145 		int i;
1146 		char *ptr;
1147 		int got_n_override = 0, got_t_override = 0;
1148 
1149 		/* no SERD engine yet, so create it */
1150 		nodep = serdinst->u.stmt.np->u.event.epname;
1151 		path = ipath2str(NULL, ipath(nodep));
1152 		cp = config_lookup(fmep->config, path, 0);
1153 		FREE((void *)path);
1154 
1155 		/*
1156 		 * We allow serd paramaters to be overridden, either from
1157 		 * eft.conf file values (if Serd_Override is set) or from
1158 		 * driver properties (for "serd.io.device" engines).
1159 		 */
1160 		if (Serd_Override != NULL) {
1161 			char *save_ptr, *ptr1, *ptr2, *ptr3;
1162 			ptr3 = save_ptr = STRDUP(Serd_Override);
1163 			while (*ptr3 != '\0') {
1164 				ptr1 = strchr(ptr3, ',');
1165 				*ptr1 = '\0';
1166 				if (strcmp(ptr3, serdclass) == 0) {
1167 					ptr2 =  strchr(ptr1 + 1, ',');
1168 					*ptr2 = '\0';
1169 					nval = atoi(ptr1 + 1);
1170 					out(O_ALTFP, "serd override %s_n %d",
1171 					    serdclass, nval);
1172 					ptr3 =  strchr(ptr2 + 1, ' ');
1173 					if (ptr3)
1174 						*ptr3 = '\0';
1175 					ptr = STRDUP(ptr2 + 1);
1176 					out(O_ALTFP, "serd override %s_t %s",
1177 					    serdclass, ptr);
1178 					got_n_override = 1;
1179 					got_t_override = 1;
1180 					break;
1181 				} else {
1182 					ptr2 =  strchr(ptr1 + 1, ',');
1183 					ptr3 =  strchr(ptr2 + 1, ' ');
1184 					if (ptr3 == NULL)
1185 						break;
1186 				}
1187 				ptr3++;
1188 			}
1189 			FREE(save_ptr);
1190 		}
1191 
1192 		if (cp && got_n_override == 0) {
1193 			/*
1194 			 * convert serd engine class into property name
1195 			 */
1196 			char *prop_name = MALLOC(strlen(serdclass) + 3);
1197 			for (i = 0; i < strlen(serdclass); i++) {
1198 				if (serdclass[i] == '.')
1199 					prop_name[i] = '_';
1200 				else
1201 					prop_name[i] = serdclass[i];
1202 			}
1203 			prop_name[i++] = '_';
1204 			prop_name[i++] = 'n';
1205 			prop_name[i] = '\0';
1206 			if (s = config_getprop(cp, prop_name)) {
1207 				nval = atoi(s);
1208 				out(O_ALTFP, "serd override %s_n %s",
1209 				    serdclass, s);
1210 				got_n_override = 1;
1211 			}
1212 			prop_name[i - 1] = 't';
1213 			if (s = config_getprop(cp, prop_name)) {
1214 				ptr = STRDUP(s);
1215 				out(O_ALTFP, "serd override %s_t %s",
1216 				    serdclass, s);
1217 				got_t_override = 1;
1218 			}
1219 			FREE(prop_name);
1220 		}
1221 
1222 		if (serdn != -1 && got_n_override == 0) {
1223 			nval = serdn;
1224 			out(O_ALTFP, "serd override %s_n %d", serdclass, serdn);
1225 			got_n_override = 1;
1226 		}
1227 		if (serdt != NULL && got_t_override == 0) {
1228 			ptr = STRDUP(serdt);
1229 			out(O_ALTFP, "serd override %s_t %s", serdclass, serdt);
1230 			got_t_override = 1;
1231 		}
1232 
1233 		if (!got_n_override) {
1234 			nN = lut_lookup(serdinst->u.stmt.lutp, (void *)L_N,
1235 			    NULL);
1236 			ASSERT(nN->t == T_NUM);
1237 			nval = (uint_t)nN->u.ull;
1238 		}
1239 		if (!got_t_override) {
1240 			nT = lut_lookup(serdinst->u.stmt.lutp, (void *)L_T,
1241 			    NULL);
1242 			ASSERT(nT->t == T_TIMEVAL);
1243 			tval = (hrtime_t)nT->u.ull;
1244 		} else {
1245 			const unsigned long long *ullp;
1246 			const char *suffix;
1247 			int len;
1248 
1249 			len = strspn(ptr, "0123456789");
1250 			suffix = stable(&ptr[len]);
1251 			ullp = (unsigned long long *)lut_lookup(Timesuffixlut,
1252 			    (void *)suffix, NULL);
1253 			ptr[len] = '\0';
1254 			tval = strtoull(ptr, NULL, 0) * (ullp ? *ullp : 1ll);
1255 			FREE(ptr);
1256 		}
1257 		fmd_serd_create(hdl, serdname, nval, tval);
1258 	}
1259 
1260 	newentp = MALLOC(sizeof (*newentp));
1261 	newentp->ename = stable(serdclass);
1262 	FREE(serdclass);
1263 	newentp->ipath = ipath(serdinst->u.stmt.np->u.event.epname);
1264 	newentp->hdl = hdl;
1265 	if (lut_lookup(SerdEngines, newentp, (lut_cmp)serd_cmp) == NULL) {
1266 		SerdEngines = lut_add(SerdEngines, (void *)newentp,
1267 		    (void *)newentp, (lut_cmp)serd_cmp);
1268 		Serd_need_save = 1;
1269 		serd_save();
1270 	} else {
1271 		FREE(newentp);
1272 	}
1273 
1274 
1275 	/*
1276 	 * increment SERD engine.  if engine fires, reset serd
1277 	 * engine and return trip_strcode if required.
1278 	 */
1279 	for (i = 0; i < serdincrement; i++) {
1280 		if (fmd_serd_record(hdl, serdname, ffep)) {
1281 			fmd_case_add_serd(hdl, fmcase, serdname);
1282 			fmd_serd_reset(hdl, serdname);
1283 
1284 			if (ippp) {
1285 				struct node *tripinst =
1286 				    lut_lookup(serdinst->u.stmt.lutp,
1287 				    (void *)L_trip, NULL);
1288 				ASSERT(tripinst != NULL);
1289 				*enamep = tripinst->u.event.ename->u.name.s;
1290 				*ippp = ipath(tripinst->u.event.epname);
1291 				out(O_ALTFP|O_NONL,
1292 				    "[engine fired: %s, sending: ", serdname);
1293 				ipath_print(O_ALTFP|O_NONL, *enamep, *ippp);
1294 				out(O_ALTFP, "]");
1295 			} else {
1296 				out(O_ALTFP, "[engine fired: %s, no trip]",
1297 				    serdname);
1298 			}
1299 			FREE(serdname);
1300 			return (1);
1301 		}
1302 	}
1303 
1304 	FREE(serdname);
1305 	return (0);
1306 }
1307 
1308 /*
1309  * search a suspect list for upsets.  feed each upset to serd_eval() and
1310  * build up tripped[], an array of ereports produced by the firing of
1311  * any SERD engines.  then feed each ereport back into
1312  * fme_receive_report().
1313  *
1314  * returns ntrip, the number of these ereports produced.
1315  */
1316 static int
1317 upsets_eval(struct fme *fmep, fmd_event_t *ffep)
1318 {
1319 	/* we build an array of tripped ereports that we send ourselves */
1320 	struct {
1321 		const char *ename;
1322 		const struct ipath *ipp;
1323 	} *tripped;
1324 	struct event *sp;
1325 	int ntrip, nupset, i;
1326 
1327 	/*
1328 	 * count the number of upsets to determine the upper limit on
1329 	 * expected trip ereport strings.  remember that one upset can
1330 	 * lead to at most one ereport.
1331 	 */
1332 	nupset = 0;
1333 	for (sp = fmep->suspects; sp; sp = sp->suspects) {
1334 		if (sp->t == N_UPSET)
1335 			nupset++;
1336 	}
1337 
1338 	if (nupset == 0)
1339 		return (0);
1340 
1341 	/*
1342 	 * get to this point if we have upsets and expect some trip
1343 	 * ereports
1344 	 */
1345 	tripped = alloca(sizeof (*tripped) * nupset);
1346 	bzero((void *)tripped, sizeof (*tripped) * nupset);
1347 
1348 	ntrip = 0;
1349 	for (sp = fmep->suspects; sp; sp = sp->suspects)
1350 		if (sp->t == N_UPSET &&
1351 		    serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, sp,
1352 		    &tripped[ntrip].ename, &tripped[ntrip].ipp) == 1)
1353 			ntrip++;
1354 
1355 	for (i = 0; i < ntrip; i++) {
1356 		struct event *ep, *nep;
1357 		struct fme *nfmep;
1358 		fmd_case_t *fmcase;
1359 		const struct ipath *ipp;
1360 		const char *eventstring;
1361 		int prev_verbose;
1362 		unsigned long long my_delay = TIMEVAL_EVENTUALLY;
1363 		enum fme_state state;
1364 
1365 		/*
1366 		 * First try and evaluate a case with the trip ereport plus
1367 		 * all the other ereports that cause the trip. If that fails
1368 		 * to evaluate then try again with just this ereport on its own.
1369 		 */
1370 		out(O_ALTFP|O_NONL, "fme_receive_report_serd: ");
1371 		ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp);
1372 		out(O_ALTFP|O_STAMP, NULL);
1373 		ep = fmep->e0;
1374 		eventstring = ep->enode->u.event.ename->u.name.s;
1375 		ipp = ep->ipp;
1376 
1377 		/*
1378 		 * create a duplicate fme and case
1379 		 */
1380 		fmcase = fmd_case_open(fmep->hdl, NULL);
1381 		out(O_ALTFP|O_NONL, "duplicate fme for event [");
1382 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1383 		out(O_ALTFP, " ]");
1384 
1385 		if ((nfmep = newfme(eventstring, ipp, fmep->hdl,
1386 		    fmcase, ffep, ep->nvp)) == NULL) {
1387 			out(O_ALTFP|O_NONL, "[");
1388 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1389 			out(O_ALTFP, " CANNOT DIAGNOSE]");
1390 			continue;
1391 		}
1392 
1393 		Open_fme_count++;
1394 		nfmep->pull = fmep->pull;
1395 		init_fme_bufs(nfmep);
1396 		out(O_ALTFP|O_NONL, "[");
1397 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1398 		out(O_ALTFP, " created FME%d, case %s]", nfmep->id,
1399 		    fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
1400 		if (ffep) {
1401 			fmd_case_setprincipal(nfmep->hdl, nfmep->fmcase, ffep);
1402 			fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase, ffep);
1403 			nfmep->e0r = ffep;
1404 		}
1405 
1406 		/*
1407 		 * add the original ereports
1408 		 */
1409 		for (ep = fmep->observations; ep; ep = ep->observations) {
1410 			eventstring = ep->enode->u.event.ename->u.name.s;
1411 			ipp = ep->ipp;
1412 			out(O_ALTFP|O_NONL, "adding event [");
1413 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1414 			out(O_ALTFP, " ]");
1415 			nep = itree_lookup(nfmep->eventtree, eventstring, ipp);
1416 			if (nep->count++ == 0) {
1417 				nep->observations = nfmep->observations;
1418 				nfmep->observations = nep;
1419 				serialize_observation(nfmep, eventstring, ipp);
1420 				nep->nvp = evnv_dupnvl(ep->nvp);
1421 			}
1422 			if (ep->ffep && ep->ffep != ffep)
1423 				fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase,
1424 				    ep->ffep);
1425 			stats_counter_bump(nfmep->Rcount);
1426 		}
1427 
1428 		/*
1429 		 * add the serd trigger ereport
1430 		 */
1431 		if ((ep = itree_lookup(nfmep->eventtree, tripped[i].ename,
1432 		    tripped[i].ipp)) == NULL) {
1433 			/*
1434 			 * The trigger ereport is not in the instance tree. It
1435 			 * was presumably removed by prune_propagations() as
1436 			 * this combination of events is not present in the
1437 			 * rules.
1438 			 */
1439 			out(O_ALTFP, "upsets_eval: e0 not in instance tree");
1440 			Undiag_reason = UD_VAL_BADEVENTI;
1441 			goto retry_lone_ereport;
1442 		}
1443 		out(O_ALTFP|O_NONL, "adding event [");
1444 		ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp);
1445 		out(O_ALTFP, " ]");
1446 		nfmep->ecurrent = ep;
1447 		ep->nvp = NULL;
1448 		ep->count = 1;
1449 		ep->observations = nfmep->observations;
1450 		nfmep->observations = ep;
1451 
1452 		/*
1453 		 * just peek first.
1454 		 */
1455 		nfmep->peek = 1;
1456 		prev_verbose = Verbose;
1457 		if (Debug == 0)
1458 			Verbose = 0;
1459 		lut_walk(nfmep->eventtree, (lut_cb)clear_arrows, (void *)nfmep);
1460 		state = hypothesise(nfmep, nfmep->e0, nfmep->ull, &my_delay);
1461 		nfmep->peek = 0;
1462 		Verbose = prev_verbose;
1463 		if (state == FME_DISPROVED) {
1464 			out(O_ALTFP, "upsets_eval: hypothesis disproved");
1465 			Undiag_reason = UD_VAL_UNSOLVD;
1466 retry_lone_ereport:
1467 			/*
1468 			 * However the trigger ereport on its own might be
1469 			 * diagnosable, so check for that. Undo the new fme
1470 			 * and case we just created and call fme_receive_report.
1471 			 */
1472 			out(O_ALTFP|O_NONL, "[");
1473 			ipath_print(O_ALTFP|O_NONL, tripped[i].ename,
1474 			    tripped[i].ipp);
1475 			out(O_ALTFP, " retrying with just trigger ereport]");
1476 			itree_free(nfmep->eventtree);
1477 			nfmep->eventtree = NULL;
1478 			structconfig_free(nfmep->config);
1479 			nfmep->config = NULL;
1480 			destroy_fme_bufs(nfmep);
1481 			fmd_case_close(nfmep->hdl, nfmep->fmcase);
1482 			fme_receive_report(fmep->hdl, ffep,
1483 			    tripped[i].ename, tripped[i].ipp, NULL);
1484 			continue;
1485 		}
1486 
1487 		/*
1488 		 * and evaluate
1489 		 */
1490 		serialize_observation(nfmep, tripped[i].ename, tripped[i].ipp);
1491 		fme_eval(nfmep, ffep);
1492 	}
1493 
1494 	return (ntrip);
1495 }
1496 
1497 /*
1498  * fme_receive_external_report -- call when an external ereport comes in
1499  *
1500  * this routine just converts the relevant information from the ereport
1501  * into a format used internally and passes it on to fme_receive_report().
1502  */
1503 void
1504 fme_receive_external_report(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl,
1505     const char *class)
1506 {
1507 	struct node		*epnamenp;
1508 	fmd_case_t		*fmcase;
1509 	const struct ipath	*ipp;
1510 	nvlist_t		*detector = NULL;
1511 
1512 	class = stable(class);
1513 
1514 	/* Get the component path from the ereport */
1515 	epnamenp = platform_getpath(nvl);
1516 
1517 	/* See if we ended up without a path. */
1518 	if (epnamenp == NULL) {
1519 		/* See if class permits silent discard on unknown component. */
1520 		if (lut_lookup(Ereportenames_discard, (void *)class, NULL)) {
1521 			out(O_ALTFP|O_VERB2, "Unable to map \"%s\" ereport "
1522 			    "to component path, but silent discard allowed.",
1523 			    class);
1524 		} else {
1525 			/*
1526 			 * XFILE: Failure to find a component is bad unless
1527 			 * 'discard_if_config_unknown=1' was specified in the
1528 			 * ereport definition. Indicate undiagnosable.
1529 			 */
1530 			Undiag_reason = UD_VAL_NOPATH;
1531 			fmcase = fmd_case_open(hdl, NULL);
1532 
1533 			/*
1534 			 * We don't have a component path here (which means that
1535 			 * the detector was not in hc-scheme and couldn't be
1536 			 * converted to hc-scheme. Report the raw detector as
1537 			 * the suspect resource if there is one.
1538 			 */
1539 			(void) nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR,
1540 			    &detector);
1541 			publish_undiagnosable(hdl, ffep, fmcase, detector,
1542 			    (char *)class);
1543 		}
1544 		return;
1545 	}
1546 
1547 	ipp = ipath(epnamenp);
1548 	tree_free(epnamenp);
1549 	fme_receive_report(hdl, ffep, class, ipp, nvl);
1550 }
1551 
1552 /*ARGSUSED*/
1553 void
1554 fme_receive_repair_list(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl,
1555     const char *eventstring)
1556 {
1557 	char *uuid;
1558 	nvlist_t **nva;
1559 	uint_t nvc;
1560 	const struct ipath *ipp;
1561 
1562 	if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) != 0 ||
1563 	    nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST,
1564 	    &nva, &nvc) != 0) {
1565 		out(O_ALTFP, "No uuid or fault list for list.repaired event");
1566 		return;
1567 	}
1568 
1569 	out(O_ALTFP, "Processing list.repaired from case %s", uuid);
1570 
1571 	while (nvc-- != 0) {
1572 		/*
1573 		 * Reset any istat or serd engine associated with this path.
1574 		 */
1575 		char *path;
1576 
1577 		if ((ipp = platform_fault2ipath(*nva++)) == NULL)
1578 			continue;
1579 
1580 		path = ipath2str(NULL, ipp);
1581 		out(O_ALTFP, "fme_receive_repair_list: resetting state for %s",
1582 		    path);
1583 		FREE(path);
1584 
1585 		lut_walk(Istats, (lut_cb)istat_counter_reset_cb, (void *)ipp);
1586 		istat_save();
1587 
1588 		lut_walk(SerdEngines, (lut_cb)serd_reset_cb, (void *)ipp);
1589 		serd_save();
1590 	}
1591 }
1592 
1593 /*ARGSUSED*/
1594 void
1595 fme_receive_topology_change(void)
1596 {
1597 	lut_walk(Istats, (lut_cb)istat_counter_topo_chg_cb, NULL);
1598 	istat_save();
1599 
1600 	lut_walk(SerdEngines, (lut_cb)serd_topo_chg_cb, NULL);
1601 	serd_save();
1602 }
1603 
1604 static int mark_arrows(struct fme *fmep, struct event *ep, int mark,
1605     unsigned long long at_latest_by, unsigned long long *pdelay, int keep);
1606 
1607 /* ARGSUSED */
1608 static void
1609 clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
1610 {
1611 	struct bubble *bp;
1612 	struct arrowlist *ap;
1613 
1614 	ep->cached_state = 0;
1615 	ep->keep_in_tree = 0;
1616 	for (bp = itree_next_bubble(ep, NULL); bp;
1617 	    bp = itree_next_bubble(ep, bp)) {
1618 		if (bp->t != B_FROM)
1619 			continue;
1620 		bp->mark = 0;
1621 		for (ap = itree_next_arrow(bp, NULL); ap;
1622 		    ap = itree_next_arrow(bp, ap))
1623 			ap->arrowp->mark = 0;
1624 	}
1625 }
1626 
1627 static void
1628 fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep,
1629     const char *eventstring, const struct ipath *ipp, nvlist_t *nvl)
1630 {
1631 	struct event *ep;
1632 	struct fme *fmep = NULL;
1633 	struct fme *ofmep = NULL;
1634 	struct fme *cfmep, *svfmep;
1635 	int matched = 0;
1636 	nvlist_t *defect;
1637 	fmd_case_t *fmcase;
1638 	char *reason;
1639 
1640 	out(O_ALTFP|O_NONL, "fme_receive_report: ");
1641 	ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1642 	out(O_ALTFP|O_STAMP, NULL);
1643 
1644 	/* decide which FME it goes to */
1645 	for (fmep = FMElist; fmep; fmep = fmep->next) {
1646 		int prev_verbose;
1647 		unsigned long long my_delay = TIMEVAL_EVENTUALLY;
1648 		enum fme_state state;
1649 		nvlist_t *pre_peek_nvp = NULL;
1650 
1651 		if (fmep->overflow) {
1652 			if (!(fmd_case_closed(fmep->hdl, fmep->fmcase)))
1653 				ofmep = fmep;
1654 
1655 			continue;
1656 		}
1657 
1658 		/*
1659 		 * ignore solved or closed cases
1660 		 */
1661 		if (fmep->posted_suspects ||
1662 		    fmd_case_solved(fmep->hdl, fmep->fmcase) ||
1663 		    fmd_case_closed(fmep->hdl, fmep->fmcase))
1664 			continue;
1665 
1666 		/* look up event in event tree for this FME */
1667 		if ((ep = itree_lookup(fmep->eventtree,
1668 		    eventstring, ipp)) == NULL)
1669 			continue;
1670 
1671 		/* note observation */
1672 		fmep->ecurrent = ep;
1673 		if (ep->count++ == 0) {
1674 			/* link it into list of observations seen */
1675 			ep->observations = fmep->observations;
1676 			fmep->observations = ep;
1677 			ep->nvp = evnv_dupnvl(nvl);
1678 		} else {
1679 			/* use new payload values for peek */
1680 			pre_peek_nvp = ep->nvp;
1681 			ep->nvp = evnv_dupnvl(nvl);
1682 		}
1683 
1684 		/* tell hypothesise() not to mess with suspect list */
1685 		fmep->peek = 1;
1686 
1687 		/* don't want this to be verbose (unless Debug is set) */
1688 		prev_verbose = Verbose;
1689 		if (Debug == 0)
1690 			Verbose = 0;
1691 
1692 		lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep);
1693 		state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay);
1694 
1695 		fmep->peek = 0;
1696 
1697 		/* put verbose flag back */
1698 		Verbose = prev_verbose;
1699 
1700 		if (state != FME_DISPROVED) {
1701 			/* found an FME that explains the ereport */
1702 			matched++;
1703 			out(O_ALTFP|O_NONL, "[");
1704 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1705 			out(O_ALTFP, " explained by FME%d]", fmep->id);
1706 
1707 			nvlist_free(pre_peek_nvp);
1708 
1709 			if (ep->count == 1)
1710 				serialize_observation(fmep, eventstring, ipp);
1711 
1712 			if (ffep) {
1713 				fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1714 				ep->ffep = ffep;
1715 			}
1716 
1717 			stats_counter_bump(fmep->Rcount);
1718 
1719 			/* re-eval FME */
1720 			fme_eval(fmep, ffep);
1721 		} else {
1722 
1723 			/* not a match, undo noting of observation */
1724 			fmep->ecurrent = NULL;
1725 			if (--ep->count == 0) {
1726 				/* unlink it from observations */
1727 				fmep->observations = ep->observations;
1728 				ep->observations = NULL;
1729 				nvlist_free(ep->nvp);
1730 				ep->nvp = NULL;
1731 			} else {
1732 				nvlist_free(ep->nvp);
1733 				ep->nvp = pre_peek_nvp;
1734 			}
1735 		}
1736 	}
1737 
1738 	if (matched)
1739 		return;	/* explained by at least one existing FME */
1740 
1741 	/* clean up closed fmes */
1742 	cfmep = ClosedFMEs;
1743 	while (cfmep != NULL) {
1744 		svfmep = cfmep->next;
1745 		destroy_fme(cfmep);
1746 		cfmep = svfmep;
1747 	}
1748 	ClosedFMEs = NULL;
1749 
1750 	if (ofmep) {
1751 		out(O_ALTFP|O_NONL, "[");
1752 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1753 		out(O_ALTFP, " ADDING TO OVERFLOW FME]");
1754 		if (ffep)
1755 			fmd_case_add_ereport(hdl, ofmep->fmcase, ffep);
1756 
1757 		return;
1758 
1759 	} else if (Max_fme && (Open_fme_count >= Max_fme)) {
1760 		out(O_ALTFP|O_NONL, "[");
1761 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1762 		out(O_ALTFP, " MAX OPEN FME REACHED]");
1763 
1764 		fmcase = fmd_case_open(hdl, NULL);
1765 
1766 		/* Create overflow fme */
1767 		if ((fmep = newfme(eventstring, ipp, hdl, fmcase, ffep,
1768 		    nvl)) == NULL) {
1769 			out(O_ALTFP|O_NONL, "[");
1770 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1771 			out(O_ALTFP, " CANNOT OPEN OVERFLOW FME]");
1772 			return;
1773 		}
1774 
1775 		Open_fme_count++;
1776 
1777 		init_fme_bufs(fmep);
1778 		fmep->overflow = B_TRUE;
1779 
1780 		if (ffep)
1781 			fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1782 
1783 		Undiag_reason = UD_VAL_MAXFME;
1784 		defect = fmd_nvl_create_fault(hdl,
1785 		    undiag_2defect_str(Undiag_reason), 100, NULL, NULL, NULL);
1786 		reason = undiag_2reason_str(Undiag_reason, NULL);
1787 		(void) nvlist_add_string(defect, UNDIAG_REASON, reason);
1788 		FREE(reason);
1789 		fmd_case_add_suspect(hdl, fmep->fmcase, defect);
1790 		fmd_case_solve(hdl, fmep->fmcase);
1791 		Undiag_reason = UD_VAL_UNKNOWN;
1792 		return;
1793 	}
1794 
1795 	/* open a case */
1796 	fmcase = fmd_case_open(hdl, NULL);
1797 
1798 	/* start a new FME */
1799 	if ((fmep = newfme(eventstring, ipp, hdl, fmcase, ffep, nvl)) == NULL) {
1800 		out(O_ALTFP|O_NONL, "[");
1801 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1802 		out(O_ALTFP, " CANNOT DIAGNOSE]");
1803 		return;
1804 	}
1805 
1806 	Open_fme_count++;
1807 
1808 	init_fme_bufs(fmep);
1809 
1810 	out(O_ALTFP|O_NONL, "[");
1811 	ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1812 	out(O_ALTFP, " created FME%d, case %s]", fmep->id,
1813 	    fmd_case_uuid(hdl, fmep->fmcase));
1814 
1815 	ep = fmep->e0;
1816 	ASSERT(ep != NULL);
1817 
1818 	/* note observation */
1819 	fmep->ecurrent = ep;
1820 	if (ep->count++ == 0) {
1821 		/* link it into list of observations seen */
1822 		ep->observations = fmep->observations;
1823 		fmep->observations = ep;
1824 		ep->nvp = evnv_dupnvl(nvl);
1825 		serialize_observation(fmep, eventstring, ipp);
1826 	} else {
1827 		/* new payload overrides any previous */
1828 		nvlist_free(ep->nvp);
1829 		ep->nvp = evnv_dupnvl(nvl);
1830 	}
1831 
1832 	stats_counter_bump(fmep->Rcount);
1833 
1834 	if (ffep) {
1835 		fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1836 		fmd_case_setprincipal(hdl, fmep->fmcase, ffep);
1837 		fmep->e0r = ffep;
1838 		ep->ffep = ffep;
1839 	}
1840 
1841 	/* give the diagnosis algorithm a shot at the new FME state */
1842 	fme_eval(fmep, ffep);
1843 }
1844 
1845 void
1846 fme_status(int flags)
1847 {
1848 	struct fme *fmep;
1849 
1850 	if (FMElist == NULL) {
1851 		out(flags, "No fault management exercises underway.");
1852 		return;
1853 	}
1854 
1855 	for (fmep = FMElist; fmep; fmep = fmep->next)
1856 		fme_print(flags, fmep);
1857 }
1858 
1859 /*
1860  * "indent" routines used mostly for nicely formatted debug output, but also
1861  * for sanity checking for infinite recursion bugs.
1862  */
1863 
1864 #define	MAX_INDENT 1024
1865 static const char *indent_s[MAX_INDENT];
1866 static int current_indent;
1867 
1868 static void
1869 indent_push(const char *s)
1870 {
1871 	if (current_indent < MAX_INDENT)
1872 		indent_s[current_indent++] = s;
1873 	else
1874 		out(O_DIE, "unexpected recursion depth (%d)", current_indent);
1875 }
1876 
1877 static void
1878 indent_set(const char *s)
1879 {
1880 	current_indent = 0;
1881 	indent_push(s);
1882 }
1883 
1884 static void
1885 indent_pop(void)
1886 {
1887 	if (current_indent > 0)
1888 		current_indent--;
1889 	else
1890 		out(O_DIE, "recursion underflow");
1891 }
1892 
1893 static void
1894 indent(void)
1895 {
1896 	int i;
1897 	if (!Verbose)
1898 		return;
1899 	for (i = 0; i < current_indent; i++)
1900 		out(O_ALTFP|O_VERB|O_NONL, indent_s[i]);
1901 }
1902 
1903 #define	SLNEW		1
1904 #define	SLCHANGED	2
1905 #define	SLWAIT		3
1906 #define	SLDISPROVED	4
1907 
1908 static void
1909 print_suspects(int circumstance, struct fme *fmep)
1910 {
1911 	struct event *ep;
1912 
1913 	out(O_ALTFP|O_NONL, "[");
1914 	if (circumstance == SLCHANGED) {
1915 		out(O_ALTFP|O_NONL, "FME%d diagnosis changed. state: %s, "
1916 		    "suspect list:", fmep->id, fme_state2str(fmep->state));
1917 	} else if (circumstance == SLWAIT) {
1918 		out(O_ALTFP|O_NONL, "FME%d set wait timer %ld ", fmep->id,
1919 		    fmep->timer);
1920 		ptree_timeval(O_ALTFP|O_NONL, &fmep->wull);
1921 	} else if (circumstance == SLDISPROVED) {
1922 		out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS UNKNOWN", fmep->id);
1923 	} else {
1924 		out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS PRODUCED:", fmep->id);
1925 	}
1926 
1927 	if (circumstance == SLWAIT || circumstance == SLDISPROVED) {
1928 		out(O_ALTFP, "]");
1929 		return;
1930 	}
1931 
1932 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
1933 		out(O_ALTFP|O_NONL, " ");
1934 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
1935 	}
1936 	out(O_ALTFP, "]");
1937 }
1938 
1939 static struct node *
1940 eventprop_lookup(struct event *ep, const char *propname)
1941 {
1942 	return (lut_lookup(ep->props, (void *)propname, NULL));
1943 }
1944 
1945 #define	MAXDIGITIDX	23
1946 static char numbuf[MAXDIGITIDX + 1];
1947 
1948 static int
1949 node2uint(struct node *n, uint_t *valp)
1950 {
1951 	struct evalue value;
1952 	struct lut *globals = NULL;
1953 
1954 	if (n == NULL)
1955 		return (1);
1956 
1957 	/*
1958 	 * check value.v since we are being asked to convert an unsigned
1959 	 * long long int to an unsigned int
1960 	 */
1961 	if (! eval_expr(n, NULL, NULL, &globals, NULL, NULL, 0, &value) ||
1962 	    value.t != UINT64 || value.v > (1ULL << 32))
1963 		return (1);
1964 
1965 	*valp = (uint_t)value.v;
1966 
1967 	return (0);
1968 }
1969 
1970 static nvlist_t *
1971 node2fmri(struct node *n)
1972 {
1973 	nvlist_t **pa, *f, *p;
1974 	struct node *nc;
1975 	uint_t depth = 0;
1976 	char *numstr, *nullbyte;
1977 	char *failure;
1978 	int err, i;
1979 
1980 	/* XXX do we need to be able to handle a non-T_NAME node? */
1981 	if (n == NULL || n->t != T_NAME)
1982 		return (NULL);
1983 
1984 	for (nc = n; nc != NULL; nc = nc->u.name.next) {
1985 		if (nc->u.name.child == NULL || nc->u.name.child->t != T_NUM)
1986 			break;
1987 		depth++;
1988 	}
1989 
1990 	if (nc != NULL) {
1991 		/* We bailed early, something went wrong */
1992 		return (NULL);
1993 	}
1994 
1995 	if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0)
1996 		out(O_DIE|O_SYS, "alloc of fmri nvl failed");
1997 	pa = alloca(depth * sizeof (nvlist_t *));
1998 	for (i = 0; i < depth; i++)
1999 		pa[i] = NULL;
2000 
2001 	err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC);
2002 	err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION);
2003 	err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, "");
2004 	err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth);
2005 	if (err != 0) {
2006 		failure = "basic construction of FMRI failed";
2007 		goto boom;
2008 	}
2009 
2010 	numbuf[MAXDIGITIDX] = '\0';
2011 	nullbyte = &numbuf[MAXDIGITIDX];
2012 	i = 0;
2013 
2014 	for (nc = n; nc != NULL; nc = nc->u.name.next) {
2015 		err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl);
2016 		if (err != 0) {
2017 			failure = "alloc of an hc-pair failed";
2018 			goto boom;
2019 		}
2020 		err = nvlist_add_string(p, FM_FMRI_HC_NAME, nc->u.name.s);
2021 		numstr = ulltostr(nc->u.name.child->u.ull, nullbyte);
2022 		err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr);
2023 		if (err != 0) {
2024 			failure = "construction of an hc-pair failed";
2025 			goto boom;
2026 		}
2027 		pa[i++] = p;
2028 	}
2029 
2030 	err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth);
2031 	if (err == 0) {
2032 		for (i = 0; i < depth; i++)
2033 			nvlist_free(pa[i]);
2034 		return (f);
2035 	}
2036 	failure = "addition of hc-pair array to FMRI failed";
2037 
2038 boom:
2039 	for (i = 0; i < depth; i++)
2040 		nvlist_free(pa[i]);
2041 	nvlist_free(f);
2042 	out(O_DIE, "%s", failure);
2043 	/*NOTREACHED*/
2044 	return (NULL);
2045 }
2046 
2047 /* an ipath cache entry is an array of these, with s==NULL at the end */
2048 struct ipath {
2049 	const char *s;	/* component name (in stable) */
2050 	int i;		/* instance number */
2051 };
2052 
2053 static nvlist_t *
2054 ipath2fmri(struct ipath *ipath)
2055 {
2056 	nvlist_t **pa, *f, *p;
2057 	uint_t depth = 0;
2058 	char *numstr, *nullbyte;
2059 	char *failure;
2060 	int err, i;
2061 	struct ipath *ipp;
2062 
2063 	for (ipp = ipath; ipp->s != NULL; ipp++)
2064 		depth++;
2065 
2066 	if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0)
2067 		out(O_DIE|O_SYS, "alloc of fmri nvl failed");
2068 	pa = alloca(depth * sizeof (nvlist_t *));
2069 	for (i = 0; i < depth; i++)
2070 		pa[i] = NULL;
2071 
2072 	err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC);
2073 	err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION);
2074 	err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, "");
2075 	err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth);
2076 	if (err != 0) {
2077 		failure = "basic construction of FMRI failed";
2078 		goto boom;
2079 	}
2080 
2081 	numbuf[MAXDIGITIDX] = '\0';
2082 	nullbyte = &numbuf[MAXDIGITIDX];
2083 	i = 0;
2084 
2085 	for (ipp = ipath; ipp->s != NULL; ipp++) {
2086 		err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl);
2087 		if (err != 0) {
2088 			failure = "alloc of an hc-pair failed";
2089 			goto boom;
2090 		}
2091 		err = nvlist_add_string(p, FM_FMRI_HC_NAME, ipp->s);
2092 		numstr = ulltostr(ipp->i, nullbyte);
2093 		err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr);
2094 		if (err != 0) {
2095 			failure = "construction of an hc-pair failed";
2096 			goto boom;
2097 		}
2098 		pa[i++] = p;
2099 	}
2100 
2101 	err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth);
2102 	if (err == 0) {
2103 		for (i = 0; i < depth; i++)
2104 			nvlist_free(pa[i]);
2105 		return (f);
2106 	}
2107 	failure = "addition of hc-pair array to FMRI failed";
2108 
2109 boom:
2110 	for (i = 0; i < depth; i++)
2111 		nvlist_free(pa[i]);
2112 	nvlist_free(f);
2113 	out(O_DIE, "%s", failure);
2114 	/*NOTREACHED*/
2115 	return (NULL);
2116 }
2117 
2118 static uint8_t
2119 percentof(uint_t part, uint_t whole)
2120 {
2121 	unsigned long long p = part * 1000;
2122 
2123 	return ((p / whole / 10) + (((p / whole % 10) >= 5) ? 1 : 0));
2124 }
2125 
2126 struct rsl {
2127 	struct event *suspect;
2128 	nvlist_t *asru;
2129 	nvlist_t *fru;
2130 	nvlist_t *rsrc;
2131 };
2132 
2133 static void publish_suspects(struct fme *fmep, struct rsl *srl);
2134 
2135 /*
2136  *  rslfree -- free internal members of struct rsl not expected to be
2137  *	freed elsewhere.
2138  */
2139 static void
2140 rslfree(struct rsl *freeme)
2141 {
2142 	nvlist_free(freeme->asru);
2143 	nvlist_free(freeme->fru);
2144 	if (freeme->rsrc != freeme->asru)
2145 		nvlist_free(freeme->rsrc);
2146 }
2147 
2148 /*
2149  *  rslcmp -- compare two rsl structures.  Use the following
2150  *	comparisons to establish cardinality:
2151  *
2152  *	1. Name of the suspect's class. (simple strcmp)
2153  *	2. Name of the suspect's ASRU. (trickier, since nvlist)
2154  *
2155  */
2156 static int
2157 rslcmp(const void *a, const void *b)
2158 {
2159 	struct rsl *r1 = (struct rsl *)a;
2160 	struct rsl *r2 = (struct rsl *)b;
2161 	int rv;
2162 
2163 	rv = strcmp(r1->suspect->enode->u.event.ename->u.name.s,
2164 	    r2->suspect->enode->u.event.ename->u.name.s);
2165 	if (rv != 0)
2166 		return (rv);
2167 
2168 	if (r1->rsrc == NULL && r2->rsrc == NULL)
2169 		return (0);
2170 	if (r1->rsrc == NULL)
2171 		return (-1);
2172 	if (r2->rsrc == NULL)
2173 		return (1);
2174 	return (evnv_cmpnvl(r1->rsrc, r2->rsrc, 0));
2175 }
2176 
2177 /*
2178  * get_resources -- for a given suspect, determine what ASRU, FRU and
2179  *     RSRC nvlists should be advertised in the final suspect list.
2180  */
2181 void
2182 get_resources(struct event *sp, struct rsl *rsrcs, struct config *croot)
2183 {
2184 	struct node *asrudef, *frudef;
2185 	const struct ipath *asrupath, *frupath;
2186 	nvlist_t *asru = NULL, *fru = NULL;
2187 	nvlist_t *rsrc = NULL;
2188 	char *pathstr;
2189 
2190 	/*
2191 	 * First find any ASRU and/or FRU defined in the
2192 	 * initial fault tree.
2193 	 */
2194 	asrudef = eventprop_lookup(sp, L_ASRU);
2195 	frudef = eventprop_lookup(sp, L_FRU);
2196 
2197 	/*
2198 	 * Create ipaths based on those definitions
2199 	 */
2200 	asrupath = ipath(asrudef);
2201 	frupath = ipath(frudef);
2202 
2203 	/*
2204 	 *  Allow for platform translations of the FMRIs
2205 	 */
2206 	pathstr = ipath2str(NULL, sp->ipp);
2207 	platform_unit_translate(is_defect(sp->t), croot, TOPO_PROP_RESOURCE,
2208 	    &rsrc, pathstr);
2209 	FREE(pathstr);
2210 
2211 	pathstr = ipath2str(NULL, asrupath);
2212 	platform_unit_translate(is_defect(sp->t), croot, TOPO_PROP_ASRU,
2213 	    &asru, pathstr);
2214 	FREE(pathstr);
2215 
2216 	pathstr = ipath2str(NULL, frupath);
2217 	platform_unit_translate(is_defect(sp->t), croot, TOPO_PROP_FRU,
2218 	    &fru, pathstr);
2219 	FREE(pathstr);
2220 
2221 	rsrcs->suspect = sp;
2222 	rsrcs->asru = asru;
2223 	rsrcs->fru = fru;
2224 	rsrcs->rsrc = rsrc;
2225 }
2226 
2227 /*
2228  * trim_suspects -- prior to publishing, we may need to remove some
2229  *    suspects from the list.  If we're auto-closing upsets, we don't
2230  *    want any of those in the published list.  If the ASRUs for multiple
2231  *    defects resolve to the same ASRU (driver) we only want to publish
2232  *    that as a single suspect.
2233  */
2234 static int
2235 trim_suspects(struct fme *fmep, struct rsl *begin, struct rsl *begin2,
2236     fmd_event_t *ffep)
2237 {
2238 	struct event *ep;
2239 	struct rsl *rp = begin;
2240 	struct rsl *rp2 = begin2;
2241 	int mess_zero_count = 0;
2242 	int serd_rval;
2243 	uint_t messval;
2244 
2245 	/* remove any unwanted upsets and populate our array */
2246 	for (ep = fmep->psuspects; ep; ep = ep->psuspects) {
2247 		if (is_upset(ep->t))
2248 			continue;
2249 		serd_rval = serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, ep,
2250 		    NULL, NULL);
2251 		if (serd_rval == 0)
2252 			continue;
2253 		if (node2uint(eventprop_lookup(ep, L_message),
2254 		    &messval) == 0 && messval == 0) {
2255 			get_resources(ep, rp2, fmep->config);
2256 			rp2++;
2257 			mess_zero_count++;
2258 		} else {
2259 			get_resources(ep, rp, fmep->config);
2260 			rp++;
2261 			fmep->nsuspects++;
2262 		}
2263 	}
2264 	return (mess_zero_count);
2265 }
2266 
2267 /*
2268  * addpayloadprop -- add a payload prop to a problem
2269  */
2270 static void
2271 addpayloadprop(const char *lhs, struct evalue *rhs, nvlist_t *fault)
2272 {
2273 	nvlist_t *rsrc, *hcs;
2274 
2275 	ASSERT(fault != NULL);
2276 	ASSERT(lhs != NULL);
2277 	ASSERT(rhs != NULL);
2278 
2279 	if (nvlist_lookup_nvlist(fault, FM_FAULT_RESOURCE, &rsrc) != 0)
2280 		out(O_DIE, "cannot add payloadprop \"%s\" to fault", lhs);
2281 
2282 	if (nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcs) != 0) {
2283 		out(O_ALTFP|O_VERB2, "addpayloadprop: create hc_specific");
2284 		if (nvlist_xalloc(&hcs, NV_UNIQUE_NAME, &Eft_nv_hdl) != 0)
2285 			out(O_DIE,
2286 			    "cannot add payloadprop \"%s\" to fault", lhs);
2287 		if (nvlist_add_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, hcs) != 0)
2288 			out(O_DIE,
2289 			    "cannot add payloadprop \"%s\" to fault", lhs);
2290 		nvlist_free(hcs);
2291 		if (nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcs) != 0)
2292 			out(O_DIE,
2293 			    "cannot add payloadprop \"%s\" to fault", lhs);
2294 	} else
2295 		out(O_ALTFP|O_VERB2, "addpayloadprop: reuse hc_specific");
2296 
2297 	if (rhs->t == UINT64) {
2298 		out(O_ALTFP|O_VERB2, "addpayloadprop: %s=%llu", lhs, rhs->v);
2299 
2300 		if (nvlist_add_uint64(hcs, lhs, rhs->v) != 0)
2301 			out(O_DIE,
2302 			    "cannot add payloadprop \"%s\" to fault", lhs);
2303 	} else {
2304 		out(O_ALTFP|O_VERB2, "addpayloadprop: %s=\"%s\"",
2305 		    lhs, (char *)(uintptr_t)rhs->v);
2306 
2307 		if (nvlist_add_string(hcs, lhs, (char *)(uintptr_t)rhs->v) != 0)
2308 			out(O_DIE,
2309 			    "cannot add payloadprop \"%s\" to fault", lhs);
2310 	}
2311 }
2312 
2313 static char *Istatbuf;
2314 static char *Istatbufptr;
2315 static int Istatsz;
2316 
2317 /*
2318  * istataddsize -- calculate size of istat and add it to Istatsz
2319  */
2320 /*ARGSUSED2*/
2321 static void
2322 istataddsize(const struct istat_entry *lhs, struct stats *rhs, void *arg)
2323 {
2324 	int val;
2325 
2326 	ASSERT(lhs != NULL);
2327 	ASSERT(rhs != NULL);
2328 
2329 	if ((val = stats_counter_value(rhs)) == 0)
2330 		return;	/* skip zero-valued stats */
2331 
2332 	/* count up the size of the stat name */
2333 	Istatsz += ipath2strlen(lhs->ename, lhs->ipath);
2334 	Istatsz++;	/* for the trailing NULL byte */
2335 
2336 	/* count up the size of the stat value */
2337 	Istatsz += snprintf(NULL, 0, "%d", val);
2338 	Istatsz++;	/* for the trailing NULL byte */
2339 }
2340 
2341 /*
2342  * istat2str -- serialize an istat, writing result to *Istatbufptr
2343  */
2344 /*ARGSUSED2*/
2345 static void
2346 istat2str(const struct istat_entry *lhs, struct stats *rhs, void *arg)
2347 {
2348 	char *str;
2349 	int len;
2350 	int val;
2351 
2352 	ASSERT(lhs != NULL);
2353 	ASSERT(rhs != NULL);
2354 
2355 	if ((val = stats_counter_value(rhs)) == 0)
2356 		return;	/* skip zero-valued stats */
2357 
2358 	/* serialize the stat name */
2359 	str = ipath2str(lhs->ename, lhs->ipath);
2360 	len = strlen(str);
2361 
2362 	ASSERT(Istatbufptr + len + 1 < &Istatbuf[Istatsz]);
2363 	(void) strlcpy(Istatbufptr, str, &Istatbuf[Istatsz] - Istatbufptr);
2364 	Istatbufptr += len;
2365 	FREE(str);
2366 	*Istatbufptr++ = '\0';
2367 
2368 	/* serialize the stat value */
2369 	Istatbufptr += snprintf(Istatbufptr, &Istatbuf[Istatsz] - Istatbufptr,
2370 	    "%d", val);
2371 	*Istatbufptr++ = '\0';
2372 
2373 	ASSERT(Istatbufptr <= &Istatbuf[Istatsz]);
2374 }
2375 
2376 void
2377 istat_save()
2378 {
2379 	if (Istat_need_save == 0)
2380 		return;
2381 
2382 	/* figure out how big the serialzed info is */
2383 	Istatsz = 0;
2384 	lut_walk(Istats, (lut_cb)istataddsize, NULL);
2385 
2386 	if (Istatsz == 0) {
2387 		/* no stats to save */
2388 		fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS);
2389 		return;
2390 	}
2391 
2392 	/* create the serialized buffer */
2393 	Istatbufptr = Istatbuf = MALLOC(Istatsz);
2394 	lut_walk(Istats, (lut_cb)istat2str, NULL);
2395 
2396 	/* clear out current saved stats */
2397 	fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS);
2398 
2399 	/* write out the new version */
2400 	fmd_buf_write(Hdl, NULL, WOBUF_ISTATS, Istatbuf, Istatsz);
2401 	FREE(Istatbuf);
2402 
2403 	Istat_need_save = 0;
2404 }
2405 
2406 int
2407 istat_cmp(struct istat_entry *ent1, struct istat_entry *ent2)
2408 {
2409 	if (ent1->ename != ent2->ename)
2410 		return (ent2->ename - ent1->ename);
2411 	if (ent1->ipath != ent2->ipath)
2412 		return ((char *)ent2->ipath - (char *)ent1->ipath);
2413 
2414 	return (0);
2415 }
2416 
2417 /*
2418  * istat-verify -- verify the component associated with a stat still exists
2419  *
2420  * if the component no longer exists, this routine resets the stat and
2421  * returns 0.  if the component still exists, it returns 1.
2422  */
2423 static int
2424 istat_verify(struct node *snp, struct istat_entry *entp)
2425 {
2426 	struct stats *statp;
2427 	nvlist_t *fmri;
2428 
2429 	fmri = node2fmri(snp->u.event.epname);
2430 	if (platform_path_exists(fmri)) {
2431 		nvlist_free(fmri);
2432 		return (1);
2433 	}
2434 	nvlist_free(fmri);
2435 
2436 	/* component no longer in system.  zero out the associated stats */
2437 	if ((statp = (struct stats *)
2438 	    lut_lookup(Istats, entp, (lut_cmp)istat_cmp)) == NULL ||
2439 	    stats_counter_value(statp) == 0)
2440 		return (0);	/* stat is already reset */
2441 
2442 	Istat_need_save = 1;
2443 	stats_counter_reset(statp);
2444 	return (0);
2445 }
2446 
2447 static void
2448 istat_bump(struct node *snp, int n)
2449 {
2450 	struct stats *statp;
2451 	struct istat_entry ent;
2452 
2453 	ASSERT(snp != NULL);
2454 	ASSERTinfo(snp->t == T_EVENT, ptree_nodetype2str(snp->t));
2455 	ASSERT(snp->u.event.epname != NULL);
2456 
2457 	/* class name should be hoisted into a single stable entry */
2458 	ASSERT(snp->u.event.ename->u.name.next == NULL);
2459 	ent.ename = snp->u.event.ename->u.name.s;
2460 	ent.ipath = ipath(snp->u.event.epname);
2461 
2462 	if (!istat_verify(snp, &ent)) {
2463 		/* component no longer exists in system, nothing to do */
2464 		return;
2465 	}
2466 
2467 	if ((statp = (struct stats *)
2468 	    lut_lookup(Istats, &ent, (lut_cmp)istat_cmp)) == NULL) {
2469 		/* need to create the counter */
2470 		int cnt = 0;
2471 		struct node *np;
2472 		char *sname;
2473 		char *snamep;
2474 		struct istat_entry *newentp;
2475 
2476 		/* count up the size of the stat name */
2477 		np = snp->u.event.ename;
2478 		while (np != NULL) {
2479 			cnt += strlen(np->u.name.s);
2480 			cnt++;	/* for the '.' or '@' */
2481 			np = np->u.name.next;
2482 		}
2483 		np = snp->u.event.epname;
2484 		while (np != NULL) {
2485 			cnt += snprintf(NULL, 0, "%s%llu",
2486 			    np->u.name.s, np->u.name.child->u.ull);
2487 			cnt++;	/* for the '/' or trailing NULL byte */
2488 			np = np->u.name.next;
2489 		}
2490 
2491 		/* build the stat name */
2492 		snamep = sname = alloca(cnt);
2493 		np = snp->u.event.ename;
2494 		while (np != NULL) {
2495 			snamep += snprintf(snamep, &sname[cnt] - snamep,
2496 			    "%s", np->u.name.s);
2497 			np = np->u.name.next;
2498 			if (np)
2499 				*snamep++ = '.';
2500 		}
2501 		*snamep++ = '@';
2502 		np = snp->u.event.epname;
2503 		while (np != NULL) {
2504 			snamep += snprintf(snamep, &sname[cnt] - snamep,
2505 			    "%s%llu", np->u.name.s, np->u.name.child->u.ull);
2506 			np = np->u.name.next;
2507 			if (np)
2508 				*snamep++ = '/';
2509 		}
2510 		*snamep++ = '\0';
2511 
2512 		/* create the new stat & add it to our list */
2513 		newentp = MALLOC(sizeof (*newentp));
2514 		*newentp = ent;
2515 		statp = stats_new_counter(NULL, sname, 0);
2516 		Istats = lut_add(Istats, (void *)newentp, (void *)statp,
2517 		    (lut_cmp)istat_cmp);
2518 	}
2519 
2520 	/* if n is non-zero, set that value instead of bumping */
2521 	if (n) {
2522 		stats_counter_reset(statp);
2523 		stats_counter_add(statp, n);
2524 	} else
2525 		stats_counter_bump(statp);
2526 	Istat_need_save = 1;
2527 
2528 	ipath_print(O_ALTFP|O_VERB2, ent.ename, ent.ipath);
2529 	out(O_ALTFP|O_VERB2, " %s to value %d", n ? "set" : "incremented",
2530 	    stats_counter_value(statp));
2531 }
2532 
2533 /*ARGSUSED*/
2534 static void
2535 istat_destructor(void *left, void *right, void *arg)
2536 {
2537 	struct istat_entry *entp = (struct istat_entry *)left;
2538 	struct stats *statp = (struct stats *)right;
2539 	FREE(entp);
2540 	stats_delete(statp);
2541 }
2542 
2543 /*
2544  * Callback used in a walk of the Istats to reset matching stat counters.
2545  */
2546 static void
2547 istat_counter_reset_cb(struct istat_entry *entp, struct stats *statp,
2548     const struct ipath *ipp)
2549 {
2550 	char *path;
2551 
2552 	if (entp->ipath == ipp) {
2553 		path = ipath2str(entp->ename, ipp);
2554 		out(O_ALTFP, "istat_counter_reset_cb: resetting %s", path);
2555 		FREE(path);
2556 		stats_counter_reset(statp);
2557 		Istat_need_save = 1;
2558 	}
2559 }
2560 
2561 /*ARGSUSED*/
2562 static void
2563 istat_counter_topo_chg_cb(struct istat_entry *entp, struct stats *statp,
2564     void *unused)
2565 {
2566 	char *path;
2567 	nvlist_t *fmri;
2568 
2569 	fmri = ipath2fmri((struct ipath *)(entp->ipath));
2570 	if (!platform_path_exists(fmri)) {
2571 		path = ipath2str(entp->ename, entp->ipath);
2572 		out(O_ALTFP, "istat_counter_topo_chg_cb: not present %s", path);
2573 		FREE(path);
2574 		stats_counter_reset(statp);
2575 		Istat_need_save = 1;
2576 	}
2577 	nvlist_free(fmri);
2578 }
2579 
2580 void
2581 istat_fini(void)
2582 {
2583 	lut_free(Istats, istat_destructor, NULL);
2584 }
2585 
2586 static char *Serdbuf;
2587 static char *Serdbufptr;
2588 static int Serdsz;
2589 
2590 /*
2591  * serdaddsize -- calculate size of serd and add it to Serdsz
2592  */
2593 /*ARGSUSED*/
2594 static void
2595 serdaddsize(const struct serd_entry *lhs, struct stats *rhs, void *arg)
2596 {
2597 	ASSERT(lhs != NULL);
2598 
2599 	/* count up the size of the stat name */
2600 	Serdsz += ipath2strlen(lhs->ename, lhs->ipath);
2601 	Serdsz++;	/* for the trailing NULL byte */
2602 }
2603 
2604 /*
2605  * serd2str -- serialize a serd engine, writing result to *Serdbufptr
2606  */
2607 /*ARGSUSED*/
2608 static void
2609 serd2str(const struct serd_entry *lhs, struct stats *rhs, void *arg)
2610 {
2611 	char *str;
2612 	int len;
2613 
2614 	ASSERT(lhs != NULL);
2615 
2616 	/* serialize the serd engine name */
2617 	str = ipath2str(lhs->ename, lhs->ipath);
2618 	len = strlen(str);
2619 
2620 	ASSERT(Serdbufptr + len + 1 <= &Serdbuf[Serdsz]);
2621 	(void) strlcpy(Serdbufptr, str, &Serdbuf[Serdsz] - Serdbufptr);
2622 	Serdbufptr += len;
2623 	FREE(str);
2624 	*Serdbufptr++ = '\0';
2625 	ASSERT(Serdbufptr <= &Serdbuf[Serdsz]);
2626 }
2627 
2628 void
2629 serd_save()
2630 {
2631 	if (Serd_need_save == 0)
2632 		return;
2633 
2634 	/* figure out how big the serialzed info is */
2635 	Serdsz = 0;
2636 	lut_walk(SerdEngines, (lut_cb)serdaddsize, NULL);
2637 
2638 	if (Serdsz == 0) {
2639 		/* no serd engines to save */
2640 		fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS);
2641 		return;
2642 	}
2643 
2644 	/* create the serialized buffer */
2645 	Serdbufptr = Serdbuf = MALLOC(Serdsz);
2646 	lut_walk(SerdEngines, (lut_cb)serd2str, NULL);
2647 
2648 	/* clear out current saved stats */
2649 	fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS);
2650 
2651 	/* write out the new version */
2652 	fmd_buf_write(Hdl, NULL, WOBUF_SERDS, Serdbuf, Serdsz);
2653 	FREE(Serdbuf);
2654 	Serd_need_save = 0;
2655 }
2656 
2657 int
2658 serd_cmp(struct serd_entry *ent1, struct serd_entry *ent2)
2659 {
2660 	if (ent1->ename != ent2->ename)
2661 		return (ent2->ename - ent1->ename);
2662 	if (ent1->ipath != ent2->ipath)
2663 		return ((char *)ent2->ipath - (char *)ent1->ipath);
2664 
2665 	return (0);
2666 }
2667 
2668 void
2669 fme_serd_load(fmd_hdl_t *hdl)
2670 {
2671 	int sz;
2672 	char *sbuf;
2673 	char *sepptr;
2674 	char *ptr;
2675 	struct serd_entry *newentp;
2676 	struct node *epname;
2677 	nvlist_t *fmri;
2678 	char *namestring;
2679 
2680 	if ((sz = fmd_buf_size(hdl, NULL, WOBUF_SERDS)) == 0)
2681 		return;
2682 	sbuf = alloca(sz);
2683 	fmd_buf_read(hdl, NULL, WOBUF_SERDS, sbuf, sz);
2684 	ptr = sbuf;
2685 	while (ptr < &sbuf[sz]) {
2686 		sepptr = strchr(ptr, '@');
2687 		*sepptr = '\0';
2688 		namestring = ptr;
2689 		sepptr++;
2690 		ptr = sepptr;
2691 		ptr += strlen(ptr);
2692 		ptr++;	/* move past the '\0' separating paths */
2693 		epname = pathstring2epnamenp(sepptr);
2694 		fmri = node2fmri(epname);
2695 		if (platform_path_exists(fmri)) {
2696 			newentp = MALLOC(sizeof (*newentp));
2697 			newentp->hdl = hdl;
2698 			newentp->ipath = ipath(epname);
2699 			newentp->ename = stable(namestring);
2700 			SerdEngines = lut_add(SerdEngines, (void *)newentp,
2701 			    (void *)newentp, (lut_cmp)serd_cmp);
2702 		} else
2703 			Serd_need_save = 1;
2704 		tree_free(epname);
2705 		nvlist_free(fmri);
2706 	}
2707 	/* save it back again in case some of the paths no longer exist */
2708 	serd_save();
2709 }
2710 
2711 /*ARGSUSED*/
2712 static void
2713 serd_destructor(void *left, void *right, void *arg)
2714 {
2715 	struct serd_entry *entp = (struct serd_entry *)left;
2716 	FREE(entp);
2717 }
2718 
2719 /*
2720  * Callback used in a walk of the SerdEngines to reset matching serd engines.
2721  */
2722 /*ARGSUSED*/
2723 static void
2724 serd_reset_cb(struct serd_entry *entp, void *unused, const struct ipath *ipp)
2725 {
2726 	char *path;
2727 
2728 	if (entp->ipath == ipp) {
2729 		path = ipath2str(entp->ename, ipp);
2730 		out(O_ALTFP, "serd_reset_cb: resetting %s", path);
2731 		fmd_serd_reset(entp->hdl, path);
2732 		FREE(path);
2733 		Serd_need_save = 1;
2734 	}
2735 }
2736 
2737 /*ARGSUSED*/
2738 static void
2739 serd_topo_chg_cb(struct serd_entry *entp, void *unused, void *unused2)
2740 {
2741 	char *path;
2742 	nvlist_t *fmri;
2743 
2744 	fmri = ipath2fmri((struct ipath *)(entp->ipath));
2745 	if (!platform_path_exists(fmri)) {
2746 		path = ipath2str(entp->ename, entp->ipath);
2747 		out(O_ALTFP, "serd_topo_chg_cb: not present %s", path);
2748 		fmd_serd_reset(entp->hdl, path);
2749 		FREE(path);
2750 		Serd_need_save = 1;
2751 	}
2752 	nvlist_free(fmri);
2753 }
2754 
2755 void
2756 serd_fini(void)
2757 {
2758 	lut_free(SerdEngines, serd_destructor, NULL);
2759 }
2760 
2761 static void
2762 publish_suspects(struct fme *fmep, struct rsl *srl)
2763 {
2764 	struct rsl *rp;
2765 	nvlist_t *fault;
2766 	uint8_t cert;
2767 	uint_t *frs;
2768 	uint_t frsum, fr;
2769 	uint_t messval;
2770 	uint_t retireval;
2771 	uint_t responseval;
2772 	struct node *snp;
2773 	int frcnt, fridx;
2774 	boolean_t allfaulty = B_TRUE;
2775 	struct rsl *erl = srl + fmep->nsuspects - 1;
2776 
2777 	/*
2778 	 * sort the array
2779 	 */
2780 	qsort(srl, fmep->nsuspects, sizeof (struct rsl), rslcmp);
2781 
2782 	/* sum the fitrates */
2783 	frs = alloca(fmep->nsuspects * sizeof (uint_t));
2784 	fridx = frcnt = frsum = 0;
2785 
2786 	for (rp = srl; rp <= erl; rp++) {
2787 		struct node *n;
2788 
2789 		n = eventprop_lookup(rp->suspect, L_FITrate);
2790 		if (node2uint(n, &fr) != 0) {
2791 			out(O_DEBUG|O_NONL, "event ");
2792 			ipath_print(O_DEBUG|O_NONL,
2793 			    rp->suspect->enode->u.event.ename->u.name.s,
2794 			    rp->suspect->ipp);
2795 			out(O_VERB, " has no FITrate (using 1)");
2796 			fr = 1;
2797 		} else if (fr == 0) {
2798 			out(O_DEBUG|O_NONL, "event ");
2799 			ipath_print(O_DEBUG|O_NONL,
2800 			    rp->suspect->enode->u.event.ename->u.name.s,
2801 			    rp->suspect->ipp);
2802 			out(O_VERB, " has zero FITrate (using 1)");
2803 			fr = 1;
2804 		}
2805 
2806 		frs[fridx++] = fr;
2807 		frsum += fr;
2808 		frcnt++;
2809 	}
2810 
2811 	/* Add them in reverse order of our sort, as fmd reverses order */
2812 	for (rp = erl; rp >= srl; rp--) {
2813 		cert = percentof(frs[--fridx], frsum);
2814 		fault = fmd_nvl_create_fault(fmep->hdl,
2815 		    rp->suspect->enode->u.event.ename->u.name.s,
2816 		    cert,
2817 		    rp->asru,
2818 		    rp->fru,
2819 		    rp->rsrc);
2820 		if (fault == NULL)
2821 			out(O_DIE, "fault creation failed");
2822 		/* if "message" property exists, add it to the fault */
2823 		if (node2uint(eventprop_lookup(rp->suspect, L_message),
2824 		    &messval) == 0) {
2825 
2826 			out(O_ALTFP,
2827 			    "[FME%d, %s adds message=%d to suspect list]",
2828 			    fmep->id,
2829 			    rp->suspect->enode->u.event.ename->u.name.s,
2830 			    messval);
2831 			if (nvlist_add_boolean_value(fault,
2832 			    FM_SUSPECT_MESSAGE,
2833 			    (messval) ? B_TRUE : B_FALSE) != 0) {
2834 				out(O_DIE, "cannot add no-message to fault");
2835 			}
2836 		}
2837 
2838 		/* if "retire" property exists, add it to the fault */
2839 		if (node2uint(eventprop_lookup(rp->suspect, L_retire),
2840 		    &retireval) == 0) {
2841 
2842 			out(O_ALTFP,
2843 			    "[FME%d, %s adds retire=%d to suspect list]",
2844 			    fmep->id,
2845 			    rp->suspect->enode->u.event.ename->u.name.s,
2846 			    retireval);
2847 			if (nvlist_add_boolean_value(fault,
2848 			    FM_SUSPECT_RETIRE,
2849 			    (retireval) ? B_TRUE : B_FALSE) != 0) {
2850 				out(O_DIE, "cannot add no-retire to fault");
2851 			}
2852 		}
2853 
2854 		/* if "response" property exists, add it to the fault */
2855 		if (node2uint(eventprop_lookup(rp->suspect, L_response),
2856 		    &responseval) == 0) {
2857 
2858 			out(O_ALTFP,
2859 			    "[FME%d, %s adds response=%d to suspect list]",
2860 			    fmep->id,
2861 			    rp->suspect->enode->u.event.ename->u.name.s,
2862 			    responseval);
2863 			if (nvlist_add_boolean_value(fault,
2864 			    FM_SUSPECT_RESPONSE,
2865 			    (responseval) ? B_TRUE : B_FALSE) != 0) {
2866 				out(O_DIE, "cannot add no-response to fault");
2867 			}
2868 		}
2869 
2870 		/* add any payload properties */
2871 		lut_walk(rp->suspect->payloadprops,
2872 		    (lut_cb)addpayloadprop, (void *)fault);
2873 		rslfree(rp);
2874 
2875 		/*
2876 		 * If "action" property exists, evaluate it;  this must be done
2877 		 * before the allfaulty check below since some actions may
2878 		 * modify the asru to be used in fmd_nvl_fmri_has_fault.  This
2879 		 * needs to be restructured if any new actions are introduced
2880 		 * that have effects that we do not want to be visible if
2881 		 * we decide not to publish in the dupclose check below.
2882 		 */
2883 		if ((snp = eventprop_lookup(rp->suspect, L_action)) != NULL) {
2884 			struct evalue evalue;
2885 
2886 			out(O_ALTFP|O_NONL,
2887 			    "[FME%d, %s action ", fmep->id,
2888 			    rp->suspect->enode->u.event.ename->u.name.s);
2889 			ptree_name_iter(O_ALTFP|O_NONL, snp);
2890 			out(O_ALTFP, "]");
2891 			Action_nvl = fault;
2892 			(void) eval_expr(snp, NULL, NULL, NULL, NULL,
2893 			    NULL, 0, &evalue);
2894 		}
2895 
2896 		fmd_case_add_suspect(fmep->hdl, fmep->fmcase, fault);
2897 
2898 		/*
2899 		 * check if the asru is already marked as "faulty".
2900 		 */
2901 		if (allfaulty) {
2902 			nvlist_t *asru;
2903 
2904 			out(O_ALTFP|O_VERB, "FME%d dup check ", fmep->id);
2905 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, rp->suspect);
2906 			out(O_ALTFP|O_VERB|O_NONL, " ");
2907 			if (nvlist_lookup_nvlist(fault,
2908 			    FM_FAULT_ASRU, &asru) != 0) {
2909 				out(O_ALTFP|O_VERB, "NULL asru");
2910 				allfaulty = B_FALSE;
2911 			} else if (fmd_nvl_fmri_has_fault(fmep->hdl, asru,
2912 			    FMD_HAS_FAULT_ASRU, NULL)) {
2913 				out(O_ALTFP|O_VERB, "faulty");
2914 			} else {
2915 				out(O_ALTFP|O_VERB, "not faulty");
2916 				allfaulty = B_FALSE;
2917 			}
2918 		}
2919 
2920 	}
2921 
2922 	if (!allfaulty) {
2923 		/*
2924 		 * don't update the count stat if all asrus are already
2925 		 * present and unrepaired in the asru cache
2926 		 */
2927 		for (rp = erl; rp >= srl; rp--) {
2928 			struct event *suspect = rp->suspect;
2929 
2930 			if (suspect == NULL)
2931 				continue;
2932 
2933 			/* if "count" exists, increment the appropriate stat */
2934 			if ((snp = eventprop_lookup(suspect,
2935 			    L_count)) != NULL) {
2936 				out(O_ALTFP|O_NONL,
2937 				    "[FME%d, %s count ", fmep->id,
2938 				    suspect->enode->u.event.ename->u.name.s);
2939 				ptree_name_iter(O_ALTFP|O_NONL, snp);
2940 				out(O_ALTFP, "]");
2941 				istat_bump(snp, 0);
2942 
2943 			}
2944 		}
2945 		istat_save();	/* write out any istat changes */
2946 	}
2947 }
2948 
2949 static const char *
2950 undiag_2defect_str(int ud)
2951 {
2952 	switch (ud) {
2953 	case UD_VAL_MISSINGINFO:
2954 	case UD_VAL_MISSINGOBS:
2955 	case UD_VAL_MISSINGPATH:
2956 	case UD_VAL_MISSINGZERO:
2957 	case UD_VAL_BADOBS:
2958 	case UD_VAL_CFGMISMATCH:
2959 		return (UNDIAG_DEFECT_CHKPT);
2960 
2961 	case UD_VAL_BADEVENTI:
2962 	case UD_VAL_BADEVENTPATH:
2963 	case UD_VAL_BADEVENTCLASS:
2964 	case UD_VAL_INSTFAIL:
2965 	case UD_VAL_NOPATH:
2966 	case UD_VAL_UNSOLVD:
2967 		return (UNDIAG_DEFECT_FME);
2968 
2969 	case UD_VAL_MAXFME:
2970 		return (UNDIAG_DEFECT_LIMIT);
2971 
2972 	case UD_VAL_UNKNOWN:
2973 	default:
2974 		return (UNDIAG_DEFECT_UNKNOWN);
2975 	}
2976 }
2977 
2978 static const char *
2979 undiag_2fault_str(int ud)
2980 {
2981 	switch (ud) {
2982 	case UD_VAL_BADEVENTI:
2983 	case UD_VAL_BADEVENTPATH:
2984 	case UD_VAL_BADEVENTCLASS:
2985 	case UD_VAL_INSTFAIL:
2986 	case UD_VAL_NOPATH:
2987 	case UD_VAL_UNSOLVD:
2988 		return (UNDIAG_FAULT_FME);
2989 	default:
2990 		return (NULL);
2991 	}
2992 }
2993 
2994 static char *
2995 undiag_2reason_str(int ud, char *arg)
2996 {
2997 	const char *ptr;
2998 	char *buf;
2999 	int with_arg = 0;
3000 
3001 	switch (ud) {
3002 	case UD_VAL_BADEVENTPATH:
3003 		ptr = UD_STR_BADEVENTPATH;
3004 		with_arg = 1;
3005 		break;
3006 	case UD_VAL_BADEVENTCLASS:
3007 		ptr = UD_STR_BADEVENTCLASS;
3008 		with_arg = 1;
3009 		break;
3010 	case UD_VAL_BADEVENTI:
3011 		ptr = UD_STR_BADEVENTI;
3012 		with_arg = 1;
3013 		break;
3014 	case UD_VAL_BADOBS:
3015 		ptr = UD_STR_BADOBS;
3016 		break;
3017 	case UD_VAL_CFGMISMATCH:
3018 		ptr = UD_STR_CFGMISMATCH;
3019 		break;
3020 	case UD_VAL_INSTFAIL:
3021 		ptr = UD_STR_INSTFAIL;
3022 		with_arg = 1;
3023 		break;
3024 	case UD_VAL_MAXFME:
3025 		ptr = UD_STR_MAXFME;
3026 		break;
3027 	case UD_VAL_MISSINGINFO:
3028 		ptr = UD_STR_MISSINGINFO;
3029 		break;
3030 	case UD_VAL_MISSINGOBS:
3031 		ptr = UD_STR_MISSINGOBS;
3032 		break;
3033 	case UD_VAL_MISSINGPATH:
3034 		ptr = UD_STR_MISSINGPATH;
3035 		break;
3036 	case UD_VAL_MISSINGZERO:
3037 		ptr = UD_STR_MISSINGZERO;
3038 		break;
3039 	case UD_VAL_NOPATH:
3040 		ptr = UD_STR_NOPATH;
3041 		with_arg = 1;
3042 		break;
3043 	case UD_VAL_UNSOLVD:
3044 		ptr = UD_STR_UNSOLVD;
3045 		break;
3046 	case UD_VAL_UNKNOWN:
3047 	default:
3048 		ptr = UD_STR_UNKNOWN;
3049 		break;
3050 	}
3051 	if (with_arg) {
3052 		buf = MALLOC(strlen(ptr) + strlen(arg) - 1);
3053 		(void) sprintf(buf, ptr, arg);
3054 	} else {
3055 		buf = MALLOC(strlen(ptr) + 1);
3056 		(void) sprintf(buf, ptr);
3057 	}
3058 	return (buf);
3059 }
3060 
3061 static void
3062 publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep, fmd_case_t *fmcase,
3063     nvlist_t *detector, char *arg)
3064 {
3065 	struct case_list *newcase;
3066 	nvlist_t *defect, *fault;
3067 	const char *faultstr;
3068 	char *reason = undiag_2reason_str(Undiag_reason, arg);
3069 
3070 	out(O_ALTFP,
3071 	    "[undiagnosable ereport received, "
3072 	    "creating and closing a new case (%s)]", reason);
3073 
3074 	newcase = MALLOC(sizeof (struct case_list));
3075 	newcase->next = NULL;
3076 	newcase->fmcase = fmcase;
3077 	if (Undiagablecaselist != NULL)
3078 		newcase->next = Undiagablecaselist;
3079 	Undiagablecaselist = newcase;
3080 
3081 	if (ffep != NULL)
3082 		fmd_case_add_ereport(hdl, newcase->fmcase, ffep);
3083 
3084 	/* add defect */
3085 	defect = fmd_nvl_create_fault(hdl,
3086 	    undiag_2defect_str(Undiag_reason), 50, NULL, NULL, detector);
3087 	(void) nvlist_add_string(defect, UNDIAG_REASON, reason);
3088 	(void) nvlist_add_boolean_value(defect, FM_SUSPECT_RETIRE, B_FALSE);
3089 	(void) nvlist_add_boolean_value(defect, FM_SUSPECT_RESPONSE, B_FALSE);
3090 	fmd_case_add_suspect(hdl, newcase->fmcase, defect);
3091 
3092 	/* add fault if appropriate */
3093 	faultstr = undiag_2fault_str(Undiag_reason);
3094 	if (faultstr != NULL) {
3095 		fault = fmd_nvl_create_fault(hdl, faultstr, 50, NULL, NULL,
3096 		    detector);
3097 		(void) nvlist_add_string(fault, UNDIAG_REASON, reason);
3098 		(void) nvlist_add_boolean_value(fault, FM_SUSPECT_RETIRE,
3099 		    B_FALSE);
3100 		(void) nvlist_add_boolean_value(fault, FM_SUSPECT_RESPONSE,
3101 		    B_FALSE);
3102 		fmd_case_add_suspect(hdl, newcase->fmcase, fault);
3103 	}
3104 	FREE(reason);
3105 
3106 	/* solve and close case */
3107 	fmd_case_solve(hdl, newcase->fmcase);
3108 	fmd_case_close(hdl, newcase->fmcase);
3109 	Undiag_reason = UD_VAL_UNKNOWN;
3110 }
3111 
3112 static void
3113 fme_undiagnosable(struct fme *f)
3114 {
3115 	nvlist_t *defect, *fault, *detector = NULL;
3116 	struct event *ep;
3117 	char *pathstr;
3118 	const char *faultstr;
3119 	char *reason = undiag_2reason_str(Undiag_reason, NULL);
3120 
3121 	out(O_ALTFP, "[solving/closing FME%d, case %s (%s)]",
3122 	    f->id, fmd_case_uuid(f->hdl, f->fmcase), reason);
3123 
3124 	for (ep = f->observations; ep; ep = ep->observations) {
3125 
3126 		if (ep->ffep != f->e0r)
3127 			fmd_case_add_ereport(f->hdl, f->fmcase, ep->ffep);
3128 
3129 		pathstr = ipath2str(NULL, ipath(platform_getpath(ep->nvp)));
3130 		platform_unit_translate(0, f->config, TOPO_PROP_RESOURCE,
3131 		    &detector, pathstr);
3132 		FREE(pathstr);
3133 
3134 		/* add defect */
3135 		defect = fmd_nvl_create_fault(f->hdl,
3136 		    undiag_2defect_str(Undiag_reason), 50 / f->uniqobs,
3137 		    NULL, NULL, detector);
3138 		(void) nvlist_add_string(defect, UNDIAG_REASON, reason);
3139 		(void) nvlist_add_boolean_value(defect, FM_SUSPECT_RETIRE,
3140 		    B_FALSE);
3141 		(void) nvlist_add_boolean_value(defect, FM_SUSPECT_RESPONSE,
3142 		    B_FALSE);
3143 		fmd_case_add_suspect(f->hdl, f->fmcase, defect);
3144 
3145 		/* add fault if appropriate */
3146 		faultstr = undiag_2fault_str(Undiag_reason);
3147 		if (faultstr == NULL)
3148 			continue;
3149 		fault = fmd_nvl_create_fault(f->hdl, faultstr, 50 / f->uniqobs,
3150 		    NULL, NULL, detector);
3151 		(void) nvlist_add_string(fault, UNDIAG_REASON, reason);
3152 		(void) nvlist_add_boolean_value(fault, FM_SUSPECT_RETIRE,
3153 		    B_FALSE);
3154 		(void) nvlist_add_boolean_value(fault, FM_SUSPECT_RESPONSE,
3155 		    B_FALSE);
3156 		fmd_case_add_suspect(f->hdl, f->fmcase, fault);
3157 		nvlist_free(detector);
3158 	}
3159 	FREE(reason);
3160 	fmd_case_solve(f->hdl, f->fmcase);
3161 	fmd_case_close(f->hdl, f->fmcase);
3162 	Undiag_reason = UD_VAL_UNKNOWN;
3163 }
3164 
3165 /*
3166  * fme_close_case
3167  *
3168  *	Find the requested case amongst our fmes and close it.  Free up
3169  *	the related fme.
3170  */
3171 void
3172 fme_close_case(fmd_hdl_t *hdl, fmd_case_t *fmcase)
3173 {
3174 	struct case_list *ucasep, *prevcasep = NULL;
3175 	struct fme *prev = NULL;
3176 	struct fme *fmep;
3177 
3178 	for (ucasep = Undiagablecaselist; ucasep; ucasep = ucasep->next) {
3179 		if (fmcase != ucasep->fmcase) {
3180 			prevcasep = ucasep;
3181 			continue;
3182 		}
3183 
3184 		if (prevcasep == NULL)
3185 			Undiagablecaselist = Undiagablecaselist->next;
3186 		else
3187 			prevcasep->next = ucasep->next;
3188 
3189 		FREE(ucasep);
3190 		return;
3191 	}
3192 
3193 	for (fmep = FMElist; fmep; fmep = fmep->next) {
3194 		if (fmep->hdl == hdl && fmep->fmcase == fmcase)
3195 			break;
3196 		prev = fmep;
3197 	}
3198 
3199 	if (fmep == NULL) {
3200 		out(O_WARN, "Eft asked to close unrecognized case [%s].",
3201 		    fmd_case_uuid(hdl, fmcase));
3202 		return;
3203 	}
3204 
3205 	if (EFMElist == fmep)
3206 		EFMElist = prev;
3207 
3208 	if (prev == NULL)
3209 		FMElist = FMElist->next;
3210 	else
3211 		prev->next = fmep->next;
3212 
3213 	fmep->next = NULL;
3214 
3215 	/* Get rid of any timer this fme has set */
3216 	if (fmep->wull != 0)
3217 		fmd_timer_remove(fmep->hdl, fmep->timer);
3218 
3219 	if (ClosedFMEs == NULL) {
3220 		ClosedFMEs = fmep;
3221 	} else {
3222 		fmep->next = ClosedFMEs;
3223 		ClosedFMEs = fmep;
3224 	}
3225 
3226 	Open_fme_count--;
3227 
3228 	/* See if we can close the overflow FME */
3229 	if (Open_fme_count <= Max_fme) {
3230 		for (fmep = FMElist; fmep; fmep = fmep->next) {
3231 			if (fmep->overflow && !(fmd_case_closed(fmep->hdl,
3232 			    fmep->fmcase)))
3233 				break;
3234 		}
3235 
3236 		if (fmep != NULL)
3237 			fmd_case_close(fmep->hdl, fmep->fmcase);
3238 	}
3239 }
3240 
3241 /*
3242  * fme_set_timer()
3243  *	If the time we need to wait for the given FME is less than the
3244  *	current timer, kick that old timer out and establish a new one.
3245  */
3246 static int
3247 fme_set_timer(struct fme *fmep, unsigned long long wull)
3248 {
3249 	out(O_ALTFP|O_VERB|O_NONL, " fme_set_timer: request to wait ");
3250 	ptree_timeval(O_ALTFP|O_VERB, &wull);
3251 
3252 	if (wull <= fmep->pull) {
3253 		out(O_ALTFP|O_VERB|O_NONL, "already have waited at least ");
3254 		ptree_timeval(O_ALTFP|O_VERB, &fmep->pull);
3255 		out(O_ALTFP|O_VERB, NULL);
3256 		/* we've waited at least wull already, don't need timer */
3257 		return (0);
3258 	}
3259 
3260 	out(O_ALTFP|O_VERB|O_NONL, " currently ");
3261 	if (fmep->wull != 0) {
3262 		out(O_ALTFP|O_VERB|O_NONL, "waiting ");
3263 		ptree_timeval(O_ALTFP|O_VERB, &fmep->wull);
3264 		out(O_ALTFP|O_VERB, NULL);
3265 	} else {
3266 		out(O_ALTFP|O_VERB|O_NONL, "not waiting");
3267 		out(O_ALTFP|O_VERB, NULL);
3268 	}
3269 
3270 	if (fmep->wull != 0)
3271 		if (wull >= fmep->wull)
3272 			/* New timer would fire later than established timer */
3273 			return (0);
3274 
3275 	if (fmep->wull != 0) {
3276 		fmd_timer_remove(fmep->hdl, fmep->timer);
3277 	}
3278 
3279 	fmep->timer = fmd_timer_install(fmep->hdl, (void *)fmep,
3280 	    fmep->e0r, wull);
3281 	out(O_ALTFP|O_VERB, "timer set, id is %ld", fmep->timer);
3282 	fmep->wull = wull;
3283 	return (1);
3284 }
3285 
3286 void
3287 fme_timer_fired(struct fme *fmep, id_t tid)
3288 {
3289 	struct fme *ffmep = NULL;
3290 
3291 	for (ffmep = FMElist; ffmep; ffmep = ffmep->next)
3292 		if (ffmep == fmep)
3293 			break;
3294 
3295 	if (ffmep == NULL) {
3296 		out(O_WARN, "Timer fired for an FME (%p) not in FMEs list.",
3297 		    (void *)fmep);
3298 		return;
3299 	}
3300 
3301 	out(O_ALTFP|O_VERB, "Timer fired %lx", tid);
3302 	fmep->pull = fmep->wull;
3303 	fmep->wull = 0;
3304 	fmd_buf_write(fmep->hdl, fmep->fmcase,
3305 	    WOBUF_PULL, (void *)&fmep->pull, sizeof (fmep->pull));
3306 
3307 	fme_eval(fmep, fmep->e0r);
3308 }
3309 
3310 /*
3311  * Preserve the fme's suspect list in its psuspects list, NULLing the
3312  * suspects list in the meantime.
3313  */
3314 static void
3315 save_suspects(struct fme *fmep)
3316 {
3317 	struct event *ep;
3318 	struct event *nextep;
3319 
3320 	/* zero out the previous suspect list */
3321 	for (ep = fmep->psuspects; ep; ep = nextep) {
3322 		nextep = ep->psuspects;
3323 		ep->psuspects = NULL;
3324 	}
3325 	fmep->psuspects = NULL;
3326 
3327 	/* zero out the suspect list, copying it to previous suspect list */
3328 	fmep->psuspects = fmep->suspects;
3329 	for (ep = fmep->suspects; ep; ep = nextep) {
3330 		nextep = ep->suspects;
3331 		ep->psuspects = ep->suspects;
3332 		ep->suspects = NULL;
3333 		ep->is_suspect = 0;
3334 	}
3335 	fmep->suspects = NULL;
3336 	fmep->nsuspects = 0;
3337 }
3338 
3339 /*
3340  * Retrieve the fme's suspect list from its psuspects list.
3341  */
3342 static void
3343 restore_suspects(struct fme *fmep)
3344 {
3345 	struct event *ep;
3346 	struct event *nextep;
3347 
3348 	fmep->nsuspects = 0;
3349 	fmep->suspects = fmep->psuspects;
3350 	for (ep = fmep->psuspects; ep; ep = nextep) {
3351 		fmep->nsuspects++;
3352 		nextep = ep->psuspects;
3353 		ep->suspects = ep->psuspects;
3354 	}
3355 }
3356 
3357 /*
3358  * this is what we use to call the Emrys prototype code instead of main()
3359  */
3360 static void
3361 fme_eval(struct fme *fmep, fmd_event_t *ffep)
3362 {
3363 	struct event *ep;
3364 	unsigned long long my_delay = TIMEVAL_EVENTUALLY;
3365 	struct rsl *srl = NULL;
3366 	struct rsl *srl2 = NULL;
3367 	int mess_zero_count;
3368 	int rpcnt;
3369 
3370 	save_suspects(fmep);
3371 
3372 	out(O_ALTFP, "Evaluate FME %d", fmep->id);
3373 	indent_set("  ");
3374 
3375 	lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep);
3376 	fmep->state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay);
3377 
3378 	out(O_ALTFP|O_NONL, "FME%d state: %s, suspect list:", fmep->id,
3379 	    fme_state2str(fmep->state));
3380 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
3381 		out(O_ALTFP|O_NONL, " ");
3382 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
3383 	}
3384 	out(O_ALTFP, NULL);
3385 
3386 	switch (fmep->state) {
3387 	case FME_CREDIBLE:
3388 		print_suspects(SLNEW, fmep);
3389 		(void) upsets_eval(fmep, ffep);
3390 
3391 		/*
3392 		 * we may have already posted suspects in upsets_eval() which
3393 		 * can recurse into fme_eval() again. If so then just return.
3394 		 */
3395 		if (fmep->posted_suspects)
3396 			return;
3397 
3398 		stats_counter_bump(fmep->diags);
3399 		rpcnt = fmep->nsuspects;
3400 		save_suspects(fmep);
3401 
3402 		/*
3403 		 * create two lists, one for "message=1" faults and one for
3404 		 * "message=0" faults. If we have a mixture we will generate
3405 		 * two separate suspect lists.
3406 		 */
3407 		srl = MALLOC(rpcnt * sizeof (struct rsl));
3408 		bzero(srl, rpcnt * sizeof (struct rsl));
3409 		srl2 = MALLOC(rpcnt * sizeof (struct rsl));
3410 		bzero(srl2, rpcnt * sizeof (struct rsl));
3411 		mess_zero_count = trim_suspects(fmep, srl, srl2, ffep);
3412 
3413 		/*
3414 		 * If the resulting suspect list has no members, we're
3415 		 * done so simply close the case. Otherwise sort and publish.
3416 		 */
3417 		if (fmep->nsuspects == 0 && mess_zero_count == 0) {
3418 			out(O_ALTFP,
3419 			    "[FME%d, case %s (all suspects are upsets)]",
3420 			    fmep->id, fmd_case_uuid(fmep->hdl, fmep->fmcase));
3421 			fmd_case_close(fmep->hdl, fmep->fmcase);
3422 		} else if (fmep->nsuspects != 0 && mess_zero_count == 0) {
3423 			publish_suspects(fmep, srl);
3424 			out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3425 			    fmd_case_uuid(fmep->hdl, fmep->fmcase));
3426 			fmd_case_solve(fmep->hdl, fmep->fmcase);
3427 		} else if (fmep->nsuspects == 0 && mess_zero_count != 0) {
3428 			fmep->nsuspects = mess_zero_count;
3429 			publish_suspects(fmep, srl2);
3430 			out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3431 			    fmd_case_uuid(fmep->hdl, fmep->fmcase));
3432 			fmd_case_solve(fmep->hdl, fmep->fmcase);
3433 		} else {
3434 			struct event *obsp;
3435 			struct fme *nfmep;
3436 
3437 			publish_suspects(fmep, srl);
3438 			out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3439 			    fmd_case_uuid(fmep->hdl, fmep->fmcase));
3440 			fmd_case_solve(fmep->hdl, fmep->fmcase);
3441 
3442 			/*
3443 			 * Got both message=0 and message=1 so create a
3444 			 * duplicate case. Also need a temporary duplicate fme
3445 			 * structure for use by publish_suspects().
3446 			 */
3447 			nfmep = alloc_fme();
3448 			nfmep->id =  Nextid++;
3449 			nfmep->hdl = fmep->hdl;
3450 			nfmep->nsuspects = mess_zero_count;
3451 			nfmep->fmcase = fmd_case_open(fmep->hdl, NULL);
3452 			out(O_ALTFP|O_STAMP,
3453 			    "[creating parallel FME%d, case %s]", nfmep->id,
3454 			    fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
3455 			Open_fme_count++;
3456 			if (ffep) {
3457 				fmd_case_setprincipal(nfmep->hdl,
3458 				    nfmep->fmcase, ffep);
3459 				fmd_case_add_ereport(nfmep->hdl,
3460 				    nfmep->fmcase, ffep);
3461 			}
3462 			for (obsp = fmep->observations; obsp;
3463 			    obsp = obsp->observations)
3464 				if (obsp->ffep && obsp->ffep != ffep)
3465 					fmd_case_add_ereport(nfmep->hdl,
3466 					    nfmep->fmcase, obsp->ffep);
3467 
3468 			publish_suspects(nfmep, srl2);
3469 			out(O_ALTFP, "[solving FME%d, case %s]", nfmep->id,
3470 			    fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
3471 			fmd_case_solve(nfmep->hdl, nfmep->fmcase);
3472 			FREE(nfmep);
3473 		}
3474 		FREE(srl);
3475 		FREE(srl2);
3476 		restore_suspects(fmep);
3477 
3478 		fmep->posted_suspects = 1;
3479 		fmd_buf_write(fmep->hdl, fmep->fmcase,
3480 		    WOBUF_POSTD,
3481 		    (void *)&fmep->posted_suspects,
3482 		    sizeof (fmep->posted_suspects));
3483 
3484 		/*
3485 		 * Now the suspects have been posted, we can clear up
3486 		 * the instance tree as we won't be looking at it again.
3487 		 * Also cancel the timer as the case is now solved.
3488 		 */
3489 		if (fmep->wull != 0) {
3490 			fmd_timer_remove(fmep->hdl, fmep->timer);
3491 			fmep->wull = 0;
3492 		}
3493 		break;
3494 
3495 	case FME_WAIT:
3496 		ASSERT(my_delay > fmep->ull);
3497 		(void) fme_set_timer(fmep, my_delay);
3498 		print_suspects(SLWAIT, fmep);
3499 		itree_prune(fmep->eventtree);
3500 		return;
3501 
3502 	case FME_DISPROVED:
3503 		print_suspects(SLDISPROVED, fmep);
3504 		Undiag_reason = UD_VAL_UNSOLVD;
3505 		fme_undiagnosable(fmep);
3506 		break;
3507 	}
3508 
3509 	itree_free(fmep->eventtree);
3510 	fmep->eventtree = NULL;
3511 	structconfig_free(fmep->config);
3512 	fmep->config = NULL;
3513 	destroy_fme_bufs(fmep);
3514 }
3515 
3516 static void indent(void);
3517 static int triggered(struct fme *fmep, struct event *ep, int mark);
3518 static enum fme_state effects_test(struct fme *fmep,
3519     struct event *fault_event, unsigned long long at_latest_by,
3520     unsigned long long *pdelay);
3521 static enum fme_state requirements_test(struct fme *fmep, struct event *ep,
3522     unsigned long long at_latest_by, unsigned long long *pdelay);
3523 static enum fme_state causes_test(struct fme *fmep, struct event *ep,
3524     unsigned long long at_latest_by, unsigned long long *pdelay);
3525 
3526 static int
3527 checkconstraints(struct fme *fmep, struct arrow *arrowp)
3528 {
3529 	struct constraintlist *ctp;
3530 	struct evalue value;
3531 	char *sep = "";
3532 
3533 	if (arrowp->forever_false) {
3534 		indent();
3535 		out(O_ALTFP|O_VERB|O_NONL, "  Forever false constraint: ");
3536 		for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3537 			out(O_ALTFP|O_VERB|O_NONL, sep);
3538 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3539 			sep = ", ";
3540 		}
3541 		out(O_ALTFP|O_VERB, NULL);
3542 		return (0);
3543 	}
3544 	if (arrowp->forever_true) {
3545 		indent();
3546 		out(O_ALTFP|O_VERB|O_NONL, "  Forever true constraint: ");
3547 		for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3548 			out(O_ALTFP|O_VERB|O_NONL, sep);
3549 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3550 			sep = ", ";
3551 		}
3552 		out(O_ALTFP|O_VERB, NULL);
3553 		return (1);
3554 	}
3555 
3556 	for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3557 		if (eval_expr(ctp->cnode, NULL, NULL,
3558 		    &fmep->globals, fmep->config,
3559 		    arrowp, 0, &value)) {
3560 			/* evaluation successful */
3561 			if (value.t == UNDEFINED || value.v == 0) {
3562 				/* known false */
3563 				arrowp->forever_false = 1;
3564 				indent();
3565 				out(O_ALTFP|O_VERB|O_NONL,
3566 				    "  False constraint: ");
3567 				ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3568 				out(O_ALTFP|O_VERB, NULL);
3569 				return (0);
3570 			}
3571 		} else {
3572 			/* evaluation unsuccessful -- unknown value */
3573 			indent();
3574 			out(O_ALTFP|O_VERB|O_NONL,
3575 			    "  Deferred constraint: ");
3576 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3577 			out(O_ALTFP|O_VERB, NULL);
3578 			return (1);
3579 		}
3580 	}
3581 	/* known true */
3582 	arrowp->forever_true = 1;
3583 	indent();
3584 	out(O_ALTFP|O_VERB|O_NONL, "  True constraint: ");
3585 	for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3586 		out(O_ALTFP|O_VERB|O_NONL, sep);
3587 		ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3588 		sep = ", ";
3589 	}
3590 	out(O_ALTFP|O_VERB, NULL);
3591 	return (1);
3592 }
3593 
3594 static int
3595 triggered(struct fme *fmep, struct event *ep, int mark)
3596 {
3597 	struct bubble *bp;
3598 	struct arrowlist *ap;
3599 	int count = 0;
3600 
3601 	stats_counter_bump(fmep->Tcallcount);
3602 	for (bp = itree_next_bubble(ep, NULL); bp;
3603 	    bp = itree_next_bubble(ep, bp)) {
3604 		if (bp->t != B_TO)
3605 			continue;
3606 		for (ap = itree_next_arrow(bp, NULL); ap;
3607 		    ap = itree_next_arrow(bp, ap)) {
3608 			/* check count of marks against K in the bubble */
3609 			if ((ap->arrowp->mark & mark) &&
3610 			    ++count >= bp->nork)
3611 				return (1);
3612 		}
3613 	}
3614 	return (0);
3615 }
3616 
3617 static int
3618 mark_arrows(struct fme *fmep, struct event *ep, int mark,
3619     unsigned long long at_latest_by, unsigned long long *pdelay, int keep)
3620 {
3621 	struct bubble *bp;
3622 	struct arrowlist *ap;
3623 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3624 	unsigned long long my_delay;
3625 	enum fme_state result;
3626 	int retval = 0;
3627 
3628 	for (bp = itree_next_bubble(ep, NULL); bp;
3629 	    bp = itree_next_bubble(ep, bp)) {
3630 		if (bp->t != B_FROM)
3631 			continue;
3632 		stats_counter_bump(fmep->Marrowcount);
3633 		for (ap = itree_next_arrow(bp, NULL); ap;
3634 		    ap = itree_next_arrow(bp, ap)) {
3635 			struct event *ep2 = ap->arrowp->head->myevent;
3636 			/*
3637 			 * if we're clearing marks, we can avoid doing
3638 			 * all that work evaluating constraints.
3639 			 */
3640 			if (mark == 0) {
3641 				if (ap->arrowp->arrow_marked == 0)
3642 					continue;
3643 				ap->arrowp->arrow_marked = 0;
3644 				ap->arrowp->mark &= ~EFFECTS_COUNTER;
3645 				if (keep && (ep2->cached_state &
3646 				    (WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT)))
3647 					ep2->keep_in_tree = 1;
3648 				ep2->cached_state &=
3649 				    ~(WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT);
3650 				(void) mark_arrows(fmep, ep2, mark, 0, NULL,
3651 				    keep);
3652 				continue;
3653 			}
3654 			ap->arrowp->arrow_marked = 1;
3655 			if (ep2->cached_state & REQMNTS_DISPROVED) {
3656 				indent();
3657 				out(O_ALTFP|O_VERB|O_NONL,
3658 				    "  ALREADY DISPROVED ");
3659 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3660 				out(O_ALTFP|O_VERB, NULL);
3661 				continue;
3662 			}
3663 			if (ep2->cached_state & WAIT_EFFECT) {
3664 				indent();
3665 				out(O_ALTFP|O_VERB|O_NONL,
3666 				    "  ALREADY EFFECTS WAIT ");
3667 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3668 				out(O_ALTFP|O_VERB, NULL);
3669 				continue;
3670 			}
3671 			if (ep2->cached_state & CREDIBLE_EFFECT) {
3672 				indent();
3673 				out(O_ALTFP|O_VERB|O_NONL,
3674 				    "  ALREADY EFFECTS CREDIBLE ");
3675 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3676 				out(O_ALTFP|O_VERB, NULL);
3677 				continue;
3678 			}
3679 			if ((ep2->cached_state & PARENT_WAIT) &&
3680 			    (mark & PARENT_WAIT)) {
3681 				indent();
3682 				out(O_ALTFP|O_VERB|O_NONL,
3683 				    "  ALREADY PARENT EFFECTS WAIT ");
3684 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3685 				out(O_ALTFP|O_VERB, NULL);
3686 				continue;
3687 			}
3688 			platform_set_payloadnvp(ep2->nvp);
3689 			if (checkconstraints(fmep, ap->arrowp) == 0) {
3690 				platform_set_payloadnvp(NULL);
3691 				indent();
3692 				out(O_ALTFP|O_VERB|O_NONL,
3693 				    "  CONSTRAINTS FAIL ");
3694 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3695 				out(O_ALTFP|O_VERB, NULL);
3696 				continue;
3697 			}
3698 			platform_set_payloadnvp(NULL);
3699 			ap->arrowp->mark |= EFFECTS_COUNTER;
3700 			if (!triggered(fmep, ep2, EFFECTS_COUNTER)) {
3701 				indent();
3702 				out(O_ALTFP|O_VERB|O_NONL,
3703 				    "  K-COUNT NOT YET MET ");
3704 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3705 				out(O_ALTFP|O_VERB, NULL);
3706 				continue;
3707 			}
3708 			ep2->cached_state &= ~PARENT_WAIT;
3709 			/*
3710 			 * if we've reached an ereport and no propagation time
3711 			 * is specified, use the Hesitate value
3712 			 */
3713 			if (ep2->t == N_EREPORT && at_latest_by == 0ULL &&
3714 			    ap->arrowp->maxdelay == 0ULL) {
3715 				out(O_ALTFP|O_VERB|O_NONL, "  default wait ");
3716 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3717 				out(O_ALTFP|O_VERB, NULL);
3718 				result = requirements_test(fmep, ep2, Hesitate,
3719 				    &my_delay);
3720 			} else {
3721 				result = requirements_test(fmep, ep2,
3722 				    at_latest_by + ap->arrowp->maxdelay,
3723 				    &my_delay);
3724 			}
3725 			if (result == FME_WAIT) {
3726 				retval = WAIT_EFFECT;
3727 				if (overall_delay > my_delay)
3728 					overall_delay = my_delay;
3729 				ep2->cached_state |= WAIT_EFFECT;
3730 				indent();
3731 				out(O_ALTFP|O_VERB|O_NONL, "  EFFECTS WAIT ");
3732 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3733 				out(O_ALTFP|O_VERB, NULL);
3734 				indent_push("  E");
3735 				if (mark_arrows(fmep, ep2, PARENT_WAIT,
3736 				    at_latest_by, &my_delay, 0) ==
3737 				    WAIT_EFFECT) {
3738 					retval = WAIT_EFFECT;
3739 					if (overall_delay > my_delay)
3740 						overall_delay = my_delay;
3741 				}
3742 				indent_pop();
3743 			} else if (result == FME_DISPROVED) {
3744 				indent();
3745 				out(O_ALTFP|O_VERB|O_NONL,
3746 				    "  EFFECTS DISPROVED ");
3747 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3748 				out(O_ALTFP|O_VERB, NULL);
3749 			} else {
3750 				ep2->cached_state |= mark;
3751 				indent();
3752 				if (mark == CREDIBLE_EFFECT)
3753 					out(O_ALTFP|O_VERB|O_NONL,
3754 					    "  EFFECTS CREDIBLE ");
3755 				else
3756 					out(O_ALTFP|O_VERB|O_NONL,
3757 					    "  PARENT EFFECTS WAIT ");
3758 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3759 				out(O_ALTFP|O_VERB, NULL);
3760 				indent_push("  E");
3761 				if (mark_arrows(fmep, ep2, mark, at_latest_by,
3762 				    &my_delay, 0) == WAIT_EFFECT) {
3763 					retval = WAIT_EFFECT;
3764 					if (overall_delay > my_delay)
3765 						overall_delay = my_delay;
3766 				}
3767 				indent_pop();
3768 			}
3769 		}
3770 	}
3771 	if (retval == WAIT_EFFECT)
3772 		*pdelay = overall_delay;
3773 	return (retval);
3774 }
3775 
3776 static enum fme_state
3777 effects_test(struct fme *fmep, struct event *fault_event,
3778     unsigned long long at_latest_by, unsigned long long *pdelay)
3779 {
3780 	struct event *error_event;
3781 	enum fme_state return_value = FME_CREDIBLE;
3782 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3783 	unsigned long long my_delay;
3784 
3785 	stats_counter_bump(fmep->Ecallcount);
3786 	indent_push("  E");
3787 	indent();
3788 	out(O_ALTFP|O_VERB|O_NONL, "->");
3789 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event);
3790 	out(O_ALTFP|O_VERB, NULL);
3791 
3792 	if (mark_arrows(fmep, fault_event, CREDIBLE_EFFECT, at_latest_by,
3793 	    &my_delay, 0) == WAIT_EFFECT) {
3794 		return_value = FME_WAIT;
3795 		if (overall_delay > my_delay)
3796 			overall_delay = my_delay;
3797 	}
3798 	for (error_event = fmep->observations;
3799 	    error_event; error_event = error_event->observations) {
3800 		indent();
3801 		out(O_ALTFP|O_VERB|O_NONL, " ");
3802 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, error_event);
3803 		if (!(error_event->cached_state & CREDIBLE_EFFECT)) {
3804 			if (error_event->cached_state &
3805 			    (PARENT_WAIT|WAIT_EFFECT)) {
3806 				out(O_ALTFP|O_VERB, " NOT YET triggered");
3807 				continue;
3808 			}
3809 			return_value = FME_DISPROVED;
3810 			out(O_ALTFP|O_VERB, " NOT triggered");
3811 			break;
3812 		} else {
3813 			out(O_ALTFP|O_VERB, " triggered");
3814 		}
3815 	}
3816 	if (return_value == FME_DISPROVED) {
3817 		(void) mark_arrows(fmep, fault_event, 0, 0, NULL, 0);
3818 	} else {
3819 		fault_event->keep_in_tree = 1;
3820 		(void) mark_arrows(fmep, fault_event, 0, 0, NULL, 1);
3821 	}
3822 
3823 	indent();
3824 	out(O_ALTFP|O_VERB|O_NONL, "<-EFFECTS %s ",
3825 	    fme_state2str(return_value));
3826 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event);
3827 	out(O_ALTFP|O_VERB, NULL);
3828 	indent_pop();
3829 	if (return_value == FME_WAIT)
3830 		*pdelay = overall_delay;
3831 	return (return_value);
3832 }
3833 
3834 static enum fme_state
3835 requirements_test(struct fme *fmep, struct event *ep,
3836     unsigned long long at_latest_by, unsigned long long *pdelay)
3837 {
3838 	int waiting_events;
3839 	int credible_events;
3840 	int deferred_events;
3841 	enum fme_state return_value = FME_CREDIBLE;
3842 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3843 	unsigned long long arrow_delay;
3844 	unsigned long long my_delay;
3845 	struct event *ep2;
3846 	struct bubble *bp;
3847 	struct arrowlist *ap;
3848 
3849 	if (ep->cached_state & REQMNTS_CREDIBLE) {
3850 		indent();
3851 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY CREDIBLE ");
3852 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3853 		out(O_ALTFP|O_VERB, NULL);
3854 		return (FME_CREDIBLE);
3855 	}
3856 	if (ep->cached_state & REQMNTS_DISPROVED) {
3857 		indent();
3858 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY DISPROVED ");
3859 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3860 		out(O_ALTFP|O_VERB, NULL);
3861 		return (FME_DISPROVED);
3862 	}
3863 	if (ep->cached_state & REQMNTS_WAIT) {
3864 		indent();
3865 		*pdelay = ep->cached_delay;
3866 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY WAIT ");
3867 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3868 		out(O_ALTFP|O_VERB|O_NONL, ", wait for: ");
3869 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3870 		out(O_ALTFP|O_VERB, NULL);
3871 		return (FME_WAIT);
3872 	}
3873 	stats_counter_bump(fmep->Rcallcount);
3874 	indent_push("  R");
3875 	indent();
3876 	out(O_ALTFP|O_VERB|O_NONL, "->");
3877 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3878 	out(O_ALTFP|O_VERB|O_NONL, ", at latest by: ");
3879 	ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3880 	out(O_ALTFP|O_VERB, NULL);
3881 
3882 	if (ep->t == N_EREPORT) {
3883 		if (ep->count == 0) {
3884 			if (fmep->pull >= at_latest_by) {
3885 				return_value = FME_DISPROVED;
3886 			} else {
3887 				ep->cached_delay = *pdelay = at_latest_by;
3888 				return_value = FME_WAIT;
3889 			}
3890 		}
3891 
3892 		indent();
3893 		switch (return_value) {
3894 		case FME_CREDIBLE:
3895 			ep->cached_state |= REQMNTS_CREDIBLE;
3896 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS CREDIBLE ");
3897 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3898 			break;
3899 		case FME_DISPROVED:
3900 			ep->cached_state |= REQMNTS_DISPROVED;
3901 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED ");
3902 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3903 			break;
3904 		case FME_WAIT:
3905 			ep->cached_state |= REQMNTS_WAIT;
3906 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS WAIT ");
3907 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3908 			out(O_ALTFP|O_VERB|O_NONL, " to ");
3909 			ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3910 			break;
3911 		default:
3912 			out(O_DIE, "requirements_test: unexpected fme_state");
3913 			break;
3914 		}
3915 		out(O_ALTFP|O_VERB, NULL);
3916 		indent_pop();
3917 
3918 		return (return_value);
3919 	}
3920 
3921 	/* this event is not a report, descend the tree */
3922 	for (bp = itree_next_bubble(ep, NULL); bp;
3923 	    bp = itree_next_bubble(ep, bp)) {
3924 		int n;
3925 
3926 		if (bp->t != B_FROM)
3927 			continue;
3928 
3929 		n = bp->nork;
3930 
3931 		credible_events = 0;
3932 		waiting_events = 0;
3933 		deferred_events = 0;
3934 		arrow_delay = TIMEVAL_EVENTUALLY;
3935 		/*
3936 		 * n is -1 for 'A' so adjust it.
3937 		 * XXX just count up the arrows for now.
3938 		 */
3939 		if (n < 0) {
3940 			n = 0;
3941 			for (ap = itree_next_arrow(bp, NULL); ap;
3942 			    ap = itree_next_arrow(bp, ap))
3943 				n++;
3944 			indent();
3945 			out(O_ALTFP|O_VERB, " Bubble Counted N=%d", n);
3946 		} else {
3947 			indent();
3948 			out(O_ALTFP|O_VERB, " Bubble N=%d", n);
3949 		}
3950 
3951 		if (n == 0)
3952 			continue;
3953 		if (!(bp->mark & (BUBBLE_ELIDED|BUBBLE_OK))) {
3954 			for (ap = itree_next_arrow(bp, NULL); ap;
3955 			    ap = itree_next_arrow(bp, ap)) {
3956 				ep2 = ap->arrowp->head->myevent;
3957 				platform_set_payloadnvp(ep2->nvp);
3958 				(void) checkconstraints(fmep, ap->arrowp);
3959 				if (!ap->arrowp->forever_false) {
3960 					/*
3961 					 * if all arrows are invalidated by the
3962 					 * constraints, then we should elide the
3963 					 * whole bubble to be consistant with
3964 					 * the tree creation time behaviour
3965 					 */
3966 					bp->mark |= BUBBLE_OK;
3967 					platform_set_payloadnvp(NULL);
3968 					break;
3969 				}
3970 				platform_set_payloadnvp(NULL);
3971 			}
3972 		}
3973 		for (ap = itree_next_arrow(bp, NULL); ap;
3974 		    ap = itree_next_arrow(bp, ap)) {
3975 			ep2 = ap->arrowp->head->myevent;
3976 			if (n <= credible_events)
3977 				break;
3978 
3979 			ap->arrowp->mark |= REQMNTS_COUNTER;
3980 			if (triggered(fmep, ep2, REQMNTS_COUNTER))
3981 				/* XXX adding max timevals! */
3982 				switch (requirements_test(fmep, ep2,
3983 				    at_latest_by + ap->arrowp->maxdelay,
3984 				    &my_delay)) {
3985 				case FME_DEFERRED:
3986 					deferred_events++;
3987 					break;
3988 				case FME_CREDIBLE:
3989 					credible_events++;
3990 					break;
3991 				case FME_DISPROVED:
3992 					break;
3993 				case FME_WAIT:
3994 					if (my_delay < arrow_delay)
3995 						arrow_delay = my_delay;
3996 					waiting_events++;
3997 					break;
3998 				default:
3999 					out(O_DIE,
4000 					"Bug in requirements_test.");
4001 				}
4002 			else
4003 				deferred_events++;
4004 		}
4005 		if (!(bp->mark & BUBBLE_OK) && waiting_events == 0) {
4006 			bp->mark |= BUBBLE_ELIDED;
4007 			continue;
4008 		}
4009 		indent();
4010 		out(O_ALTFP|O_VERB, " Credible: %d Waiting %d",
4011 		    credible_events + deferred_events, waiting_events);
4012 		if (credible_events + deferred_events + waiting_events < n) {
4013 			/* Can never meet requirements */
4014 			ep->cached_state |= REQMNTS_DISPROVED;
4015 			indent();
4016 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED ");
4017 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4018 			out(O_ALTFP|O_VERB, NULL);
4019 			indent_pop();
4020 			return (FME_DISPROVED);
4021 		}
4022 		if (credible_events + deferred_events < n) {
4023 			/* will have to wait */
4024 			/* wait time is shortest known */
4025 			if (arrow_delay < overall_delay)
4026 				overall_delay = arrow_delay;
4027 			return_value = FME_WAIT;
4028 		} else if (credible_events < n) {
4029 			if (return_value != FME_WAIT)
4030 				return_value = FME_DEFERRED;
4031 		}
4032 	}
4033 
4034 	/*
4035 	 * don't mark as FME_DEFERRED. If this event isn't reached by another
4036 	 * path, then this will be considered FME_CREDIBLE. But if it is
4037 	 * reached by a different path so the K-count is met, then might
4038 	 * get overridden by FME_WAIT or FME_DISPROVED.
4039 	 */
4040 	if (return_value == FME_WAIT) {
4041 		ep->cached_state |= REQMNTS_WAIT;
4042 		ep->cached_delay = *pdelay = overall_delay;
4043 	} else if (return_value == FME_CREDIBLE) {
4044 		ep->cached_state |= REQMNTS_CREDIBLE;
4045 	}
4046 	indent();
4047 	out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS %s ",
4048 	    fme_state2str(return_value));
4049 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4050 	out(O_ALTFP|O_VERB, NULL);
4051 	indent_pop();
4052 	return (return_value);
4053 }
4054 
4055 static enum fme_state
4056 causes_test(struct fme *fmep, struct event *ep,
4057     unsigned long long at_latest_by, unsigned long long *pdelay)
4058 {
4059 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
4060 	unsigned long long my_delay;
4061 	int credible_results = 0;
4062 	int waiting_results = 0;
4063 	enum fme_state fstate;
4064 	struct event *tail_event;
4065 	struct bubble *bp;
4066 	struct arrowlist *ap;
4067 	int k = 1;
4068 
4069 	stats_counter_bump(fmep->Ccallcount);
4070 	indent_push("  C");
4071 	indent();
4072 	out(O_ALTFP|O_VERB|O_NONL, "->");
4073 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4074 	out(O_ALTFP|O_VERB, NULL);
4075 
4076 	for (bp = itree_next_bubble(ep, NULL); bp;
4077 	    bp = itree_next_bubble(ep, bp)) {
4078 		if (bp->t != B_TO)
4079 			continue;
4080 		k = bp->nork;	/* remember the K value */
4081 		for (ap = itree_next_arrow(bp, NULL); ap;
4082 		    ap = itree_next_arrow(bp, ap)) {
4083 			int do_not_follow = 0;
4084 
4085 			/*
4086 			 * if we get to the same event multiple times
4087 			 * only worry about the first one.
4088 			 */
4089 			if (ap->arrowp->tail->myevent->cached_state &
4090 			    CAUSES_TESTED) {
4091 				indent();
4092 				out(O_ALTFP|O_VERB|O_NONL,
4093 				    "  causes test already run for ");
4094 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL,
4095 				    ap->arrowp->tail->myevent);
4096 				out(O_ALTFP|O_VERB, NULL);
4097 				continue;
4098 			}
4099 
4100 			/*
4101 			 * see if false constraint prevents us
4102 			 * from traversing this arrow
4103 			 */
4104 			platform_set_payloadnvp(ep->nvp);
4105 			if (checkconstraints(fmep, ap->arrowp) == 0)
4106 				do_not_follow = 1;
4107 			platform_set_payloadnvp(NULL);
4108 			if (do_not_follow) {
4109 				indent();
4110 				out(O_ALTFP|O_VERB|O_NONL,
4111 				    "  False arrow from ");
4112 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL,
4113 				    ap->arrowp->tail->myevent);
4114 				out(O_ALTFP|O_VERB, NULL);
4115 				continue;
4116 			}
4117 
4118 			ap->arrowp->tail->myevent->cached_state |=
4119 			    CAUSES_TESTED;
4120 			tail_event = ap->arrowp->tail->myevent;
4121 			fstate = hypothesise(fmep, tail_event, at_latest_by,
4122 			    &my_delay);
4123 
4124 			switch (fstate) {
4125 			case FME_WAIT:
4126 				if (my_delay < overall_delay)
4127 					overall_delay = my_delay;
4128 				waiting_results++;
4129 				break;
4130 			case FME_CREDIBLE:
4131 				credible_results++;
4132 				break;
4133 			case FME_DISPROVED:
4134 				break;
4135 			default:
4136 				out(O_DIE, "Bug in causes_test");
4137 			}
4138 		}
4139 	}
4140 	/* compare against K */
4141 	if (credible_results + waiting_results < k) {
4142 		indent();
4143 		out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES DISPROVED ");
4144 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4145 		out(O_ALTFP|O_VERB, NULL);
4146 		indent_pop();
4147 		return (FME_DISPROVED);
4148 	}
4149 	if (waiting_results != 0) {
4150 		*pdelay = overall_delay;
4151 		indent();
4152 		out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES WAIT ");
4153 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4154 		out(O_ALTFP|O_VERB|O_NONL, " to ");
4155 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
4156 		out(O_ALTFP|O_VERB, NULL);
4157 		indent_pop();
4158 		return (FME_WAIT);
4159 	}
4160 	indent();
4161 	out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES CREDIBLE ");
4162 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4163 	out(O_ALTFP|O_VERB, NULL);
4164 	indent_pop();
4165 	return (FME_CREDIBLE);
4166 }
4167 
4168 static enum fme_state
4169 hypothesise(struct fme *fmep, struct event *ep,
4170     unsigned long long at_latest_by, unsigned long long *pdelay)
4171 {
4172 	enum fme_state rtr, otr;
4173 	unsigned long long my_delay;
4174 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
4175 
4176 	stats_counter_bump(fmep->Hcallcount);
4177 	indent_push("  H");
4178 	indent();
4179 	out(O_ALTFP|O_VERB|O_NONL, "->");
4180 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4181 	out(O_ALTFP|O_VERB|O_NONL, ", at latest by: ");
4182 	ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
4183 	out(O_ALTFP|O_VERB, NULL);
4184 
4185 	rtr = requirements_test(fmep, ep, at_latest_by, &my_delay);
4186 	if ((rtr == FME_WAIT) && (my_delay < overall_delay))
4187 		overall_delay = my_delay;
4188 	if (rtr != FME_DISPROVED) {
4189 		if (is_problem(ep->t)) {
4190 			otr = effects_test(fmep, ep, at_latest_by, &my_delay);
4191 			if (otr != FME_DISPROVED) {
4192 				if (fmep->peek == 0 && ep->is_suspect == 0) {
4193 					ep->suspects = fmep->suspects;
4194 					ep->is_suspect = 1;
4195 					fmep->suspects = ep;
4196 					fmep->nsuspects++;
4197 				}
4198 			}
4199 		} else
4200 			otr = causes_test(fmep, ep, at_latest_by, &my_delay);
4201 		if ((otr == FME_WAIT) && (my_delay < overall_delay))
4202 			overall_delay = my_delay;
4203 		if ((otr != FME_DISPROVED) &&
4204 		    ((rtr == FME_WAIT) || (otr == FME_WAIT)))
4205 			*pdelay = overall_delay;
4206 	}
4207 	if (rtr == FME_DISPROVED) {
4208 		indent();
4209 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4210 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4211 		out(O_ALTFP|O_VERB, " (doesn't meet requirements)");
4212 		indent_pop();
4213 		return (FME_DISPROVED);
4214 	}
4215 	if ((otr == FME_DISPROVED) && is_problem(ep->t)) {
4216 		indent();
4217 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4218 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4219 		out(O_ALTFP|O_VERB, " (doesn't explain all reports)");
4220 		indent_pop();
4221 		return (FME_DISPROVED);
4222 	}
4223 	if (otr == FME_DISPROVED) {
4224 		indent();
4225 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4226 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4227 		out(O_ALTFP|O_VERB, " (causes are not credible)");
4228 		indent_pop();
4229 		return (FME_DISPROVED);
4230 	}
4231 	if ((rtr == FME_WAIT) || (otr == FME_WAIT)) {
4232 		indent();
4233 		out(O_ALTFP|O_VERB|O_NONL, "<-WAIT ");
4234 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4235 		out(O_ALTFP|O_VERB|O_NONL, " to ");
4236 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &overall_delay);
4237 		out(O_ALTFP|O_VERB, NULL);
4238 		indent_pop();
4239 		return (FME_WAIT);
4240 	}
4241 	indent();
4242 	out(O_ALTFP|O_VERB|O_NONL, "<-CREDIBLE ");
4243 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4244 	out(O_ALTFP|O_VERB, NULL);
4245 	indent_pop();
4246 	return (FME_CREDIBLE);
4247 }
4248 
4249 /*
4250  * fme_istat_load -- reconstitute any persistent istats
4251  */
4252 void
4253 fme_istat_load(fmd_hdl_t *hdl)
4254 {
4255 	int sz;
4256 	char *sbuf;
4257 	char *ptr;
4258 
4259 	if ((sz = fmd_buf_size(hdl, NULL, WOBUF_ISTATS)) == 0) {
4260 		out(O_ALTFP, "fme_istat_load: No stats");
4261 		return;
4262 	}
4263 
4264 	sbuf = alloca(sz);
4265 
4266 	fmd_buf_read(hdl, NULL, WOBUF_ISTATS, sbuf, sz);
4267 
4268 	/*
4269 	 * pick apart the serialized stats
4270 	 *
4271 	 * format is:
4272 	 *	<class-name>, '@', <path>, '\0', <value>, '\0'
4273 	 * for example:
4274 	 *	"stat.first@stat0/path0\02\0stat.second@stat0/path1\023\0"
4275 	 *
4276 	 * since this is parsing our own serialized data, any parsing issues
4277 	 * are fatal, so we check for them all with ASSERT() below.
4278 	 */
4279 	ptr = sbuf;
4280 	while (ptr < &sbuf[sz]) {
4281 		char *sepptr;
4282 		struct node *np;
4283 		int val;
4284 
4285 		sepptr = strchr(ptr, '@');
4286 		ASSERT(sepptr != NULL);
4287 		*sepptr = '\0';
4288 
4289 		/* construct the event */
4290 		np = newnode(T_EVENT, NULL, 0);
4291 		np->u.event.ename = newnode(T_NAME, NULL, 0);
4292 		np->u.event.ename->u.name.t = N_STAT;
4293 		np->u.event.ename->u.name.s = stable(ptr);
4294 		np->u.event.ename->u.name.it = IT_ENAME;
4295 		np->u.event.ename->u.name.last = np->u.event.ename;
4296 
4297 		ptr = sepptr + 1;
4298 		ASSERT(ptr < &sbuf[sz]);
4299 		ptr += strlen(ptr);
4300 		ptr++;	/* move past the '\0' separating path from value */
4301 		ASSERT(ptr < &sbuf[sz]);
4302 		ASSERT(isdigit(*ptr));
4303 		val = atoi(ptr);
4304 		ASSERT(val > 0);
4305 		ptr += strlen(ptr);
4306 		ptr++;	/* move past the final '\0' for this entry */
4307 
4308 		np->u.event.epname = pathstring2epnamenp(sepptr + 1);
4309 		ASSERT(np->u.event.epname != NULL);
4310 
4311 		istat_bump(np, val);
4312 		tree_free(np);
4313 	}
4314 
4315 	istat_save();
4316 }
4317