xref: /titanic_52/usr/src/cmd/fm/modules/common/eversholt/fme.c (revision 0b38a8bdfd75ac6144f9d462bb38d0c1b3f0ca50)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  *
26  * fme.c -- fault management exercise module
27  *
28  * this module provides the simulated fault management exercise.
29  */
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <strings.h>
37 #include <ctype.h>
38 #include <alloca.h>
39 #include <libnvpair.h>
40 #include <sys/fm/protocol.h>
41 #include <fm/fmd_api.h>
42 #include "alloc.h"
43 #include "out.h"
44 #include "stats.h"
45 #include "stable.h"
46 #include "literals.h"
47 #include "lut.h"
48 #include "tree.h"
49 #include "ptree.h"
50 #include "itree.h"
51 #include "ipath.h"
52 #include "fme.h"
53 #include "evnv.h"
54 #include "eval.h"
55 #include "config.h"
56 #include "platform.h"
57 
58 /* imported from eft.c... */
59 extern int Autoconvict;
60 extern char *Autoclose;
61 extern hrtime_t Hesitate;
62 extern nv_alloc_t Eft_nv_hdl;
63 
64 /* fme under construction is global so we can free it on module abort */
65 static struct fme *Nfmep;
66 
67 static const char *Undiag_reason;
68 
69 static int Nextid = 0;
70 
71 /* list of fault management exercises underway */
72 static struct fme {
73 	struct fme *next;		/* next exercise */
74 	unsigned long long ull;		/* time when fme was created */
75 	int id;				/* FME id */
76 	struct cfgdata *cfgdata;	/* full configuration data */
77 	struct lut *eventtree;		/* propagation tree for this FME */
78 	/*
79 	 * The initial error report that created this FME is kept in
80 	 * two forms.  e0 points to the instance tree node and is used
81 	 * by fme_eval() as the starting point for the inference
82 	 * algorithm.  e0r is the event handle FMD passed to us when
83 	 * the ereport first arrived and is used when setting timers,
84 	 * which are always relative to the time of this initial
85 	 * report.
86 	 */
87 	struct event *e0;
88 	fmd_event_t *e0r;
89 
90 	id_t    timer;			/* for setting an fmd time-out */
91 	id_t	htid;			/* for setting hesitation timer */
92 
93 	struct event *ecurrent;		/* ereport under consideration */
94 	struct event *suspects;		/* current suspect list */
95 	struct event *psuspects;	/* previous suspect list */
96 	int nsuspects;			/* count of suspects */
97 	int nonfault;			/* zero if all suspects T_FAULT */
98 	int posted_suspects;		/* true if we've posted a diagnosis */
99 	int hesitated;			/* true if we hesitated */
100 	int uniqobs;			/* number of unique events observed */
101 	int peek;			/* just peeking, don't track suspects */
102 	enum fme_state {
103 		FME_NOTHING = 5000,	/* not evaluated yet */
104 		FME_WAIT,		/* need to wait for more info */
105 		FME_CREDIBLE,		/* suspect list is credible */
106 		FME_DISPROVED		/* no valid suspects found */
107 	} state;
108 
109 	unsigned long long pull;	/* time passed since created */
110 	unsigned long long wull;	/* wait until this time for re-eval */
111 	struct event *observations;	/* observation list */
112 	struct lut *globals;		/* values of global variables */
113 	/* fmd interfacing */
114 	fmd_hdl_t *hdl;			/* handle for talking with fmd */
115 	fmd_case_t *fmcase;		/* what fmd 'case' we associate with */
116 	/* stats */
117 	struct stats *Rcount;
118 	struct stats *Hcallcount;
119 	struct stats *Rcallcount;
120 	struct stats *Ccallcount;
121 	struct stats *Ecallcount;
122 	struct stats *Tcallcount;
123 	struct stats *Marrowcount;
124 	struct stats *diags;
125 } *FMElist, *EFMElist, *ClosedFMEs;
126 
127 static struct case_list {
128 	fmd_case_t *fmcase;
129 	struct case_list *next;
130 } *Undiagablecaselist;
131 
132 static void fme_eval(struct fme *fmep, fmd_event_t *ffep);
133 static enum fme_state hypothesise(struct fme *fmep, struct event *ep,
134 	unsigned long long at_latest_by, unsigned long long *pdelay,
135 	struct arrow *arrowp);
136 static struct node *eventprop_lookup(struct event *ep, const char *propname);
137 static struct node *pathstring2epnamenp(char *path);
138 static void publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep);
139 static void restore_suspects(struct fme *fmep);
140 static void save_suspects(struct fme *fmep);
141 static void destroy_fme(struct fme *f);
142 static void fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep,
143     const char *eventstring, const struct ipath *ipp, nvlist_t *nvl);
144 
145 static struct fme *
146 alloc_fme(void)
147 {
148 	struct fme *fmep;
149 
150 	fmep = MALLOC(sizeof (*fmep));
151 	bzero(fmep, sizeof (*fmep));
152 	return (fmep);
153 }
154 
155 /*
156  * fme_ready -- called when all initialization of the FME (except for
157  *	stats) has completed successfully.  Adds the fme to global lists
158  *	and establishes its stats.
159  */
160 static struct fme *
161 fme_ready(struct fme *fmep)
162 {
163 	char nbuf[100];
164 
165 	Nfmep = NULL;	/* don't need to free this on module abort now */
166 
167 	if (EFMElist) {
168 		EFMElist->next = fmep;
169 		EFMElist = fmep;
170 	} else
171 		FMElist = EFMElist = fmep;
172 
173 	(void) sprintf(nbuf, "fme%d.Rcount", fmep->id);
174 	fmep->Rcount = stats_new_counter(nbuf, "ereports received", 0);
175 	(void) sprintf(nbuf, "fme%d.Hcall", fmep->id);
176 	fmep->Hcallcount = stats_new_counter(nbuf, "calls to hypothesise()", 1);
177 	(void) sprintf(nbuf, "fme%d.Rcall", fmep->id);
178 	fmep->Rcallcount = stats_new_counter(nbuf,
179 	    "calls to requirements_test()", 1);
180 	(void) sprintf(nbuf, "fme%d.Ccall", fmep->id);
181 	fmep->Ccallcount = stats_new_counter(nbuf, "calls to causes_test()", 1);
182 	(void) sprintf(nbuf, "fme%d.Ecall", fmep->id);
183 	fmep->Ecallcount =
184 	    stats_new_counter(nbuf, "calls to effects_test()", 1);
185 	(void) sprintf(nbuf, "fme%d.Tcall", fmep->id);
186 	fmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1);
187 	(void) sprintf(nbuf, "fme%d.Marrow", fmep->id);
188 	fmep->Marrowcount = stats_new_counter(nbuf,
189 	    "arrows marked by mark_arrows()", 1);
190 	(void) sprintf(nbuf, "fme%d.diags", fmep->id);
191 	fmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0);
192 
193 	out(O_ALTFP|O_VERB2, "newfme: config snapshot contains...");
194 	config_print(O_ALTFP|O_VERB2, fmep->cfgdata->cooked);
195 
196 	return (fmep);
197 }
198 
199 static struct fme *
200 newfme(const char *e0class, const struct ipath *e0ipp)
201 {
202 	struct cfgdata *cfgdata;
203 
204 	if ((cfgdata = config_snapshot()) == NULL) {
205 		out(O_ALTFP, "newfme: NULL configuration");
206 		Undiag_reason = UD_NOCONF;
207 		return (NULL);
208 	}
209 
210 	Nfmep = alloc_fme();
211 
212 	Nfmep->id = Nextid++;
213 	Nfmep->cfgdata = cfgdata;
214 	Nfmep->posted_suspects = 0;
215 	Nfmep->uniqobs = 0;
216 	Nfmep->state = FME_NOTHING;
217 	Nfmep->pull = 0ULL;
218 
219 	Nfmep->fmcase = NULL;
220 	Nfmep->hdl = NULL;
221 
222 	if ((Nfmep->eventtree = itree_create(cfgdata->cooked)) == NULL) {
223 		out(O_ALTFP, "newfme: NULL instance tree");
224 		Undiag_reason = UD_INSTFAIL;
225 		config_free(cfgdata);
226 		FREE(Nfmep);
227 		Nfmep = NULL;
228 		return (NULL);
229 	}
230 
231 	itree_ptree(O_ALTFP|O_VERB2, Nfmep->eventtree);
232 
233 	if ((Nfmep->e0 =
234 	    itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) {
235 		out(O_ALTFP, "newfme: e0 not in instance tree");
236 		Undiag_reason = UD_BADEVENTI;
237 		itree_free(Nfmep->eventtree);
238 		config_free(cfgdata);
239 		FREE(Nfmep);
240 		Nfmep = NULL;
241 		return (NULL);
242 	}
243 
244 	return (fme_ready(Nfmep));
245 }
246 
247 void
248 fme_fini(void)
249 {
250 	struct fme *sfp, *fp;
251 	struct case_list *ucasep, *nextcasep;
252 
253 	ucasep = Undiagablecaselist;
254 	while (ucasep != NULL) {
255 		nextcasep = ucasep->next;
256 		FREE(ucasep);
257 		ucasep = nextcasep;
258 	}
259 	Undiagablecaselist = NULL;
260 
261 	/* clean up closed fmes */
262 	fp = ClosedFMEs;
263 	while (fp != NULL) {
264 		sfp = fp->next;
265 		destroy_fme(fp);
266 		fp = sfp;
267 	}
268 	ClosedFMEs = NULL;
269 
270 	fp = FMElist;
271 	while (fp != NULL) {
272 		sfp = fp->next;
273 		destroy_fme(fp);
274 		fp = sfp;
275 	}
276 	FMElist = EFMElist = NULL;
277 
278 	/* if we were in the middle of creating an fme, free it now */
279 	if (Nfmep) {
280 		destroy_fme(Nfmep);
281 		Nfmep = NULL;
282 	}
283 }
284 
285 /*
286  * Allocated space for a buffer name.  20 bytes allows for
287  * a ridiculous 9,999,999 unique observations.
288  */
289 #define	OBBUFNMSZ 20
290 
291 /*
292  *  serialize_observation
293  *
294  *  Create a recoverable version of the current observation
295  *  (f->ecurrent).  We keep a serialized version of each unique
296  *  observation in order that we may resume correctly the fme in the
297  *  correct state if eft or fmd crashes and we're restarted.
298  */
299 static void
300 serialize_observation(struct fme *fp, const char *cls, const struct ipath *ipp)
301 {
302 	size_t pkdlen;
303 	char tmpbuf[OBBUFNMSZ];
304 	char *pkd = NULL;
305 	char *estr;
306 
307 	(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", fp->uniqobs);
308 	estr = ipath2str(cls, ipp);
309 	fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, strlen(estr) + 1);
310 	fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)estr,
311 	    strlen(estr) + 1);
312 	FREE(estr);
313 
314 	if (fp->ecurrent != NULL && fp->ecurrent->nvp != NULL) {
315 		(void) snprintf(tmpbuf,
316 		    OBBUFNMSZ, "observed%d.nvp", fp->uniqobs);
317 		if (nvlist_xpack(fp->ecurrent->nvp,
318 		    &pkd, &pkdlen, NV_ENCODE_XDR, &Eft_nv_hdl) != 0)
319 			out(O_DIE|O_SYS, "pack of observed nvl failed");
320 		fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, pkdlen);
321 		fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)pkd, pkdlen);
322 		FREE(pkd);
323 	}
324 
325 	fp->uniqobs++;
326 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs,
327 	    sizeof (fp->uniqobs));
328 }
329 
330 /*
331  *  init_fme_bufs -- We keep several bits of state about an fme for
332  *	use if eft or fmd crashes and we're restarted.
333  */
334 static void
335 init_fme_bufs(struct fme *fp)
336 {
337 	size_t cfglen = fp->cfgdata->nextfree - fp->cfgdata->begin;
338 
339 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_CFGLEN, sizeof (cfglen));
340 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_CFGLEN, (void *)&cfglen,
341 	    sizeof (cfglen));
342 	if (cfglen != 0) {
343 		fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_CFG, cfglen);
344 		fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_CFG,
345 		    fp->cfgdata->begin, cfglen);
346 	}
347 
348 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_PULL, sizeof (fp->pull));
349 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_PULL, (void *)&fp->pull,
350 	    sizeof (fp->pull));
351 
352 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_ID, sizeof (fp->id));
353 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_ID, (void *)&fp->id,
354 	    sizeof (fp->id));
355 
356 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_NOBS, sizeof (fp->uniqobs));
357 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs,
358 	    sizeof (fp->uniqobs));
359 
360 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_POSTD,
361 	    sizeof (fp->posted_suspects));
362 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_POSTD,
363 	    (void *)&fp->posted_suspects, sizeof (fp->posted_suspects));
364 }
365 
366 static void
367 destroy_fme_bufs(struct fme *fp)
368 {
369 	char tmpbuf[OBBUFNMSZ];
370 	int o;
371 
372 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFGLEN);
373 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFG);
374 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_PULL);
375 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_ID);
376 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_POSTD);
377 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_NOBS);
378 
379 	for (o = 0; o < fp->uniqobs; o++) {
380 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", o);
381 		fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf);
382 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", o);
383 		fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf);
384 	}
385 }
386 
387 /*
388  * reconstitute_observations -- convert a case's serialized observations
389  *	back into struct events.  Returns zero if all observations are
390  *	successfully reconstituted.
391  */
392 static int
393 reconstitute_observations(struct fme *fmep)
394 {
395 	struct event *ep;
396 	struct node *epnamenp = NULL;
397 	size_t pkdlen;
398 	char *pkd = NULL;
399 	char *tmpbuf = alloca(OBBUFNMSZ);
400 	char *sepptr;
401 	char *estr;
402 	int ocnt;
403 	int elen;
404 
405 	for (ocnt = 0; ocnt < fmep->uniqobs; ocnt++) {
406 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", ocnt);
407 		elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
408 		if (elen == 0) {
409 			out(O_ALTFP,
410 			    "reconstitute_observation: no %s buffer found.",
411 			    tmpbuf);
412 			Undiag_reason = UD_MISSINGOBS;
413 			break;
414 		}
415 
416 		estr = MALLOC(elen);
417 		fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen);
418 		sepptr = strchr(estr, '@');
419 		if (sepptr == NULL) {
420 			out(O_ALTFP,
421 			    "reconstitute_observation: %s: "
422 			    "missing @ separator in %s.",
423 			    tmpbuf, estr);
424 			Undiag_reason = UD_MISSINGPATH;
425 			FREE(estr);
426 			break;
427 		}
428 
429 		*sepptr = '\0';
430 		if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) {
431 			out(O_ALTFP,
432 			    "reconstitute_observation: %s: "
433 			    "trouble converting path string \"%s\" "
434 			    "to internal representation.",
435 			    tmpbuf, sepptr + 1);
436 			Undiag_reason = UD_MISSINGPATH;
437 			FREE(estr);
438 			break;
439 		}
440 
441 		/* construct the event */
442 		ep = itree_lookup(fmep->eventtree,
443 		    stable(estr), ipath(epnamenp));
444 		if (ep == NULL) {
445 			out(O_ALTFP,
446 			    "reconstitute_observation: %s: "
447 			    "lookup of  \"%s\" in itree failed.",
448 			    tmpbuf, ipath2str(estr, ipath(epnamenp)));
449 			Undiag_reason = UD_BADOBS;
450 			tree_free(epnamenp);
451 			FREE(estr);
452 			break;
453 		}
454 		tree_free(epnamenp);
455 
456 		/*
457 		 * We may or may not have a saved nvlist for the observation
458 		 */
459 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", ocnt);
460 		pkdlen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
461 		if (pkdlen != 0) {
462 			pkd = MALLOC(pkdlen);
463 			fmd_buf_read(fmep->hdl,
464 			    fmep->fmcase, tmpbuf, pkd, pkdlen);
465 			if (nvlist_xunpack(pkd,
466 			    pkdlen, &ep->nvp, &Eft_nv_hdl) != 0)
467 				out(O_DIE|O_SYS, "pack of observed nvl failed");
468 			FREE(pkd);
469 		}
470 
471 		if (ocnt == 0)
472 			fmep->e0 = ep;
473 
474 		FREE(estr);
475 		fmep->ecurrent = ep;
476 		ep->count++;
477 
478 		/* link it into list of observations seen */
479 		ep->observations = fmep->observations;
480 		fmep->observations = ep;
481 	}
482 
483 	if (ocnt == fmep->uniqobs) {
484 		(void) fme_ready(fmep);
485 		return (0);
486 	}
487 
488 	return (1);
489 }
490 
491 /*
492  * restart_fme -- called during eft initialization.  Reconstitutes
493  *	an in-progress fme.
494  */
495 void
496 fme_restart(fmd_hdl_t *hdl, fmd_case_t *inprogress)
497 {
498 	nvlist_t *defect;
499 	struct case_list *bad;
500 	struct fme *fmep;
501 	struct cfgdata *cfgdata = NULL;
502 	size_t rawsz;
503 
504 	fmep = alloc_fme();
505 	fmep->fmcase = inprogress;
506 	fmep->hdl = hdl;
507 
508 	if (fmd_buf_size(hdl, inprogress, WOBUF_CFGLEN) != sizeof (size_t)) {
509 		out(O_ALTFP, "restart_fme: No config data");
510 		Undiag_reason = UD_MISSINGINFO;
511 		goto badcase;
512 	}
513 	fmd_buf_read(hdl, inprogress, WOBUF_CFGLEN, (void *)&rawsz,
514 	    sizeof (size_t));
515 
516 	if ((fmep->e0r = fmd_case_getprincipal(hdl, inprogress)) == NULL) {
517 		out(O_ALTFP, "restart_fme: No event zero");
518 		Undiag_reason = UD_MISSINGZERO;
519 		goto badcase;
520 	}
521 
522 	cfgdata = MALLOC(sizeof (struct cfgdata));
523 	cfgdata->cooked = NULL;
524 	cfgdata->devcache = NULL;
525 	cfgdata->cpucache = NULL;
526 	cfgdata->refcnt = 1;
527 
528 	if (rawsz > 0) {
529 		if (fmd_buf_size(hdl, inprogress, WOBUF_CFG) != rawsz) {
530 			out(O_ALTFP, "restart_fme: Config data size mismatch");
531 			Undiag_reason = UD_CFGMISMATCH;
532 			goto badcase;
533 		}
534 		cfgdata->begin = MALLOC(rawsz);
535 		cfgdata->end = cfgdata->nextfree = cfgdata->begin + rawsz;
536 		fmd_buf_read(hdl,
537 		    inprogress, WOBUF_CFG, cfgdata->begin, rawsz);
538 	} else {
539 		cfgdata->begin = cfgdata->end = cfgdata->nextfree = NULL;
540 	}
541 	fmep->cfgdata = cfgdata;
542 
543 	config_cook(cfgdata);
544 	if ((fmep->eventtree = itree_create(cfgdata->cooked)) == NULL) {
545 		/* case not properly saved or irretrievable */
546 		out(O_ALTFP, "restart_fme: NULL instance tree");
547 		Undiag_reason = UD_INSTFAIL;
548 		goto badcase;
549 	}
550 
551 	itree_ptree(O_ALTFP|O_VERB2, fmep->eventtree);
552 
553 	if (fmd_buf_size(hdl, inprogress, WOBUF_PULL) == 0) {
554 		out(O_ALTFP, "restart_fme: no saved wait time");
555 		Undiag_reason = UD_MISSINGINFO;
556 		goto badcase;
557 	} else {
558 		fmd_buf_read(hdl, inprogress, WOBUF_PULL, (void *)&fmep->pull,
559 		    sizeof (fmep->pull));
560 	}
561 
562 	if (fmd_buf_size(hdl, inprogress, WOBUF_POSTD) == 0) {
563 		out(O_ALTFP, "restart_fme: no saved posted status");
564 		Undiag_reason = UD_MISSINGINFO;
565 		goto badcase;
566 	} else {
567 		fmd_buf_read(hdl, inprogress, WOBUF_POSTD,
568 		    (void *)&fmep->posted_suspects,
569 		    sizeof (fmep->posted_suspects));
570 	}
571 
572 	if (fmd_buf_size(hdl, inprogress, WOBUF_ID) == 0) {
573 		out(O_ALTFP, "restart_fme: no saved id");
574 		Undiag_reason = UD_MISSINGINFO;
575 		goto badcase;
576 	} else {
577 		fmd_buf_read(hdl, inprogress, WOBUF_ID, (void *)&fmep->id,
578 		    sizeof (fmep->id));
579 	}
580 	if (Nextid <= fmep->id)
581 		Nextid = fmep->id + 1;
582 
583 	if (fmd_buf_size(hdl, inprogress, WOBUF_NOBS) == 0) {
584 		out(O_ALTFP, "restart_fme: no count of observations");
585 		Undiag_reason = UD_MISSINGINFO;
586 		goto badcase;
587 	} else {
588 		fmd_buf_read(hdl, inprogress, WOBUF_NOBS,
589 		    (void *)&fmep->uniqobs, sizeof (fmep->uniqobs));
590 	}
591 
592 	if (reconstitute_observations(fmep) != 0)
593 		goto badcase;
594 
595 	/* give the diagnosis algorithm a shot at the new FME state */
596 	fme_eval(fmep, NULL);
597 	return;
598 
599 badcase:
600 	if (fmep->eventtree != NULL)
601 		itree_free(fmep->eventtree);
602 	config_free(cfgdata);
603 	destroy_fme_bufs(fmep);
604 	FREE(fmep);
605 
606 	/*
607 	 * Since we're unable to restart the case, add it to the undiagable
608 	 * list and solve and close it as appropriate.
609 	 */
610 	bad = MALLOC(sizeof (struct case_list));
611 	bad->next = NULL;
612 
613 	if (Undiagablecaselist != NULL)
614 		bad->next = Undiagablecaselist;
615 	Undiagablecaselist = bad;
616 	bad->fmcase = inprogress;
617 
618 	out(O_ALTFP, "[case %s (unable to restart), ",
619 	    fmd_case_uuid(hdl, bad->fmcase));
620 
621 	if (fmd_case_solved(hdl, bad->fmcase)) {
622 		out(O_ALTFP, "already solved, ");
623 	} else {
624 		out(O_ALTFP, "solving, ");
625 		defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100,
626 		    NULL, NULL, NULL);
627 		if (Undiag_reason != NULL)
628 			(void) nvlist_add_string(defect,
629 			    UNDIAG_REASON, Undiag_reason);
630 		fmd_case_add_suspect(hdl, bad->fmcase, defect);
631 		fmd_case_solve(hdl, bad->fmcase);
632 	}
633 
634 	if (fmd_case_closed(hdl, bad->fmcase)) {
635 		out(O_ALTFP, "already closed ]");
636 	} else {
637 		out(O_ALTFP, "closing ]");
638 		fmd_case_close(hdl, bad->fmcase);
639 	}
640 }
641 
642 void
643 destroy_fme(struct fme *f)
644 {
645 	stats_delete(f->Rcount);
646 	stats_delete(f->Hcallcount);
647 	stats_delete(f->Rcallcount);
648 	stats_delete(f->Ccallcount);
649 	stats_delete(f->Ecallcount);
650 	stats_delete(f->Tcallcount);
651 	stats_delete(f->Marrowcount);
652 	stats_delete(f->diags);
653 
654 	itree_free(f->eventtree);
655 	config_free(f->cfgdata);
656 	FREE(f);
657 }
658 
659 static const char *
660 fme_state2str(enum fme_state s)
661 {
662 	switch (s) {
663 	case FME_NOTHING:	return ("NOTHING");
664 	case FME_WAIT:		return ("WAIT");
665 	case FME_CREDIBLE:	return ("CREDIBLE");
666 	case FME_DISPROVED:	return ("DISPROVED");
667 	default:		return ("UNKNOWN");
668 	}
669 }
670 
671 static int
672 is_problem(enum nametype t)
673 {
674 	return (t == N_FAULT || t == N_DEFECT || t == N_UPSET);
675 }
676 
677 static int
678 is_fault(enum nametype t)
679 {
680 	return (t == N_FAULT);
681 }
682 
683 static int
684 is_defect(enum nametype t)
685 {
686 	return (t == N_DEFECT);
687 }
688 
689 static int
690 is_upset(enum nametype t)
691 {
692 	return (t == N_UPSET);
693 }
694 
695 /*ARGSUSED*/
696 static void
697 clear_causes_tested(struct event *lhs, struct event *ep, void *arg)
698 {
699 	struct bubble *bp;
700 	struct arrowlist *ap;
701 
702 	for (bp = itree_next_bubble(ep, NULL); bp;
703 	    bp = itree_next_bubble(ep, bp)) {
704 		if (bp->t != B_FROM)
705 			continue;
706 		for (ap = itree_next_arrow(bp, NULL); ap;
707 		    ap = itree_next_arrow(bp, ap))
708 			ap->arrowp->causes_tested = 0;
709 	}
710 }
711 
712 /*
713  * call this function with initcode set to 0 to initialize cycle tracking
714  */
715 static void
716 initialize_cycles(struct fme *fmep)
717 {
718 	lut_walk(fmep->eventtree, (lut_cb)clear_causes_tested, NULL);
719 }
720 
721 static void
722 fme_print(int flags, struct fme *fmep)
723 {
724 	struct event *ep;
725 
726 	out(flags, "Fault Management Exercise %d", fmep->id);
727 	out(flags, "\t       State: %s", fme_state2str(fmep->state));
728 	out(flags|O_NONL, "\t  Start time: ");
729 	ptree_timeval(flags|O_NONL, &fmep->ull);
730 	out(flags, NULL);
731 	if (fmep->wull) {
732 		out(flags|O_NONL, "\t   Wait time: ");
733 		ptree_timeval(flags|O_NONL, &fmep->wull);
734 		out(flags, NULL);
735 	}
736 	out(flags|O_NONL, "\t          E0: ");
737 	if (fmep->e0)
738 		itree_pevent_brief(flags|O_NONL, fmep->e0);
739 	else
740 		out(flags|O_NONL, "NULL");
741 	out(flags, NULL);
742 	out(flags|O_NONL, "\tObservations:");
743 	for (ep = fmep->observations; ep; ep = ep->observations) {
744 		out(flags|O_NONL, " ");
745 		itree_pevent_brief(flags|O_NONL, ep);
746 	}
747 	out(flags, NULL);
748 	out(flags|O_NONL, "\tSuspect list:");
749 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
750 		out(flags|O_NONL, " ");
751 		itree_pevent_brief(flags|O_NONL, ep);
752 	}
753 	out(flags, NULL);
754 	out(flags|O_VERB2, "\t        Tree:");
755 	itree_ptree(flags|O_VERB2, fmep->eventtree);
756 }
757 
758 static struct node *
759 pathstring2epnamenp(char *path)
760 {
761 	char *sep = "/";
762 	struct node *ret;
763 	char *ptr;
764 
765 	if ((ptr = strtok(path, sep)) == NULL)
766 		out(O_DIE, "pathstring2epnamenp: invalid empty class");
767 
768 	ret = tree_iname(stable(ptr), NULL, 0);
769 
770 	while ((ptr = strtok(NULL, sep)) != NULL)
771 		ret = tree_name_append(ret,
772 		    tree_iname(stable(ptr), NULL, 0));
773 
774 	return (ret);
775 }
776 
777 /*
778  * for a given upset sp, increment the corresponding SERD engine.  if the
779  * SERD engine trips, return the ename and ipp of the resulting ereport.
780  * returns true if engine tripped and *enamep and *ippp were filled in.
781  */
782 static int
783 serd_eval(fmd_hdl_t *hdl, fmd_event_t *ffep, fmd_case_t *fmcase,
784 	struct event *sp, const char **enamep, const struct ipath **ippp)
785 {
786 	struct node *serdinst;
787 	char *serdname;
788 
789 	ASSERT(sp->t == N_UPSET);
790 	ASSERT(ffep != NULL);
791 
792 	/*
793 	 * obtain instanced SERD engine from the upset sp.  from this
794 	 * derive serdname, the string used to identify the SERD engine.
795 	 */
796 	serdinst = eventprop_lookup(sp, L_engine);
797 
798 	if (serdinst == NULL)
799 		return (NULL);
800 
801 	serdname = ipath2str(serdinst->u.stmt.np->u.event.ename->u.name.s,
802 	    ipath(serdinst->u.stmt.np->u.event.epname));
803 
804 	if (!fmd_serd_exists(hdl, serdname)) {
805 		struct node *nN, *nT;
806 
807 		/* no SERD engine yet, so create it */
808 		nN = lut_lookup(serdinst->u.stmt.lutp, (void *)L_N, NULL);
809 		nT = lut_lookup(serdinst->u.stmt.lutp, (void *)L_T, NULL);
810 
811 		ASSERT(nN->t == T_NUM);
812 		ASSERT(nT->t == T_TIMEVAL);
813 
814 		fmd_serd_create(hdl, serdname, (uint_t)nN->u.ull,
815 		    (hrtime_t)nT->u.ull);
816 	}
817 
818 
819 	/*
820 	 * increment SERD engine.  if engine fires, reset serd
821 	 * engine and return trip_strcode
822 	 */
823 	if (fmd_serd_record(hdl, serdname, ffep)) {
824 		struct node *tripinst = lut_lookup(serdinst->u.stmt.lutp,
825 		    (void *)L_trip, NULL);
826 
827 		ASSERT(tripinst != NULL);
828 
829 		*enamep = tripinst->u.event.ename->u.name.s;
830 		*ippp = ipath(tripinst->u.event.epname);
831 
832 		fmd_case_add_serd(hdl, fmcase, serdname);
833 		fmd_serd_reset(hdl, serdname);
834 		out(O_ALTFP|O_NONL, "[engine fired: %s, sending: ", serdname);
835 		ipath_print(O_ALTFP|O_NONL, *enamep, *ippp);
836 		out(O_ALTFP, "]");
837 
838 		FREE(serdname);
839 		return (1);
840 	}
841 
842 	FREE(serdname);
843 	return (0);
844 }
845 
846 /*
847  * search a suspect list for upsets.  feed each upset to serd_eval() and
848  * build up tripped[], an array of ereports produced by the firing of
849  * any SERD engines.  then feed each ereport back into
850  * fme_receive_report().
851  *
852  * returns ntrip, the number of these ereports produced.
853  */
854 static int
855 upsets_eval(struct fme *fmep, fmd_event_t *ffep)
856 {
857 	/* we build an array of tripped ereports that we send ourselves */
858 	struct {
859 		const char *ename;
860 		const struct ipath *ipp;
861 	} *tripped;
862 	struct event *sp;
863 	int ntrip, nupset, i;
864 
865 	/*
866 	 * we avoid recursion by calling fme_receive_report() at the end of
867 	 * this function with a NULL ffep
868 	 */
869 	if (ffep == NULL)
870 		return (0);
871 
872 	/*
873 	 * count the number of upsets to determine the upper limit on
874 	 * expected trip ereport strings.  remember that one upset can
875 	 * lead to at most one ereport.
876 	 */
877 	nupset = 0;
878 	for (sp = fmep->suspects; sp; sp = sp->suspects) {
879 		if (sp->t == N_UPSET)
880 			nupset++;
881 	}
882 
883 	if (nupset == 0)
884 		return (0);
885 
886 	/*
887 	 * get to this point if we have upsets and expect some trip
888 	 * ereports
889 	 */
890 	tripped = alloca(sizeof (*tripped) * nupset);
891 	bzero((void *)tripped, sizeof (*tripped) * nupset);
892 
893 	ntrip = 0;
894 	for (sp = fmep->suspects; sp; sp = sp->suspects)
895 		if (sp->t == N_UPSET &&
896 		    serd_eval(fmep->hdl, ffep, fmep->fmcase, sp,
897 			    &tripped[ntrip].ename, &tripped[ntrip].ipp))
898 			ntrip++;
899 
900 	for (i = 0; i < ntrip; i++)
901 		fme_receive_report(fmep->hdl, NULL,
902 		    tripped[i].ename, tripped[i].ipp, NULL);
903 
904 	return (ntrip);
905 }
906 
907 /*
908  * fme_receive_external_report -- call when an external ereport comes in
909  *
910  * this routine just converts the relevant information from the ereport
911  * into a format used internally and passes it on to fme_receive_report().
912  */
913 void
914 fme_receive_external_report(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl,
915     const char *eventstring)
916 {
917 	struct node *epnamenp = platform_getpath(nvl);
918 	const struct ipath *ipp;
919 
920 	/*
921 	 * XFILE: If we ended up without a path, it's an X-file.
922 	 * For now, use our undiagnosable interface.
923 	 */
924 	if (epnamenp == NULL) {
925 		out(O_ALTFP, "XFILE: Unable to get path from ereport");
926 		Undiag_reason = UD_NOPATH;
927 		publish_undiagnosable(hdl, ffep);
928 		return;
929 	}
930 
931 	ipp = ipath(epnamenp);
932 	tree_free(epnamenp);
933 	fme_receive_report(hdl, ffep, stable(eventstring), ipp, nvl);
934 }
935 
936 static void
937 fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep,
938     const char *eventstring, const struct ipath *ipp, nvlist_t *nvl)
939 {
940 	struct event *ep;
941 	struct fme *fmep = NULL;
942 	struct fme *ofmep, *svfmep;
943 	int matched = 0;
944 
945 	out(O_ALTFP|O_NONL, "fme_receive_report: ");
946 	ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
947 	out(O_ALTFP|O_STAMP, NULL);
948 
949 	/* decide which FME it goes to */
950 	for (fmep = FMElist; fmep; fmep = fmep->next) {
951 		int prev_verbose;
952 		unsigned long long my_delay = TIMEVAL_EVENTUALLY;
953 		enum fme_state state;
954 
955 		/* look up event in event tree for this FME */
956 		if ((ep = itree_lookup(fmep->eventtree,
957 		    eventstring, ipp)) == NULL)
958 			continue;
959 
960 		/* note observation */
961 		fmep->ecurrent = ep;
962 		if (ep->count++ == 0) {
963 			/* link it into list of observations seen */
964 			ep->observations = fmep->observations;
965 			fmep->observations = ep;
966 			ep->nvp = evnv_dupnvl(nvl);
967 		}
968 
969 		/* tell hypothesise() not to mess with suspect list */
970 		fmep->peek = 1;
971 
972 		/* don't want this to be verbose (unless Debug is set) */
973 		prev_verbose = Verbose;
974 		if (Debug == 0)
975 			Verbose = 0;
976 
977 		initialize_cycles(fmep);
978 		state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay, NULL);
979 
980 		fmep->peek = 0;
981 
982 		/* put verbose flag back */
983 		Verbose = prev_verbose;
984 
985 		if (state != FME_DISPROVED) {
986 			/* found an FME that explains the ereport */
987 			matched++;
988 			out(O_ALTFP|O_NONL, "[");
989 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
990 			out(O_ALTFP, " explained by FME%d]", fmep->id);
991 
992 			if (ep->count == 1)
993 				serialize_observation(fmep, eventstring, ipp);
994 
995 			if (ffep)
996 				fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
997 
998 			stats_counter_bump(fmep->Rcount);
999 
1000 			/* re-eval FME */
1001 			fme_eval(fmep, ffep);
1002 		} else {
1003 
1004 			/* not a match, undo noting of observation */
1005 			fmep->ecurrent = NULL;
1006 			if (--ep->count == 0) {
1007 				/* unlink it from observations */
1008 				fmep->observations = ep->observations;
1009 				ep->observations = NULL;
1010 				nvlist_free(ep->nvp);
1011 				ep->nvp = NULL;
1012 			}
1013 		}
1014 	}
1015 
1016 	if (matched)
1017 		return;	/* explained by at least one existing FME */
1018 
1019 	/* clean up closed fmes */
1020 	ofmep = ClosedFMEs;
1021 	while (ofmep != NULL) {
1022 		svfmep = ofmep->next;
1023 		destroy_fme(ofmep);
1024 		ofmep = svfmep;
1025 	}
1026 	ClosedFMEs = NULL;
1027 
1028 	/* start a new FME */
1029 	if ((fmep = newfme(eventstring, ipp)) == NULL) {
1030 		out(O_ALTFP|O_NONL, "[");
1031 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1032 		out(O_ALTFP, " CANNOT DIAGNOSE]");
1033 		publish_undiagnosable(hdl, ffep);
1034 		return;
1035 	}
1036 
1037 	/* open a case */
1038 	fmep->fmcase = fmd_case_open(hdl, NULL);
1039 	fmep->hdl = hdl;
1040 	init_fme_bufs(fmep);
1041 
1042 	out(O_ALTFP|O_NONL, "[");
1043 	ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1044 	out(O_ALTFP, " created FME%d, case %s]", fmep->id,
1045 	    fmd_case_uuid(hdl, fmep->fmcase));
1046 
1047 	ep = fmep->e0;
1048 	ASSERT(ep != NULL);
1049 
1050 	/* note observation */
1051 	fmep->ecurrent = ep;
1052 	if (ep->count++ == 0) {
1053 		/* link it into list of observations seen */
1054 		ep->observations = fmep->observations;
1055 		fmep->observations = ep;
1056 		ep->nvp = evnv_dupnvl(nvl);
1057 		serialize_observation(fmep, eventstring, ipp);
1058 	}
1059 
1060 	stats_counter_bump(fmep->Rcount);
1061 
1062 	if (ffep) {
1063 		fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1064 		fmd_case_setprincipal(hdl, fmep->fmcase, ffep);
1065 		fmep->e0r = ffep;
1066 	}
1067 
1068 	/* give the diagnosis algorithm a shot at the new FME state */
1069 	fme_eval(fmep, ffep);
1070 }
1071 
1072 void
1073 fme_status(int flags)
1074 {
1075 	struct fme *fmep;
1076 
1077 	if (FMElist == NULL) {
1078 		out(flags, "No fault management exercises underway.");
1079 		return;
1080 	}
1081 
1082 	for (fmep = FMElist; fmep; fmep = fmep->next)
1083 		fme_print(flags, fmep);
1084 }
1085 
1086 /*
1087  * "indent" routines used mostly for nicely formatted debug output, but also
1088  * for sanity checking for infinite recursion bugs.
1089  */
1090 
1091 #define	MAX_INDENT 1024
1092 static const char *indent_s[MAX_INDENT];
1093 static int current_indent;
1094 
1095 static void
1096 indent_push(const char *s)
1097 {
1098 	if (current_indent < MAX_INDENT)
1099 		indent_s[current_indent++] = s;
1100 	else
1101 		out(O_DIE, "unexpected recursion depth (%d)", current_indent);
1102 }
1103 
1104 static void
1105 indent_set(const char *s)
1106 {
1107 	current_indent = 0;
1108 	indent_push(s);
1109 }
1110 
1111 static void
1112 indent_pop(void)
1113 {
1114 	if (current_indent > 0)
1115 		current_indent--;
1116 	else
1117 		out(O_DIE, "recursion underflow");
1118 }
1119 
1120 static void
1121 indent(void)
1122 {
1123 	int i;
1124 	if (!Verbose)
1125 		return;
1126 	for (i = 0; i < current_indent; i++)
1127 		out(O_ALTFP|O_VERB|O_NONL, indent_s[i]);
1128 }
1129 
1130 static int
1131 suspects_changed(struct fme *fmep)
1132 {
1133 	struct event *suspects = fmep->suspects;
1134 	struct event *psuspects = fmep->psuspects;
1135 
1136 	while (suspects != NULL && psuspects != NULL) {
1137 		if (suspects != psuspects)
1138 			return (1);
1139 		suspects = suspects->suspects;
1140 		psuspects = psuspects->psuspects;
1141 	}
1142 
1143 	return (suspects != psuspects);
1144 }
1145 
1146 #define	SLNEW		1
1147 #define	SLCHANGED	2
1148 #define	SLWAIT		3
1149 #define	SLDISPROVED	4
1150 
1151 static void
1152 print_suspects(int circumstance, struct fme *fmep)
1153 {
1154 	struct event *ep;
1155 
1156 	out(O_ALTFP|O_NONL, "[");
1157 	if (circumstance == SLCHANGED) {
1158 		out(O_ALTFP|O_NONL, "FME%d diagnosis changed. state: %s, "
1159 		    "suspect list:", fmep->id, fme_state2str(fmep->state));
1160 	} else if (circumstance == SLWAIT) {
1161 		out(O_ALTFP|O_NONL, "FME%d set wait timer ", fmep->id);
1162 		ptree_timeval(O_ALTFP|O_NONL, &fmep->wull);
1163 	} else if (circumstance == SLDISPROVED) {
1164 		out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS UNKNOWN", fmep->id);
1165 	} else {
1166 		out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS PRODUCED:", fmep->id);
1167 	}
1168 
1169 	if (circumstance == SLWAIT || circumstance == SLDISPROVED) {
1170 		out(O_ALTFP, "]");
1171 		return;
1172 	}
1173 
1174 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
1175 		out(O_ALTFP|O_NONL, " ");
1176 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
1177 	}
1178 	out(O_ALTFP, "]");
1179 }
1180 
1181 static struct node *
1182 eventprop_lookup(struct event *ep, const char *propname)
1183 {
1184 	return (lut_lookup(ep->props, (void *)propname, NULL));
1185 }
1186 
1187 #define	MAXDIGITIDX	23
1188 static char numbuf[MAXDIGITIDX + 1];
1189 
1190 static int
1191 node2uint(struct node *n, uint_t *valp)
1192 {
1193 	struct evalue value;
1194 	struct lut *globals = NULL;
1195 
1196 	if (n == NULL)
1197 		return (1);
1198 
1199 	/*
1200 	 * check value.v since we are being asked to convert an unsigned
1201 	 * long long int to an unsigned int
1202 	 */
1203 	if (! eval_expr(n, NULL, NULL, &globals, NULL, NULL, 0, &value) ||
1204 	    value.t != UINT64 || value.v > (1ULL << 32))
1205 		return (1);
1206 
1207 	*valp = (uint_t)value.v;
1208 
1209 	return (0);
1210 }
1211 
1212 static nvlist_t *
1213 node2fmri(struct node *n)
1214 {
1215 	nvlist_t **pa, *f, *p;
1216 	struct node *nc;
1217 	uint_t depth = 0;
1218 	char *numstr, *nullbyte;
1219 	char *failure;
1220 	int err, i;
1221 
1222 	/* XXX do we need to be able to handle a non-T_NAME node? */
1223 	if (n == NULL || n->t != T_NAME)
1224 		return (NULL);
1225 
1226 	for (nc = n; nc != NULL; nc = nc->u.name.next) {
1227 		if (nc->u.name.child == NULL || nc->u.name.child->t != T_NUM)
1228 			break;
1229 		depth++;
1230 	}
1231 
1232 	if (nc != NULL) {
1233 		/* We bailed early, something went wrong */
1234 		return (NULL);
1235 	}
1236 
1237 	if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0)
1238 		out(O_DIE|O_SYS, "alloc of fmri nvl failed");
1239 	pa = alloca(depth * sizeof (nvlist_t *));
1240 	for (i = 0; i < depth; i++)
1241 		pa[i] = NULL;
1242 
1243 	err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC);
1244 	err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION);
1245 	err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, "");
1246 	err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth);
1247 	if (err != 0) {
1248 		failure = "basic construction of FMRI failed";
1249 		goto boom;
1250 	}
1251 
1252 	numbuf[MAXDIGITIDX] = '\0';
1253 	nullbyte = &numbuf[MAXDIGITIDX];
1254 	i = 0;
1255 
1256 	for (nc = n; nc != NULL; nc = nc->u.name.next) {
1257 		err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl);
1258 		if (err != 0) {
1259 			failure = "alloc of an hc-pair failed";
1260 			goto boom;
1261 		}
1262 		err = nvlist_add_string(p, FM_FMRI_HC_NAME, nc->u.name.s);
1263 		numstr = ulltostr(nc->u.name.child->u.ull, nullbyte);
1264 		err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr);
1265 		if (err != 0) {
1266 			failure = "construction of an hc-pair failed";
1267 			goto boom;
1268 		}
1269 		pa[i++] = p;
1270 	}
1271 
1272 	err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth);
1273 	if (err == 0) {
1274 		for (i = 0; i < depth; i++)
1275 			if (pa[i] != NULL)
1276 				nvlist_free(pa[i]);
1277 		return (f);
1278 	}
1279 	failure = "addition of hc-pair array to FMRI failed";
1280 
1281 boom:
1282 	for (i = 0; i < depth; i++)
1283 		if (pa[i] != NULL)
1284 			nvlist_free(pa[i]);
1285 	nvlist_free(f);
1286 	out(O_DIE, "%s", failure);
1287 	/*NOTREACHED*/
1288 }
1289 
1290 static uint_t
1291 avg(uint_t sum, uint_t cnt)
1292 {
1293 	unsigned long long s = sum * 10;
1294 
1295 	return ((s / cnt / 10) + (((s / cnt % 10) >= 5) ? 1 : 0));
1296 }
1297 
1298 static uint8_t
1299 percentof(uint_t part, uint_t whole)
1300 {
1301 	unsigned long long p = part * 1000;
1302 
1303 	return ((p / whole / 10) + (((p / whole % 10) >= 5) ? 1 : 0));
1304 }
1305 
1306 static struct rsl {
1307 	struct event *suspect;
1308 	nvlist_t *asru;
1309 	nvlist_t *fru;
1310 	nvlist_t *rsrc;
1311 };
1312 
1313 /*
1314  *  rslfree -- free internal members of struct rsl not expected to be
1315  *	freed elsewhere.
1316  */
1317 static void
1318 rslfree(struct rsl *freeme)
1319 {
1320 	if (freeme->asru != NULL)
1321 		nvlist_free(freeme->asru);
1322 	if (freeme->fru != NULL)
1323 		nvlist_free(freeme->fru);
1324 	if (freeme->rsrc != NULL && freeme->rsrc != freeme->asru)
1325 		nvlist_free(freeme->rsrc);
1326 }
1327 
1328 /*
1329  *  rslcmp -- compare two rsl structures.  Use the following
1330  *	comparisons to establish cardinality:
1331  *
1332  *	1. Name of the suspect's class. (simple strcmp)
1333  *	2. Name of the suspect's ASRU. (trickier, since nvlist)
1334  *
1335  */
1336 static int
1337 rslcmp(const void *a, const void *b)
1338 {
1339 	struct rsl *r1 = (struct rsl *)a;
1340 	struct rsl *r2 = (struct rsl *)b;
1341 	int rv;
1342 
1343 	rv = strcmp(r1->suspect->enode->u.event.ename->u.name.s,
1344 	    r2->suspect->enode->u.event.ename->u.name.s);
1345 	if (rv != 0)
1346 		return (rv);
1347 
1348 	if (r1->asru == NULL && r2->asru == NULL)
1349 		return (0);
1350 	if (r1->asru == NULL)
1351 		return (-1);
1352 	if (r2->asru == NULL)
1353 		return (1);
1354 	return (evnv_cmpnvl(r1->asru, r2->asru, 0));
1355 }
1356 
1357 /*
1358  *  rsluniq -- given an array of rsl structures, seek out and "remove"
1359  *	any duplicates.  Dups are "remove"d by NULLing the suspect pointer
1360  *	of the array element.  Removal also means updating the number of
1361  *	problems and the number of problems which are not faults.  User
1362  *	provides the first and last element pointers.
1363  */
1364 static void
1365 rsluniq(struct rsl *first, struct rsl *last, int *nprobs, int *nnonf)
1366 {
1367 	struct rsl *cr;
1368 
1369 	if (*nprobs == 1)
1370 		return;
1371 
1372 	/*
1373 	 *  At this point, we only expect duplicate defects.
1374 	 *  Eversholt's diagnosis algorithm prevents duplicate
1375 	 *  suspects, but we rewrite defects in the platform code after
1376 	 *  the diagnosis is made, and that can introduce new
1377 	 *  duplicates.
1378 	 */
1379 	while (first <= last) {
1380 		if (first->suspect == NULL || !is_defect(first->suspect->t)) {
1381 			first++;
1382 			continue;
1383 		}
1384 		cr = first + 1;
1385 		while (cr <= last) {
1386 			if (is_defect(first->suspect->t)) {
1387 				if (rslcmp(first, cr) == 0) {
1388 					cr->suspect = NULL;
1389 					rslfree(cr);
1390 					(*nprobs)--;
1391 					(*nnonf)--;
1392 				}
1393 			}
1394 			/*
1395 			 * assume all defects are in order after our
1396 			 * sort and short circuit here with "else break" ?
1397 			 */
1398 			cr++;
1399 		}
1400 		first++;
1401 	}
1402 }
1403 
1404 /*
1405  * get_resources -- for a given suspect, determine what ASRU, FRU and
1406  *     RSRC nvlists should be advertised in the final suspect list.
1407  */
1408 void
1409 get_resources(struct event *sp, struct rsl *rsrcs, struct config *croot)
1410 {
1411 	struct node *asrudef, *frudef;
1412 	nvlist_t *asru, *fru;
1413 	nvlist_t *rsrc = NULL;
1414 	char *pathstr;
1415 
1416 	/*
1417 	 * First find any ASRU and/or FRU defined in the
1418 	 * initial fault tree.
1419 	 */
1420 	asrudef = eventprop_lookup(sp, L_ASRU);
1421 	frudef = eventprop_lookup(sp, L_FRU);
1422 
1423 	/*
1424 	 * Create FMRIs based on those definitions
1425 	 */
1426 	asru = node2fmri(asrudef);
1427 	fru = node2fmri(frudef);
1428 	pathstr = ipath2str(NULL, sp->ipp);
1429 
1430 	/*
1431 	 * Allow for platform translations of the FMRIs
1432 	 */
1433 	platform_units_translate(is_defect(sp->t), croot, &asru, &fru, &rsrc,
1434 	    pathstr);
1435 
1436 	FREE(pathstr);
1437 	rsrcs->suspect = sp;
1438 	rsrcs->asru = asru;
1439 	rsrcs->fru = fru;
1440 	rsrcs->rsrc = rsrc;
1441 }
1442 
1443 /*
1444  * trim_suspects -- prior to publishing, we may need to remove some
1445  *    suspects from the list.  If we're auto-closing upsets, we don't
1446  *    want any of those in the published list.  If the ASRUs for multiple
1447  *    defects resolve to the same ASRU (driver) we only want to publish
1448  *    that as a single suspect.
1449  */
1450 static void
1451 trim_suspects(struct fme *fmep, boolean_t no_upsets, struct rsl **begin,
1452     struct rsl **end)
1453 {
1454 	struct event *ep;
1455 	struct rsl *rp;
1456 	int rpcnt;
1457 
1458 	/*
1459 	 * First save the suspects in the psuspects, then copy back
1460 	 * only the ones we wish to retain.  This resets nsuspects to
1461 	 * zero.
1462 	 */
1463 	rpcnt = fmep->nsuspects;
1464 	save_suspects(fmep);
1465 
1466 	/*
1467 	 * allocate an array of resource pointers for the suspects.
1468 	 * We may end up using less than the full allocation, but this
1469 	 * is a very short-lived array.  publish_suspects() will free
1470 	 * this array when it's done using it.
1471 	 */
1472 	rp = *begin = MALLOC(rpcnt * sizeof (struct rsl));
1473 	bzero(rp, rpcnt * sizeof (struct rsl));
1474 
1475 	/* first pass, remove any unwanted upsets and populate our array */
1476 	for (ep = fmep->psuspects; ep; ep = ep->psuspects) {
1477 		if (no_upsets && is_upset(ep->t))
1478 			continue;
1479 		get_resources(ep, rp, fmep->cfgdata->cooked);
1480 		rp++;
1481 		fmep->nsuspects++;
1482 		if (!is_fault(ep->t))
1483 			fmep->nonfault++;
1484 	}
1485 
1486 	/* if all we had was unwanted upsets, we're done */
1487 	if (fmep->nsuspects == 0)
1488 		return;
1489 
1490 	*end = rp - 1;
1491 
1492 	/* sort the array */
1493 	qsort(*begin, fmep->nsuspects, sizeof (struct rsl), rslcmp);
1494 	rsluniq(*begin, *end, &fmep->nsuspects, &fmep->nonfault);
1495 }
1496 
1497 static void
1498 publish_suspects(struct fme *fmep)
1499 {
1500 	struct event *ep;
1501 	struct rsl *srl = NULL;
1502 	struct rsl *erl;
1503 	struct rsl *rp;
1504 	nvlist_t *fault;
1505 	uint8_t cert;
1506 	uint_t *frs;
1507 	uint_t fravg, frsum, fr;
1508 	int frcnt, fridx;
1509 	boolean_t no_upsets = B_FALSE;
1510 
1511 	stats_counter_bump(fmep->diags);
1512 
1513 	/*
1514 	 * The current fmd interfaces don't allow us to solve a case
1515 	 * that's already solved.  If we make a new case, what of the
1516 	 * ereports?  We don't appear to have an interface that allows
1517 	 * us to access the ereports attached to a case (if we wanted
1518 	 * to copy the original case's ereport attachments to the new
1519 	 * case) and it's also a bit unclear if there would be any
1520 	 * problems with having ereports attached to multiple cases
1521 	 * and/or attaching DIAGNOSED ereports to a case.  For now,
1522 	 * we'll just output a message.
1523 	 */
1524 	if (fmep->posted_suspects ||
1525 	    fmd_case_solved(fmep->hdl, fmep->fmcase)) {
1526 		out(O_ALTFP|O_NONL, "Revised diagnosis for case %s: ",
1527 		    fmd_case_uuid(fmep->hdl, fmep->fmcase));
1528 		for (ep = fmep->suspects; ep; ep = ep->suspects) {
1529 			out(O_ALTFP|O_NONL, " ");
1530 			itree_pevent_brief(O_ALTFP|O_NONL, ep);
1531 		}
1532 		out(O_ALTFP, NULL);
1533 		return;
1534 	}
1535 
1536 	/*
1537 	 * If we're auto-closing upsets, we don't want to include them
1538 	 * in any produced suspect lists or certainty accounting.
1539 	 */
1540 	if (Autoclose != NULL)
1541 		if (strcmp(Autoclose, "true") == 0 ||
1542 		    strcmp(Autoclose, "all") == 0 ||
1543 		    strcmp(Autoclose, "upsets") == 0)
1544 			no_upsets = B_TRUE;
1545 
1546 	trim_suspects(fmep, no_upsets, &srl, &erl);
1547 
1548 	/*
1549 	 * If the resulting suspect list has no members, we're
1550 	 * done.  Returning here will simply close the case.
1551 	 */
1552 	if (fmep->nsuspects == 0) {
1553 		out(O_ALTFP,
1554 		    "[FME%d, case %s (all suspects are upsets)]",
1555 		    fmep->id, fmd_case_uuid(fmep->hdl, fmep->fmcase));
1556 		FREE(srl);
1557 		restore_suspects(fmep);
1558 		return;
1559 	}
1560 
1561 	/*
1562 	 * If the suspect list is all faults, then for a given fault,
1563 	 * say X of N, X's certainty is computed via:
1564 	 *
1565 	 * fitrate(X) / (fitrate(1) + ... + fitrate(N)) * 100
1566 	 *
1567 	 * If none of the suspects are faults, and there are N suspects,
1568 	 * the certainty of a given suspect is 100/N.
1569 	 *
1570 	 * If there are are a mixture of faults and other problems in
1571 	 * the suspect list, we take an average of the faults'
1572 	 * FITrates and treat this average as the FITrate for any
1573 	 * non-faults.  The fitrate of any given suspect is then
1574 	 * computed per the first formula above.
1575 	 */
1576 	if (fmep->nonfault == fmep->nsuspects) {
1577 		/* NO faults in the suspect list */
1578 		cert = percentof(1, fmep->nsuspects);
1579 	} else {
1580 		/* sum the fitrates */
1581 		frs = alloca(fmep->nsuspects * sizeof (uint_t));
1582 		fridx = frcnt = frsum = 0;
1583 
1584 		for (rp = srl; rp <= erl; rp++) {
1585 			struct node *n;
1586 
1587 			if (rp->suspect == NULL)
1588 				continue;
1589 			if (!is_fault(rp->suspect->t)) {
1590 				frs[fridx++] = 0;
1591 				continue;
1592 			}
1593 			n = eventprop_lookup(rp->suspect, L_FITrate);
1594 			if (node2uint(n, &fr) != 0) {
1595 				out(O_DEBUG|O_NONL, "event ");
1596 				ipath_print(O_DEBUG|O_NONL,
1597 				    ep->enode->u.event.ename->u.name.s,
1598 				    ep->ipp);
1599 				out(O_DEBUG, " has no FITrate (using 1)");
1600 				fr = 1;
1601 			} else if (fr == 0) {
1602 				out(O_DEBUG|O_NONL, "event ");
1603 				ipath_print(O_DEBUG|O_NONL,
1604 				    ep->enode->u.event.ename->u.name.s,
1605 				    ep->ipp);
1606 				out(O_DEBUG, " has zero FITrate (using 1)");
1607 				fr = 1;
1608 			}
1609 
1610 			frs[fridx++] = fr;
1611 			frsum += fr;
1612 			frcnt++;
1613 		}
1614 		fravg = avg(frsum, frcnt);
1615 		for (fridx = 0; fridx < fmep->nsuspects; fridx++)
1616 			if (frs[fridx] == 0) {
1617 				frs[fridx] = fravg;
1618 				frsum += fravg;
1619 			}
1620 	}
1621 
1622 	/* Add them in reverse order of our sort, as fmd reverses order */
1623 	for (rp = erl; rp >= srl; rp--) {
1624 		if (rp->suspect == NULL)
1625 			continue;
1626 		if (fmep->nonfault != fmep->nsuspects)
1627 			cert = percentof(frs[--fridx], frsum);
1628 		fault = fmd_nvl_create_fault(fmep->hdl,
1629 		    rp->suspect->enode->u.event.ename->u.name.s,
1630 		    cert,
1631 		    rp->asru,
1632 		    rp->fru,
1633 		    rp->rsrc);
1634 		if (fault == NULL)
1635 			out(O_DIE, "fault creation failed");
1636 		fmd_case_add_suspect(fmep->hdl, fmep->fmcase, fault);
1637 		rp->suspect->fault = fault;
1638 		rslfree(rp);
1639 	}
1640 	fmd_case_solve(fmep->hdl, fmep->fmcase);
1641 	out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
1642 	    fmd_case_uuid(fmep->hdl, fmep->fmcase));
1643 
1644 	if (Autoconvict) {
1645 		for (rp = srl; rp <= erl; rp++) {
1646 			if (rp->suspect == NULL)
1647 				continue;
1648 			fmd_case_convict(fmep->hdl,
1649 			    fmep->fmcase, rp->suspect->fault);
1650 		}
1651 		out(O_ALTFP, "[convicting FME%d, case %s]", fmep->id,
1652 		    fmd_case_uuid(fmep->hdl, fmep->fmcase));
1653 	}
1654 
1655 	/*
1656 	 * revert to the original suspect list
1657 	 */
1658 	FREE(srl);
1659 	restore_suspects(fmep);
1660 }
1661 
1662 static void
1663 publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep)
1664 {
1665 	struct case_list *newcase;
1666 	nvlist_t *defect;
1667 
1668 	out(O_ALTFP,
1669 	    "[undiagnosable ereport received, "
1670 	    "creating and closing a new case (%s)]",
1671 	    Undiag_reason ? Undiag_reason : "reason not provided");
1672 
1673 	newcase = MALLOC(sizeof (struct case_list));
1674 	newcase->next = NULL;
1675 
1676 	newcase->fmcase = fmd_case_open(hdl, NULL);
1677 	if (Undiagablecaselist != NULL)
1678 		newcase->next = Undiagablecaselist;
1679 	Undiagablecaselist = newcase;
1680 
1681 	if (ffep != NULL)
1682 		fmd_case_add_ereport(hdl, newcase->fmcase, ffep);
1683 
1684 	defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100,
1685 	    NULL, NULL, NULL);
1686 	if (Undiag_reason != NULL)
1687 		(void) nvlist_add_string(defect, UNDIAG_REASON, Undiag_reason);
1688 	fmd_case_add_suspect(hdl, newcase->fmcase, defect);
1689 
1690 	fmd_case_solve(hdl, newcase->fmcase);
1691 	fmd_case_close(hdl, newcase->fmcase);
1692 }
1693 
1694 static void
1695 fme_undiagnosable(struct fme *f)
1696 {
1697 	nvlist_t *defect;
1698 
1699 	out(O_ALTFP, "[solving/closing FME%d, case %s (%s)]",
1700 	    f->id, fmd_case_uuid(f->hdl, f->fmcase),
1701 	    Undiag_reason ? Undiag_reason : "undiagnosable");
1702 
1703 	defect = fmd_nvl_create_fault(f->hdl, UNDIAGNOSABLE_DEFECT, 100,
1704 	    NULL, NULL, NULL);
1705 	if (Undiag_reason != NULL)
1706 		(void) nvlist_add_string(defect, UNDIAG_REASON, Undiag_reason);
1707 	fmd_case_add_suspect(f->hdl, f->fmcase, defect);
1708 	fmd_case_solve(f->hdl, f->fmcase);
1709 	destroy_fme_bufs(f);
1710 	fmd_case_close(f->hdl, f->fmcase);
1711 }
1712 
1713 /*
1714  * fme_close_case
1715  *
1716  *	Find the requested case amongst our fmes and close it.  Free up
1717  *	the related fme.
1718  */
1719 void
1720 fme_close_case(fmd_hdl_t *hdl, fmd_case_t *fmcase)
1721 {
1722 	struct case_list *ucasep, *prevcasep = NULL;
1723 	struct fme *prev = NULL;
1724 	struct fme *fmep;
1725 
1726 	for (ucasep = Undiagablecaselist; ucasep; ucasep = ucasep->next) {
1727 		if (fmcase != ucasep->fmcase) {
1728 			prevcasep = ucasep;
1729 			continue;
1730 		}
1731 
1732 		if (prevcasep == NULL)
1733 			Undiagablecaselist = Undiagablecaselist->next;
1734 		else
1735 			prevcasep->next = ucasep->next;
1736 
1737 		FREE(ucasep);
1738 		return;
1739 	}
1740 
1741 	for (fmep = FMElist; fmep; fmep = fmep->next) {
1742 		if (fmep->hdl == hdl && fmep->fmcase == fmcase)
1743 			break;
1744 		prev = fmep;
1745 	}
1746 
1747 	if (fmep == NULL) {
1748 		out(O_WARN, "Eft asked to close unrecognized case [%s].",
1749 		    fmd_case_uuid(hdl, fmcase));
1750 		return;
1751 	}
1752 
1753 	if (EFMElist == fmep)
1754 		EFMElist = prev;
1755 
1756 	if (prev == NULL)
1757 		FMElist = FMElist->next;
1758 	else
1759 		prev->next = fmep->next;
1760 
1761 	fmep->next = NULL;
1762 
1763 	/* Get rid of any timer this fme has set */
1764 	if (fmep->wull != 0)
1765 		fmd_timer_remove(fmep->hdl, fmep->timer);
1766 
1767 	if (ClosedFMEs == NULL) {
1768 		ClosedFMEs = fmep;
1769 	} else {
1770 		fmep->next = ClosedFMEs;
1771 		ClosedFMEs = fmep;
1772 	}
1773 }
1774 
1775 /*
1776  * fme_set_timer()
1777  *	If the time we need to wait for the given FME is less than the
1778  *	current timer, kick that old timer out and establish a new one.
1779  */
1780 static void
1781 fme_set_timer(struct fme *fmep, unsigned long long wull)
1782 {
1783 	out(O_ALTFP|O_VERB|O_NONL, " fme_set_timer: request to wait ");
1784 	ptree_timeval(O_ALTFP|O_VERB, &wull);
1785 
1786 	if (wull <= fmep->pull) {
1787 		out(O_ALTFP|O_VERB|O_NONL, "already have waited at least ");
1788 		ptree_timeval(O_ALTFP|O_VERB, &fmep->pull);
1789 		out(O_ALTFP|O_VERB, NULL);
1790 		/* we've waited at least wull already, don't need timer */
1791 		return;
1792 	}
1793 
1794 	out(O_ALTFP|O_VERB|O_NONL, " currently ");
1795 	if (fmep->wull != 0) {
1796 		out(O_ALTFP|O_VERB|O_NONL, "waiting ");
1797 		ptree_timeval(O_ALTFP|O_VERB, &fmep->wull);
1798 		out(O_ALTFP|O_VERB, NULL);
1799 	} else {
1800 		out(O_ALTFP|O_VERB|O_NONL, "not waiting");
1801 		out(O_ALTFP|O_VERB, NULL);
1802 	}
1803 
1804 	if (fmep->wull != 0)
1805 		if (wull >= fmep->wull)
1806 			/* New timer would fire later than established timer */
1807 			return;
1808 
1809 	if (fmep->wull != 0)
1810 		fmd_timer_remove(fmep->hdl, fmep->timer);
1811 
1812 	fmep->timer = fmd_timer_install(fmep->hdl, (void *)fmep,
1813 	    fmep->e0r, wull);
1814 	out(O_ALTFP|O_VERB, "timer set, id is %ld", fmep->timer);
1815 	fmep->wull = wull;
1816 }
1817 
1818 void
1819 fme_timer_fired(struct fme *fmep, id_t tid)
1820 {
1821 	struct fme *ffmep = NULL;
1822 
1823 	for (ffmep = FMElist; ffmep; ffmep = ffmep->next)
1824 		if (ffmep == fmep)
1825 			break;
1826 
1827 	if (ffmep == NULL) {
1828 		out(O_WARN, "Timer fired for an FME (%p) not in FMEs list.",
1829 		    (void *)fmep);
1830 		return;
1831 	}
1832 
1833 	if (tid != fmep->htid) {
1834 		/*
1835 		 * normal timer (not the hesitation timer
1836 		 */
1837 		fmep->pull = fmep->wull;
1838 		fmep->wull = 0;
1839 		fmd_buf_write(fmep->hdl, fmep->fmcase,
1840 		    WOBUF_PULL, (void *)&fmep->pull, sizeof (fmep->pull));
1841 	} else {
1842 		fmep->hesitated = 1;
1843 	}
1844 	fme_eval(fmep, NULL);
1845 }
1846 
1847 /*
1848  * Preserve the fme's suspect list in its psuspects list, NULLing the
1849  * suspects list in the meantime.
1850  */
1851 static void
1852 save_suspects(struct fme *fmep)
1853 {
1854 	struct event *ep;
1855 	struct event *nextep;
1856 
1857 	/* zero out the previous suspect list */
1858 	for (ep = fmep->psuspects; ep; ep = nextep) {
1859 		nextep = ep->psuspects;
1860 		ep->psuspects = NULL;
1861 	}
1862 	fmep->psuspects = NULL;
1863 
1864 	/* zero out the suspect list, copying it to previous suspect list */
1865 	fmep->psuspects = fmep->suspects;
1866 	for (ep = fmep->suspects; ep; ep = nextep) {
1867 		nextep = ep->suspects;
1868 		ep->psuspects = ep->suspects;
1869 		ep->suspects = NULL;
1870 		ep->is_suspect = 0;
1871 	}
1872 	fmep->suspects = NULL;
1873 	fmep->nsuspects = 0;
1874 	fmep->nonfault = 0;
1875 }
1876 
1877 /*
1878  * Retrieve the fme's suspect list from its psuspects list.
1879  */
1880 static void
1881 restore_suspects(struct fme *fmep)
1882 {
1883 	struct event *ep;
1884 	struct event *nextep;
1885 
1886 	fmep->nsuspects = fmep->nonfault = 0;
1887 	fmep->suspects = fmep->psuspects;
1888 	for (ep = fmep->psuspects; ep; ep = nextep) {
1889 		fmep->nsuspects++;
1890 		if (!is_fault(ep->t))
1891 			fmep->nonfault++;
1892 		nextep = ep->psuspects;
1893 		ep->suspects = ep->psuspects;
1894 	}
1895 }
1896 
1897 /*
1898  * this is what we use to call the Emrys prototype code instead of main()
1899  */
1900 static void
1901 fme_eval(struct fme *fmep, fmd_event_t *ffep)
1902 {
1903 	struct event *ep;
1904 	unsigned long long my_delay = TIMEVAL_EVENTUALLY;
1905 
1906 	save_suspects(fmep);
1907 
1908 	out(O_ALTFP|O_VERB, "Evaluate FME %d", fmep->id);
1909 	indent_set("  ");
1910 
1911 	initialize_cycles(fmep);
1912 	fmep->state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay, NULL);
1913 
1914 	out(O_ALTFP|O_VERB|O_NONL, "FME%d state: %s, suspect list:", fmep->id,
1915 	    fme_state2str(fmep->state));
1916 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
1917 		out(O_ALTFP|O_VERB|O_NONL, " ");
1918 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
1919 	}
1920 	out(O_ALTFP|O_VERB, NULL);
1921 
1922 	if (fmep->posted_suspects) {
1923 		/*
1924 		 * this FME has already posted a diagnosis, so see if
1925 		 * the event changed the diagnosis and print a warning
1926 		 * if it did.
1927 		 *
1928 		 */
1929 		if (suspects_changed(fmep)) {
1930 			print_suspects(SLCHANGED, fmep);
1931 			publish_suspects(fmep);
1932 		}
1933 	} else {
1934 		switch (fmep->state) {
1935 		case FME_CREDIBLE:
1936 			/*
1937 			 * if the suspect list contains any upsets, we
1938 			 * turn off the hesitation logic (by setting
1939 			 * the hesitate flag which normally indicates
1940 			 * we've already done the hesitate logic).
1941 			 * this is done because hesitating with upsets
1942 			 * causes us to explain away additional soft errors
1943 			 * while the upset FME stays open.
1944 			 */
1945 			if (fmep->hesitated == 0) {
1946 				struct event *s;
1947 
1948 				for (s = fmep->suspects; s; s = s->suspects) {
1949 					if (s->t == N_UPSET) {
1950 						fmep->hesitated = 1;
1951 						break;
1952 					}
1953 				}
1954 			}
1955 
1956 			if (Hesitate &&
1957 			    fmep->suspects != NULL &&
1958 			    fmep->suspects->suspects != NULL &&
1959 			    fmep->hesitated == 0) {
1960 				/*
1961 				 * about to publish multi-entry suspect list,
1962 				 * set the hesitation timer if not already set.
1963 				 */
1964 				if (fmep->htid == 0) {
1965 					out(O_ALTFP|O_NONL,
1966 					    "[hesitate FME%d, case %s ",
1967 					    fmep->id,
1968 					    fmd_case_uuid(fmep->hdl,
1969 					    fmep->fmcase));
1970 					ptree_timeval(O_ALTFP|O_NONL,
1971 					    (unsigned long long *)&Hesitate);
1972 					out(O_ALTFP, "]");
1973 					fme_set_timer(fmep, my_delay);
1974 					fmep->htid =
1975 					    fmd_timer_install(fmep->hdl,
1976 					    (void *)fmep, NULL, Hesitate);
1977 				} else {
1978 					out(O_ALTFP,
1979 					    "[still hesitating FME%d, case %s]",
1980 					    fmep->id,
1981 					    fmd_case_uuid(fmep->hdl,
1982 					    fmep->fmcase));
1983 				}
1984 			} else {
1985 				print_suspects(SLNEW, fmep);
1986 				(void) upsets_eval(fmep, ffep);
1987 				publish_suspects(fmep);
1988 				fmep->posted_suspects = 1;
1989 				fmd_buf_write(fmep->hdl, fmep->fmcase,
1990 				    WOBUF_POSTD,
1991 				    (void *)&fmep->posted_suspects,
1992 				    sizeof (fmep->posted_suspects));
1993 			}
1994 			break;
1995 
1996 		case FME_WAIT:
1997 			/*
1998 			 * singleton suspect list implies
1999 			 * no point in waiting
2000 			 */
2001 			if (fmep->suspects &&
2002 			    fmep->suspects->suspects == NULL) {
2003 				print_suspects(SLNEW, fmep);
2004 				(void) upsets_eval(fmep, ffep);
2005 				publish_suspects(fmep);
2006 				fmep->posted_suspects = 1;
2007 				fmd_buf_write(fmep->hdl, fmep->fmcase,
2008 				    WOBUF_POSTD,
2009 				    (void *)&fmep->posted_suspects,
2010 				    sizeof (fmep->posted_suspects));
2011 				fmep->state = FME_CREDIBLE;
2012 			} else {
2013 				ASSERT(my_delay > fmep->ull);
2014 				fme_set_timer(fmep, my_delay);
2015 				print_suspects(SLWAIT, fmep);
2016 			}
2017 			break;
2018 
2019 		case FME_DISPROVED:
2020 			print_suspects(SLDISPROVED, fmep);
2021 			Undiag_reason = UD_UNSOLVD;
2022 			fme_undiagnosable(fmep);
2023 			break;
2024 		}
2025 	}
2026 
2027 	if (fmep->posted_suspects == 1 && Autoclose != NULL) {
2028 		int doclose = 0;
2029 
2030 		if (strcmp(Autoclose, "true") == 0 ||
2031 		    strcmp(Autoclose, "all") == 0)
2032 			doclose = 1;
2033 
2034 		if (strcmp(Autoclose, "upsets") == 0) {
2035 			doclose = 1;
2036 			for (ep = fmep->suspects; ep; ep = ep->suspects) {
2037 				if (ep->t != N_UPSET) {
2038 					doclose = 0;
2039 					break;
2040 				}
2041 			}
2042 		}
2043 
2044 		if (doclose) {
2045 			out(O_ALTFP, "[closing FME%d, case %s (autoclose)]",
2046 			    fmep->id, fmd_case_uuid(fmep->hdl, fmep->fmcase));
2047 
2048 			destroy_fme_bufs(fmep);
2049 			fmd_case_close(fmep->hdl, fmep->fmcase);
2050 		}
2051 	}
2052 }
2053 
2054 /*
2055  * below here is the code derived from the Emrys prototype
2056  */
2057 
2058 static void indent(void);
2059 static int triggered(struct fme *fmep, struct event *ep, int mark);
2060 static void mark_arrows(struct fme *fmep, struct event *ep, int mark);
2061 static enum fme_state effects_test(struct fme *fmep,
2062     struct event *fault_event);
2063 static enum fme_state requirements_test(struct fme *fmep, struct event *ep,
2064     unsigned long long at_latest_by, unsigned long long *pdelay,
2065     struct arrow *arrowp);
2066 static enum fme_state causes_test(struct fme *fmep, struct event *ep,
2067     unsigned long long at_latest_by, unsigned long long *pdelay);
2068 
2069 static int
2070 triggered(struct fme *fmep, struct event *ep, int mark)
2071 {
2072 	struct bubble *bp;
2073 	struct arrowlist *ap;
2074 	int count = 0;
2075 
2076 	stats_counter_bump(fmep->Tcallcount);
2077 	for (bp = itree_next_bubble(ep, NULL); bp;
2078 	    bp = itree_next_bubble(ep, bp)) {
2079 		if (bp->t != B_TO)
2080 			continue;
2081 		for (ap = itree_next_arrow(bp, NULL); ap;
2082 		    ap = itree_next_arrow(bp, ap)) {
2083 			/* check count of marks against K in the bubble */
2084 			if (ap->arrowp->tail->mark == mark &&
2085 			    ++count >= bp->nork)
2086 				return (1);
2087 		}
2088 	}
2089 	return (0);
2090 }
2091 
2092 static void
2093 mark_arrows(struct fme *fmep, struct event *ep, int mark)
2094 {
2095 	struct bubble *bp;
2096 	struct arrowlist *ap;
2097 
2098 	for (bp = itree_next_bubble(ep, NULL); bp;
2099 	    bp = itree_next_bubble(ep, bp)) {
2100 		if (bp->t != B_FROM)
2101 			continue;
2102 		if (bp->mark != mark) {
2103 			stats_counter_bump(fmep->Marrowcount);
2104 			bp->mark = mark;
2105 			for (ap = itree_next_arrow(bp, NULL); ap;
2106 			    ap = itree_next_arrow(bp, ap)) {
2107 				struct constraintlist *ctp;
2108 				struct evalue value;
2109 				int do_not_follow = 0;
2110 				/*
2111 				 * see if false constraint prevents us
2112 				 * from traversing this arrow, but don't
2113 				 * bother if the event is an ereport we
2114 				 * haven't seen
2115 				 */
2116 				if (ap->arrowp->head->myevent->t != N_EREPORT ||
2117 				    ap->arrowp->head->myevent->count != 0) {
2118 					platform_set_payloadnvp(
2119 					    ap->arrowp->head->myevent->nvp);
2120 					for (ctp = ap->arrowp->constraints;
2121 					    ctp != NULL; ctp = ctp->next) {
2122 						if (eval_expr(ctp->cnode,
2123 						    NULL, NULL,
2124 						    &fmep->globals,
2125 						    fmep->cfgdata->cooked,
2126 						    ap->arrowp, 0,
2127 						    &value) == 0 ||
2128 						    value.t == UNDEFINED ||
2129 						    value.v == 0) {
2130 							do_not_follow = 1;
2131 							break;
2132 						}
2133 					}
2134 					platform_set_payloadnvp(NULL);
2135 				}
2136 
2137 				if (do_not_follow) {
2138 					indent();
2139 					out(O_ALTFP|O_VERB|O_NONL,
2140 					    "  False arrow to ");
2141 					itree_pevent_brief(
2142 					    O_ALTFP|O_VERB|O_NONL,
2143 					    ap->arrowp->head->myevent);
2144 					out(O_ALTFP|O_VERB|O_NONL, " ");
2145 					ptree(O_ALTFP|O_VERB|O_NONL,
2146 					    ctp->cnode, 1, 0);
2147 					out(O_ALTFP|O_VERB, NULL);
2148 					continue;
2149 				}
2150 
2151 				if (triggered(fmep, ap->arrowp->head->myevent,
2152 				    mark))
2153 					mark_arrows(fmep,
2154 					    ap->arrowp->head->myevent, mark);
2155 			}
2156 		}
2157 	}
2158 }
2159 
2160 static enum fme_state
2161 effects_test(struct fme *fmep, struct event *fault_event)
2162 {
2163 	struct event *error_event;
2164 	enum fme_state return_value = FME_CREDIBLE;
2165 
2166 	stats_counter_bump(fmep->Ecallcount);
2167 	indent_push("  E");
2168 	indent();
2169 	out(O_ALTFP|O_VERB|O_NONL, "->");
2170 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event);
2171 	out(O_ALTFP|O_VERB, NULL);
2172 
2173 	mark_arrows(fmep, fault_event, 1);
2174 	for (error_event = fmep->observations;
2175 	    error_event; error_event = error_event->observations) {
2176 		indent();
2177 		out(O_ALTFP|O_VERB|O_NONL, " ");
2178 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, error_event);
2179 		if (!triggered(fmep, error_event, 1)) {
2180 			return_value = FME_DISPROVED;
2181 			out(O_ALTFP|O_VERB, " NOT triggered");
2182 			break;
2183 		} else {
2184 			out(O_ALTFP|O_VERB, " triggered");
2185 		}
2186 	}
2187 	mark_arrows(fmep, fault_event, 0);
2188 
2189 	indent();
2190 	out(O_ALTFP|O_VERB|O_NONL, "<-%s ", fme_state2str(return_value));
2191 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event);
2192 	out(O_ALTFP|O_VERB, NULL);
2193 	indent_pop();
2194 	return (return_value);
2195 }
2196 
2197 static enum fme_state
2198 requirements_test(struct fme *fmep, struct event *ep,
2199     unsigned long long at_latest_by, unsigned long long *pdelay,
2200     struct arrow *arrowp)
2201 {
2202 	int waiting_events;
2203 	int credible_events;
2204 	enum fme_state return_value = FME_CREDIBLE;
2205 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
2206 	unsigned long long arrow_delay;
2207 	unsigned long long my_delay;
2208 	struct event *ep2;
2209 	struct bubble *bp;
2210 	struct arrowlist *ap;
2211 
2212 	stats_counter_bump(fmep->Rcallcount);
2213 	indent_push("  R");
2214 	indent();
2215 	out(O_ALTFP|O_VERB|O_NONL, "->");
2216 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
2217 	out(O_ALTFP|O_VERB|O_NONL, ", at latest by: ");
2218 	ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
2219 	out(O_ALTFP|O_VERB, NULL);
2220 
2221 	if (ep->t == N_EREPORT) {
2222 		if (ep->count == 0) {
2223 			if (fmep->pull >= at_latest_by) {
2224 				return_value = FME_DISPROVED;
2225 			} else {
2226 				*pdelay = at_latest_by;
2227 				return_value = FME_WAIT;
2228 			}
2229 		} else if (arrowp != NULL) {
2230 			/*
2231 			 * evaluate constraints only for current observation
2232 			 */
2233 			struct constraintlist *ctp;
2234 			struct evalue value;
2235 
2236 			platform_set_payloadnvp(ep->nvp);
2237 			for (ctp = arrowp->constraints; ctp != NULL;
2238 				ctp = ctp->next) {
2239 				if (eval_expr(ctp->cnode, NULL, NULL,
2240 				    &fmep->globals, fmep->cfgdata->cooked,
2241 				    arrowp, 0, &value) == 0 ||
2242 				    value.t == UNDEFINED || value.v == 0) {
2243 					indent();
2244 					out(O_ALTFP|O_VERB|O_NONL,
2245 					    "  False constraint ");
2246 					out(O_ALTFP|O_VERB|O_NONL, " ");
2247 					ptree(O_ALTFP|O_VERB|O_NONL,
2248 					    ctp->cnode, 1, 0);
2249 					out(O_ALTFP|O_VERB, NULL);
2250 					return_value = FME_DISPROVED;
2251 					break;
2252 				}
2253 			}
2254 			platform_set_payloadnvp(NULL);
2255 		}
2256 
2257 		indent();
2258 		switch (return_value) {
2259 		case FME_CREDIBLE:
2260 			out(O_ALTFP|O_VERB|O_NONL, "<-CREDIBLE ");
2261 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
2262 			break;
2263 		case FME_DISPROVED:
2264 			out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
2265 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
2266 			break;
2267 		case FME_WAIT:
2268 			out(O_ALTFP|O_VERB|O_NONL, "<-WAIT ");
2269 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
2270 			out(O_ALTFP|O_VERB|O_NONL, " to ");
2271 			ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
2272 			break;
2273 		default:
2274 			out(O_DIE, "requirements_test: unexpected fme_state");
2275 			break;
2276 		}
2277 		out(O_ALTFP|O_VERB, NULL);
2278 		indent_pop();
2279 
2280 		return (return_value);
2281 	}
2282 
2283 	/* this event is not a report, descend the tree */
2284 	for (bp = itree_next_bubble(ep, NULL); bp;
2285 	    bp = itree_next_bubble(ep, bp)) {
2286 		if (bp->t != B_FROM)
2287 			continue;
2288 		if (bp->mark == 0) {
2289 			int n = bp->nork;
2290 
2291 			bp->mark = 1;
2292 			credible_events = 0;
2293 			waiting_events = 0;
2294 			arrow_delay = TIMEVAL_EVENTUALLY;
2295 			/*
2296 			 * n is -1 for 'A' so adjust it.
2297 			 * XXX just count up the arrows for now.
2298 			 */
2299 			if (n < 0) {
2300 				n = 0;
2301 				for (ap = itree_next_arrow(bp, NULL); ap;
2302 				    ap = itree_next_arrow(bp, ap))
2303 					n++;
2304 				indent();
2305 				out(O_ALTFP|O_VERB, " Bubble Counted N=%d", n);
2306 			} else {
2307 				indent();
2308 				out(O_ALTFP|O_VERB, " Bubble N=%d", n);
2309 			}
2310 
2311 			for (ap = itree_next_arrow(bp, NULL); ap;
2312 			    ap = itree_next_arrow(bp, ap)) {
2313 				ep2 = ap->arrowp->head->myevent;
2314 				if (n <= credible_events)
2315 					break;
2316 
2317 				if (triggered(fmep, ep2, 1))
2318 					/* XXX adding max timevals! */
2319 					switch (requirements_test(fmep, ep2,
2320 					    at_latest_by + ap->arrowp->maxdelay,
2321 					    &my_delay, ap->arrowp)) {
2322 					case FME_CREDIBLE:
2323 						credible_events++;
2324 						break;
2325 					case FME_DISPROVED:
2326 						break;
2327 					case FME_WAIT:
2328 						if (my_delay < arrow_delay)
2329 							arrow_delay = my_delay;
2330 						waiting_events++;
2331 						break;
2332 					default:
2333 						out(O_DIE,
2334 						"Bug in requirements_test.");
2335 					}
2336 				else
2337 					credible_events++;
2338 			}
2339 			indent();
2340 			out(O_ALTFP|O_VERB, " Credible: %d Waiting %d",
2341 			    credible_events, waiting_events);
2342 			if (credible_events + waiting_events < n) {
2343 				/* Can never meet requirements */
2344 				indent();
2345 				out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
2346 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
2347 				out(O_ALTFP|O_VERB, NULL);
2348 				indent_pop();
2349 				return (FME_DISPROVED);
2350 			}
2351 			if (credible_events < n) { /* will have to wait */
2352 				/* wait time is shortest known */
2353 				if (arrow_delay < overall_delay)
2354 					overall_delay = arrow_delay;
2355 				return_value = FME_WAIT;
2356 			}
2357 		} else {
2358 			indent();
2359 			out(O_ALTFP|O_VERB|O_NONL, " Mark was set: ");
2360 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
2361 			out(O_ALTFP|O_VERB|O_NONL, " to");
2362 			for (ap = itree_next_arrow(bp, NULL); ap;
2363 			    ap = itree_next_arrow(bp, ap)) {
2364 				out(O_ALTFP|O_VERB|O_NONL, " ");
2365 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL,
2366 				    ap->arrowp->head->myevent);
2367 			}
2368 			out(O_ALTFP|O_VERB, NULL);
2369 		}
2370 	}
2371 
2372 	/*
2373 	 * evaluate constraints for ctlist, which is the list of
2374 	 * constraints for the arrow pointing into this node of the tree
2375 	 */
2376 	if (return_value == FME_CREDIBLE && arrowp != NULL) {
2377 		struct constraintlist *ctp;
2378 		struct evalue value;
2379 
2380 		platform_set_payloadnvp(ep->nvp);
2381 		for (ctp = arrowp->constraints; ctp != NULL;
2382 			ctp = ctp->next) {
2383 			if (eval_expr(ctp->cnode, NULL,	NULL, &fmep->globals,
2384 			    fmep->cfgdata->cooked, arrowp, 0, &value) == 0 ||
2385 			    value.t == UNDEFINED || value.v == 0) {
2386 				indent();
2387 				out(O_ALTFP|O_VERB|O_NONL,
2388 				    "  False constraint ");
2389 				out(O_ALTFP|O_VERB|O_NONL, " ");
2390 				ptree(O_ALTFP|O_VERB|O_NONL,
2391 				    ctp->cnode, 1, 0);
2392 				out(O_ALTFP|O_VERB, NULL);
2393 				return_value = FME_DISPROVED;
2394 				break;
2395 			}
2396 		}
2397 		platform_set_payloadnvp(NULL);
2398 	}
2399 
2400 	if (return_value == FME_WAIT)
2401 		*pdelay = overall_delay;
2402 	indent();
2403 	out(O_ALTFP|O_VERB|O_NONL, "<-%s ", fme_state2str(return_value));
2404 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
2405 	out(O_ALTFP|O_VERB, NULL);
2406 	indent_pop();
2407 	return (return_value);
2408 }
2409 
2410 static enum fme_state
2411 causes_test(struct fme *fmep, struct event *ep,
2412     unsigned long long at_latest_by, unsigned long long *pdelay)
2413 {
2414 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
2415 	unsigned long long my_delay;
2416 	int credible_results = 0;
2417 	int waiting_results = 0;
2418 	enum fme_state fstate;
2419 	struct event *tail_event;
2420 	struct bubble *bp;
2421 	struct arrowlist *ap;
2422 	int k = 1;
2423 
2424 	stats_counter_bump(fmep->Ccallcount);
2425 	indent_push("  C");
2426 	indent();
2427 	out(O_ALTFP|O_VERB|O_NONL, "->");
2428 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
2429 	out(O_ALTFP|O_VERB, NULL);
2430 
2431 	for (bp = itree_next_bubble(ep, NULL); bp;
2432 	    bp = itree_next_bubble(ep, bp)) {
2433 		if (bp->t != B_TO)
2434 			continue;
2435 		k = bp->nork;	/* remember the K value */
2436 		for (ap = itree_next_arrow(bp, NULL); ap;
2437 		    ap = itree_next_arrow(bp, ap)) {
2438 			struct constraintlist *ctp;
2439 			struct evalue value;
2440 			int do_not_follow = 0;
2441 			/*
2442 			 * see if false constraint prevents us
2443 			 * from traversing this arrow
2444 			 */
2445 			platform_set_payloadnvp(ep->nvp);
2446 			for (ctp = ap->arrowp->constraints;
2447 			    ctp != NULL; ctp = ctp->next) {
2448 				if (eval_expr(ctp->cnode, NULL, NULL,
2449 				    &fmep->globals,
2450 				    fmep->cfgdata->cooked,
2451 				    ap->arrowp, 0,
2452 				    &value) == 0 ||
2453 				    value.t == UNDEFINED ||
2454 				    value.v == 0) {
2455 					do_not_follow = 1;
2456 					break;
2457 				}
2458 			}
2459 			platform_set_payloadnvp(NULL);
2460 			if (do_not_follow) {
2461 				indent();
2462 				out(O_ALTFP|O_VERB|O_NONL,
2463 				    "  False arrow from ");
2464 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL,
2465 				    ap->arrowp->tail->myevent);
2466 				out(O_ALTFP|O_VERB|O_NONL, " ");
2467 				ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
2468 				out(O_ALTFP|O_VERB, NULL);
2469 				continue;
2470 			}
2471 
2472 			if (ap->arrowp->causes_tested++ > 0) {
2473 				/*
2474 				 * get to this point if this is not the
2475 				 * first time we're going through this
2476 				 * arrow in the causes test.  consider this
2477 				 * branch to be credible and let the
2478 				 * credible/noncredible outcome depend on
2479 				 * the other branches in this cycle.
2480 				 */
2481 				fstate = FME_CREDIBLE;
2482 			} else {
2483 				/*
2484 				 * get to this point if this is the first
2485 				 * time we're going through this arrow.
2486 				 */
2487 				tail_event = ap->arrowp->tail->myevent;
2488 				fstate = hypothesise(fmep, tail_event,
2489 						    at_latest_by,
2490 						    &my_delay, ap->arrowp);
2491 			}
2492 
2493 			switch (fstate) {
2494 			case FME_WAIT:
2495 				if (my_delay < overall_delay)
2496 					overall_delay = my_delay;
2497 				waiting_results++;
2498 				break;
2499 			case FME_CREDIBLE:
2500 				credible_results++;
2501 				break;
2502 			case FME_DISPROVED:
2503 				break;
2504 			default:
2505 				out(O_DIE, "Bug in causes_test");
2506 			}
2507 
2508 			ap->arrowp->causes_tested--;
2509 			ASSERT(ap->arrowp->causes_tested >= 0);
2510 		}
2511 	}
2512 	/* compare against K */
2513 	if (credible_results + waiting_results < k) {
2514 		indent();
2515 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
2516 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
2517 		out(O_ALTFP|O_VERB, NULL);
2518 		indent_pop();
2519 		return (FME_DISPROVED);
2520 	}
2521 	if (waiting_results != 0) {
2522 		*pdelay = overall_delay;
2523 		indent();
2524 		out(O_ALTFP|O_VERB|O_NONL, "<-WAIT ");
2525 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
2526 		out(O_ALTFP|O_VERB|O_NONL, " to ");
2527 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
2528 		out(O_ALTFP|O_VERB, NULL);
2529 		indent_pop();
2530 		return (FME_WAIT);
2531 	}
2532 	indent();
2533 	out(O_ALTFP|O_VERB|O_NONL, "<-CREDIBLE ");
2534 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
2535 	out(O_ALTFP|O_VERB, NULL);
2536 	indent_pop();
2537 	return (FME_CREDIBLE);
2538 }
2539 
2540 static enum fme_state
2541 hypothesise(struct fme *fmep, struct event *ep,
2542 	unsigned long long at_latest_by, unsigned long long *pdelay,
2543 	struct arrow *arrowp)
2544 {
2545 	enum fme_state rtr, otr;
2546 	unsigned long long my_delay;
2547 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
2548 
2549 	stats_counter_bump(fmep->Hcallcount);
2550 	indent_push("  H");
2551 	indent();
2552 	out(O_ALTFP|O_VERB|O_NONL, "->");
2553 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
2554 	out(O_ALTFP|O_VERB|O_NONL, ", at latest by: ");
2555 	ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
2556 	out(O_ALTFP|O_VERB, NULL);
2557 
2558 	rtr = requirements_test(fmep, ep, at_latest_by, &my_delay, arrowp);
2559 	mark_arrows(fmep, ep, 0); /* clean up after requirements test */
2560 	if ((rtr == FME_WAIT) && (my_delay < overall_delay))
2561 		overall_delay = my_delay;
2562 	if (rtr != FME_DISPROVED) {
2563 		if (is_problem(ep->t)) {
2564 			otr = effects_test(fmep, ep);
2565 			if (otr != FME_DISPROVED) {
2566 				if (fmep->peek == 0 && ep->is_suspect++ == 0) {
2567 					ep->suspects = fmep->suspects;
2568 					fmep->suspects = ep;
2569 					fmep->nsuspects++;
2570 					if (!is_fault(ep->t))
2571 						fmep->nonfault++;
2572 				}
2573 			}
2574 		} else
2575 			otr = causes_test(fmep, ep, at_latest_by, &my_delay);
2576 		if ((otr == FME_WAIT) && (my_delay < overall_delay))
2577 			overall_delay = my_delay;
2578 		if ((otr != FME_DISPROVED) &&
2579 		    ((rtr == FME_WAIT) || (otr == FME_WAIT)))
2580 			*pdelay = overall_delay;
2581 	}
2582 	if (rtr == FME_DISPROVED) {
2583 		indent();
2584 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
2585 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
2586 		out(O_ALTFP|O_VERB, " (doesn't meet requirements)");
2587 		indent_pop();
2588 		return (FME_DISPROVED);
2589 	}
2590 	if ((otr == FME_DISPROVED) && is_problem(ep->t)) {
2591 		indent();
2592 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
2593 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
2594 		out(O_ALTFP|O_VERB, " (doesn't explain all reports)");
2595 		indent_pop();
2596 		return (FME_DISPROVED);
2597 	}
2598 	if (otr == FME_DISPROVED) {
2599 		indent();
2600 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
2601 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
2602 		out(O_ALTFP|O_VERB, " (causes are not credible)");
2603 		indent_pop();
2604 		return (FME_DISPROVED);
2605 	}
2606 	if ((rtr == FME_WAIT) || (otr == FME_WAIT)) {
2607 		indent();
2608 		out(O_ALTFP|O_VERB|O_NONL, "<-WAIT ");
2609 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
2610 		out(O_ALTFP|O_VERB|O_NONL, " to ");
2611 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &overall_delay);
2612 		out(O_ALTFP|O_VERB, NULL);
2613 		indent_pop();
2614 		return (FME_WAIT);
2615 	}
2616 	indent();
2617 	out(O_ALTFP|O_VERB|O_NONL, "<-CREDIBLE ");
2618 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
2619 	out(O_ALTFP|O_VERB, NULL);
2620 	indent_pop();
2621 	return (FME_CREDIBLE);
2622 }
2623