xref: /titanic_41/usr/src/cmd/fm/modules/common/eversholt/fme.c (revision 33f5ff17089e3a43e6e730bf80384c233123dbd9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2012 Milan Jurik. All rights reserved.
25  *
26  * fme.c -- fault management exercise module
27  *
28  * this module provides the simulated fault management exercise.
29  */
30 
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <strings.h>
35 #include <ctype.h>
36 #include <alloca.h>
37 #include <libnvpair.h>
38 #include <sys/fm/protocol.h>
39 #include <fm/fmd_api.h>
40 #include "alloc.h"
41 #include "out.h"
42 #include "stats.h"
43 #include "stable.h"
44 #include "literals.h"
45 #include "lut.h"
46 #include "tree.h"
47 #include "ptree.h"
48 #include "itree.h"
49 #include "ipath.h"
50 #include "fme.h"
51 #include "evnv.h"
52 #include "eval.h"
53 #include "config.h"
54 #include "platform.h"
55 #include "esclex.h"
56 
57 /* imported from eft.c... */
58 extern hrtime_t Hesitate;
59 extern char *Serd_Override;
60 extern nv_alloc_t Eft_nv_hdl;
61 extern int Max_fme;
62 extern fmd_hdl_t *Hdl;
63 
64 static int Istat_need_save;
65 static int Serd_need_save;
66 void istat_save(void);
67 void serd_save(void);
68 
69 /* fme under construction is global so we can free it on module abort */
70 static struct fme *Nfmep;
71 
72 static int Undiag_reason = UD_VAL_UNKNOWN;
73 
74 static int Nextid = 0;
75 
76 static int Open_fme_count = 0;	/* Count of open FMEs */
77 
78 /* list of fault management exercises underway */
79 static struct fme {
80 	struct fme *next;		/* next exercise */
81 	unsigned long long ull;		/* time when fme was created */
82 	int id;				/* FME id */
83 	struct config *config;		/* cooked configuration data */
84 	struct lut *eventtree;		/* propagation tree for this FME */
85 	/*
86 	 * The initial error report that created this FME is kept in
87 	 * two forms.  e0 points to the instance tree node and is used
88 	 * by fme_eval() as the starting point for the inference
89 	 * algorithm.  e0r is the event handle FMD passed to us when
90 	 * the ereport first arrived and is used when setting timers,
91 	 * which are always relative to the time of this initial
92 	 * report.
93 	 */
94 	struct event *e0;
95 	fmd_event_t *e0r;
96 
97 	id_t    timer;			/* for setting an fmd time-out */
98 
99 	struct event *ecurrent;		/* ereport under consideration */
100 	struct event *suspects;		/* current suspect list */
101 	struct event *psuspects;	/* previous suspect list */
102 	int nsuspects;			/* count of suspects */
103 	int posted_suspects;		/* true if we've posted a diagnosis */
104 	int uniqobs;			/* number of unique events observed */
105 	int peek;			/* just peeking, don't track suspects */
106 	int overflow;			/* true if overflow FME */
107 	enum fme_state {
108 		FME_NOTHING = 5000,	/* not evaluated yet */
109 		FME_WAIT,		/* need to wait for more info */
110 		FME_CREDIBLE,		/* suspect list is credible */
111 		FME_DISPROVED,		/* no valid suspects found */
112 		FME_DEFERRED		/* don't know yet (k-count not met) */
113 	} state;
114 
115 	unsigned long long pull;	/* time passed since created */
116 	unsigned long long wull;	/* wait until this time for re-eval */
117 	struct event *observations;	/* observation list */
118 	struct lut *globals;		/* values of global variables */
119 	/* fmd interfacing */
120 	fmd_hdl_t *hdl;			/* handle for talking with fmd */
121 	fmd_case_t *fmcase;		/* what fmd 'case' we associate with */
122 	/* stats */
123 	struct stats *Rcount;
124 	struct stats *Hcallcount;
125 	struct stats *Rcallcount;
126 	struct stats *Ccallcount;
127 	struct stats *Ecallcount;
128 	struct stats *Tcallcount;
129 	struct stats *Marrowcount;
130 	struct stats *diags;
131 } *FMElist, *EFMElist, *ClosedFMEs;
132 
133 static struct case_list {
134 	fmd_case_t *fmcase;
135 	struct case_list *next;
136 } *Undiagablecaselist;
137 
138 static void fme_eval(struct fme *fmep, fmd_event_t *ffep);
139 static enum fme_state hypothesise(struct fme *fmep, struct event *ep,
140 	unsigned long long at_latest_by, unsigned long long *pdelay);
141 static struct node *eventprop_lookup(struct event *ep, const char *propname);
142 static struct node *pathstring2epnamenp(char *path);
143 static void publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep,
144 	fmd_case_t *fmcase, nvlist_t *detector, char *arg);
145 static char *undiag_2reason_str(int ud, char *arg);
146 static const char *undiag_2defect_str(int ud);
147 static void restore_suspects(struct fme *fmep);
148 static void save_suspects(struct fme *fmep);
149 static void destroy_fme(struct fme *f);
150 static void fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep,
151     const char *eventstring, const struct ipath *ipp, nvlist_t *nvl);
152 static void istat_counter_reset_cb(struct istat_entry *entp,
153     struct stats *statp, const struct ipath *ipp);
154 static void istat_counter_topo_chg_cb(struct istat_entry *entp,
155     struct stats *statp, void *unused);
156 static void serd_reset_cb(struct serd_entry *entp, void *unused,
157     const struct ipath *ipp);
158 static void serd_topo_chg_cb(struct serd_entry *entp, void *unused,
159     void *unused2);
160 static void destroy_fme_bufs(struct fme *fp);
161 
162 static struct fme *
alloc_fme(void)163 alloc_fme(void)
164 {
165 	struct fme *fmep;
166 
167 	fmep = MALLOC(sizeof (*fmep));
168 	bzero(fmep, sizeof (*fmep));
169 	return (fmep);
170 }
171 
172 /*
173  * fme_ready -- called when all initialization of the FME (except for
174  *	stats) has completed successfully.  Adds the fme to global lists
175  *	and establishes its stats.
176  */
177 static struct fme *
fme_ready(struct fme * fmep)178 fme_ready(struct fme *fmep)
179 {
180 	char nbuf[100];
181 
182 	Nfmep = NULL;	/* don't need to free this on module abort now */
183 
184 	if (EFMElist) {
185 		EFMElist->next = fmep;
186 		EFMElist = fmep;
187 	} else
188 		FMElist = EFMElist = fmep;
189 
190 	(void) sprintf(nbuf, "fme%d.Rcount", fmep->id);
191 	fmep->Rcount = stats_new_counter(nbuf, "ereports received", 0);
192 	(void) sprintf(nbuf, "fme%d.Hcall", fmep->id);
193 	fmep->Hcallcount = stats_new_counter(nbuf, "calls to hypothesise()", 1);
194 	(void) sprintf(nbuf, "fme%d.Rcall", fmep->id);
195 	fmep->Rcallcount = stats_new_counter(nbuf,
196 	    "calls to requirements_test()", 1);
197 	(void) sprintf(nbuf, "fme%d.Ccall", fmep->id);
198 	fmep->Ccallcount = stats_new_counter(nbuf, "calls to causes_test()", 1);
199 	(void) sprintf(nbuf, "fme%d.Ecall", fmep->id);
200 	fmep->Ecallcount =
201 	    stats_new_counter(nbuf, "calls to effects_test()", 1);
202 	(void) sprintf(nbuf, "fme%d.Tcall", fmep->id);
203 	fmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1);
204 	(void) sprintf(nbuf, "fme%d.Marrow", fmep->id);
205 	fmep->Marrowcount = stats_new_counter(nbuf,
206 	    "arrows marked by mark_arrows()", 1);
207 	(void) sprintf(nbuf, "fme%d.diags", fmep->id);
208 	fmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0);
209 
210 	out(O_ALTFP|O_VERB2, "newfme: config snapshot contains...");
211 	config_print(O_ALTFP|O_VERB2, fmep->config);
212 
213 	return (fmep);
214 }
215 
216 extern void ipath_dummy_lut(struct arrow *);
217 extern struct lut *itree_create_dummy(const char *, const struct ipath *);
218 
219 /* ARGSUSED */
220 static void
set_needed_arrows(struct event * ep,struct event * ep2,struct fme * fmep)221 set_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
222 {
223 	struct bubble *bp;
224 	struct arrowlist *ap;
225 
226 	for (bp = itree_next_bubble(ep, NULL); bp;
227 	    bp = itree_next_bubble(ep, bp)) {
228 		if (bp->t != B_FROM)
229 			continue;
230 		for (ap = itree_next_arrow(bp, NULL); ap;
231 		    ap = itree_next_arrow(bp, ap)) {
232 			ap->arrowp->pnode->u.arrow.needed = 1;
233 			ipath_dummy_lut(ap->arrowp);
234 		}
235 	}
236 }
237 
238 /* ARGSUSED */
239 static void
unset_needed_arrows(struct event * ep,struct event * ep2,struct fme * fmep)240 unset_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
241 {
242 	struct bubble *bp;
243 	struct arrowlist *ap;
244 
245 	for (bp = itree_next_bubble(ep, NULL); bp;
246 	    bp = itree_next_bubble(ep, bp)) {
247 		if (bp->t != B_FROM)
248 			continue;
249 		for (ap = itree_next_arrow(bp, NULL); ap;
250 		    ap = itree_next_arrow(bp, ap))
251 			ap->arrowp->pnode->u.arrow.needed = 0;
252 	}
253 }
254 
255 static void globals_destructor(void *left, void *right, void *arg);
256 static void clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep);
257 
258 static boolean_t
prune_propagations(const char * e0class,const struct ipath * e0ipp)259 prune_propagations(const char *e0class, const struct ipath *e0ipp)
260 {
261 	char nbuf[100];
262 	unsigned long long my_delay = TIMEVAL_EVENTUALLY;
263 	extern struct lut *Usednames;
264 
265 	Nfmep = alloc_fme();
266 	Nfmep->id = Nextid;
267 	Nfmep->state = FME_NOTHING;
268 	Nfmep->eventtree = itree_create_dummy(e0class, e0ipp);
269 	if ((Nfmep->e0 =
270 	    itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) {
271 		itree_free(Nfmep->eventtree);
272 		FREE(Nfmep);
273 		Nfmep = NULL;
274 		return (B_FALSE);
275 	}
276 	Nfmep->ecurrent = Nfmep->observations = Nfmep->e0;
277 	Nfmep->e0->count++;
278 
279 	(void) sprintf(nbuf, "fme%d.Rcount", Nfmep->id);
280 	Nfmep->Rcount = stats_new_counter(nbuf, "ereports received", 0);
281 	(void) sprintf(nbuf, "fme%d.Hcall", Nfmep->id);
282 	Nfmep->Hcallcount =
283 	    stats_new_counter(nbuf, "calls to hypothesise()", 1);
284 	(void) sprintf(nbuf, "fme%d.Rcall", Nfmep->id);
285 	Nfmep->Rcallcount = stats_new_counter(nbuf,
286 	    "calls to requirements_test()", 1);
287 	(void) sprintf(nbuf, "fme%d.Ccall", Nfmep->id);
288 	Nfmep->Ccallcount =
289 	    stats_new_counter(nbuf, "calls to causes_test()", 1);
290 	(void) sprintf(nbuf, "fme%d.Ecall", Nfmep->id);
291 	Nfmep->Ecallcount =
292 	    stats_new_counter(nbuf, "calls to effects_test()", 1);
293 	(void) sprintf(nbuf, "fme%d.Tcall", Nfmep->id);
294 	Nfmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1);
295 	(void) sprintf(nbuf, "fme%d.Marrow", Nfmep->id);
296 	Nfmep->Marrowcount = stats_new_counter(nbuf,
297 	    "arrows marked by mark_arrows()", 1);
298 	(void) sprintf(nbuf, "fme%d.diags", Nfmep->id);
299 	Nfmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0);
300 
301 	Nfmep->peek = 1;
302 	lut_walk(Nfmep->eventtree, (lut_cb)unset_needed_arrows, (void *)Nfmep);
303 	lut_free(Usednames, NULL, NULL);
304 	Usednames = NULL;
305 	lut_walk(Nfmep->eventtree, (lut_cb)clear_arrows, (void *)Nfmep);
306 	(void) hypothesise(Nfmep, Nfmep->e0, Nfmep->ull, &my_delay);
307 	itree_prune(Nfmep->eventtree);
308 	lut_walk(Nfmep->eventtree, (lut_cb)set_needed_arrows, (void *)Nfmep);
309 
310 	stats_delete(Nfmep->Rcount);
311 	stats_delete(Nfmep->Hcallcount);
312 	stats_delete(Nfmep->Rcallcount);
313 	stats_delete(Nfmep->Ccallcount);
314 	stats_delete(Nfmep->Ecallcount);
315 	stats_delete(Nfmep->Tcallcount);
316 	stats_delete(Nfmep->Marrowcount);
317 	stats_delete(Nfmep->diags);
318 	itree_free(Nfmep->eventtree);
319 	lut_free(Nfmep->globals, globals_destructor, NULL);
320 	FREE(Nfmep);
321 	return (B_TRUE);
322 }
323 
324 static struct fme *
newfme(const char * e0class,const struct ipath * e0ipp,fmd_hdl_t * hdl,fmd_case_t * fmcase,fmd_event_t * ffep,nvlist_t * nvl)325 newfme(const char *e0class, const struct ipath *e0ipp, fmd_hdl_t *hdl,
326 	fmd_case_t *fmcase, fmd_event_t *ffep, nvlist_t *nvl)
327 {
328 	struct cfgdata *cfgdata;
329 	int init_size;
330 	extern int alloc_total();
331 	nvlist_t *detector = NULL;
332 	char *pathstr;
333 	char *arg;
334 
335 	/*
336 	 * First check if e0ipp is actually in the topology so we can give a
337 	 * more useful error message.
338 	 */
339 	ipathlastcomp(e0ipp);
340 	pathstr = ipath2str(NULL, e0ipp);
341 	cfgdata = config_snapshot();
342 	platform_units_translate(0, cfgdata->cooked, NULL, NULL,
343 	    &detector, pathstr);
344 	FREE(pathstr);
345 	structconfig_free(cfgdata->cooked);
346 	config_free(cfgdata);
347 	if (detector == NULL) {
348 		/* See if class permits silent discard on unknown component. */
349 		if (lut_lookup(Ereportenames_discard, (void *)e0class, NULL)) {
350 			out(O_ALTFP|O_VERB2, "Unable to map \"%s\" ereport "
351 			    "to component path, but silent discard allowed.",
352 			    e0class);
353 		} else {
354 			Undiag_reason = UD_VAL_BADEVENTPATH;
355 			(void) nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR,
356 			    &detector);
357 			arg = ipath2str(e0class, e0ipp);
358 			publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
359 			FREE(arg);
360 		}
361 		return (NULL);
362 	}
363 
364 	/*
365 	 * Next run a quick first pass of the rules with a dummy config. This
366 	 * allows us to prune those rules which can't possibly cause this
367 	 * ereport.
368 	 */
369 	if (!prune_propagations(e0class, e0ipp)) {
370 		/*
371 		 * The fault class must have been in the rules or we would
372 		 * not have registered for it (and got a "nosub"), and the
373 		 * pathname must be in the topology or we would have failed the
374 		 * previous test. So to get here means the combination of
375 		 * class and pathname in the ereport must be invalid.
376 		 */
377 		Undiag_reason = UD_VAL_BADEVENTCLASS;
378 		arg = ipath2str(e0class, e0ipp);
379 		publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
380 		nvlist_free(detector);
381 		FREE(arg);
382 		return (NULL);
383 	}
384 
385 	/*
386 	 * Now go ahead and create the real fme using the pruned rules.
387 	 */
388 	init_size = alloc_total();
389 	out(O_ALTFP|O_STAMP, "start config_snapshot using %d bytes", init_size);
390 	nvlist_free(detector);
391 	pathstr = ipath2str(NULL, e0ipp);
392 	cfgdata = config_snapshot();
393 	platform_units_translate(0, cfgdata->cooked, NULL, NULL,
394 	    &detector, pathstr);
395 	FREE(pathstr);
396 	platform_save_config(hdl, fmcase);
397 	out(O_ALTFP|O_STAMP, "config_snapshot added %d bytes",
398 	    alloc_total() - init_size);
399 
400 	Nfmep = alloc_fme();
401 
402 	Nfmep->id = Nextid++;
403 	Nfmep->config = cfgdata->cooked;
404 	config_free(cfgdata);
405 	Nfmep->posted_suspects = 0;
406 	Nfmep->uniqobs = 0;
407 	Nfmep->state = FME_NOTHING;
408 	Nfmep->pull = 0ULL;
409 	Nfmep->overflow = 0;
410 
411 	Nfmep->fmcase = fmcase;
412 	Nfmep->hdl = hdl;
413 
414 	if ((Nfmep->eventtree = itree_create(Nfmep->config)) == NULL) {
415 		Undiag_reason = UD_VAL_INSTFAIL;
416 		arg = ipath2str(e0class, e0ipp);
417 		publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
418 		nvlist_free(detector);
419 		FREE(arg);
420 		structconfig_free(Nfmep->config);
421 		destroy_fme_bufs(Nfmep);
422 		FREE(Nfmep);
423 		Nfmep = NULL;
424 		return (NULL);
425 	}
426 
427 	itree_ptree(O_ALTFP|O_VERB2, Nfmep->eventtree);
428 
429 	if ((Nfmep->e0 =
430 	    itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) {
431 		Undiag_reason = UD_VAL_BADEVENTI;
432 		arg = ipath2str(e0class, e0ipp);
433 		publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
434 		nvlist_free(detector);
435 		FREE(arg);
436 		itree_free(Nfmep->eventtree);
437 		structconfig_free(Nfmep->config);
438 		destroy_fme_bufs(Nfmep);
439 		FREE(Nfmep);
440 		Nfmep = NULL;
441 		return (NULL);
442 	}
443 
444 	nvlist_free(detector);
445 	return (fme_ready(Nfmep));
446 }
447 
448 void
fme_fini(void)449 fme_fini(void)
450 {
451 	struct fme *sfp, *fp;
452 	struct case_list *ucasep, *nextcasep;
453 
454 	ucasep = Undiagablecaselist;
455 	while (ucasep != NULL) {
456 		nextcasep = ucasep->next;
457 		FREE(ucasep);
458 		ucasep = nextcasep;
459 	}
460 	Undiagablecaselist = NULL;
461 
462 	/* clean up closed fmes */
463 	fp = ClosedFMEs;
464 	while (fp != NULL) {
465 		sfp = fp->next;
466 		destroy_fme(fp);
467 		fp = sfp;
468 	}
469 	ClosedFMEs = NULL;
470 
471 	fp = FMElist;
472 	while (fp != NULL) {
473 		sfp = fp->next;
474 		destroy_fme(fp);
475 		fp = sfp;
476 	}
477 	FMElist = EFMElist = NULL;
478 
479 	/* if we were in the middle of creating an fme, free it now */
480 	if (Nfmep) {
481 		destroy_fme(Nfmep);
482 		Nfmep = NULL;
483 	}
484 }
485 
486 /*
487  * Allocated space for a buffer name.  20 bytes allows for
488  * a ridiculous 9,999,999 unique observations.
489  */
490 #define	OBBUFNMSZ 20
491 
492 /*
493  *  serialize_observation
494  *
495  *  Create a recoverable version of the current observation
496  *  (f->ecurrent).  We keep a serialized version of each unique
497  *  observation in order that we may resume correctly the fme in the
498  *  correct state if eft or fmd crashes and we're restarted.
499  */
500 static void
serialize_observation(struct fme * fp,const char * cls,const struct ipath * ipp)501 serialize_observation(struct fme *fp, const char *cls, const struct ipath *ipp)
502 {
503 	size_t pkdlen;
504 	char tmpbuf[OBBUFNMSZ];
505 	char *pkd = NULL;
506 	char *estr;
507 
508 	(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", fp->uniqobs);
509 	estr = ipath2str(cls, ipp);
510 	fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, strlen(estr) + 1);
511 	fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)estr,
512 	    strlen(estr) + 1);
513 	FREE(estr);
514 
515 	if (fp->ecurrent != NULL && fp->ecurrent->nvp != NULL) {
516 		(void) snprintf(tmpbuf,
517 		    OBBUFNMSZ, "observed%d.nvp", fp->uniqobs);
518 		if (nvlist_xpack(fp->ecurrent->nvp,
519 		    &pkd, &pkdlen, NV_ENCODE_XDR, &Eft_nv_hdl) != 0)
520 			out(O_DIE|O_SYS, "pack of observed nvl failed");
521 		fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, pkdlen);
522 		fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)pkd, pkdlen);
523 		FREE(pkd);
524 	}
525 
526 	fp->uniqobs++;
527 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs,
528 	    sizeof (fp->uniqobs));
529 }
530 
531 /*
532  *  init_fme_bufs -- We keep several bits of state about an fme for
533  *	use if eft or fmd crashes and we're restarted.
534  */
535 static void
init_fme_bufs(struct fme * fp)536 init_fme_bufs(struct fme *fp)
537 {
538 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_PULL, sizeof (fp->pull));
539 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_PULL, (void *)&fp->pull,
540 	    sizeof (fp->pull));
541 
542 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_ID, sizeof (fp->id));
543 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_ID, (void *)&fp->id,
544 	    sizeof (fp->id));
545 
546 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_NOBS, sizeof (fp->uniqobs));
547 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs,
548 	    sizeof (fp->uniqobs));
549 
550 	fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_POSTD,
551 	    sizeof (fp->posted_suspects));
552 	fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_POSTD,
553 	    (void *)&fp->posted_suspects, sizeof (fp->posted_suspects));
554 }
555 
556 static void
destroy_fme_bufs(struct fme * fp)557 destroy_fme_bufs(struct fme *fp)
558 {
559 	char tmpbuf[OBBUFNMSZ];
560 	int o;
561 
562 	platform_restore_config(fp->hdl, fp->fmcase);
563 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFGLEN);
564 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFG);
565 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_PULL);
566 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_ID);
567 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_POSTD);
568 	fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_NOBS);
569 
570 	for (o = 0; o < fp->uniqobs; o++) {
571 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", o);
572 		fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf);
573 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", o);
574 		fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf);
575 	}
576 }
577 
578 /*
579  * reconstitute_observations -- convert a case's serialized observations
580  *	back into struct events.  Returns zero if all observations are
581  *	successfully reconstituted.
582  */
583 static int
reconstitute_observations(struct fme * fmep)584 reconstitute_observations(struct fme *fmep)
585 {
586 	struct event *ep;
587 	struct node *epnamenp = NULL;
588 	size_t pkdlen;
589 	char *pkd = NULL;
590 	char *tmpbuf = alloca(OBBUFNMSZ);
591 	char *sepptr;
592 	char *estr;
593 	int ocnt;
594 	int elen;
595 
596 	for (ocnt = 0; ocnt < fmep->uniqobs; ocnt++) {
597 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", ocnt);
598 		elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
599 		if (elen == 0) {
600 			out(O_ALTFP,
601 			    "reconstitute_observation: no %s buffer found.",
602 			    tmpbuf);
603 			Undiag_reason = UD_VAL_MISSINGOBS;
604 			break;
605 		}
606 
607 		estr = MALLOC(elen);
608 		fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen);
609 		sepptr = strchr(estr, '@');
610 		if (sepptr == NULL) {
611 			out(O_ALTFP,
612 			    "reconstitute_observation: %s: "
613 			    "missing @ separator in %s.",
614 			    tmpbuf, estr);
615 			Undiag_reason = UD_VAL_MISSINGPATH;
616 			FREE(estr);
617 			break;
618 		}
619 
620 		*sepptr = '\0';
621 		if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) {
622 			out(O_ALTFP,
623 			    "reconstitute_observation: %s: "
624 			    "trouble converting path string \"%s\" "
625 			    "to internal representation.",
626 			    tmpbuf, sepptr + 1);
627 			Undiag_reason = UD_VAL_MISSINGPATH;
628 			FREE(estr);
629 			break;
630 		}
631 
632 		/* construct the event */
633 		ep = itree_lookup(fmep->eventtree,
634 		    stable(estr), ipath(epnamenp));
635 		if (ep == NULL) {
636 			out(O_ALTFP,
637 			    "reconstitute_observation: %s: "
638 			    "lookup of  \"%s\" in itree failed.",
639 			    tmpbuf, ipath2str(estr, ipath(epnamenp)));
640 			Undiag_reason = UD_VAL_BADOBS;
641 			tree_free(epnamenp);
642 			FREE(estr);
643 			break;
644 		}
645 		tree_free(epnamenp);
646 
647 		/*
648 		 * We may or may not have a saved nvlist for the observation
649 		 */
650 		(void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", ocnt);
651 		pkdlen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
652 		if (pkdlen != 0) {
653 			pkd = MALLOC(pkdlen);
654 			fmd_buf_read(fmep->hdl,
655 			    fmep->fmcase, tmpbuf, pkd, pkdlen);
656 			ASSERT(ep->nvp == NULL);
657 			if (nvlist_xunpack(pkd,
658 			    pkdlen, &ep->nvp, &Eft_nv_hdl) != 0)
659 				out(O_DIE|O_SYS, "pack of observed nvl failed");
660 			FREE(pkd);
661 		}
662 
663 		if (ocnt == 0)
664 			fmep->e0 = ep;
665 
666 		FREE(estr);
667 		fmep->ecurrent = ep;
668 		ep->count++;
669 
670 		/* link it into list of observations seen */
671 		ep->observations = fmep->observations;
672 		fmep->observations = ep;
673 	}
674 
675 	if (ocnt == fmep->uniqobs) {
676 		(void) fme_ready(fmep);
677 		return (0);
678 	}
679 
680 	return (1);
681 }
682 
683 /*
684  * restart_fme -- called during eft initialization.  Reconstitutes
685  *	an in-progress fme.
686  */
687 void
fme_restart(fmd_hdl_t * hdl,fmd_case_t * inprogress)688 fme_restart(fmd_hdl_t *hdl, fmd_case_t *inprogress)
689 {
690 	nvlist_t *defect;
691 	struct case_list *bad;
692 	struct fme *fmep;
693 	struct cfgdata *cfgdata;
694 	size_t rawsz;
695 	struct event *ep;
696 	char *tmpbuf = alloca(OBBUFNMSZ);
697 	char *sepptr;
698 	char *estr;
699 	int elen;
700 	struct node *epnamenp = NULL;
701 	int init_size;
702 	extern int alloc_total();
703 	char *reason;
704 
705 	/*
706 	 * ignore solved or closed cases
707 	 */
708 	if (fmd_case_solved(hdl, inprogress) ||
709 	    fmd_case_closed(hdl, inprogress))
710 		return;
711 
712 	fmep = alloc_fme();
713 	fmep->fmcase = inprogress;
714 	fmep->hdl = hdl;
715 
716 	if (fmd_buf_size(hdl, inprogress, WOBUF_POSTD) == 0) {
717 		out(O_ALTFP, "restart_fme: no saved posted status");
718 		Undiag_reason = UD_VAL_MISSINGINFO;
719 		goto badcase;
720 	} else {
721 		fmd_buf_read(hdl, inprogress, WOBUF_POSTD,
722 		    (void *)&fmep->posted_suspects,
723 		    sizeof (fmep->posted_suspects));
724 	}
725 
726 	if (fmd_buf_size(hdl, inprogress, WOBUF_ID) == 0) {
727 		out(O_ALTFP, "restart_fme: no saved id");
728 		Undiag_reason = UD_VAL_MISSINGINFO;
729 		goto badcase;
730 	} else {
731 		fmd_buf_read(hdl, inprogress, WOBUF_ID, (void *)&fmep->id,
732 		    sizeof (fmep->id));
733 	}
734 	if (Nextid <= fmep->id)
735 		Nextid = fmep->id + 1;
736 
737 	out(O_ALTFP, "Replay FME %d", fmep->id);
738 
739 	if (fmd_buf_size(hdl, inprogress, WOBUF_CFGLEN) != sizeof (size_t)) {
740 		out(O_ALTFP, "restart_fme: No config data");
741 		Undiag_reason = UD_VAL_MISSINGINFO;
742 		goto badcase;
743 	}
744 	fmd_buf_read(hdl, inprogress, WOBUF_CFGLEN, (void *)&rawsz,
745 	    sizeof (size_t));
746 
747 	if ((fmep->e0r = fmd_case_getprincipal(hdl, inprogress)) == NULL) {
748 		out(O_ALTFP, "restart_fme: No event zero");
749 		Undiag_reason = UD_VAL_MISSINGZERO;
750 		goto badcase;
751 	}
752 
753 	if (fmd_buf_size(hdl, inprogress, WOBUF_PULL) == 0) {
754 		out(O_ALTFP, "restart_fme: no saved wait time");
755 		Undiag_reason = UD_VAL_MISSINGINFO;
756 		goto badcase;
757 	} else {
758 		fmd_buf_read(hdl, inprogress, WOBUF_PULL, (void *)&fmep->pull,
759 		    sizeof (fmep->pull));
760 	}
761 
762 	if (fmd_buf_size(hdl, inprogress, WOBUF_NOBS) == 0) {
763 		out(O_ALTFP, "restart_fme: no count of observations");
764 		Undiag_reason = UD_VAL_MISSINGINFO;
765 		goto badcase;
766 	} else {
767 		fmd_buf_read(hdl, inprogress, WOBUF_NOBS,
768 		    (void *)&fmep->uniqobs, sizeof (fmep->uniqobs));
769 	}
770 
771 	(void) snprintf(tmpbuf, OBBUFNMSZ, "observed0");
772 	elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
773 	if (elen == 0) {
774 		out(O_ALTFP, "reconstitute_observation: no %s buffer found.",
775 		    tmpbuf);
776 		Undiag_reason = UD_VAL_MISSINGOBS;
777 		goto badcase;
778 	}
779 	estr = MALLOC(elen);
780 	fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen);
781 	sepptr = strchr(estr, '@');
782 	if (sepptr == NULL) {
783 		out(O_ALTFP, "reconstitute_observation: %s: "
784 		    "missing @ separator in %s.",
785 		    tmpbuf, estr);
786 		Undiag_reason = UD_VAL_MISSINGPATH;
787 		FREE(estr);
788 		goto badcase;
789 	}
790 	*sepptr = '\0';
791 	if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) {
792 		out(O_ALTFP, "reconstitute_observation: %s: "
793 		    "trouble converting path string \"%s\" "
794 		    "to internal representation.", tmpbuf, sepptr + 1);
795 		Undiag_reason = UD_VAL_MISSINGPATH;
796 		FREE(estr);
797 		goto badcase;
798 	}
799 	(void) prune_propagations(stable(estr), ipath(epnamenp));
800 	tree_free(epnamenp);
801 	FREE(estr);
802 
803 	init_size = alloc_total();
804 	out(O_ALTFP|O_STAMP, "start config_restore using %d bytes", init_size);
805 	cfgdata = MALLOC(sizeof (struct cfgdata));
806 	cfgdata->cooked = NULL;
807 	cfgdata->devcache = NULL;
808 	cfgdata->devidcache = NULL;
809 	cfgdata->tpcache = NULL;
810 	cfgdata->cpucache = NULL;
811 	cfgdata->raw_refcnt = 1;
812 
813 	if (rawsz > 0) {
814 		if (fmd_buf_size(hdl, inprogress, WOBUF_CFG) != rawsz) {
815 			out(O_ALTFP, "restart_fme: Config data size mismatch");
816 			Undiag_reason = UD_VAL_CFGMISMATCH;
817 			goto badcase;
818 		}
819 		cfgdata->begin = MALLOC(rawsz);
820 		cfgdata->end = cfgdata->nextfree = cfgdata->begin + rawsz;
821 		fmd_buf_read(hdl,
822 		    inprogress, WOBUF_CFG, cfgdata->begin, rawsz);
823 	} else {
824 		cfgdata->begin = cfgdata->end = cfgdata->nextfree = NULL;
825 	}
826 
827 	config_cook(cfgdata);
828 	fmep->config = cfgdata->cooked;
829 	config_free(cfgdata);
830 	out(O_ALTFP|O_STAMP, "config_restore added %d bytes",
831 	    alloc_total() - init_size);
832 
833 	if ((fmep->eventtree = itree_create(fmep->config)) == NULL) {
834 		/* case not properly saved or irretrievable */
835 		out(O_ALTFP, "restart_fme: NULL instance tree");
836 		Undiag_reason = UD_VAL_INSTFAIL;
837 		goto badcase;
838 	}
839 
840 	itree_ptree(O_ALTFP|O_VERB2, fmep->eventtree);
841 
842 	if (reconstitute_observations(fmep) != 0)
843 		goto badcase;
844 
845 	out(O_ALTFP|O_NONL, "FME %d replay observations: ", fmep->id);
846 	for (ep = fmep->observations; ep; ep = ep->observations) {
847 		out(O_ALTFP|O_NONL, " ");
848 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
849 	}
850 	out(O_ALTFP, NULL);
851 
852 	Open_fme_count++;
853 
854 	/* give the diagnosis algorithm a shot at the new FME state */
855 	fme_eval(fmep, fmep->e0r);
856 	return;
857 
858 badcase:
859 	if (fmep->eventtree != NULL)
860 		itree_free(fmep->eventtree);
861 	if (fmep->config)
862 		structconfig_free(fmep->config);
863 	destroy_fme_bufs(fmep);
864 	FREE(fmep);
865 
866 	/*
867 	 * Since we're unable to restart the case, add it to the undiagable
868 	 * list and solve and close it as appropriate.
869 	 */
870 	bad = MALLOC(sizeof (struct case_list));
871 	bad->next = NULL;
872 
873 	if (Undiagablecaselist != NULL)
874 		bad->next = Undiagablecaselist;
875 	Undiagablecaselist = bad;
876 	bad->fmcase = inprogress;
877 
878 	out(O_ALTFP|O_NONL, "[case %s (unable to restart), ",
879 	    fmd_case_uuid(hdl, bad->fmcase));
880 
881 	if (fmd_case_solved(hdl, bad->fmcase)) {
882 		out(O_ALTFP|O_NONL, "already solved, ");
883 	} else {
884 		out(O_ALTFP|O_NONL, "solving, ");
885 		defect = fmd_nvl_create_fault(hdl,
886 		    undiag_2defect_str(Undiag_reason), 100, NULL, NULL, NULL);
887 		reason = undiag_2reason_str(Undiag_reason, NULL);
888 		(void) nvlist_add_string(defect, UNDIAG_REASON, reason);
889 		FREE(reason);
890 		fmd_case_add_suspect(hdl, bad->fmcase, defect);
891 		fmd_case_solve(hdl, bad->fmcase);
892 		Undiag_reason = UD_VAL_UNKNOWN;
893 	}
894 
895 	if (fmd_case_closed(hdl, bad->fmcase)) {
896 		out(O_ALTFP, "already closed ]");
897 	} else {
898 		out(O_ALTFP, "closing ]");
899 		fmd_case_close(hdl, bad->fmcase);
900 	}
901 }
902 
903 /*ARGSUSED*/
904 static void
globals_destructor(void * left,void * right,void * arg)905 globals_destructor(void *left, void *right, void *arg)
906 {
907 	struct evalue *evp = (struct evalue *)right;
908 	if (evp->t == NODEPTR)
909 		tree_free((struct node *)(uintptr_t)evp->v);
910 	evp->v = (uintptr_t)NULL;
911 	FREE(evp);
912 }
913 
914 void
destroy_fme(struct fme * f)915 destroy_fme(struct fme *f)
916 {
917 	stats_delete(f->Rcount);
918 	stats_delete(f->Hcallcount);
919 	stats_delete(f->Rcallcount);
920 	stats_delete(f->Ccallcount);
921 	stats_delete(f->Ecallcount);
922 	stats_delete(f->Tcallcount);
923 	stats_delete(f->Marrowcount);
924 	stats_delete(f->diags);
925 
926 	if (f->eventtree != NULL)
927 		itree_free(f->eventtree);
928 	if (f->config)
929 		structconfig_free(f->config);
930 	lut_free(f->globals, globals_destructor, NULL);
931 	FREE(f);
932 }
933 
934 static const char *
fme_state2str(enum fme_state s)935 fme_state2str(enum fme_state s)
936 {
937 	switch (s) {
938 	case FME_NOTHING:	return ("NOTHING");
939 	case FME_WAIT:		return ("WAIT");
940 	case FME_CREDIBLE:	return ("CREDIBLE");
941 	case FME_DISPROVED:	return ("DISPROVED");
942 	case FME_DEFERRED:	return ("DEFERRED");
943 	default:		return ("UNKNOWN");
944 	}
945 }
946 
947 static int
is_problem(enum nametype t)948 is_problem(enum nametype t)
949 {
950 	return (t == N_FAULT || t == N_DEFECT || t == N_UPSET);
951 }
952 
953 static int
is_defect(enum nametype t)954 is_defect(enum nametype t)
955 {
956 	return (t == N_DEFECT);
957 }
958 
959 static int
is_upset(enum nametype t)960 is_upset(enum nametype t)
961 {
962 	return (t == N_UPSET);
963 }
964 
965 static void
fme_print(int flags,struct fme * fmep)966 fme_print(int flags, struct fme *fmep)
967 {
968 	struct event *ep;
969 
970 	out(flags, "Fault Management Exercise %d", fmep->id);
971 	out(flags, "\t       State: %s", fme_state2str(fmep->state));
972 	out(flags|O_NONL, "\t  Start time: ");
973 	ptree_timeval(flags|O_NONL, &fmep->ull);
974 	out(flags, NULL);
975 	if (fmep->wull) {
976 		out(flags|O_NONL, "\t   Wait time: ");
977 		ptree_timeval(flags|O_NONL, &fmep->wull);
978 		out(flags, NULL);
979 	}
980 	out(flags|O_NONL, "\t          E0: ");
981 	if (fmep->e0)
982 		itree_pevent_brief(flags|O_NONL, fmep->e0);
983 	else
984 		out(flags|O_NONL, "NULL");
985 	out(flags, NULL);
986 	out(flags|O_NONL, "\tObservations:");
987 	for (ep = fmep->observations; ep; ep = ep->observations) {
988 		out(flags|O_NONL, " ");
989 		itree_pevent_brief(flags|O_NONL, ep);
990 	}
991 	out(flags, NULL);
992 	out(flags|O_NONL, "\tSuspect list:");
993 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
994 		out(flags|O_NONL, " ");
995 		itree_pevent_brief(flags|O_NONL, ep);
996 	}
997 	out(flags, NULL);
998 	if (fmep->eventtree != NULL) {
999 		out(flags|O_VERB2, "\t        Tree:");
1000 		itree_ptree(flags|O_VERB2, fmep->eventtree);
1001 	}
1002 }
1003 
1004 static struct node *
pathstring2epnamenp(char * path)1005 pathstring2epnamenp(char *path)
1006 {
1007 	char *sep = "/";
1008 	struct node *ret;
1009 	char *ptr;
1010 
1011 	if ((ptr = strtok(path, sep)) == NULL)
1012 		out(O_DIE, "pathstring2epnamenp: invalid empty class");
1013 
1014 	ret = tree_iname(stable(ptr), NULL, 0);
1015 
1016 	while ((ptr = strtok(NULL, sep)) != NULL)
1017 		ret = tree_name_append(ret,
1018 		    tree_iname(stable(ptr), NULL, 0));
1019 
1020 	return (ret);
1021 }
1022 
1023 /*
1024  * for a given upset sp, increment the corresponding SERD engine.  if the
1025  * SERD engine trips, return the ename and ipp of the resulting ereport.
1026  * returns true if engine tripped and *enamep and *ippp were filled in.
1027  */
1028 static int
serd_eval(struct fme * fmep,fmd_hdl_t * hdl,fmd_event_t * ffep,fmd_case_t * fmcase,struct event * sp,const char ** enamep,const struct ipath ** ippp)1029 serd_eval(struct fme *fmep, fmd_hdl_t *hdl, fmd_event_t *ffep,
1030     fmd_case_t *fmcase, struct event *sp, const char **enamep,
1031     const struct ipath **ippp)
1032 {
1033 	struct node *serdinst;
1034 	char *serdname;
1035 	char *serdresource;
1036 	char *serdclass;
1037 	struct node *nid;
1038 	struct serd_entry *newentp;
1039 	int i, serdn = -1, serdincrement = 1, len = 0;
1040 	char *serdsuffix = NULL, *serdt = NULL;
1041 	struct evalue *ep;
1042 
1043 	ASSERT(sp->t == N_UPSET);
1044 	ASSERT(ffep != NULL);
1045 
1046 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1047 	    (void *)"n", (lut_cmp)strcmp)) != NULL) {
1048 		ASSERT(ep->t == UINT64);
1049 		serdn = (int)ep->v;
1050 	}
1051 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1052 	    (void *)"t", (lut_cmp)strcmp)) != NULL) {
1053 		ASSERT(ep->t == STRING);
1054 		serdt = (char *)(uintptr_t)ep->v;
1055 	}
1056 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1057 	    (void *)"suffix", (lut_cmp)strcmp)) != NULL) {
1058 		ASSERT(ep->t == STRING);
1059 		serdsuffix = (char *)(uintptr_t)ep->v;
1060 	}
1061 	if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1062 	    (void *)"increment", (lut_cmp)strcmp)) != NULL) {
1063 		ASSERT(ep->t == UINT64);
1064 		serdincrement = (int)ep->v;
1065 	}
1066 
1067 	/*
1068 	 * obtain instanced SERD engine from the upset sp.  from this
1069 	 * derive serdname, the string used to identify the SERD engine.
1070 	 */
1071 	serdinst = eventprop_lookup(sp, L_engine);
1072 
1073 	if (serdinst == NULL)
1074 		return (-1);
1075 
1076 	len = strlen(serdinst->u.stmt.np->u.event.ename->u.name.s) + 1;
1077 	if (serdsuffix != NULL)
1078 		len += strlen(serdsuffix);
1079 	serdclass = MALLOC(len);
1080 	if (serdsuffix != NULL)
1081 		(void) snprintf(serdclass, len, "%s%s",
1082 		    serdinst->u.stmt.np->u.event.ename->u.name.s, serdsuffix);
1083 	else
1084 		(void) snprintf(serdclass, len, "%s",
1085 		    serdinst->u.stmt.np->u.event.ename->u.name.s);
1086 	serdresource = ipath2str(NULL,
1087 	    ipath(serdinst->u.stmt.np->u.event.epname));
1088 	len += strlen(serdresource) + 1;
1089 	serdname = MALLOC(len);
1090 	(void) snprintf(serdname, len, "%s@%s", serdclass, serdresource);
1091 	FREE(serdresource);
1092 
1093 	/* handle serd engine "id" property, if there is one */
1094 	if ((nid =
1095 	    lut_lookup(serdinst->u.stmt.lutp, (void *)L_id, NULL)) != NULL) {
1096 		struct evalue *gval;
1097 		char suffixbuf[200];
1098 		char *suffix;
1099 		char *nserdname;
1100 		size_t nname;
1101 
1102 		out(O_ALTFP|O_NONL, "serd \"%s\" id: ", serdname);
1103 		ptree_name_iter(O_ALTFP|O_NONL, nid);
1104 
1105 		ASSERTinfo(nid->t == T_GLOBID, ptree_nodetype2str(nid->t));
1106 
1107 		if ((gval = lut_lookup(fmep->globals,
1108 		    (void *)nid->u.globid.s, NULL)) == NULL) {
1109 			out(O_ALTFP, " undefined");
1110 		} else if (gval->t == UINT64) {
1111 			out(O_ALTFP, " %llu", gval->v);
1112 			(void) sprintf(suffixbuf, "%llu", gval->v);
1113 			suffix = suffixbuf;
1114 		} else {
1115 			out(O_ALTFP, " \"%s\"", (char *)(uintptr_t)gval->v);
1116 			suffix = (char *)(uintptr_t)gval->v;
1117 		}
1118 
1119 		nname = strlen(serdname) + strlen(suffix) + 2;
1120 		nserdname = MALLOC(nname);
1121 		(void) snprintf(nserdname, nname, "%s:%s", serdname, suffix);
1122 		FREE(serdname);
1123 		serdname = nserdname;
1124 	}
1125 
1126 	/*
1127 	 * if the engine is empty, and we have an override for n/t then
1128 	 * destroy and recreate it.
1129 	 */
1130 	if ((serdn != -1 || serdt != NULL) && fmd_serd_exists(hdl, serdname) &&
1131 	    fmd_serd_empty(hdl, serdname))
1132 		fmd_serd_destroy(hdl, serdname);
1133 
1134 	if (!fmd_serd_exists(hdl, serdname)) {
1135 		struct node *nN, *nT;
1136 		const char *s;
1137 		struct node *nodep;
1138 		struct config *cp;
1139 		char *path;
1140 		uint_t nval;
1141 		hrtime_t tval;
1142 		int i;
1143 		char *ptr;
1144 		int got_n_override = 0, got_t_override = 0;
1145 
1146 		/* no SERD engine yet, so create it */
1147 		nodep = serdinst->u.stmt.np->u.event.epname;
1148 		path = ipath2str(NULL, ipath(nodep));
1149 		cp = config_lookup(fmep->config, path, 0);
1150 		FREE((void *)path);
1151 
1152 		/*
1153 		 * We allow serd paramaters to be overridden, either from
1154 		 * eft.conf file values (if Serd_Override is set) or from
1155 		 * driver properties (for "serd.io.device" engines).
1156 		 */
1157 		if (Serd_Override != NULL) {
1158 			char *save_ptr, *ptr1, *ptr2, *ptr3;
1159 			ptr3 = save_ptr = STRDUP(Serd_Override);
1160 			while (*ptr3 != '\0') {
1161 				ptr1 = strchr(ptr3, ',');
1162 				*ptr1 = '\0';
1163 				if (strcmp(ptr3, serdclass) == 0) {
1164 					ptr2 =  strchr(ptr1 + 1, ',');
1165 					*ptr2 = '\0';
1166 					nval = atoi(ptr1 + 1);
1167 					out(O_ALTFP, "serd override %s_n %d",
1168 					    serdclass, nval);
1169 					ptr3 =  strchr(ptr2 + 1, ' ');
1170 					if (ptr3)
1171 						*ptr3 = '\0';
1172 					ptr = STRDUP(ptr2 + 1);
1173 					out(O_ALTFP, "serd override %s_t %s",
1174 					    serdclass, ptr);
1175 					got_n_override = 1;
1176 					got_t_override = 1;
1177 					break;
1178 				} else {
1179 					ptr2 =  strchr(ptr1 + 1, ',');
1180 					ptr3 =  strchr(ptr2 + 1, ' ');
1181 					if (ptr3 == NULL)
1182 						break;
1183 				}
1184 				ptr3++;
1185 			}
1186 			FREE(save_ptr);
1187 		}
1188 
1189 		if (cp && got_n_override == 0) {
1190 			/*
1191 			 * convert serd engine class into property name
1192 			 */
1193 			char *prop_name = MALLOC(strlen(serdclass) + 3);
1194 			for (i = 0; i < strlen(serdclass); i++) {
1195 				if (serdclass[i] == '.')
1196 					prop_name[i] = '_';
1197 				else
1198 					prop_name[i] = serdclass[i];
1199 			}
1200 			prop_name[i++] = '_';
1201 			prop_name[i++] = 'n';
1202 			prop_name[i] = '\0';
1203 			if (s = config_getprop(cp, prop_name)) {
1204 				nval = atoi(s);
1205 				out(O_ALTFP, "serd override %s_n %s",
1206 				    serdclass, s);
1207 				got_n_override = 1;
1208 			}
1209 			prop_name[i - 1] = 't';
1210 			if (s = config_getprop(cp, prop_name)) {
1211 				ptr = STRDUP(s);
1212 				out(O_ALTFP, "serd override %s_t %s",
1213 				    serdclass, s);
1214 				got_t_override = 1;
1215 			}
1216 			FREE(prop_name);
1217 		}
1218 
1219 		if (serdn != -1 && got_n_override == 0) {
1220 			nval = serdn;
1221 			out(O_ALTFP, "serd override %s_n %d", serdclass, serdn);
1222 			got_n_override = 1;
1223 		}
1224 		if (serdt != NULL && got_t_override == 0) {
1225 			ptr = STRDUP(serdt);
1226 			out(O_ALTFP, "serd override %s_t %s", serdclass, serdt);
1227 			got_t_override = 1;
1228 		}
1229 
1230 		if (!got_n_override) {
1231 			nN = lut_lookup(serdinst->u.stmt.lutp, (void *)L_N,
1232 			    NULL);
1233 			ASSERT(nN->t == T_NUM);
1234 			nval = (uint_t)nN->u.ull;
1235 		}
1236 		if (!got_t_override) {
1237 			nT = lut_lookup(serdinst->u.stmt.lutp, (void *)L_T,
1238 			    NULL);
1239 			ASSERT(nT->t == T_TIMEVAL);
1240 			tval = (hrtime_t)nT->u.ull;
1241 		} else {
1242 			const unsigned long long *ullp;
1243 			const char *suffix;
1244 			int len;
1245 
1246 			len = strspn(ptr, "0123456789");
1247 			suffix = stable(&ptr[len]);
1248 			ullp = (unsigned long long *)lut_lookup(Timesuffixlut,
1249 			    (void *)suffix, NULL);
1250 			ptr[len] = '\0';
1251 			tval = strtoull(ptr, NULL, 0) * (ullp ? *ullp : 1ll);
1252 			FREE(ptr);
1253 		}
1254 		fmd_serd_create(hdl, serdname, nval, tval);
1255 	}
1256 
1257 	newentp = MALLOC(sizeof (*newentp));
1258 	newentp->ename = stable(serdclass);
1259 	FREE(serdclass);
1260 	newentp->ipath = ipath(serdinst->u.stmt.np->u.event.epname);
1261 	newentp->hdl = hdl;
1262 	if (lut_lookup(SerdEngines, newentp, (lut_cmp)serd_cmp) == NULL) {
1263 		SerdEngines = lut_add(SerdEngines, (void *)newentp,
1264 		    (void *)newentp, (lut_cmp)serd_cmp);
1265 		Serd_need_save = 1;
1266 		serd_save();
1267 	} else {
1268 		FREE(newentp);
1269 	}
1270 
1271 
1272 	/*
1273 	 * increment SERD engine.  if engine fires, reset serd
1274 	 * engine and return trip_strcode if required.
1275 	 */
1276 	for (i = 0; i < serdincrement; i++) {
1277 		if (fmd_serd_record(hdl, serdname, ffep)) {
1278 			fmd_case_add_serd(hdl, fmcase, serdname);
1279 			fmd_serd_reset(hdl, serdname);
1280 
1281 			if (ippp) {
1282 				struct node *tripinst =
1283 				    lut_lookup(serdinst->u.stmt.lutp,
1284 				    (void *)L_trip, NULL);
1285 				ASSERT(tripinst != NULL);
1286 				*enamep = tripinst->u.event.ename->u.name.s;
1287 				*ippp = ipath(tripinst->u.event.epname);
1288 				out(O_ALTFP|O_NONL,
1289 				    "[engine fired: %s, sending: ", serdname);
1290 				ipath_print(O_ALTFP|O_NONL, *enamep, *ippp);
1291 				out(O_ALTFP, "]");
1292 			} else {
1293 				out(O_ALTFP, "[engine fired: %s, no trip]",
1294 				    serdname);
1295 			}
1296 			FREE(serdname);
1297 			return (1);
1298 		}
1299 	}
1300 
1301 	FREE(serdname);
1302 	return (0);
1303 }
1304 
1305 /*
1306  * search a suspect list for upsets.  feed each upset to serd_eval() and
1307  * build up tripped[], an array of ereports produced by the firing of
1308  * any SERD engines.  then feed each ereport back into
1309  * fme_receive_report().
1310  *
1311  * returns ntrip, the number of these ereports produced.
1312  */
1313 static int
upsets_eval(struct fme * fmep,fmd_event_t * ffep)1314 upsets_eval(struct fme *fmep, fmd_event_t *ffep)
1315 {
1316 	/* we build an array of tripped ereports that we send ourselves */
1317 	struct {
1318 		const char *ename;
1319 		const struct ipath *ipp;
1320 	} *tripped;
1321 	struct event *sp;
1322 	int ntrip, nupset, i;
1323 
1324 	/*
1325 	 * count the number of upsets to determine the upper limit on
1326 	 * expected trip ereport strings.  remember that one upset can
1327 	 * lead to at most one ereport.
1328 	 */
1329 	nupset = 0;
1330 	for (sp = fmep->suspects; sp; sp = sp->suspects) {
1331 		if (sp->t == N_UPSET)
1332 			nupset++;
1333 	}
1334 
1335 	if (nupset == 0)
1336 		return (0);
1337 
1338 	/*
1339 	 * get to this point if we have upsets and expect some trip
1340 	 * ereports
1341 	 */
1342 	tripped = alloca(sizeof (*tripped) * nupset);
1343 	bzero((void *)tripped, sizeof (*tripped) * nupset);
1344 
1345 	ntrip = 0;
1346 	for (sp = fmep->suspects; sp; sp = sp->suspects)
1347 		if (sp->t == N_UPSET &&
1348 		    serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, sp,
1349 		    &tripped[ntrip].ename, &tripped[ntrip].ipp) == 1)
1350 			ntrip++;
1351 
1352 	for (i = 0; i < ntrip; i++) {
1353 		struct event *ep, *nep;
1354 		struct fme *nfmep;
1355 		fmd_case_t *fmcase;
1356 		const struct ipath *ipp;
1357 		const char *eventstring;
1358 		int prev_verbose;
1359 		unsigned long long my_delay = TIMEVAL_EVENTUALLY;
1360 		enum fme_state state;
1361 
1362 		/*
1363 		 * First try and evaluate a case with the trip ereport plus
1364 		 * all the other ereports that cause the trip. If that fails
1365 		 * to evaluate then try again with just this ereport on its own.
1366 		 */
1367 		out(O_ALTFP|O_NONL, "fme_receive_report_serd: ");
1368 		ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp);
1369 		out(O_ALTFP|O_STAMP, NULL);
1370 		ep = fmep->e0;
1371 		eventstring = ep->enode->u.event.ename->u.name.s;
1372 		ipp = ep->ipp;
1373 
1374 		/*
1375 		 * create a duplicate fme and case
1376 		 */
1377 		fmcase = fmd_case_open(fmep->hdl, NULL);
1378 		out(O_ALTFP|O_NONL, "duplicate fme for event [");
1379 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1380 		out(O_ALTFP, " ]");
1381 
1382 		if ((nfmep = newfme(eventstring, ipp, fmep->hdl,
1383 		    fmcase, ffep, ep->nvp)) == NULL) {
1384 			out(O_ALTFP|O_NONL, "[");
1385 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1386 			out(O_ALTFP, " CANNOT DIAGNOSE]");
1387 			continue;
1388 		}
1389 
1390 		Open_fme_count++;
1391 		nfmep->pull = fmep->pull;
1392 		init_fme_bufs(nfmep);
1393 		out(O_ALTFP|O_NONL, "[");
1394 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1395 		out(O_ALTFP, " created FME%d, case %s]", nfmep->id,
1396 		    fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
1397 		if (ffep) {
1398 			fmd_case_setprincipal(nfmep->hdl, nfmep->fmcase, ffep);
1399 			fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase, ffep);
1400 			nfmep->e0r = ffep;
1401 		}
1402 
1403 		/*
1404 		 * add the original ereports
1405 		 */
1406 		for (ep = fmep->observations; ep; ep = ep->observations) {
1407 			eventstring = ep->enode->u.event.ename->u.name.s;
1408 			ipp = ep->ipp;
1409 			out(O_ALTFP|O_NONL, "adding event [");
1410 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1411 			out(O_ALTFP, " ]");
1412 			nep = itree_lookup(nfmep->eventtree, eventstring, ipp);
1413 			if (nep->count++ == 0) {
1414 				nep->observations = nfmep->observations;
1415 				nfmep->observations = nep;
1416 				serialize_observation(nfmep, eventstring, ipp);
1417 				nep->nvp = evnv_dupnvl(ep->nvp);
1418 			}
1419 			if (ep->ffep && ep->ffep != ffep)
1420 				fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase,
1421 				    ep->ffep);
1422 			stats_counter_bump(nfmep->Rcount);
1423 		}
1424 
1425 		/*
1426 		 * add the serd trigger ereport
1427 		 */
1428 		if ((ep = itree_lookup(nfmep->eventtree, tripped[i].ename,
1429 		    tripped[i].ipp)) == NULL) {
1430 			/*
1431 			 * The trigger ereport is not in the instance tree. It
1432 			 * was presumably removed by prune_propagations() as
1433 			 * this combination of events is not present in the
1434 			 * rules.
1435 			 */
1436 			out(O_ALTFP, "upsets_eval: e0 not in instance tree");
1437 			Undiag_reason = UD_VAL_BADEVENTI;
1438 			goto retry_lone_ereport;
1439 		}
1440 		out(O_ALTFP|O_NONL, "adding event [");
1441 		ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp);
1442 		out(O_ALTFP, " ]");
1443 		nfmep->ecurrent = ep;
1444 		ep->nvp = NULL;
1445 		ep->count = 1;
1446 		ep->observations = nfmep->observations;
1447 		nfmep->observations = ep;
1448 
1449 		/*
1450 		 * just peek first.
1451 		 */
1452 		nfmep->peek = 1;
1453 		prev_verbose = Verbose;
1454 		if (Debug == 0)
1455 			Verbose = 0;
1456 		lut_walk(nfmep->eventtree, (lut_cb)clear_arrows, (void *)nfmep);
1457 		state = hypothesise(nfmep, nfmep->e0, nfmep->ull, &my_delay);
1458 		nfmep->peek = 0;
1459 		Verbose = prev_verbose;
1460 		if (state == FME_DISPROVED) {
1461 			out(O_ALTFP, "upsets_eval: hypothesis disproved");
1462 			Undiag_reason = UD_VAL_UNSOLVD;
1463 retry_lone_ereport:
1464 			/*
1465 			 * However the trigger ereport on its own might be
1466 			 * diagnosable, so check for that. Undo the new fme
1467 			 * and case we just created and call fme_receive_report.
1468 			 */
1469 			out(O_ALTFP|O_NONL, "[");
1470 			ipath_print(O_ALTFP|O_NONL, tripped[i].ename,
1471 			    tripped[i].ipp);
1472 			out(O_ALTFP, " retrying with just trigger ereport]");
1473 			itree_free(nfmep->eventtree);
1474 			nfmep->eventtree = NULL;
1475 			structconfig_free(nfmep->config);
1476 			nfmep->config = NULL;
1477 			destroy_fme_bufs(nfmep);
1478 			fmd_case_close(nfmep->hdl, nfmep->fmcase);
1479 			fme_receive_report(fmep->hdl, ffep,
1480 			    tripped[i].ename, tripped[i].ipp, NULL);
1481 			continue;
1482 		}
1483 
1484 		/*
1485 		 * and evaluate
1486 		 */
1487 		serialize_observation(nfmep, tripped[i].ename, tripped[i].ipp);
1488 		fme_eval(nfmep, ffep);
1489 	}
1490 
1491 	return (ntrip);
1492 }
1493 
1494 /*
1495  * fme_receive_external_report -- call when an external ereport comes in
1496  *
1497  * this routine just converts the relevant information from the ereport
1498  * into a format used internally and passes it on to fme_receive_report().
1499  */
1500 void
fme_receive_external_report(fmd_hdl_t * hdl,fmd_event_t * ffep,nvlist_t * nvl,const char * class)1501 fme_receive_external_report(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl,
1502     const char *class)
1503 {
1504 	struct node		*epnamenp;
1505 	fmd_case_t		*fmcase;
1506 	const struct ipath	*ipp;
1507 	nvlist_t		*detector = NULL;
1508 
1509 	class = stable(class);
1510 
1511 	/* Get the component path from the ereport */
1512 	epnamenp = platform_getpath(nvl);
1513 
1514 	/* See if we ended up without a path. */
1515 	if (epnamenp == NULL) {
1516 		/* See if class permits silent discard on unknown component. */
1517 		if (lut_lookup(Ereportenames_discard, (void *)class, NULL)) {
1518 			out(O_ALTFP|O_VERB2, "Unable to map \"%s\" ereport "
1519 			    "to component path, but silent discard allowed.",
1520 			    class);
1521 		} else {
1522 			/*
1523 			 * XFILE: Failure to find a component is bad unless
1524 			 * 'discard_if_config_unknown=1' was specified in the
1525 			 * ereport definition. Indicate undiagnosable.
1526 			 */
1527 			Undiag_reason = UD_VAL_NOPATH;
1528 			fmcase = fmd_case_open(hdl, NULL);
1529 
1530 			/*
1531 			 * We don't have a component path here (which means that
1532 			 * the detector was not in hc-scheme and couldn't be
1533 			 * converted to hc-scheme. Report the raw detector as
1534 			 * the suspect resource if there is one.
1535 			 */
1536 			(void) nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR,
1537 			    &detector);
1538 			publish_undiagnosable(hdl, ffep, fmcase, detector,
1539 			    (char *)class);
1540 		}
1541 		return;
1542 	}
1543 
1544 	ipp = ipath(epnamenp);
1545 	tree_free(epnamenp);
1546 	fme_receive_report(hdl, ffep, class, ipp, nvl);
1547 }
1548 
1549 /*ARGSUSED*/
1550 void
fme_receive_repair_list(fmd_hdl_t * hdl,fmd_event_t * ffep,nvlist_t * nvl,const char * eventstring)1551 fme_receive_repair_list(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl,
1552     const char *eventstring)
1553 {
1554 	char *uuid;
1555 	nvlist_t **nva;
1556 	uint_t nvc;
1557 	const struct ipath *ipp;
1558 
1559 	if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) != 0 ||
1560 	    nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST,
1561 	    &nva, &nvc) != 0) {
1562 		out(O_ALTFP, "No uuid or fault list for list.repaired event");
1563 		return;
1564 	}
1565 
1566 	out(O_ALTFP, "Processing list.repaired from case %s", uuid);
1567 
1568 	while (nvc-- != 0) {
1569 		/*
1570 		 * Reset any istat or serd engine associated with this path.
1571 		 */
1572 		char *path;
1573 
1574 		if ((ipp = platform_fault2ipath(*nva++)) == NULL)
1575 			continue;
1576 
1577 		path = ipath2str(NULL, ipp);
1578 		out(O_ALTFP, "fme_receive_repair_list: resetting state for %s",
1579 		    path);
1580 		FREE(path);
1581 
1582 		lut_walk(Istats, (lut_cb)istat_counter_reset_cb, (void *)ipp);
1583 		istat_save();
1584 
1585 		lut_walk(SerdEngines, (lut_cb)serd_reset_cb, (void *)ipp);
1586 		serd_save();
1587 	}
1588 }
1589 
1590 /*ARGSUSED*/
1591 void
fme_receive_topology_change(void)1592 fme_receive_topology_change(void)
1593 {
1594 	lut_walk(Istats, (lut_cb)istat_counter_topo_chg_cb, NULL);
1595 	istat_save();
1596 
1597 	lut_walk(SerdEngines, (lut_cb)serd_topo_chg_cb, NULL);
1598 	serd_save();
1599 }
1600 
1601 static int mark_arrows(struct fme *fmep, struct event *ep, int mark,
1602     unsigned long long at_latest_by, unsigned long long *pdelay, int keep);
1603 
1604 /* ARGSUSED */
1605 static void
clear_arrows(struct event * ep,struct event * ep2,struct fme * fmep)1606 clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
1607 {
1608 	struct bubble *bp;
1609 	struct arrowlist *ap;
1610 
1611 	ep->cached_state = 0;
1612 	ep->keep_in_tree = 0;
1613 	for (bp = itree_next_bubble(ep, NULL); bp;
1614 	    bp = itree_next_bubble(ep, bp)) {
1615 		if (bp->t != B_FROM)
1616 			continue;
1617 		bp->mark = 0;
1618 		for (ap = itree_next_arrow(bp, NULL); ap;
1619 		    ap = itree_next_arrow(bp, ap))
1620 			ap->arrowp->mark = 0;
1621 	}
1622 }
1623 
1624 static void
fme_receive_report(fmd_hdl_t * hdl,fmd_event_t * ffep,const char * eventstring,const struct ipath * ipp,nvlist_t * nvl)1625 fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep,
1626     const char *eventstring, const struct ipath *ipp, nvlist_t *nvl)
1627 {
1628 	struct event *ep;
1629 	struct fme *fmep = NULL;
1630 	struct fme *ofmep = NULL;
1631 	struct fme *cfmep, *svfmep;
1632 	int matched = 0;
1633 	nvlist_t *defect;
1634 	fmd_case_t *fmcase;
1635 	char *reason;
1636 
1637 	out(O_ALTFP|O_NONL, "fme_receive_report: ");
1638 	ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1639 	out(O_ALTFP|O_STAMP, NULL);
1640 
1641 	/* decide which FME it goes to */
1642 	for (fmep = FMElist; fmep; fmep = fmep->next) {
1643 		int prev_verbose;
1644 		unsigned long long my_delay = TIMEVAL_EVENTUALLY;
1645 		enum fme_state state;
1646 		nvlist_t *pre_peek_nvp = NULL;
1647 
1648 		if (fmep->overflow) {
1649 			if (!(fmd_case_closed(fmep->hdl, fmep->fmcase)))
1650 				ofmep = fmep;
1651 
1652 			continue;
1653 		}
1654 
1655 		/*
1656 		 * ignore solved or closed cases
1657 		 */
1658 		if (fmep->posted_suspects ||
1659 		    fmd_case_solved(fmep->hdl, fmep->fmcase) ||
1660 		    fmd_case_closed(fmep->hdl, fmep->fmcase))
1661 			continue;
1662 
1663 		/* look up event in event tree for this FME */
1664 		if ((ep = itree_lookup(fmep->eventtree,
1665 		    eventstring, ipp)) == NULL)
1666 			continue;
1667 
1668 		/* note observation */
1669 		fmep->ecurrent = ep;
1670 		if (ep->count++ == 0) {
1671 			/* link it into list of observations seen */
1672 			ep->observations = fmep->observations;
1673 			fmep->observations = ep;
1674 			ep->nvp = evnv_dupnvl(nvl);
1675 		} else {
1676 			/* use new payload values for peek */
1677 			pre_peek_nvp = ep->nvp;
1678 			ep->nvp = evnv_dupnvl(nvl);
1679 		}
1680 
1681 		/* tell hypothesise() not to mess with suspect list */
1682 		fmep->peek = 1;
1683 
1684 		/* don't want this to be verbose (unless Debug is set) */
1685 		prev_verbose = Verbose;
1686 		if (Debug == 0)
1687 			Verbose = 0;
1688 
1689 		lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep);
1690 		state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay);
1691 
1692 		fmep->peek = 0;
1693 
1694 		/* put verbose flag back */
1695 		Verbose = prev_verbose;
1696 
1697 		if (state != FME_DISPROVED) {
1698 			/* found an FME that explains the ereport */
1699 			matched++;
1700 			out(O_ALTFP|O_NONL, "[");
1701 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1702 			out(O_ALTFP, " explained by FME%d]", fmep->id);
1703 
1704 			if (pre_peek_nvp)
1705 				nvlist_free(pre_peek_nvp);
1706 
1707 			if (ep->count == 1)
1708 				serialize_observation(fmep, eventstring, ipp);
1709 
1710 			if (ffep) {
1711 				fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1712 				ep->ffep = ffep;
1713 			}
1714 
1715 			stats_counter_bump(fmep->Rcount);
1716 
1717 			/* re-eval FME */
1718 			fme_eval(fmep, ffep);
1719 		} else {
1720 
1721 			/* not a match, undo noting of observation */
1722 			fmep->ecurrent = NULL;
1723 			if (--ep->count == 0) {
1724 				/* unlink it from observations */
1725 				fmep->observations = ep->observations;
1726 				ep->observations = NULL;
1727 				nvlist_free(ep->nvp);
1728 				ep->nvp = NULL;
1729 			} else {
1730 				nvlist_free(ep->nvp);
1731 				ep->nvp = pre_peek_nvp;
1732 			}
1733 		}
1734 	}
1735 
1736 	if (matched)
1737 		return;	/* explained by at least one existing FME */
1738 
1739 	/* clean up closed fmes */
1740 	cfmep = ClosedFMEs;
1741 	while (cfmep != NULL) {
1742 		svfmep = cfmep->next;
1743 		destroy_fme(cfmep);
1744 		cfmep = svfmep;
1745 	}
1746 	ClosedFMEs = NULL;
1747 
1748 	if (ofmep) {
1749 		out(O_ALTFP|O_NONL, "[");
1750 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1751 		out(O_ALTFP, " ADDING TO OVERFLOW FME]");
1752 		if (ffep)
1753 			fmd_case_add_ereport(hdl, ofmep->fmcase, ffep);
1754 
1755 		return;
1756 
1757 	} else if (Max_fme && (Open_fme_count >= Max_fme)) {
1758 		out(O_ALTFP|O_NONL, "[");
1759 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1760 		out(O_ALTFP, " MAX OPEN FME REACHED]");
1761 
1762 		fmcase = fmd_case_open(hdl, NULL);
1763 
1764 		/* Create overflow fme */
1765 		if ((fmep = newfme(eventstring, ipp, hdl, fmcase, ffep,
1766 		    nvl)) == NULL) {
1767 			out(O_ALTFP|O_NONL, "[");
1768 			ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1769 			out(O_ALTFP, " CANNOT OPEN OVERFLOW FME]");
1770 			return;
1771 		}
1772 
1773 		Open_fme_count++;
1774 
1775 		init_fme_bufs(fmep);
1776 		fmep->overflow = B_TRUE;
1777 
1778 		if (ffep)
1779 			fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1780 
1781 		Undiag_reason = UD_VAL_MAXFME;
1782 		defect = fmd_nvl_create_fault(hdl,
1783 		    undiag_2defect_str(Undiag_reason), 100, NULL, NULL, NULL);
1784 		reason = undiag_2reason_str(Undiag_reason, NULL);
1785 		(void) nvlist_add_string(defect, UNDIAG_REASON, reason);
1786 		FREE(reason);
1787 		fmd_case_add_suspect(hdl, fmep->fmcase, defect);
1788 		fmd_case_solve(hdl, fmep->fmcase);
1789 		Undiag_reason = UD_VAL_UNKNOWN;
1790 		return;
1791 	}
1792 
1793 	/* open a case */
1794 	fmcase = fmd_case_open(hdl, NULL);
1795 
1796 	/* start a new FME */
1797 	if ((fmep = newfme(eventstring, ipp, hdl, fmcase, ffep, nvl)) == NULL) {
1798 		out(O_ALTFP|O_NONL, "[");
1799 		ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1800 		out(O_ALTFP, " CANNOT DIAGNOSE]");
1801 		return;
1802 	}
1803 
1804 	Open_fme_count++;
1805 
1806 	init_fme_bufs(fmep);
1807 
1808 	out(O_ALTFP|O_NONL, "[");
1809 	ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1810 	out(O_ALTFP, " created FME%d, case %s]", fmep->id,
1811 	    fmd_case_uuid(hdl, fmep->fmcase));
1812 
1813 	ep = fmep->e0;
1814 	ASSERT(ep != NULL);
1815 
1816 	/* note observation */
1817 	fmep->ecurrent = ep;
1818 	if (ep->count++ == 0) {
1819 		/* link it into list of observations seen */
1820 		ep->observations = fmep->observations;
1821 		fmep->observations = ep;
1822 		ep->nvp = evnv_dupnvl(nvl);
1823 		serialize_observation(fmep, eventstring, ipp);
1824 	} else {
1825 		/* new payload overrides any previous */
1826 		nvlist_free(ep->nvp);
1827 		ep->nvp = evnv_dupnvl(nvl);
1828 	}
1829 
1830 	stats_counter_bump(fmep->Rcount);
1831 
1832 	if (ffep) {
1833 		fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1834 		fmd_case_setprincipal(hdl, fmep->fmcase, ffep);
1835 		fmep->e0r = ffep;
1836 		ep->ffep = ffep;
1837 	}
1838 
1839 	/* give the diagnosis algorithm a shot at the new FME state */
1840 	fme_eval(fmep, ffep);
1841 }
1842 
1843 void
fme_status(int flags)1844 fme_status(int flags)
1845 {
1846 	struct fme *fmep;
1847 
1848 	if (FMElist == NULL) {
1849 		out(flags, "No fault management exercises underway.");
1850 		return;
1851 	}
1852 
1853 	for (fmep = FMElist; fmep; fmep = fmep->next)
1854 		fme_print(flags, fmep);
1855 }
1856 
1857 /*
1858  * "indent" routines used mostly for nicely formatted debug output, but also
1859  * for sanity checking for infinite recursion bugs.
1860  */
1861 
1862 #define	MAX_INDENT 1024
1863 static const char *indent_s[MAX_INDENT];
1864 static int current_indent;
1865 
1866 static void
indent_push(const char * s)1867 indent_push(const char *s)
1868 {
1869 	if (current_indent < MAX_INDENT)
1870 		indent_s[current_indent++] = s;
1871 	else
1872 		out(O_DIE, "unexpected recursion depth (%d)", current_indent);
1873 }
1874 
1875 static void
indent_set(const char * s)1876 indent_set(const char *s)
1877 {
1878 	current_indent = 0;
1879 	indent_push(s);
1880 }
1881 
1882 static void
indent_pop(void)1883 indent_pop(void)
1884 {
1885 	if (current_indent > 0)
1886 		current_indent--;
1887 	else
1888 		out(O_DIE, "recursion underflow");
1889 }
1890 
1891 static void
indent(void)1892 indent(void)
1893 {
1894 	int i;
1895 	if (!Verbose)
1896 		return;
1897 	for (i = 0; i < current_indent; i++)
1898 		out(O_ALTFP|O_VERB|O_NONL, indent_s[i]);
1899 }
1900 
1901 #define	SLNEW		1
1902 #define	SLCHANGED	2
1903 #define	SLWAIT		3
1904 #define	SLDISPROVED	4
1905 
1906 static void
print_suspects(int circumstance,struct fme * fmep)1907 print_suspects(int circumstance, struct fme *fmep)
1908 {
1909 	struct event *ep;
1910 
1911 	out(O_ALTFP|O_NONL, "[");
1912 	if (circumstance == SLCHANGED) {
1913 		out(O_ALTFP|O_NONL, "FME%d diagnosis changed. state: %s, "
1914 		    "suspect list:", fmep->id, fme_state2str(fmep->state));
1915 	} else if (circumstance == SLWAIT) {
1916 		out(O_ALTFP|O_NONL, "FME%d set wait timer %ld ", fmep->id,
1917 		    fmep->timer);
1918 		ptree_timeval(O_ALTFP|O_NONL, &fmep->wull);
1919 	} else if (circumstance == SLDISPROVED) {
1920 		out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS UNKNOWN", fmep->id);
1921 	} else {
1922 		out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS PRODUCED:", fmep->id);
1923 	}
1924 
1925 	if (circumstance == SLWAIT || circumstance == SLDISPROVED) {
1926 		out(O_ALTFP, "]");
1927 		return;
1928 	}
1929 
1930 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
1931 		out(O_ALTFP|O_NONL, " ");
1932 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
1933 	}
1934 	out(O_ALTFP, "]");
1935 }
1936 
1937 static struct node *
eventprop_lookup(struct event * ep,const char * propname)1938 eventprop_lookup(struct event *ep, const char *propname)
1939 {
1940 	return (lut_lookup(ep->props, (void *)propname, NULL));
1941 }
1942 
1943 #define	MAXDIGITIDX	23
1944 static char numbuf[MAXDIGITIDX + 1];
1945 
1946 static int
node2uint(struct node * n,uint_t * valp)1947 node2uint(struct node *n, uint_t *valp)
1948 {
1949 	struct evalue value;
1950 	struct lut *globals = NULL;
1951 
1952 	if (n == NULL)
1953 		return (1);
1954 
1955 	/*
1956 	 * check value.v since we are being asked to convert an unsigned
1957 	 * long long int to an unsigned int
1958 	 */
1959 	if (! eval_expr(n, NULL, NULL, &globals, NULL, NULL, 0, &value) ||
1960 	    value.t != UINT64 || value.v > (1ULL << 32))
1961 		return (1);
1962 
1963 	*valp = (uint_t)value.v;
1964 
1965 	return (0);
1966 }
1967 
1968 static nvlist_t *
node2fmri(struct node * n)1969 node2fmri(struct node *n)
1970 {
1971 	nvlist_t **pa, *f, *p;
1972 	struct node *nc;
1973 	uint_t depth = 0;
1974 	char *numstr, *nullbyte;
1975 	char *failure;
1976 	int err, i;
1977 
1978 	/* XXX do we need to be able to handle a non-T_NAME node? */
1979 	if (n == NULL || n->t != T_NAME)
1980 		return (NULL);
1981 
1982 	for (nc = n; nc != NULL; nc = nc->u.name.next) {
1983 		if (nc->u.name.child == NULL || nc->u.name.child->t != T_NUM)
1984 			break;
1985 		depth++;
1986 	}
1987 
1988 	if (nc != NULL) {
1989 		/* We bailed early, something went wrong */
1990 		return (NULL);
1991 	}
1992 
1993 	if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0)
1994 		out(O_DIE|O_SYS, "alloc of fmri nvl failed");
1995 	pa = alloca(depth * sizeof (nvlist_t *));
1996 	for (i = 0; i < depth; i++)
1997 		pa[i] = NULL;
1998 
1999 	err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC);
2000 	err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION);
2001 	err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, "");
2002 	err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth);
2003 	if (err != 0) {
2004 		failure = "basic construction of FMRI failed";
2005 		goto boom;
2006 	}
2007 
2008 	numbuf[MAXDIGITIDX] = '\0';
2009 	nullbyte = &numbuf[MAXDIGITIDX];
2010 	i = 0;
2011 
2012 	for (nc = n; nc != NULL; nc = nc->u.name.next) {
2013 		err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl);
2014 		if (err != 0) {
2015 			failure = "alloc of an hc-pair failed";
2016 			goto boom;
2017 		}
2018 		err = nvlist_add_string(p, FM_FMRI_HC_NAME, nc->u.name.s);
2019 		numstr = ulltostr(nc->u.name.child->u.ull, nullbyte);
2020 		err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr);
2021 		if (err != 0) {
2022 			failure = "construction of an hc-pair failed";
2023 			goto boom;
2024 		}
2025 		pa[i++] = p;
2026 	}
2027 
2028 	err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth);
2029 	if (err == 0) {
2030 		for (i = 0; i < depth; i++)
2031 			if (pa[i] != NULL)
2032 				nvlist_free(pa[i]);
2033 		return (f);
2034 	}
2035 	failure = "addition of hc-pair array to FMRI failed";
2036 
2037 boom:
2038 	for (i = 0; i < depth; i++)
2039 		if (pa[i] != NULL)
2040 			nvlist_free(pa[i]);
2041 	nvlist_free(f);
2042 	out(O_DIE, "%s", failure);
2043 	/*NOTREACHED*/
2044 	return (NULL);
2045 }
2046 
2047 /* an ipath cache entry is an array of these, with s==NULL at the end */
2048 struct ipath {
2049 	const char *s;	/* component name (in stable) */
2050 	int i;		/* instance number */
2051 };
2052 
2053 static nvlist_t *
ipath2fmri(struct ipath * ipath)2054 ipath2fmri(struct ipath *ipath)
2055 {
2056 	nvlist_t **pa, *f, *p;
2057 	uint_t depth = 0;
2058 	char *numstr, *nullbyte;
2059 	char *failure;
2060 	int err, i;
2061 	struct ipath *ipp;
2062 
2063 	for (ipp = ipath; ipp->s != NULL; ipp++)
2064 		depth++;
2065 
2066 	if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0)
2067 		out(O_DIE|O_SYS, "alloc of fmri nvl failed");
2068 	pa = alloca(depth * sizeof (nvlist_t *));
2069 	for (i = 0; i < depth; i++)
2070 		pa[i] = NULL;
2071 
2072 	err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC);
2073 	err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION);
2074 	err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, "");
2075 	err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth);
2076 	if (err != 0) {
2077 		failure = "basic construction of FMRI failed";
2078 		goto boom;
2079 	}
2080 
2081 	numbuf[MAXDIGITIDX] = '\0';
2082 	nullbyte = &numbuf[MAXDIGITIDX];
2083 	i = 0;
2084 
2085 	for (ipp = ipath; ipp->s != NULL; ipp++) {
2086 		err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl);
2087 		if (err != 0) {
2088 			failure = "alloc of an hc-pair failed";
2089 			goto boom;
2090 		}
2091 		err = nvlist_add_string(p, FM_FMRI_HC_NAME, ipp->s);
2092 		numstr = ulltostr(ipp->i, nullbyte);
2093 		err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr);
2094 		if (err != 0) {
2095 			failure = "construction of an hc-pair failed";
2096 			goto boom;
2097 		}
2098 		pa[i++] = p;
2099 	}
2100 
2101 	err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth);
2102 	if (err == 0) {
2103 		for (i = 0; i < depth; i++)
2104 			if (pa[i] != NULL)
2105 				nvlist_free(pa[i]);
2106 		return (f);
2107 	}
2108 	failure = "addition of hc-pair array to FMRI failed";
2109 
2110 boom:
2111 	for (i = 0; i < depth; i++)
2112 		if (pa[i] != NULL)
2113 			nvlist_free(pa[i]);
2114 	nvlist_free(f);
2115 	out(O_DIE, "%s", failure);
2116 	/*NOTREACHED*/
2117 	return (NULL);
2118 }
2119 
2120 static uint8_t
percentof(uint_t part,uint_t whole)2121 percentof(uint_t part, uint_t whole)
2122 {
2123 	unsigned long long p = part * 1000;
2124 
2125 	return ((p / whole / 10) + (((p / whole % 10) >= 5) ? 1 : 0));
2126 }
2127 
2128 struct rsl {
2129 	struct event *suspect;
2130 	nvlist_t *asru;
2131 	nvlist_t *fru;
2132 	nvlist_t *rsrc;
2133 };
2134 
2135 static void publish_suspects(struct fme *fmep, struct rsl *srl);
2136 
2137 /*
2138  *  rslfree -- free internal members of struct rsl not expected to be
2139  *	freed elsewhere.
2140  */
2141 static void
rslfree(struct rsl * freeme)2142 rslfree(struct rsl *freeme)
2143 {
2144 	if (freeme->asru != NULL)
2145 		nvlist_free(freeme->asru);
2146 	if (freeme->fru != NULL)
2147 		nvlist_free(freeme->fru);
2148 	if (freeme->rsrc != NULL && freeme->rsrc != freeme->asru)
2149 		nvlist_free(freeme->rsrc);
2150 }
2151 
2152 /*
2153  *  rslcmp -- compare two rsl structures.  Use the following
2154  *	comparisons to establish cardinality:
2155  *
2156  *	1. Name of the suspect's class. (simple strcmp)
2157  *	2. Name of the suspect's ASRU. (trickier, since nvlist)
2158  *
2159  */
2160 static int
rslcmp(const void * a,const void * b)2161 rslcmp(const void *a, const void *b)
2162 {
2163 	struct rsl *r1 = (struct rsl *)a;
2164 	struct rsl *r2 = (struct rsl *)b;
2165 	int rv;
2166 
2167 	rv = strcmp(r1->suspect->enode->u.event.ename->u.name.s,
2168 	    r2->suspect->enode->u.event.ename->u.name.s);
2169 	if (rv != 0)
2170 		return (rv);
2171 
2172 	if (r1->rsrc == NULL && r2->rsrc == NULL)
2173 		return (0);
2174 	if (r1->rsrc == NULL)
2175 		return (-1);
2176 	if (r2->rsrc == NULL)
2177 		return (1);
2178 	return (evnv_cmpnvl(r1->rsrc, r2->rsrc, 0));
2179 }
2180 
2181 /*
2182  * get_resources -- for a given suspect, determine what ASRU, FRU and
2183  *     RSRC nvlists should be advertised in the final suspect list.
2184  */
2185 void
get_resources(struct event * sp,struct rsl * rsrcs,struct config * croot)2186 get_resources(struct event *sp, struct rsl *rsrcs, struct config *croot)
2187 {
2188 	struct node *asrudef, *frudef;
2189 	nvlist_t *asru, *fru;
2190 	nvlist_t *rsrc = NULL;
2191 	char *pathstr;
2192 
2193 	/*
2194 	 * First find any ASRU and/or FRU defined in the
2195 	 * initial fault tree.
2196 	 */
2197 	asrudef = eventprop_lookup(sp, L_ASRU);
2198 	frudef = eventprop_lookup(sp, L_FRU);
2199 
2200 	/*
2201 	 * Create FMRIs based on those definitions
2202 	 */
2203 	asru = node2fmri(asrudef);
2204 	fru = node2fmri(frudef);
2205 	pathstr = ipath2str(NULL, sp->ipp);
2206 
2207 	/*
2208 	 *  Allow for platform translations of the FMRIs
2209 	 */
2210 	platform_units_translate(is_defect(sp->t), croot, &asru, &fru, &rsrc,
2211 	    pathstr);
2212 
2213 	FREE(pathstr);
2214 	rsrcs->suspect = sp;
2215 	rsrcs->asru = asru;
2216 	rsrcs->fru = fru;
2217 	rsrcs->rsrc = rsrc;
2218 }
2219 
2220 /*
2221  * trim_suspects -- prior to publishing, we may need to remove some
2222  *    suspects from the list.  If we're auto-closing upsets, we don't
2223  *    want any of those in the published list.  If the ASRUs for multiple
2224  *    defects resolve to the same ASRU (driver) we only want to publish
2225  *    that as a single suspect.
2226  */
2227 static int
trim_suspects(struct fme * fmep,struct rsl * begin,struct rsl * begin2,fmd_event_t * ffep)2228 trim_suspects(struct fme *fmep, struct rsl *begin, struct rsl *begin2,
2229     fmd_event_t *ffep)
2230 {
2231 	struct event *ep;
2232 	struct rsl *rp = begin;
2233 	struct rsl *rp2 = begin2;
2234 	int mess_zero_count = 0;
2235 	int serd_rval;
2236 	uint_t messval;
2237 
2238 	/* remove any unwanted upsets and populate our array */
2239 	for (ep = fmep->psuspects; ep; ep = ep->psuspects) {
2240 		if (is_upset(ep->t))
2241 			continue;
2242 		serd_rval = serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, ep,
2243 		    NULL, NULL);
2244 		if (serd_rval == 0)
2245 			continue;
2246 		if (node2uint(eventprop_lookup(ep, L_message),
2247 		    &messval) == 0 && messval == 0) {
2248 			get_resources(ep, rp2, fmep->config);
2249 			rp2++;
2250 			mess_zero_count++;
2251 		} else {
2252 			get_resources(ep, rp, fmep->config);
2253 			rp++;
2254 			fmep->nsuspects++;
2255 		}
2256 	}
2257 	return (mess_zero_count);
2258 }
2259 
2260 /*
2261  * addpayloadprop -- add a payload prop to a problem
2262  */
2263 static void
addpayloadprop(const char * lhs,struct evalue * rhs,nvlist_t * fault)2264 addpayloadprop(const char *lhs, struct evalue *rhs, nvlist_t *fault)
2265 {
2266 	nvlist_t *rsrc, *hcs;
2267 
2268 	ASSERT(fault != NULL);
2269 	ASSERT(lhs != NULL);
2270 	ASSERT(rhs != NULL);
2271 
2272 	if (nvlist_lookup_nvlist(fault, FM_FAULT_RESOURCE, &rsrc) != 0)
2273 		out(O_DIE, "cannot add payloadprop \"%s\" to fault", lhs);
2274 
2275 	if (nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcs) != 0) {
2276 		out(O_ALTFP|O_VERB2, "addpayloadprop: create hc_specific");
2277 		if (nvlist_xalloc(&hcs, NV_UNIQUE_NAME, &Eft_nv_hdl) != 0)
2278 			out(O_DIE,
2279 			    "cannot add payloadprop \"%s\" to fault", lhs);
2280 		if (nvlist_add_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, hcs) != 0)
2281 			out(O_DIE,
2282 			    "cannot add payloadprop \"%s\" to fault", lhs);
2283 		nvlist_free(hcs);
2284 		if (nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcs) != 0)
2285 			out(O_DIE,
2286 			    "cannot add payloadprop \"%s\" to fault", lhs);
2287 	} else
2288 		out(O_ALTFP|O_VERB2, "addpayloadprop: reuse hc_specific");
2289 
2290 	if (rhs->t == UINT64) {
2291 		out(O_ALTFP|O_VERB2, "addpayloadprop: %s=%llu", lhs, rhs->v);
2292 
2293 		if (nvlist_add_uint64(hcs, lhs, rhs->v) != 0)
2294 			out(O_DIE,
2295 			    "cannot add payloadprop \"%s\" to fault", lhs);
2296 	} else {
2297 		out(O_ALTFP|O_VERB2, "addpayloadprop: %s=\"%s\"",
2298 		    lhs, (char *)(uintptr_t)rhs->v);
2299 
2300 		if (nvlist_add_string(hcs, lhs, (char *)(uintptr_t)rhs->v) != 0)
2301 			out(O_DIE,
2302 			    "cannot add payloadprop \"%s\" to fault", lhs);
2303 	}
2304 }
2305 
2306 static char *Istatbuf;
2307 static char *Istatbufptr;
2308 static int Istatsz;
2309 
2310 /*
2311  * istataddsize -- calculate size of istat and add it to Istatsz
2312  */
2313 /*ARGSUSED2*/
2314 static void
istataddsize(const struct istat_entry * lhs,struct stats * rhs,void * arg)2315 istataddsize(const struct istat_entry *lhs, struct stats *rhs, void *arg)
2316 {
2317 	int val;
2318 
2319 	ASSERT(lhs != NULL);
2320 	ASSERT(rhs != NULL);
2321 
2322 	if ((val = stats_counter_value(rhs)) == 0)
2323 		return;	/* skip zero-valued stats */
2324 
2325 	/* count up the size of the stat name */
2326 	Istatsz += ipath2strlen(lhs->ename, lhs->ipath);
2327 	Istatsz++;	/* for the trailing NULL byte */
2328 
2329 	/* count up the size of the stat value */
2330 	Istatsz += snprintf(NULL, 0, "%d", val);
2331 	Istatsz++;	/* for the trailing NULL byte */
2332 }
2333 
2334 /*
2335  * istat2str -- serialize an istat, writing result to *Istatbufptr
2336  */
2337 /*ARGSUSED2*/
2338 static void
istat2str(const struct istat_entry * lhs,struct stats * rhs,void * arg)2339 istat2str(const struct istat_entry *lhs, struct stats *rhs, void *arg)
2340 {
2341 	char *str;
2342 	int len;
2343 	int val;
2344 
2345 	ASSERT(lhs != NULL);
2346 	ASSERT(rhs != NULL);
2347 
2348 	if ((val = stats_counter_value(rhs)) == 0)
2349 		return;	/* skip zero-valued stats */
2350 
2351 	/* serialize the stat name */
2352 	str = ipath2str(lhs->ename, lhs->ipath);
2353 	len = strlen(str);
2354 
2355 	ASSERT(Istatbufptr + len + 1 < &Istatbuf[Istatsz]);
2356 	(void) strlcpy(Istatbufptr, str, &Istatbuf[Istatsz] - Istatbufptr);
2357 	Istatbufptr += len;
2358 	FREE(str);
2359 	*Istatbufptr++ = '\0';
2360 
2361 	/* serialize the stat value */
2362 	Istatbufptr += snprintf(Istatbufptr, &Istatbuf[Istatsz] - Istatbufptr,
2363 	    "%d", val);
2364 	*Istatbufptr++ = '\0';
2365 
2366 	ASSERT(Istatbufptr <= &Istatbuf[Istatsz]);
2367 }
2368 
2369 void
istat_save()2370 istat_save()
2371 {
2372 	if (Istat_need_save == 0)
2373 		return;
2374 
2375 	/* figure out how big the serialzed info is */
2376 	Istatsz = 0;
2377 	lut_walk(Istats, (lut_cb)istataddsize, NULL);
2378 
2379 	if (Istatsz == 0) {
2380 		/* no stats to save */
2381 		fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS);
2382 		return;
2383 	}
2384 
2385 	/* create the serialized buffer */
2386 	Istatbufptr = Istatbuf = MALLOC(Istatsz);
2387 	lut_walk(Istats, (lut_cb)istat2str, NULL);
2388 
2389 	/* clear out current saved stats */
2390 	fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS);
2391 
2392 	/* write out the new version */
2393 	fmd_buf_write(Hdl, NULL, WOBUF_ISTATS, Istatbuf, Istatsz);
2394 	FREE(Istatbuf);
2395 
2396 	Istat_need_save = 0;
2397 }
2398 
2399 int
istat_cmp(struct istat_entry * ent1,struct istat_entry * ent2)2400 istat_cmp(struct istat_entry *ent1, struct istat_entry *ent2)
2401 {
2402 	if (ent1->ename != ent2->ename)
2403 		return (ent2->ename - ent1->ename);
2404 	if (ent1->ipath != ent2->ipath)
2405 		return ((char *)ent2->ipath - (char *)ent1->ipath);
2406 
2407 	return (0);
2408 }
2409 
2410 /*
2411  * istat-verify -- verify the component associated with a stat still exists
2412  *
2413  * if the component no longer exists, this routine resets the stat and
2414  * returns 0.  if the component still exists, it returns 1.
2415  */
2416 static int
istat_verify(struct node * snp,struct istat_entry * entp)2417 istat_verify(struct node *snp, struct istat_entry *entp)
2418 {
2419 	struct stats *statp;
2420 	nvlist_t *fmri;
2421 
2422 	fmri = node2fmri(snp->u.event.epname);
2423 	if (platform_path_exists(fmri)) {
2424 		nvlist_free(fmri);
2425 		return (1);
2426 	}
2427 	nvlist_free(fmri);
2428 
2429 	/* component no longer in system.  zero out the associated stats */
2430 	if ((statp = (struct stats *)
2431 	    lut_lookup(Istats, entp, (lut_cmp)istat_cmp)) == NULL ||
2432 	    stats_counter_value(statp) == 0)
2433 		return (0);	/* stat is already reset */
2434 
2435 	Istat_need_save = 1;
2436 	stats_counter_reset(statp);
2437 	return (0);
2438 }
2439 
2440 static void
istat_bump(struct node * snp,int n)2441 istat_bump(struct node *snp, int n)
2442 {
2443 	struct stats *statp;
2444 	struct istat_entry ent;
2445 
2446 	ASSERT(snp != NULL);
2447 	ASSERTinfo(snp->t == T_EVENT, ptree_nodetype2str(snp->t));
2448 	ASSERT(snp->u.event.epname != NULL);
2449 
2450 	/* class name should be hoisted into a single stable entry */
2451 	ASSERT(snp->u.event.ename->u.name.next == NULL);
2452 	ent.ename = snp->u.event.ename->u.name.s;
2453 	ent.ipath = ipath(snp->u.event.epname);
2454 
2455 	if (!istat_verify(snp, &ent)) {
2456 		/* component no longer exists in system, nothing to do */
2457 		return;
2458 	}
2459 
2460 	if ((statp = (struct stats *)
2461 	    lut_lookup(Istats, &ent, (lut_cmp)istat_cmp)) == NULL) {
2462 		/* need to create the counter */
2463 		int cnt = 0;
2464 		struct node *np;
2465 		char *sname;
2466 		char *snamep;
2467 		struct istat_entry *newentp;
2468 
2469 		/* count up the size of the stat name */
2470 		np = snp->u.event.ename;
2471 		while (np != NULL) {
2472 			cnt += strlen(np->u.name.s);
2473 			cnt++;	/* for the '.' or '@' */
2474 			np = np->u.name.next;
2475 		}
2476 		np = snp->u.event.epname;
2477 		while (np != NULL) {
2478 			cnt += snprintf(NULL, 0, "%s%llu",
2479 			    np->u.name.s, np->u.name.child->u.ull);
2480 			cnt++;	/* for the '/' or trailing NULL byte */
2481 			np = np->u.name.next;
2482 		}
2483 
2484 		/* build the stat name */
2485 		snamep = sname = alloca(cnt);
2486 		np = snp->u.event.ename;
2487 		while (np != NULL) {
2488 			snamep += snprintf(snamep, &sname[cnt] - snamep,
2489 			    "%s", np->u.name.s);
2490 			np = np->u.name.next;
2491 			if (np)
2492 				*snamep++ = '.';
2493 		}
2494 		*snamep++ = '@';
2495 		np = snp->u.event.epname;
2496 		while (np != NULL) {
2497 			snamep += snprintf(snamep, &sname[cnt] - snamep,
2498 			    "%s%llu", np->u.name.s, np->u.name.child->u.ull);
2499 			np = np->u.name.next;
2500 			if (np)
2501 				*snamep++ = '/';
2502 		}
2503 		*snamep++ = '\0';
2504 
2505 		/* create the new stat & add it to our list */
2506 		newentp = MALLOC(sizeof (*newentp));
2507 		*newentp = ent;
2508 		statp = stats_new_counter(NULL, sname, 0);
2509 		Istats = lut_add(Istats, (void *)newentp, (void *)statp,
2510 		    (lut_cmp)istat_cmp);
2511 	}
2512 
2513 	/* if n is non-zero, set that value instead of bumping */
2514 	if (n) {
2515 		stats_counter_reset(statp);
2516 		stats_counter_add(statp, n);
2517 	} else
2518 		stats_counter_bump(statp);
2519 	Istat_need_save = 1;
2520 
2521 	ipath_print(O_ALTFP|O_VERB2, ent.ename, ent.ipath);
2522 	out(O_ALTFP|O_VERB2, " %s to value %d", n ? "set" : "incremented",
2523 	    stats_counter_value(statp));
2524 }
2525 
2526 /*ARGSUSED*/
2527 static void
istat_destructor(void * left,void * right,void * arg)2528 istat_destructor(void *left, void *right, void *arg)
2529 {
2530 	struct istat_entry *entp = (struct istat_entry *)left;
2531 	struct stats *statp = (struct stats *)right;
2532 	FREE(entp);
2533 	stats_delete(statp);
2534 }
2535 
2536 /*
2537  * Callback used in a walk of the Istats to reset matching stat counters.
2538  */
2539 static void
istat_counter_reset_cb(struct istat_entry * entp,struct stats * statp,const struct ipath * ipp)2540 istat_counter_reset_cb(struct istat_entry *entp, struct stats *statp,
2541     const struct ipath *ipp)
2542 {
2543 	char *path;
2544 
2545 	if (entp->ipath == ipp) {
2546 		path = ipath2str(entp->ename, ipp);
2547 		out(O_ALTFP, "istat_counter_reset_cb: resetting %s", path);
2548 		FREE(path);
2549 		stats_counter_reset(statp);
2550 		Istat_need_save = 1;
2551 	}
2552 }
2553 
2554 /*ARGSUSED*/
2555 static void
istat_counter_topo_chg_cb(struct istat_entry * entp,struct stats * statp,void * unused)2556 istat_counter_topo_chg_cb(struct istat_entry *entp, struct stats *statp,
2557     void *unused)
2558 {
2559 	char *path;
2560 	nvlist_t *fmri;
2561 
2562 	fmri = ipath2fmri((struct ipath *)(entp->ipath));
2563 	if (!platform_path_exists(fmri)) {
2564 		path = ipath2str(entp->ename, entp->ipath);
2565 		out(O_ALTFP, "istat_counter_topo_chg_cb: not present %s", path);
2566 		FREE(path);
2567 		stats_counter_reset(statp);
2568 		Istat_need_save = 1;
2569 	}
2570 	nvlist_free(fmri);
2571 }
2572 
2573 void
istat_fini(void)2574 istat_fini(void)
2575 {
2576 	lut_free(Istats, istat_destructor, NULL);
2577 }
2578 
2579 static char *Serdbuf;
2580 static char *Serdbufptr;
2581 static int Serdsz;
2582 
2583 /*
2584  * serdaddsize -- calculate size of serd and add it to Serdsz
2585  */
2586 /*ARGSUSED*/
2587 static void
serdaddsize(const struct serd_entry * lhs,struct stats * rhs,void * arg)2588 serdaddsize(const struct serd_entry *lhs, struct stats *rhs, void *arg)
2589 {
2590 	ASSERT(lhs != NULL);
2591 
2592 	/* count up the size of the stat name */
2593 	Serdsz += ipath2strlen(lhs->ename, lhs->ipath);
2594 	Serdsz++;	/* for the trailing NULL byte */
2595 }
2596 
2597 /*
2598  * serd2str -- serialize a serd engine, writing result to *Serdbufptr
2599  */
2600 /*ARGSUSED*/
2601 static void
serd2str(const struct serd_entry * lhs,struct stats * rhs,void * arg)2602 serd2str(const struct serd_entry *lhs, struct stats *rhs, void *arg)
2603 {
2604 	char *str;
2605 	int len;
2606 
2607 	ASSERT(lhs != NULL);
2608 
2609 	/* serialize the serd engine name */
2610 	str = ipath2str(lhs->ename, lhs->ipath);
2611 	len = strlen(str);
2612 
2613 	ASSERT(Serdbufptr + len + 1 <= &Serdbuf[Serdsz]);
2614 	(void) strlcpy(Serdbufptr, str, &Serdbuf[Serdsz] - Serdbufptr);
2615 	Serdbufptr += len;
2616 	FREE(str);
2617 	*Serdbufptr++ = '\0';
2618 	ASSERT(Serdbufptr <= &Serdbuf[Serdsz]);
2619 }
2620 
2621 void
serd_save()2622 serd_save()
2623 {
2624 	if (Serd_need_save == 0)
2625 		return;
2626 
2627 	/* figure out how big the serialzed info is */
2628 	Serdsz = 0;
2629 	lut_walk(SerdEngines, (lut_cb)serdaddsize, NULL);
2630 
2631 	if (Serdsz == 0) {
2632 		/* no serd engines to save */
2633 		fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS);
2634 		return;
2635 	}
2636 
2637 	/* create the serialized buffer */
2638 	Serdbufptr = Serdbuf = MALLOC(Serdsz);
2639 	lut_walk(SerdEngines, (lut_cb)serd2str, NULL);
2640 
2641 	/* clear out current saved stats */
2642 	fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS);
2643 
2644 	/* write out the new version */
2645 	fmd_buf_write(Hdl, NULL, WOBUF_SERDS, Serdbuf, Serdsz);
2646 	FREE(Serdbuf);
2647 	Serd_need_save = 0;
2648 }
2649 
2650 int
serd_cmp(struct serd_entry * ent1,struct serd_entry * ent2)2651 serd_cmp(struct serd_entry *ent1, struct serd_entry *ent2)
2652 {
2653 	if (ent1->ename != ent2->ename)
2654 		return (ent2->ename - ent1->ename);
2655 	if (ent1->ipath != ent2->ipath)
2656 		return ((char *)ent2->ipath - (char *)ent1->ipath);
2657 
2658 	return (0);
2659 }
2660 
2661 void
fme_serd_load(fmd_hdl_t * hdl)2662 fme_serd_load(fmd_hdl_t *hdl)
2663 {
2664 	int sz;
2665 	char *sbuf;
2666 	char *sepptr;
2667 	char *ptr;
2668 	struct serd_entry *newentp;
2669 	struct node *epname;
2670 	nvlist_t *fmri;
2671 	char *namestring;
2672 
2673 	if ((sz = fmd_buf_size(hdl, NULL, WOBUF_SERDS)) == 0)
2674 		return;
2675 	sbuf = alloca(sz);
2676 	fmd_buf_read(hdl, NULL, WOBUF_SERDS, sbuf, sz);
2677 	ptr = sbuf;
2678 	while (ptr < &sbuf[sz]) {
2679 		sepptr = strchr(ptr, '@');
2680 		*sepptr = '\0';
2681 		namestring = ptr;
2682 		sepptr++;
2683 		ptr = sepptr;
2684 		ptr += strlen(ptr);
2685 		ptr++;	/* move past the '\0' separating paths */
2686 		epname = pathstring2epnamenp(sepptr);
2687 		fmri = node2fmri(epname);
2688 		if (platform_path_exists(fmri)) {
2689 			newentp = MALLOC(sizeof (*newentp));
2690 			newentp->hdl = hdl;
2691 			newentp->ipath = ipath(epname);
2692 			newentp->ename = stable(namestring);
2693 			SerdEngines = lut_add(SerdEngines, (void *)newentp,
2694 			    (void *)newentp, (lut_cmp)serd_cmp);
2695 		} else
2696 			Serd_need_save = 1;
2697 		tree_free(epname);
2698 		nvlist_free(fmri);
2699 	}
2700 	/* save it back again in case some of the paths no longer exist */
2701 	serd_save();
2702 }
2703 
2704 /*ARGSUSED*/
2705 static void
serd_destructor(void * left,void * right,void * arg)2706 serd_destructor(void *left, void *right, void *arg)
2707 {
2708 	struct serd_entry *entp = (struct serd_entry *)left;
2709 	FREE(entp);
2710 }
2711 
2712 /*
2713  * Callback used in a walk of the SerdEngines to reset matching serd engines.
2714  */
2715 /*ARGSUSED*/
2716 static void
serd_reset_cb(struct serd_entry * entp,void * unused,const struct ipath * ipp)2717 serd_reset_cb(struct serd_entry *entp, void *unused, const struct ipath *ipp)
2718 {
2719 	char *path;
2720 
2721 	if (entp->ipath == ipp) {
2722 		path = ipath2str(entp->ename, ipp);
2723 		out(O_ALTFP, "serd_reset_cb: resetting %s", path);
2724 		fmd_serd_reset(entp->hdl, path);
2725 		FREE(path);
2726 		Serd_need_save = 1;
2727 	}
2728 }
2729 
2730 /*ARGSUSED*/
2731 static void
serd_topo_chg_cb(struct serd_entry * entp,void * unused,void * unused2)2732 serd_topo_chg_cb(struct serd_entry *entp, void *unused, void *unused2)
2733 {
2734 	char *path;
2735 	nvlist_t *fmri;
2736 
2737 	fmri = ipath2fmri((struct ipath *)(entp->ipath));
2738 	if (!platform_path_exists(fmri)) {
2739 		path = ipath2str(entp->ename, entp->ipath);
2740 		out(O_ALTFP, "serd_topo_chg_cb: not present %s", path);
2741 		fmd_serd_reset(entp->hdl, path);
2742 		FREE(path);
2743 		Serd_need_save = 1;
2744 	}
2745 	nvlist_free(fmri);
2746 }
2747 
2748 void
serd_fini(void)2749 serd_fini(void)
2750 {
2751 	lut_free(SerdEngines, serd_destructor, NULL);
2752 }
2753 
2754 static void
publish_suspects(struct fme * fmep,struct rsl * srl)2755 publish_suspects(struct fme *fmep, struct rsl *srl)
2756 {
2757 	struct rsl *rp;
2758 	nvlist_t *fault;
2759 	uint8_t cert;
2760 	uint_t *frs;
2761 	uint_t frsum, fr;
2762 	uint_t messval;
2763 	uint_t retireval;
2764 	uint_t responseval;
2765 	struct node *snp;
2766 	int frcnt, fridx;
2767 	boolean_t allfaulty = B_TRUE;
2768 	struct rsl *erl = srl + fmep->nsuspects - 1;
2769 
2770 	/*
2771 	 * sort the array
2772 	 */
2773 	qsort(srl, fmep->nsuspects, sizeof (struct rsl), rslcmp);
2774 
2775 	/* sum the fitrates */
2776 	frs = alloca(fmep->nsuspects * sizeof (uint_t));
2777 	fridx = frcnt = frsum = 0;
2778 
2779 	for (rp = srl; rp <= erl; rp++) {
2780 		struct node *n;
2781 
2782 		n = eventprop_lookup(rp->suspect, L_FITrate);
2783 		if (node2uint(n, &fr) != 0) {
2784 			out(O_DEBUG|O_NONL, "event ");
2785 			ipath_print(O_DEBUG|O_NONL,
2786 			    rp->suspect->enode->u.event.ename->u.name.s,
2787 			    rp->suspect->ipp);
2788 			out(O_VERB, " has no FITrate (using 1)");
2789 			fr = 1;
2790 		} else if (fr == 0) {
2791 			out(O_DEBUG|O_NONL, "event ");
2792 			ipath_print(O_DEBUG|O_NONL,
2793 			    rp->suspect->enode->u.event.ename->u.name.s,
2794 			    rp->suspect->ipp);
2795 			out(O_VERB, " has zero FITrate (using 1)");
2796 			fr = 1;
2797 		}
2798 
2799 		frs[fridx++] = fr;
2800 		frsum += fr;
2801 		frcnt++;
2802 	}
2803 
2804 	/* Add them in reverse order of our sort, as fmd reverses order */
2805 	for (rp = erl; rp >= srl; rp--) {
2806 		cert = percentof(frs[--fridx], frsum);
2807 		fault = fmd_nvl_create_fault(fmep->hdl,
2808 		    rp->suspect->enode->u.event.ename->u.name.s,
2809 		    cert,
2810 		    rp->asru,
2811 		    rp->fru,
2812 		    rp->rsrc);
2813 		if (fault == NULL)
2814 			out(O_DIE, "fault creation failed");
2815 		/* if "message" property exists, add it to the fault */
2816 		if (node2uint(eventprop_lookup(rp->suspect, L_message),
2817 		    &messval) == 0) {
2818 
2819 			out(O_ALTFP,
2820 			    "[FME%d, %s adds message=%d to suspect list]",
2821 			    fmep->id,
2822 			    rp->suspect->enode->u.event.ename->u.name.s,
2823 			    messval);
2824 			if (nvlist_add_boolean_value(fault,
2825 			    FM_SUSPECT_MESSAGE,
2826 			    (messval) ? B_TRUE : B_FALSE) != 0) {
2827 				out(O_DIE, "cannot add no-message to fault");
2828 			}
2829 		}
2830 
2831 		/* if "retire" property exists, add it to the fault */
2832 		if (node2uint(eventprop_lookup(rp->suspect, L_retire),
2833 		    &retireval) == 0) {
2834 
2835 			out(O_ALTFP,
2836 			    "[FME%d, %s adds retire=%d to suspect list]",
2837 			    fmep->id,
2838 			    rp->suspect->enode->u.event.ename->u.name.s,
2839 			    retireval);
2840 			if (nvlist_add_boolean_value(fault,
2841 			    FM_SUSPECT_RETIRE,
2842 			    (retireval) ? B_TRUE : B_FALSE) != 0) {
2843 				out(O_DIE, "cannot add no-retire to fault");
2844 			}
2845 		}
2846 
2847 		/* if "response" property exists, add it to the fault */
2848 		if (node2uint(eventprop_lookup(rp->suspect, L_response),
2849 		    &responseval) == 0) {
2850 
2851 			out(O_ALTFP,
2852 			    "[FME%d, %s adds response=%d to suspect list]",
2853 			    fmep->id,
2854 			    rp->suspect->enode->u.event.ename->u.name.s,
2855 			    responseval);
2856 			if (nvlist_add_boolean_value(fault,
2857 			    FM_SUSPECT_RESPONSE,
2858 			    (responseval) ? B_TRUE : B_FALSE) != 0) {
2859 				out(O_DIE, "cannot add no-response to fault");
2860 			}
2861 		}
2862 
2863 		/* add any payload properties */
2864 		lut_walk(rp->suspect->payloadprops,
2865 		    (lut_cb)addpayloadprop, (void *)fault);
2866 		rslfree(rp);
2867 
2868 		/*
2869 		 * If "action" property exists, evaluate it;  this must be done
2870 		 * before the allfaulty check below since some actions may
2871 		 * modify the asru to be used in fmd_nvl_fmri_has_fault.  This
2872 		 * needs to be restructured if any new actions are introduced
2873 		 * that have effects that we do not want to be visible if
2874 		 * we decide not to publish in the dupclose check below.
2875 		 */
2876 		if ((snp = eventprop_lookup(rp->suspect, L_action)) != NULL) {
2877 			struct evalue evalue;
2878 
2879 			out(O_ALTFP|O_NONL,
2880 			    "[FME%d, %s action ", fmep->id,
2881 			    rp->suspect->enode->u.event.ename->u.name.s);
2882 			ptree_name_iter(O_ALTFP|O_NONL, snp);
2883 			out(O_ALTFP, "]");
2884 			Action_nvl = fault;
2885 			(void) eval_expr(snp, NULL, NULL, NULL, NULL,
2886 			    NULL, 0, &evalue);
2887 		}
2888 
2889 		fmd_case_add_suspect(fmep->hdl, fmep->fmcase, fault);
2890 
2891 		/*
2892 		 * check if the asru is already marked as "faulty".
2893 		 */
2894 		if (allfaulty) {
2895 			nvlist_t *asru;
2896 
2897 			out(O_ALTFP|O_VERB, "FME%d dup check ", fmep->id);
2898 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, rp->suspect);
2899 			out(O_ALTFP|O_VERB|O_NONL, " ");
2900 			if (nvlist_lookup_nvlist(fault,
2901 			    FM_FAULT_ASRU, &asru) != 0) {
2902 				out(O_ALTFP|O_VERB, "NULL asru");
2903 				allfaulty = B_FALSE;
2904 			} else if (fmd_nvl_fmri_has_fault(fmep->hdl, asru,
2905 			    FMD_HAS_FAULT_ASRU, NULL)) {
2906 				out(O_ALTFP|O_VERB, "faulty");
2907 			} else {
2908 				out(O_ALTFP|O_VERB, "not faulty");
2909 				allfaulty = B_FALSE;
2910 			}
2911 		}
2912 
2913 	}
2914 
2915 	if (!allfaulty) {
2916 		/*
2917 		 * don't update the count stat if all asrus are already
2918 		 * present and unrepaired in the asru cache
2919 		 */
2920 		for (rp = erl; rp >= srl; rp--) {
2921 			struct event *suspect = rp->suspect;
2922 
2923 			if (suspect == NULL)
2924 				continue;
2925 
2926 			/* if "count" exists, increment the appropriate stat */
2927 			if ((snp = eventprop_lookup(suspect,
2928 			    L_count)) != NULL) {
2929 				out(O_ALTFP|O_NONL,
2930 				    "[FME%d, %s count ", fmep->id,
2931 				    suspect->enode->u.event.ename->u.name.s);
2932 				ptree_name_iter(O_ALTFP|O_NONL, snp);
2933 				out(O_ALTFP, "]");
2934 				istat_bump(snp, 0);
2935 
2936 			}
2937 		}
2938 		istat_save();	/* write out any istat changes */
2939 	}
2940 }
2941 
2942 static const char *
undiag_2defect_str(int ud)2943 undiag_2defect_str(int ud)
2944 {
2945 	switch (ud) {
2946 	case UD_VAL_MISSINGINFO:
2947 	case UD_VAL_MISSINGOBS:
2948 	case UD_VAL_MISSINGPATH:
2949 	case UD_VAL_MISSINGZERO:
2950 	case UD_VAL_BADOBS:
2951 	case UD_VAL_CFGMISMATCH:
2952 		return (UNDIAG_DEFECT_CHKPT);
2953 
2954 	case UD_VAL_BADEVENTI:
2955 	case UD_VAL_BADEVENTPATH:
2956 	case UD_VAL_BADEVENTCLASS:
2957 	case UD_VAL_INSTFAIL:
2958 	case UD_VAL_NOPATH:
2959 	case UD_VAL_UNSOLVD:
2960 		return (UNDIAG_DEFECT_FME);
2961 
2962 	case UD_VAL_MAXFME:
2963 		return (UNDIAG_DEFECT_LIMIT);
2964 
2965 	case UD_VAL_UNKNOWN:
2966 	default:
2967 		return (UNDIAG_DEFECT_UNKNOWN);
2968 	}
2969 }
2970 
2971 static const char *
undiag_2fault_str(int ud)2972 undiag_2fault_str(int ud)
2973 {
2974 	switch (ud) {
2975 	case UD_VAL_BADEVENTI:
2976 	case UD_VAL_BADEVENTPATH:
2977 	case UD_VAL_BADEVENTCLASS:
2978 	case UD_VAL_INSTFAIL:
2979 	case UD_VAL_NOPATH:
2980 	case UD_VAL_UNSOLVD:
2981 		return (UNDIAG_FAULT_FME);
2982 	default:
2983 		return (NULL);
2984 	}
2985 }
2986 
2987 static char *
undiag_2reason_str(int ud,char * arg)2988 undiag_2reason_str(int ud, char *arg)
2989 {
2990 	const char *ptr;
2991 	char *buf;
2992 	int with_arg = 0;
2993 
2994 	switch (ud) {
2995 	case UD_VAL_BADEVENTPATH:
2996 		ptr = UD_STR_BADEVENTPATH;
2997 		with_arg = 1;
2998 		break;
2999 	case UD_VAL_BADEVENTCLASS:
3000 		ptr = UD_STR_BADEVENTCLASS;
3001 		with_arg = 1;
3002 		break;
3003 	case UD_VAL_BADEVENTI:
3004 		ptr = UD_STR_BADEVENTI;
3005 		with_arg = 1;
3006 		break;
3007 	case UD_VAL_BADOBS:
3008 		ptr = UD_STR_BADOBS;
3009 		break;
3010 	case UD_VAL_CFGMISMATCH:
3011 		ptr = UD_STR_CFGMISMATCH;
3012 		break;
3013 	case UD_VAL_INSTFAIL:
3014 		ptr = UD_STR_INSTFAIL;
3015 		with_arg = 1;
3016 		break;
3017 	case UD_VAL_MAXFME:
3018 		ptr = UD_STR_MAXFME;
3019 		break;
3020 	case UD_VAL_MISSINGINFO:
3021 		ptr = UD_STR_MISSINGINFO;
3022 		break;
3023 	case UD_VAL_MISSINGOBS:
3024 		ptr = UD_STR_MISSINGOBS;
3025 		break;
3026 	case UD_VAL_MISSINGPATH:
3027 		ptr = UD_STR_MISSINGPATH;
3028 		break;
3029 	case UD_VAL_MISSINGZERO:
3030 		ptr = UD_STR_MISSINGZERO;
3031 		break;
3032 	case UD_VAL_NOPATH:
3033 		ptr = UD_STR_NOPATH;
3034 		with_arg = 1;
3035 		break;
3036 	case UD_VAL_UNSOLVD:
3037 		ptr = UD_STR_UNSOLVD;
3038 		break;
3039 	case UD_VAL_UNKNOWN:
3040 	default:
3041 		ptr = UD_STR_UNKNOWN;
3042 		break;
3043 	}
3044 	if (with_arg) {
3045 		buf = MALLOC(strlen(ptr) + strlen(arg) - 1);
3046 		(void) sprintf(buf, ptr, arg);
3047 	} else {
3048 		buf = MALLOC(strlen(ptr) + 1);
3049 		(void) sprintf(buf, ptr);
3050 	}
3051 	return (buf);
3052 }
3053 
3054 static void
publish_undiagnosable(fmd_hdl_t * hdl,fmd_event_t * ffep,fmd_case_t * fmcase,nvlist_t * detector,char * arg)3055 publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep, fmd_case_t *fmcase,
3056     nvlist_t *detector, char *arg)
3057 {
3058 	struct case_list *newcase;
3059 	nvlist_t *defect, *fault;
3060 	const char *faultstr;
3061 	char *reason = undiag_2reason_str(Undiag_reason, arg);
3062 
3063 	out(O_ALTFP,
3064 	    "[undiagnosable ereport received, "
3065 	    "creating and closing a new case (%s)]", reason);
3066 
3067 	newcase = MALLOC(sizeof (struct case_list));
3068 	newcase->next = NULL;
3069 	newcase->fmcase = fmcase;
3070 	if (Undiagablecaselist != NULL)
3071 		newcase->next = Undiagablecaselist;
3072 	Undiagablecaselist = newcase;
3073 
3074 	if (ffep != NULL)
3075 		fmd_case_add_ereport(hdl, newcase->fmcase, ffep);
3076 
3077 	/* add defect */
3078 	defect = fmd_nvl_create_fault(hdl,
3079 	    undiag_2defect_str(Undiag_reason), 50, NULL, NULL, detector);
3080 	(void) nvlist_add_string(defect, UNDIAG_REASON, reason);
3081 	(void) nvlist_add_boolean_value(defect, FM_SUSPECT_RETIRE, B_FALSE);
3082 	(void) nvlist_add_boolean_value(defect, FM_SUSPECT_RESPONSE, B_FALSE);
3083 	fmd_case_add_suspect(hdl, newcase->fmcase, defect);
3084 
3085 	/* add fault if appropriate */
3086 	faultstr = undiag_2fault_str(Undiag_reason);
3087 	if (faultstr != NULL) {
3088 		fault = fmd_nvl_create_fault(hdl, faultstr, 50, NULL, NULL,
3089 		    detector);
3090 		(void) nvlist_add_string(fault, UNDIAG_REASON, reason);
3091 		(void) nvlist_add_boolean_value(fault, FM_SUSPECT_RETIRE,
3092 		    B_FALSE);
3093 		(void) nvlist_add_boolean_value(fault, FM_SUSPECT_RESPONSE,
3094 		    B_FALSE);
3095 		fmd_case_add_suspect(hdl, newcase->fmcase, fault);
3096 	}
3097 	FREE(reason);
3098 
3099 	/* solve and close case */
3100 	fmd_case_solve(hdl, newcase->fmcase);
3101 	fmd_case_close(hdl, newcase->fmcase);
3102 	Undiag_reason = UD_VAL_UNKNOWN;
3103 }
3104 
3105 static void
fme_undiagnosable(struct fme * f)3106 fme_undiagnosable(struct fme *f)
3107 {
3108 	nvlist_t *defect, *fault, *detector = NULL;
3109 	struct event *ep;
3110 	char *pathstr;
3111 	const char *faultstr;
3112 	char *reason = undiag_2reason_str(Undiag_reason, NULL);
3113 
3114 	out(O_ALTFP, "[solving/closing FME%d, case %s (%s)]",
3115 	    f->id, fmd_case_uuid(f->hdl, f->fmcase), reason);
3116 
3117 	for (ep = f->observations; ep; ep = ep->observations) {
3118 
3119 		if (ep->ffep != f->e0r)
3120 			fmd_case_add_ereport(f->hdl, f->fmcase, ep->ffep);
3121 
3122 		pathstr = ipath2str(NULL, ipath(platform_getpath(ep->nvp)));
3123 		platform_units_translate(0, f->config, NULL, NULL, &detector,
3124 		    pathstr);
3125 		FREE(pathstr);
3126 
3127 		/* add defect */
3128 		defect = fmd_nvl_create_fault(f->hdl,
3129 		    undiag_2defect_str(Undiag_reason), 50 / f->uniqobs,
3130 		    NULL, NULL, detector);
3131 		(void) nvlist_add_string(defect, UNDIAG_REASON, reason);
3132 		(void) nvlist_add_boolean_value(defect, FM_SUSPECT_RETIRE,
3133 		    B_FALSE);
3134 		(void) nvlist_add_boolean_value(defect, FM_SUSPECT_RESPONSE,
3135 		    B_FALSE);
3136 		fmd_case_add_suspect(f->hdl, f->fmcase, defect);
3137 
3138 		/* add fault if appropriate */
3139 		faultstr = undiag_2fault_str(Undiag_reason);
3140 		if (faultstr == NULL)
3141 			continue;
3142 		fault = fmd_nvl_create_fault(f->hdl, faultstr, 50 / f->uniqobs,
3143 		    NULL, NULL, detector);
3144 		(void) nvlist_add_string(fault, UNDIAG_REASON, reason);
3145 		(void) nvlist_add_boolean_value(fault, FM_SUSPECT_RETIRE,
3146 		    B_FALSE);
3147 		(void) nvlist_add_boolean_value(fault, FM_SUSPECT_RESPONSE,
3148 		    B_FALSE);
3149 		fmd_case_add_suspect(f->hdl, f->fmcase, fault);
3150 		nvlist_free(detector);
3151 	}
3152 	FREE(reason);
3153 	fmd_case_solve(f->hdl, f->fmcase);
3154 	fmd_case_close(f->hdl, f->fmcase);
3155 	Undiag_reason = UD_VAL_UNKNOWN;
3156 }
3157 
3158 /*
3159  * fme_close_case
3160  *
3161  *	Find the requested case amongst our fmes and close it.  Free up
3162  *	the related fme.
3163  */
3164 void
fme_close_case(fmd_hdl_t * hdl,fmd_case_t * fmcase)3165 fme_close_case(fmd_hdl_t *hdl, fmd_case_t *fmcase)
3166 {
3167 	struct case_list *ucasep, *prevcasep = NULL;
3168 	struct fme *prev = NULL;
3169 	struct fme *fmep;
3170 
3171 	for (ucasep = Undiagablecaselist; ucasep; ucasep = ucasep->next) {
3172 		if (fmcase != ucasep->fmcase) {
3173 			prevcasep = ucasep;
3174 			continue;
3175 		}
3176 
3177 		if (prevcasep == NULL)
3178 			Undiagablecaselist = Undiagablecaselist->next;
3179 		else
3180 			prevcasep->next = ucasep->next;
3181 
3182 		FREE(ucasep);
3183 		return;
3184 	}
3185 
3186 	for (fmep = FMElist; fmep; fmep = fmep->next) {
3187 		if (fmep->hdl == hdl && fmep->fmcase == fmcase)
3188 			break;
3189 		prev = fmep;
3190 	}
3191 
3192 	if (fmep == NULL) {
3193 		out(O_WARN, "Eft asked to close unrecognized case [%s].",
3194 		    fmd_case_uuid(hdl, fmcase));
3195 		return;
3196 	}
3197 
3198 	if (EFMElist == fmep)
3199 		EFMElist = prev;
3200 
3201 	if (prev == NULL)
3202 		FMElist = FMElist->next;
3203 	else
3204 		prev->next = fmep->next;
3205 
3206 	fmep->next = NULL;
3207 
3208 	/* Get rid of any timer this fme has set */
3209 	if (fmep->wull != 0)
3210 		fmd_timer_remove(fmep->hdl, fmep->timer);
3211 
3212 	if (ClosedFMEs == NULL) {
3213 		ClosedFMEs = fmep;
3214 	} else {
3215 		fmep->next = ClosedFMEs;
3216 		ClosedFMEs = fmep;
3217 	}
3218 
3219 	Open_fme_count--;
3220 
3221 	/* See if we can close the overflow FME */
3222 	if (Open_fme_count <= Max_fme) {
3223 		for (fmep = FMElist; fmep; fmep = fmep->next) {
3224 			if (fmep->overflow && !(fmd_case_closed(fmep->hdl,
3225 			    fmep->fmcase)))
3226 				break;
3227 		}
3228 
3229 		if (fmep != NULL)
3230 			fmd_case_close(fmep->hdl, fmep->fmcase);
3231 	}
3232 }
3233 
3234 /*
3235  * fme_set_timer()
3236  *	If the time we need to wait for the given FME is less than the
3237  *	current timer, kick that old timer out and establish a new one.
3238  */
3239 static int
fme_set_timer(struct fme * fmep,unsigned long long wull)3240 fme_set_timer(struct fme *fmep, unsigned long long wull)
3241 {
3242 	out(O_ALTFP|O_VERB|O_NONL, " fme_set_timer: request to wait ");
3243 	ptree_timeval(O_ALTFP|O_VERB, &wull);
3244 
3245 	if (wull <= fmep->pull) {
3246 		out(O_ALTFP|O_VERB|O_NONL, "already have waited at least ");
3247 		ptree_timeval(O_ALTFP|O_VERB, &fmep->pull);
3248 		out(O_ALTFP|O_VERB, NULL);
3249 		/* we've waited at least wull already, don't need timer */
3250 		return (0);
3251 	}
3252 
3253 	out(O_ALTFP|O_VERB|O_NONL, " currently ");
3254 	if (fmep->wull != 0) {
3255 		out(O_ALTFP|O_VERB|O_NONL, "waiting ");
3256 		ptree_timeval(O_ALTFP|O_VERB, &fmep->wull);
3257 		out(O_ALTFP|O_VERB, NULL);
3258 	} else {
3259 		out(O_ALTFP|O_VERB|O_NONL, "not waiting");
3260 		out(O_ALTFP|O_VERB, NULL);
3261 	}
3262 
3263 	if (fmep->wull != 0)
3264 		if (wull >= fmep->wull)
3265 			/* New timer would fire later than established timer */
3266 			return (0);
3267 
3268 	if (fmep->wull != 0) {
3269 		fmd_timer_remove(fmep->hdl, fmep->timer);
3270 	}
3271 
3272 	fmep->timer = fmd_timer_install(fmep->hdl, (void *)fmep,
3273 	    fmep->e0r, wull);
3274 	out(O_ALTFP|O_VERB, "timer set, id is %ld", fmep->timer);
3275 	fmep->wull = wull;
3276 	return (1);
3277 }
3278 
3279 void
fme_timer_fired(struct fme * fmep,id_t tid)3280 fme_timer_fired(struct fme *fmep, id_t tid)
3281 {
3282 	struct fme *ffmep = NULL;
3283 
3284 	for (ffmep = FMElist; ffmep; ffmep = ffmep->next)
3285 		if (ffmep == fmep)
3286 			break;
3287 
3288 	if (ffmep == NULL) {
3289 		out(O_WARN, "Timer fired for an FME (%p) not in FMEs list.",
3290 		    (void *)fmep);
3291 		return;
3292 	}
3293 
3294 	out(O_ALTFP|O_VERB, "Timer fired %lx", tid);
3295 	fmep->pull = fmep->wull;
3296 	fmep->wull = 0;
3297 	fmd_buf_write(fmep->hdl, fmep->fmcase,
3298 	    WOBUF_PULL, (void *)&fmep->pull, sizeof (fmep->pull));
3299 
3300 	fme_eval(fmep, fmep->e0r);
3301 }
3302 
3303 /*
3304  * Preserve the fme's suspect list in its psuspects list, NULLing the
3305  * suspects list in the meantime.
3306  */
3307 static void
save_suspects(struct fme * fmep)3308 save_suspects(struct fme *fmep)
3309 {
3310 	struct event *ep;
3311 	struct event *nextep;
3312 
3313 	/* zero out the previous suspect list */
3314 	for (ep = fmep->psuspects; ep; ep = nextep) {
3315 		nextep = ep->psuspects;
3316 		ep->psuspects = NULL;
3317 	}
3318 	fmep->psuspects = NULL;
3319 
3320 	/* zero out the suspect list, copying it to previous suspect list */
3321 	fmep->psuspects = fmep->suspects;
3322 	for (ep = fmep->suspects; ep; ep = nextep) {
3323 		nextep = ep->suspects;
3324 		ep->psuspects = ep->suspects;
3325 		ep->suspects = NULL;
3326 		ep->is_suspect = 0;
3327 	}
3328 	fmep->suspects = NULL;
3329 	fmep->nsuspects = 0;
3330 }
3331 
3332 /*
3333  * Retrieve the fme's suspect list from its psuspects list.
3334  */
3335 static void
restore_suspects(struct fme * fmep)3336 restore_suspects(struct fme *fmep)
3337 {
3338 	struct event *ep;
3339 	struct event *nextep;
3340 
3341 	fmep->nsuspects = 0;
3342 	fmep->suspects = fmep->psuspects;
3343 	for (ep = fmep->psuspects; ep; ep = nextep) {
3344 		fmep->nsuspects++;
3345 		nextep = ep->psuspects;
3346 		ep->suspects = ep->psuspects;
3347 	}
3348 }
3349 
3350 /*
3351  * this is what we use to call the Emrys prototype code instead of main()
3352  */
3353 static void
fme_eval(struct fme * fmep,fmd_event_t * ffep)3354 fme_eval(struct fme *fmep, fmd_event_t *ffep)
3355 {
3356 	struct event *ep;
3357 	unsigned long long my_delay = TIMEVAL_EVENTUALLY;
3358 	struct rsl *srl = NULL;
3359 	struct rsl *srl2 = NULL;
3360 	int mess_zero_count;
3361 	int rpcnt;
3362 
3363 	save_suspects(fmep);
3364 
3365 	out(O_ALTFP, "Evaluate FME %d", fmep->id);
3366 	indent_set("  ");
3367 
3368 	lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep);
3369 	fmep->state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay);
3370 
3371 	out(O_ALTFP|O_NONL, "FME%d state: %s, suspect list:", fmep->id,
3372 	    fme_state2str(fmep->state));
3373 	for (ep = fmep->suspects; ep; ep = ep->suspects) {
3374 		out(O_ALTFP|O_NONL, " ");
3375 		itree_pevent_brief(O_ALTFP|O_NONL, ep);
3376 	}
3377 	out(O_ALTFP, NULL);
3378 
3379 	switch (fmep->state) {
3380 	case FME_CREDIBLE:
3381 		print_suspects(SLNEW, fmep);
3382 		(void) upsets_eval(fmep, ffep);
3383 
3384 		/*
3385 		 * we may have already posted suspects in upsets_eval() which
3386 		 * can recurse into fme_eval() again. If so then just return.
3387 		 */
3388 		if (fmep->posted_suspects)
3389 			return;
3390 
3391 		stats_counter_bump(fmep->diags);
3392 		rpcnt = fmep->nsuspects;
3393 		save_suspects(fmep);
3394 
3395 		/*
3396 		 * create two lists, one for "message=1" faults and one for
3397 		 * "message=0" faults. If we have a mixture we will generate
3398 		 * two separate suspect lists.
3399 		 */
3400 		srl = MALLOC(rpcnt * sizeof (struct rsl));
3401 		bzero(srl, rpcnt * sizeof (struct rsl));
3402 		srl2 = MALLOC(rpcnt * sizeof (struct rsl));
3403 		bzero(srl2, rpcnt * sizeof (struct rsl));
3404 		mess_zero_count = trim_suspects(fmep, srl, srl2, ffep);
3405 
3406 		/*
3407 		 * If the resulting suspect list has no members, we're
3408 		 * done so simply close the case. Otherwise sort and publish.
3409 		 */
3410 		if (fmep->nsuspects == 0 && mess_zero_count == 0) {
3411 			out(O_ALTFP,
3412 			    "[FME%d, case %s (all suspects are upsets)]",
3413 			    fmep->id, fmd_case_uuid(fmep->hdl, fmep->fmcase));
3414 			fmd_case_close(fmep->hdl, fmep->fmcase);
3415 		} else if (fmep->nsuspects != 0 && mess_zero_count == 0) {
3416 			publish_suspects(fmep, srl);
3417 			out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3418 			    fmd_case_uuid(fmep->hdl, fmep->fmcase));
3419 			fmd_case_solve(fmep->hdl, fmep->fmcase);
3420 		} else if (fmep->nsuspects == 0 && mess_zero_count != 0) {
3421 			fmep->nsuspects = mess_zero_count;
3422 			publish_suspects(fmep, srl2);
3423 			out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3424 			    fmd_case_uuid(fmep->hdl, fmep->fmcase));
3425 			fmd_case_solve(fmep->hdl, fmep->fmcase);
3426 		} else {
3427 			struct event *obsp;
3428 			struct fme *nfmep;
3429 
3430 			publish_suspects(fmep, srl);
3431 			out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3432 			    fmd_case_uuid(fmep->hdl, fmep->fmcase));
3433 			fmd_case_solve(fmep->hdl, fmep->fmcase);
3434 
3435 			/*
3436 			 * Got both message=0 and message=1 so create a
3437 			 * duplicate case. Also need a temporary duplicate fme
3438 			 * structure for use by publish_suspects().
3439 			 */
3440 			nfmep = alloc_fme();
3441 			nfmep->id =  Nextid++;
3442 			nfmep->hdl = fmep->hdl;
3443 			nfmep->nsuspects = mess_zero_count;
3444 			nfmep->fmcase = fmd_case_open(fmep->hdl, NULL);
3445 			out(O_ALTFP|O_STAMP,
3446 			    "[creating parallel FME%d, case %s]", nfmep->id,
3447 			    fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
3448 			Open_fme_count++;
3449 			if (ffep) {
3450 				fmd_case_setprincipal(nfmep->hdl,
3451 				    nfmep->fmcase, ffep);
3452 				fmd_case_add_ereport(nfmep->hdl,
3453 				    nfmep->fmcase, ffep);
3454 			}
3455 			for (obsp = fmep->observations; obsp;
3456 			    obsp = obsp->observations)
3457 				if (obsp->ffep && obsp->ffep != ffep)
3458 					fmd_case_add_ereport(nfmep->hdl,
3459 					    nfmep->fmcase, obsp->ffep);
3460 
3461 			publish_suspects(nfmep, srl2);
3462 			out(O_ALTFP, "[solving FME%d, case %s]", nfmep->id,
3463 			    fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
3464 			fmd_case_solve(nfmep->hdl, nfmep->fmcase);
3465 			FREE(nfmep);
3466 		}
3467 		FREE(srl);
3468 		FREE(srl2);
3469 		restore_suspects(fmep);
3470 
3471 		fmep->posted_suspects = 1;
3472 		fmd_buf_write(fmep->hdl, fmep->fmcase,
3473 		    WOBUF_POSTD,
3474 		    (void *)&fmep->posted_suspects,
3475 		    sizeof (fmep->posted_suspects));
3476 
3477 		/*
3478 		 * Now the suspects have been posted, we can clear up
3479 		 * the instance tree as we won't be looking at it again.
3480 		 * Also cancel the timer as the case is now solved.
3481 		 */
3482 		if (fmep->wull != 0) {
3483 			fmd_timer_remove(fmep->hdl, fmep->timer);
3484 			fmep->wull = 0;
3485 		}
3486 		break;
3487 
3488 	case FME_WAIT:
3489 		ASSERT(my_delay > fmep->ull);
3490 		(void) fme_set_timer(fmep, my_delay);
3491 		print_suspects(SLWAIT, fmep);
3492 		itree_prune(fmep->eventtree);
3493 		return;
3494 
3495 	case FME_DISPROVED:
3496 		print_suspects(SLDISPROVED, fmep);
3497 		Undiag_reason = UD_VAL_UNSOLVD;
3498 		fme_undiagnosable(fmep);
3499 		break;
3500 	}
3501 
3502 	itree_free(fmep->eventtree);
3503 	fmep->eventtree = NULL;
3504 	structconfig_free(fmep->config);
3505 	fmep->config = NULL;
3506 	destroy_fme_bufs(fmep);
3507 }
3508 
3509 static void indent(void);
3510 static int triggered(struct fme *fmep, struct event *ep, int mark);
3511 static enum fme_state effects_test(struct fme *fmep,
3512     struct event *fault_event, unsigned long long at_latest_by,
3513     unsigned long long *pdelay);
3514 static enum fme_state requirements_test(struct fme *fmep, struct event *ep,
3515     unsigned long long at_latest_by, unsigned long long *pdelay);
3516 static enum fme_state causes_test(struct fme *fmep, struct event *ep,
3517     unsigned long long at_latest_by, unsigned long long *pdelay);
3518 
3519 static int
checkconstraints(struct fme * fmep,struct arrow * arrowp)3520 checkconstraints(struct fme *fmep, struct arrow *arrowp)
3521 {
3522 	struct constraintlist *ctp;
3523 	struct evalue value;
3524 	char *sep = "";
3525 
3526 	if (arrowp->forever_false) {
3527 		indent();
3528 		out(O_ALTFP|O_VERB|O_NONL, "  Forever false constraint: ");
3529 		for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3530 			out(O_ALTFP|O_VERB|O_NONL, sep);
3531 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3532 			sep = ", ";
3533 		}
3534 		out(O_ALTFP|O_VERB, NULL);
3535 		return (0);
3536 	}
3537 	if (arrowp->forever_true) {
3538 		indent();
3539 		out(O_ALTFP|O_VERB|O_NONL, "  Forever true constraint: ");
3540 		for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3541 			out(O_ALTFP|O_VERB|O_NONL, sep);
3542 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3543 			sep = ", ";
3544 		}
3545 		out(O_ALTFP|O_VERB, NULL);
3546 		return (1);
3547 	}
3548 
3549 	for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3550 		if (eval_expr(ctp->cnode, NULL, NULL,
3551 		    &fmep->globals, fmep->config,
3552 		    arrowp, 0, &value)) {
3553 			/* evaluation successful */
3554 			if (value.t == UNDEFINED || value.v == 0) {
3555 				/* known false */
3556 				arrowp->forever_false = 1;
3557 				indent();
3558 				out(O_ALTFP|O_VERB|O_NONL,
3559 				    "  False constraint: ");
3560 				ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3561 				out(O_ALTFP|O_VERB, NULL);
3562 				return (0);
3563 			}
3564 		} else {
3565 			/* evaluation unsuccessful -- unknown value */
3566 			indent();
3567 			out(O_ALTFP|O_VERB|O_NONL,
3568 			    "  Deferred constraint: ");
3569 			ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3570 			out(O_ALTFP|O_VERB, NULL);
3571 			return (1);
3572 		}
3573 	}
3574 	/* known true */
3575 	arrowp->forever_true = 1;
3576 	indent();
3577 	out(O_ALTFP|O_VERB|O_NONL, "  True constraint: ");
3578 	for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3579 		out(O_ALTFP|O_VERB|O_NONL, sep);
3580 		ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3581 		sep = ", ";
3582 	}
3583 	out(O_ALTFP|O_VERB, NULL);
3584 	return (1);
3585 }
3586 
3587 static int
triggered(struct fme * fmep,struct event * ep,int mark)3588 triggered(struct fme *fmep, struct event *ep, int mark)
3589 {
3590 	struct bubble *bp;
3591 	struct arrowlist *ap;
3592 	int count = 0;
3593 
3594 	stats_counter_bump(fmep->Tcallcount);
3595 	for (bp = itree_next_bubble(ep, NULL); bp;
3596 	    bp = itree_next_bubble(ep, bp)) {
3597 		if (bp->t != B_TO)
3598 			continue;
3599 		for (ap = itree_next_arrow(bp, NULL); ap;
3600 		    ap = itree_next_arrow(bp, ap)) {
3601 			/* check count of marks against K in the bubble */
3602 			if ((ap->arrowp->mark & mark) &&
3603 			    ++count >= bp->nork)
3604 				return (1);
3605 		}
3606 	}
3607 	return (0);
3608 }
3609 
3610 static int
mark_arrows(struct fme * fmep,struct event * ep,int mark,unsigned long long at_latest_by,unsigned long long * pdelay,int keep)3611 mark_arrows(struct fme *fmep, struct event *ep, int mark,
3612     unsigned long long at_latest_by, unsigned long long *pdelay, int keep)
3613 {
3614 	struct bubble *bp;
3615 	struct arrowlist *ap;
3616 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3617 	unsigned long long my_delay;
3618 	enum fme_state result;
3619 	int retval = 0;
3620 
3621 	for (bp = itree_next_bubble(ep, NULL); bp;
3622 	    bp = itree_next_bubble(ep, bp)) {
3623 		if (bp->t != B_FROM)
3624 			continue;
3625 		stats_counter_bump(fmep->Marrowcount);
3626 		for (ap = itree_next_arrow(bp, NULL); ap;
3627 		    ap = itree_next_arrow(bp, ap)) {
3628 			struct event *ep2 = ap->arrowp->head->myevent;
3629 			/*
3630 			 * if we're clearing marks, we can avoid doing
3631 			 * all that work evaluating constraints.
3632 			 */
3633 			if (mark == 0) {
3634 				if (ap->arrowp->arrow_marked == 0)
3635 					continue;
3636 				ap->arrowp->arrow_marked = 0;
3637 				ap->arrowp->mark &= ~EFFECTS_COUNTER;
3638 				if (keep && (ep2->cached_state &
3639 				    (WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT)))
3640 					ep2->keep_in_tree = 1;
3641 				ep2->cached_state &=
3642 				    ~(WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT);
3643 				(void) mark_arrows(fmep, ep2, mark, 0, NULL,
3644 				    keep);
3645 				continue;
3646 			}
3647 			ap->arrowp->arrow_marked = 1;
3648 			if (ep2->cached_state & REQMNTS_DISPROVED) {
3649 				indent();
3650 				out(O_ALTFP|O_VERB|O_NONL,
3651 				    "  ALREADY DISPROVED ");
3652 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3653 				out(O_ALTFP|O_VERB, NULL);
3654 				continue;
3655 			}
3656 			if (ep2->cached_state & WAIT_EFFECT) {
3657 				indent();
3658 				out(O_ALTFP|O_VERB|O_NONL,
3659 				    "  ALREADY EFFECTS WAIT ");
3660 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3661 				out(O_ALTFP|O_VERB, NULL);
3662 				continue;
3663 			}
3664 			if (ep2->cached_state & CREDIBLE_EFFECT) {
3665 				indent();
3666 				out(O_ALTFP|O_VERB|O_NONL,
3667 				    "  ALREADY EFFECTS CREDIBLE ");
3668 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3669 				out(O_ALTFP|O_VERB, NULL);
3670 				continue;
3671 			}
3672 			if ((ep2->cached_state & PARENT_WAIT) &&
3673 			    (mark & PARENT_WAIT)) {
3674 				indent();
3675 				out(O_ALTFP|O_VERB|O_NONL,
3676 				    "  ALREADY PARENT EFFECTS WAIT ");
3677 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3678 				out(O_ALTFP|O_VERB, NULL);
3679 				continue;
3680 			}
3681 			platform_set_payloadnvp(ep2->nvp);
3682 			if (checkconstraints(fmep, ap->arrowp) == 0) {
3683 				platform_set_payloadnvp(NULL);
3684 				indent();
3685 				out(O_ALTFP|O_VERB|O_NONL,
3686 				    "  CONSTRAINTS FAIL ");
3687 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3688 				out(O_ALTFP|O_VERB, NULL);
3689 				continue;
3690 			}
3691 			platform_set_payloadnvp(NULL);
3692 			ap->arrowp->mark |= EFFECTS_COUNTER;
3693 			if (!triggered(fmep, ep2, EFFECTS_COUNTER)) {
3694 				indent();
3695 				out(O_ALTFP|O_VERB|O_NONL,
3696 				    "  K-COUNT NOT YET MET ");
3697 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3698 				out(O_ALTFP|O_VERB, NULL);
3699 				continue;
3700 			}
3701 			ep2->cached_state &= ~PARENT_WAIT;
3702 			/*
3703 			 * if we've reached an ereport and no propagation time
3704 			 * is specified, use the Hesitate value
3705 			 */
3706 			if (ep2->t == N_EREPORT && at_latest_by == 0ULL &&
3707 			    ap->arrowp->maxdelay == 0ULL) {
3708 				out(O_ALTFP|O_VERB|O_NONL, "  default wait ");
3709 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3710 				out(O_ALTFP|O_VERB, NULL);
3711 				result = requirements_test(fmep, ep2, Hesitate,
3712 				    &my_delay);
3713 			} else {
3714 				result = requirements_test(fmep, ep2,
3715 				    at_latest_by + ap->arrowp->maxdelay,
3716 				    &my_delay);
3717 			}
3718 			if (result == FME_WAIT) {
3719 				retval = WAIT_EFFECT;
3720 				if (overall_delay > my_delay)
3721 					overall_delay = my_delay;
3722 				ep2->cached_state |= WAIT_EFFECT;
3723 				indent();
3724 				out(O_ALTFP|O_VERB|O_NONL, "  EFFECTS WAIT ");
3725 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3726 				out(O_ALTFP|O_VERB, NULL);
3727 				indent_push("  E");
3728 				if (mark_arrows(fmep, ep2, PARENT_WAIT,
3729 				    at_latest_by, &my_delay, 0) ==
3730 				    WAIT_EFFECT) {
3731 					retval = WAIT_EFFECT;
3732 					if (overall_delay > my_delay)
3733 						overall_delay = my_delay;
3734 				}
3735 				indent_pop();
3736 			} else if (result == FME_DISPROVED) {
3737 				indent();
3738 				out(O_ALTFP|O_VERB|O_NONL,
3739 				    "  EFFECTS DISPROVED ");
3740 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3741 				out(O_ALTFP|O_VERB, NULL);
3742 			} else {
3743 				ep2->cached_state |= mark;
3744 				indent();
3745 				if (mark == CREDIBLE_EFFECT)
3746 					out(O_ALTFP|O_VERB|O_NONL,
3747 					    "  EFFECTS CREDIBLE ");
3748 				else
3749 					out(O_ALTFP|O_VERB|O_NONL,
3750 					    "  PARENT EFFECTS WAIT ");
3751 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3752 				out(O_ALTFP|O_VERB, NULL);
3753 				indent_push("  E");
3754 				if (mark_arrows(fmep, ep2, mark, at_latest_by,
3755 				    &my_delay, 0) == WAIT_EFFECT) {
3756 					retval = WAIT_EFFECT;
3757 					if (overall_delay > my_delay)
3758 						overall_delay = my_delay;
3759 				}
3760 				indent_pop();
3761 			}
3762 		}
3763 	}
3764 	if (retval == WAIT_EFFECT)
3765 		*pdelay = overall_delay;
3766 	return (retval);
3767 }
3768 
3769 static enum fme_state
effects_test(struct fme * fmep,struct event * fault_event,unsigned long long at_latest_by,unsigned long long * pdelay)3770 effects_test(struct fme *fmep, struct event *fault_event,
3771     unsigned long long at_latest_by, unsigned long long *pdelay)
3772 {
3773 	struct event *error_event;
3774 	enum fme_state return_value = FME_CREDIBLE;
3775 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3776 	unsigned long long my_delay;
3777 
3778 	stats_counter_bump(fmep->Ecallcount);
3779 	indent_push("  E");
3780 	indent();
3781 	out(O_ALTFP|O_VERB|O_NONL, "->");
3782 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event);
3783 	out(O_ALTFP|O_VERB, NULL);
3784 
3785 	if (mark_arrows(fmep, fault_event, CREDIBLE_EFFECT, at_latest_by,
3786 	    &my_delay, 0) == WAIT_EFFECT) {
3787 		return_value = FME_WAIT;
3788 		if (overall_delay > my_delay)
3789 			overall_delay = my_delay;
3790 	}
3791 	for (error_event = fmep->observations;
3792 	    error_event; error_event = error_event->observations) {
3793 		indent();
3794 		out(O_ALTFP|O_VERB|O_NONL, " ");
3795 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, error_event);
3796 		if (!(error_event->cached_state & CREDIBLE_EFFECT)) {
3797 			if (error_event->cached_state &
3798 			    (PARENT_WAIT|WAIT_EFFECT)) {
3799 				out(O_ALTFP|O_VERB, " NOT YET triggered");
3800 				continue;
3801 			}
3802 			return_value = FME_DISPROVED;
3803 			out(O_ALTFP|O_VERB, " NOT triggered");
3804 			break;
3805 		} else {
3806 			out(O_ALTFP|O_VERB, " triggered");
3807 		}
3808 	}
3809 	if (return_value == FME_DISPROVED) {
3810 		(void) mark_arrows(fmep, fault_event, 0, 0, NULL, 0);
3811 	} else {
3812 		fault_event->keep_in_tree = 1;
3813 		(void) mark_arrows(fmep, fault_event, 0, 0, NULL, 1);
3814 	}
3815 
3816 	indent();
3817 	out(O_ALTFP|O_VERB|O_NONL, "<-EFFECTS %s ",
3818 	    fme_state2str(return_value));
3819 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event);
3820 	out(O_ALTFP|O_VERB, NULL);
3821 	indent_pop();
3822 	if (return_value == FME_WAIT)
3823 		*pdelay = overall_delay;
3824 	return (return_value);
3825 }
3826 
3827 static enum fme_state
requirements_test(struct fme * fmep,struct event * ep,unsigned long long at_latest_by,unsigned long long * pdelay)3828 requirements_test(struct fme *fmep, struct event *ep,
3829     unsigned long long at_latest_by, unsigned long long *pdelay)
3830 {
3831 	int waiting_events;
3832 	int credible_events;
3833 	int deferred_events;
3834 	enum fme_state return_value = FME_CREDIBLE;
3835 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3836 	unsigned long long arrow_delay;
3837 	unsigned long long my_delay;
3838 	struct event *ep2;
3839 	struct bubble *bp;
3840 	struct arrowlist *ap;
3841 
3842 	if (ep->cached_state & REQMNTS_CREDIBLE) {
3843 		indent();
3844 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY CREDIBLE ");
3845 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3846 		out(O_ALTFP|O_VERB, NULL);
3847 		return (FME_CREDIBLE);
3848 	}
3849 	if (ep->cached_state & REQMNTS_DISPROVED) {
3850 		indent();
3851 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY DISPROVED ");
3852 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3853 		out(O_ALTFP|O_VERB, NULL);
3854 		return (FME_DISPROVED);
3855 	}
3856 	if (ep->cached_state & REQMNTS_WAIT) {
3857 		indent();
3858 		*pdelay = ep->cached_delay;
3859 		out(O_ALTFP|O_VERB|O_NONL, "  REQMNTS ALREADY WAIT ");
3860 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3861 		out(O_ALTFP|O_VERB|O_NONL, ", wait for: ");
3862 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3863 		out(O_ALTFP|O_VERB, NULL);
3864 		return (FME_WAIT);
3865 	}
3866 	stats_counter_bump(fmep->Rcallcount);
3867 	indent_push("  R");
3868 	indent();
3869 	out(O_ALTFP|O_VERB|O_NONL, "->");
3870 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3871 	out(O_ALTFP|O_VERB|O_NONL, ", at latest by: ");
3872 	ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3873 	out(O_ALTFP|O_VERB, NULL);
3874 
3875 	if (ep->t == N_EREPORT) {
3876 		if (ep->count == 0) {
3877 			if (fmep->pull >= at_latest_by) {
3878 				return_value = FME_DISPROVED;
3879 			} else {
3880 				ep->cached_delay = *pdelay = at_latest_by;
3881 				return_value = FME_WAIT;
3882 			}
3883 		}
3884 
3885 		indent();
3886 		switch (return_value) {
3887 		case FME_CREDIBLE:
3888 			ep->cached_state |= REQMNTS_CREDIBLE;
3889 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS CREDIBLE ");
3890 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3891 			break;
3892 		case FME_DISPROVED:
3893 			ep->cached_state |= REQMNTS_DISPROVED;
3894 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED ");
3895 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3896 			break;
3897 		case FME_WAIT:
3898 			ep->cached_state |= REQMNTS_WAIT;
3899 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS WAIT ");
3900 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3901 			out(O_ALTFP|O_VERB|O_NONL, " to ");
3902 			ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3903 			break;
3904 		default:
3905 			out(O_DIE, "requirements_test: unexpected fme_state");
3906 			break;
3907 		}
3908 		out(O_ALTFP|O_VERB, NULL);
3909 		indent_pop();
3910 
3911 		return (return_value);
3912 	}
3913 
3914 	/* this event is not a report, descend the tree */
3915 	for (bp = itree_next_bubble(ep, NULL); bp;
3916 	    bp = itree_next_bubble(ep, bp)) {
3917 		int n;
3918 
3919 		if (bp->t != B_FROM)
3920 			continue;
3921 
3922 		n = bp->nork;
3923 
3924 		credible_events = 0;
3925 		waiting_events = 0;
3926 		deferred_events = 0;
3927 		arrow_delay = TIMEVAL_EVENTUALLY;
3928 		/*
3929 		 * n is -1 for 'A' so adjust it.
3930 		 * XXX just count up the arrows for now.
3931 		 */
3932 		if (n < 0) {
3933 			n = 0;
3934 			for (ap = itree_next_arrow(bp, NULL); ap;
3935 			    ap = itree_next_arrow(bp, ap))
3936 				n++;
3937 			indent();
3938 			out(O_ALTFP|O_VERB, " Bubble Counted N=%d", n);
3939 		} else {
3940 			indent();
3941 			out(O_ALTFP|O_VERB, " Bubble N=%d", n);
3942 		}
3943 
3944 		if (n == 0)
3945 			continue;
3946 		if (!(bp->mark & (BUBBLE_ELIDED|BUBBLE_OK))) {
3947 			for (ap = itree_next_arrow(bp, NULL); ap;
3948 			    ap = itree_next_arrow(bp, ap)) {
3949 				ep2 = ap->arrowp->head->myevent;
3950 				platform_set_payloadnvp(ep2->nvp);
3951 				(void) checkconstraints(fmep, ap->arrowp);
3952 				if (!ap->arrowp->forever_false) {
3953 					/*
3954 					 * if all arrows are invalidated by the
3955 					 * constraints, then we should elide the
3956 					 * whole bubble to be consistant with
3957 					 * the tree creation time behaviour
3958 					 */
3959 					bp->mark |= BUBBLE_OK;
3960 					platform_set_payloadnvp(NULL);
3961 					break;
3962 				}
3963 				platform_set_payloadnvp(NULL);
3964 			}
3965 		}
3966 		for (ap = itree_next_arrow(bp, NULL); ap;
3967 		    ap = itree_next_arrow(bp, ap)) {
3968 			ep2 = ap->arrowp->head->myevent;
3969 			if (n <= credible_events)
3970 				break;
3971 
3972 			ap->arrowp->mark |= REQMNTS_COUNTER;
3973 			if (triggered(fmep, ep2, REQMNTS_COUNTER))
3974 				/* XXX adding max timevals! */
3975 				switch (requirements_test(fmep, ep2,
3976 				    at_latest_by + ap->arrowp->maxdelay,
3977 				    &my_delay)) {
3978 				case FME_DEFERRED:
3979 					deferred_events++;
3980 					break;
3981 				case FME_CREDIBLE:
3982 					credible_events++;
3983 					break;
3984 				case FME_DISPROVED:
3985 					break;
3986 				case FME_WAIT:
3987 					if (my_delay < arrow_delay)
3988 						arrow_delay = my_delay;
3989 					waiting_events++;
3990 					break;
3991 				default:
3992 					out(O_DIE,
3993 					"Bug in requirements_test.");
3994 				}
3995 			else
3996 				deferred_events++;
3997 		}
3998 		if (!(bp->mark & BUBBLE_OK) && waiting_events == 0) {
3999 			bp->mark |= BUBBLE_ELIDED;
4000 			continue;
4001 		}
4002 		indent();
4003 		out(O_ALTFP|O_VERB, " Credible: %d Waiting %d",
4004 		    credible_events + deferred_events, waiting_events);
4005 		if (credible_events + deferred_events + waiting_events < n) {
4006 			/* Can never meet requirements */
4007 			ep->cached_state |= REQMNTS_DISPROVED;
4008 			indent();
4009 			out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED ");
4010 			itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4011 			out(O_ALTFP|O_VERB, NULL);
4012 			indent_pop();
4013 			return (FME_DISPROVED);
4014 		}
4015 		if (credible_events + deferred_events < n) {
4016 			/* will have to wait */
4017 			/* wait time is shortest known */
4018 			if (arrow_delay < overall_delay)
4019 				overall_delay = arrow_delay;
4020 			return_value = FME_WAIT;
4021 		} else if (credible_events < n) {
4022 			if (return_value != FME_WAIT)
4023 				return_value = FME_DEFERRED;
4024 		}
4025 	}
4026 
4027 	/*
4028 	 * don't mark as FME_DEFERRED. If this event isn't reached by another
4029 	 * path, then this will be considered FME_CREDIBLE. But if it is
4030 	 * reached by a different path so the K-count is met, then might
4031 	 * get overridden by FME_WAIT or FME_DISPROVED.
4032 	 */
4033 	if (return_value == FME_WAIT) {
4034 		ep->cached_state |= REQMNTS_WAIT;
4035 		ep->cached_delay = *pdelay = overall_delay;
4036 	} else if (return_value == FME_CREDIBLE) {
4037 		ep->cached_state |= REQMNTS_CREDIBLE;
4038 	}
4039 	indent();
4040 	out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS %s ",
4041 	    fme_state2str(return_value));
4042 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4043 	out(O_ALTFP|O_VERB, NULL);
4044 	indent_pop();
4045 	return (return_value);
4046 }
4047 
4048 static enum fme_state
causes_test(struct fme * fmep,struct event * ep,unsigned long long at_latest_by,unsigned long long * pdelay)4049 causes_test(struct fme *fmep, struct event *ep,
4050     unsigned long long at_latest_by, unsigned long long *pdelay)
4051 {
4052 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
4053 	unsigned long long my_delay;
4054 	int credible_results = 0;
4055 	int waiting_results = 0;
4056 	enum fme_state fstate;
4057 	struct event *tail_event;
4058 	struct bubble *bp;
4059 	struct arrowlist *ap;
4060 	int k = 1;
4061 
4062 	stats_counter_bump(fmep->Ccallcount);
4063 	indent_push("  C");
4064 	indent();
4065 	out(O_ALTFP|O_VERB|O_NONL, "->");
4066 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4067 	out(O_ALTFP|O_VERB, NULL);
4068 
4069 	for (bp = itree_next_bubble(ep, NULL); bp;
4070 	    bp = itree_next_bubble(ep, bp)) {
4071 		if (bp->t != B_TO)
4072 			continue;
4073 		k = bp->nork;	/* remember the K value */
4074 		for (ap = itree_next_arrow(bp, NULL); ap;
4075 		    ap = itree_next_arrow(bp, ap)) {
4076 			int do_not_follow = 0;
4077 
4078 			/*
4079 			 * if we get to the same event multiple times
4080 			 * only worry about the first one.
4081 			 */
4082 			if (ap->arrowp->tail->myevent->cached_state &
4083 			    CAUSES_TESTED) {
4084 				indent();
4085 				out(O_ALTFP|O_VERB|O_NONL,
4086 				    "  causes test already run for ");
4087 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL,
4088 				    ap->arrowp->tail->myevent);
4089 				out(O_ALTFP|O_VERB, NULL);
4090 				continue;
4091 			}
4092 
4093 			/*
4094 			 * see if false constraint prevents us
4095 			 * from traversing this arrow
4096 			 */
4097 			platform_set_payloadnvp(ep->nvp);
4098 			if (checkconstraints(fmep, ap->arrowp) == 0)
4099 				do_not_follow = 1;
4100 			platform_set_payloadnvp(NULL);
4101 			if (do_not_follow) {
4102 				indent();
4103 				out(O_ALTFP|O_VERB|O_NONL,
4104 				    "  False arrow from ");
4105 				itree_pevent_brief(O_ALTFP|O_VERB|O_NONL,
4106 				    ap->arrowp->tail->myevent);
4107 				out(O_ALTFP|O_VERB, NULL);
4108 				continue;
4109 			}
4110 
4111 			ap->arrowp->tail->myevent->cached_state |=
4112 			    CAUSES_TESTED;
4113 			tail_event = ap->arrowp->tail->myevent;
4114 			fstate = hypothesise(fmep, tail_event, at_latest_by,
4115 			    &my_delay);
4116 
4117 			switch (fstate) {
4118 			case FME_WAIT:
4119 				if (my_delay < overall_delay)
4120 					overall_delay = my_delay;
4121 				waiting_results++;
4122 				break;
4123 			case FME_CREDIBLE:
4124 				credible_results++;
4125 				break;
4126 			case FME_DISPROVED:
4127 				break;
4128 			default:
4129 				out(O_DIE, "Bug in causes_test");
4130 			}
4131 		}
4132 	}
4133 	/* compare against K */
4134 	if (credible_results + waiting_results < k) {
4135 		indent();
4136 		out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES DISPROVED ");
4137 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4138 		out(O_ALTFP|O_VERB, NULL);
4139 		indent_pop();
4140 		return (FME_DISPROVED);
4141 	}
4142 	if (waiting_results != 0) {
4143 		*pdelay = overall_delay;
4144 		indent();
4145 		out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES WAIT ");
4146 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4147 		out(O_ALTFP|O_VERB|O_NONL, " to ");
4148 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
4149 		out(O_ALTFP|O_VERB, NULL);
4150 		indent_pop();
4151 		return (FME_WAIT);
4152 	}
4153 	indent();
4154 	out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES CREDIBLE ");
4155 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4156 	out(O_ALTFP|O_VERB, NULL);
4157 	indent_pop();
4158 	return (FME_CREDIBLE);
4159 }
4160 
4161 static enum fme_state
hypothesise(struct fme * fmep,struct event * ep,unsigned long long at_latest_by,unsigned long long * pdelay)4162 hypothesise(struct fme *fmep, struct event *ep,
4163 	unsigned long long at_latest_by, unsigned long long *pdelay)
4164 {
4165 	enum fme_state rtr, otr;
4166 	unsigned long long my_delay;
4167 	unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
4168 
4169 	stats_counter_bump(fmep->Hcallcount);
4170 	indent_push("  H");
4171 	indent();
4172 	out(O_ALTFP|O_VERB|O_NONL, "->");
4173 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4174 	out(O_ALTFP|O_VERB|O_NONL, ", at latest by: ");
4175 	ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
4176 	out(O_ALTFP|O_VERB, NULL);
4177 
4178 	rtr = requirements_test(fmep, ep, at_latest_by, &my_delay);
4179 	if ((rtr == FME_WAIT) && (my_delay < overall_delay))
4180 		overall_delay = my_delay;
4181 	if (rtr != FME_DISPROVED) {
4182 		if (is_problem(ep->t)) {
4183 			otr = effects_test(fmep, ep, at_latest_by, &my_delay);
4184 			if (otr != FME_DISPROVED) {
4185 				if (fmep->peek == 0 && ep->is_suspect == 0) {
4186 					ep->suspects = fmep->suspects;
4187 					ep->is_suspect = 1;
4188 					fmep->suspects = ep;
4189 					fmep->nsuspects++;
4190 				}
4191 			}
4192 		} else
4193 			otr = causes_test(fmep, ep, at_latest_by, &my_delay);
4194 		if ((otr == FME_WAIT) && (my_delay < overall_delay))
4195 			overall_delay = my_delay;
4196 		if ((otr != FME_DISPROVED) &&
4197 		    ((rtr == FME_WAIT) || (otr == FME_WAIT)))
4198 			*pdelay = overall_delay;
4199 	}
4200 	if (rtr == FME_DISPROVED) {
4201 		indent();
4202 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4203 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4204 		out(O_ALTFP|O_VERB, " (doesn't meet requirements)");
4205 		indent_pop();
4206 		return (FME_DISPROVED);
4207 	}
4208 	if ((otr == FME_DISPROVED) && is_problem(ep->t)) {
4209 		indent();
4210 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4211 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4212 		out(O_ALTFP|O_VERB, " (doesn't explain all reports)");
4213 		indent_pop();
4214 		return (FME_DISPROVED);
4215 	}
4216 	if (otr == FME_DISPROVED) {
4217 		indent();
4218 		out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4219 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4220 		out(O_ALTFP|O_VERB, " (causes are not credible)");
4221 		indent_pop();
4222 		return (FME_DISPROVED);
4223 	}
4224 	if ((rtr == FME_WAIT) || (otr == FME_WAIT)) {
4225 		indent();
4226 		out(O_ALTFP|O_VERB|O_NONL, "<-WAIT ");
4227 		itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4228 		out(O_ALTFP|O_VERB|O_NONL, " to ");
4229 		ptree_timeval(O_ALTFP|O_VERB|O_NONL, &overall_delay);
4230 		out(O_ALTFP|O_VERB, NULL);
4231 		indent_pop();
4232 		return (FME_WAIT);
4233 	}
4234 	indent();
4235 	out(O_ALTFP|O_VERB|O_NONL, "<-CREDIBLE ");
4236 	itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4237 	out(O_ALTFP|O_VERB, NULL);
4238 	indent_pop();
4239 	return (FME_CREDIBLE);
4240 }
4241 
4242 /*
4243  * fme_istat_load -- reconstitute any persistent istats
4244  */
4245 void
fme_istat_load(fmd_hdl_t * hdl)4246 fme_istat_load(fmd_hdl_t *hdl)
4247 {
4248 	int sz;
4249 	char *sbuf;
4250 	char *ptr;
4251 
4252 	if ((sz = fmd_buf_size(hdl, NULL, WOBUF_ISTATS)) == 0) {
4253 		out(O_ALTFP, "fme_istat_load: No stats");
4254 		return;
4255 	}
4256 
4257 	sbuf = alloca(sz);
4258 
4259 	fmd_buf_read(hdl, NULL, WOBUF_ISTATS, sbuf, sz);
4260 
4261 	/*
4262 	 * pick apart the serialized stats
4263 	 *
4264 	 * format is:
4265 	 *	<class-name>, '@', <path>, '\0', <value>, '\0'
4266 	 * for example:
4267 	 *	"stat.first@stat0/path0\02\0stat.second@stat0/path1\023\0"
4268 	 *
4269 	 * since this is parsing our own serialized data, any parsing issues
4270 	 * are fatal, so we check for them all with ASSERT() below.
4271 	 */
4272 	ptr = sbuf;
4273 	while (ptr < &sbuf[sz]) {
4274 		char *sepptr;
4275 		struct node *np;
4276 		int val;
4277 
4278 		sepptr = strchr(ptr, '@');
4279 		ASSERT(sepptr != NULL);
4280 		*sepptr = '\0';
4281 
4282 		/* construct the event */
4283 		np = newnode(T_EVENT, NULL, 0);
4284 		np->u.event.ename = newnode(T_NAME, NULL, 0);
4285 		np->u.event.ename->u.name.t = N_STAT;
4286 		np->u.event.ename->u.name.s = stable(ptr);
4287 		np->u.event.ename->u.name.it = IT_ENAME;
4288 		np->u.event.ename->u.name.last = np->u.event.ename;
4289 
4290 		ptr = sepptr + 1;
4291 		ASSERT(ptr < &sbuf[sz]);
4292 		ptr += strlen(ptr);
4293 		ptr++;	/* move past the '\0' separating path from value */
4294 		ASSERT(ptr < &sbuf[sz]);
4295 		ASSERT(isdigit(*ptr));
4296 		val = atoi(ptr);
4297 		ASSERT(val > 0);
4298 		ptr += strlen(ptr);
4299 		ptr++;	/* move past the final '\0' for this entry */
4300 
4301 		np->u.event.epname = pathstring2epnamenp(sepptr + 1);
4302 		ASSERT(np->u.event.epname != NULL);
4303 
4304 		istat_bump(np, val);
4305 		tree_free(np);
4306 	}
4307 
4308 	istat_save();
4309 }
4310