1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2012 Milan Jurik. All rights reserved.
25 * Copyright (c) 2018, Joyent, Inc.
26 *
27 * fme.c -- fault management exercise module
28 *
29 * this module provides the simulated fault management exercise.
30 */
31
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <strings.h>
36 #include <ctype.h>
37 #include <alloca.h>
38 #include <libnvpair.h>
39 #include <sys/fm/protocol.h>
40 #include <fm/fmd_api.h>
41 #include <fm/libtopo.h>
42 #include "alloc.h"
43 #include "out.h"
44 #include "stats.h"
45 #include "stable.h"
46 #include "literals.h"
47 #include "lut.h"
48 #include "tree.h"
49 #include "ptree.h"
50 #include "itree.h"
51 #include "ipath.h"
52 #include "fme.h"
53 #include "evnv.h"
54 #include "eval.h"
55 #include "config.h"
56 #include "platform.h"
57 #include "esclex.h"
58
59 struct lut *Istats;
60 struct lut *SerdEngines;
61 nvlist_t *Action_nvl;
62
63 /* imported from eft.c... */
64 extern hrtime_t Hesitate;
65 extern char *Serd_Override;
66 extern nv_alloc_t Eft_nv_hdl;
67 extern int Max_fme;
68 extern fmd_hdl_t *Hdl;
69
70 static int Istat_need_save;
71 static int Serd_need_save;
72 void istat_save(void);
73 void serd_save(void);
74
75 /* fme under construction is global so we can free it on module abort */
76 static struct fme *Nfmep;
77
78 static int Undiag_reason = UD_VAL_UNKNOWN;
79
80 static int Nextid = 0;
81
82 static int Open_fme_count = 0; /* Count of open FMEs */
83
84 /* list of fault management exercises underway */
85 static struct fme {
86 struct fme *next; /* next exercise */
87 unsigned long long ull; /* time when fme was created */
88 int id; /* FME id */
89 struct config *config; /* cooked configuration data */
90 struct lut *eventtree; /* propagation tree for this FME */
91 /*
92 * The initial error report that created this FME is kept in
93 * two forms. e0 points to the instance tree node and is used
94 * by fme_eval() as the starting point for the inference
95 * algorithm. e0r is the event handle FMD passed to us when
96 * the ereport first arrived and is used when setting timers,
97 * which are always relative to the time of this initial
98 * report.
99 */
100 struct event *e0;
101 fmd_event_t *e0r;
102
103 id_t timer; /* for setting an fmd time-out */
104
105 struct event *ecurrent; /* ereport under consideration */
106 struct event *suspects; /* current suspect list */
107 struct event *psuspects; /* previous suspect list */
108 int nsuspects; /* count of suspects */
109 int posted_suspects; /* true if we've posted a diagnosis */
110 int uniqobs; /* number of unique events observed */
111 int peek; /* just peeking, don't track suspects */
112 int overflow; /* true if overflow FME */
113 enum fme_state {
114 FME_NOTHING = 5000, /* not evaluated yet */
115 FME_WAIT, /* need to wait for more info */
116 FME_CREDIBLE, /* suspect list is credible */
117 FME_DISPROVED, /* no valid suspects found */
118 FME_DEFERRED /* don't know yet (k-count not met) */
119 } state;
120
121 unsigned long long pull; /* time passed since created */
122 unsigned long long wull; /* wait until this time for re-eval */
123 struct event *observations; /* observation list */
124 struct lut *globals; /* values of global variables */
125 /* fmd interfacing */
126 fmd_hdl_t *hdl; /* handle for talking with fmd */
127 fmd_case_t *fmcase; /* what fmd 'case' we associate with */
128 /* stats */
129 struct stats *Rcount;
130 struct stats *Hcallcount;
131 struct stats *Rcallcount;
132 struct stats *Ccallcount;
133 struct stats *Ecallcount;
134 struct stats *Tcallcount;
135 struct stats *Marrowcount;
136 struct stats *diags;
137 } *FMElist, *EFMElist, *ClosedFMEs;
138
139 static struct case_list {
140 fmd_case_t *fmcase;
141 struct case_list *next;
142 } *Undiagablecaselist;
143
144 static void fme_eval(struct fme *fmep, fmd_event_t *ffep);
145 static enum fme_state hypothesise(struct fme *fmep, struct event *ep,
146 unsigned long long at_latest_by, unsigned long long *pdelay);
147 static struct node *eventprop_lookup(struct event *ep, const char *propname);
148 static struct node *pathstring2epnamenp(char *path);
149 static void publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep,
150 fmd_case_t *fmcase, nvlist_t *detector, char *arg);
151 static char *undiag_2reason_str(int ud, char *arg);
152 static const char *undiag_2defect_str(int ud);
153 static void restore_suspects(struct fme *fmep);
154 static void save_suspects(struct fme *fmep);
155 static void destroy_fme(struct fme *f);
156 static void fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep,
157 const char *eventstring, const struct ipath *ipp, nvlist_t *nvl);
158 static void istat_counter_reset_cb(struct istat_entry *entp,
159 struct stats *statp, const struct ipath *ipp);
160 static void istat_counter_topo_chg_cb(struct istat_entry *entp,
161 struct stats *statp, void *unused);
162 static void serd_reset_cb(struct serd_entry *entp, void *unused,
163 const struct ipath *ipp);
164 static void serd_topo_chg_cb(struct serd_entry *entp, void *unused,
165 void *unused2);
166 static void destroy_fme_bufs(struct fme *fp);
167
168 static struct fme *
alloc_fme(void)169 alloc_fme(void)
170 {
171 struct fme *fmep;
172
173 fmep = MALLOC(sizeof (*fmep));
174 bzero(fmep, sizeof (*fmep));
175 return (fmep);
176 }
177
178 /*
179 * fme_ready -- called when all initialization of the FME (except for
180 * stats) has completed successfully. Adds the fme to global lists
181 * and establishes its stats.
182 */
183 static struct fme *
fme_ready(struct fme * fmep)184 fme_ready(struct fme *fmep)
185 {
186 char nbuf[100];
187
188 Nfmep = NULL; /* don't need to free this on module abort now */
189
190 if (EFMElist) {
191 EFMElist->next = fmep;
192 EFMElist = fmep;
193 } else
194 FMElist = EFMElist = fmep;
195
196 (void) sprintf(nbuf, "fme%d.Rcount", fmep->id);
197 fmep->Rcount = stats_new_counter(nbuf, "ereports received", 0);
198 (void) sprintf(nbuf, "fme%d.Hcall", fmep->id);
199 fmep->Hcallcount = stats_new_counter(nbuf, "calls to hypothesise()", 1);
200 (void) sprintf(nbuf, "fme%d.Rcall", fmep->id);
201 fmep->Rcallcount = stats_new_counter(nbuf,
202 "calls to requirements_test()", 1);
203 (void) sprintf(nbuf, "fme%d.Ccall", fmep->id);
204 fmep->Ccallcount = stats_new_counter(nbuf, "calls to causes_test()", 1);
205 (void) sprintf(nbuf, "fme%d.Ecall", fmep->id);
206 fmep->Ecallcount =
207 stats_new_counter(nbuf, "calls to effects_test()", 1);
208 (void) sprintf(nbuf, "fme%d.Tcall", fmep->id);
209 fmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1);
210 (void) sprintf(nbuf, "fme%d.Marrow", fmep->id);
211 fmep->Marrowcount = stats_new_counter(nbuf,
212 "arrows marked by mark_arrows()", 1);
213 (void) sprintf(nbuf, "fme%d.diags", fmep->id);
214 fmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0);
215
216 out(O_ALTFP|O_VERB2, "newfme: config snapshot contains...");
217 config_print(O_ALTFP|O_VERB2, fmep->config);
218
219 return (fmep);
220 }
221
222 extern void ipath_dummy_lut(struct arrow *);
223 extern struct lut *itree_create_dummy(const char *, const struct ipath *);
224
225 /* ARGSUSED */
226 static void
set_needed_arrows(struct event * ep,struct event * ep2,struct fme * fmep)227 set_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
228 {
229 struct bubble *bp;
230 struct arrowlist *ap;
231
232 for (bp = itree_next_bubble(ep, NULL); bp;
233 bp = itree_next_bubble(ep, bp)) {
234 if (bp->t != B_FROM)
235 continue;
236 for (ap = itree_next_arrow(bp, NULL); ap;
237 ap = itree_next_arrow(bp, ap)) {
238 ap->arrowp->pnode->u.arrow.needed = 1;
239 ipath_dummy_lut(ap->arrowp);
240 }
241 }
242 }
243
244 /* ARGSUSED */
245 static void
unset_needed_arrows(struct event * ep,struct event * ep2,struct fme * fmep)246 unset_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
247 {
248 struct bubble *bp;
249 struct arrowlist *ap;
250
251 for (bp = itree_next_bubble(ep, NULL); bp;
252 bp = itree_next_bubble(ep, bp)) {
253 if (bp->t != B_FROM)
254 continue;
255 for (ap = itree_next_arrow(bp, NULL); ap;
256 ap = itree_next_arrow(bp, ap))
257 ap->arrowp->pnode->u.arrow.needed = 0;
258 }
259 }
260
261 static void globals_destructor(void *left, void *right, void *arg);
262 static void clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep);
263
264 static boolean_t
prune_propagations(const char * e0class,const struct ipath * e0ipp)265 prune_propagations(const char *e0class, const struct ipath *e0ipp)
266 {
267 char nbuf[100];
268 unsigned long long my_delay = TIMEVAL_EVENTUALLY;
269 extern struct lut *Usednames;
270
271 Nfmep = alloc_fme();
272 Nfmep->id = Nextid;
273 Nfmep->state = FME_NOTHING;
274 Nfmep->eventtree = itree_create_dummy(e0class, e0ipp);
275 if ((Nfmep->e0 =
276 itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) {
277 itree_free(Nfmep->eventtree);
278 FREE(Nfmep);
279 Nfmep = NULL;
280 return (B_FALSE);
281 }
282 Nfmep->ecurrent = Nfmep->observations = Nfmep->e0;
283 Nfmep->e0->count++;
284
285 (void) sprintf(nbuf, "fme%d.Rcount", Nfmep->id);
286 Nfmep->Rcount = stats_new_counter(nbuf, "ereports received", 0);
287 (void) sprintf(nbuf, "fme%d.Hcall", Nfmep->id);
288 Nfmep->Hcallcount =
289 stats_new_counter(nbuf, "calls to hypothesise()", 1);
290 (void) sprintf(nbuf, "fme%d.Rcall", Nfmep->id);
291 Nfmep->Rcallcount = stats_new_counter(nbuf,
292 "calls to requirements_test()", 1);
293 (void) sprintf(nbuf, "fme%d.Ccall", Nfmep->id);
294 Nfmep->Ccallcount =
295 stats_new_counter(nbuf, "calls to causes_test()", 1);
296 (void) sprintf(nbuf, "fme%d.Ecall", Nfmep->id);
297 Nfmep->Ecallcount =
298 stats_new_counter(nbuf, "calls to effects_test()", 1);
299 (void) sprintf(nbuf, "fme%d.Tcall", Nfmep->id);
300 Nfmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1);
301 (void) sprintf(nbuf, "fme%d.Marrow", Nfmep->id);
302 Nfmep->Marrowcount = stats_new_counter(nbuf,
303 "arrows marked by mark_arrows()", 1);
304 (void) sprintf(nbuf, "fme%d.diags", Nfmep->id);
305 Nfmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0);
306
307 Nfmep->peek = 1;
308 lut_walk(Nfmep->eventtree, (lut_cb)unset_needed_arrows, (void *)Nfmep);
309 lut_free(Usednames, NULL, NULL);
310 Usednames = NULL;
311 lut_walk(Nfmep->eventtree, (lut_cb)clear_arrows, (void *)Nfmep);
312 (void) hypothesise(Nfmep, Nfmep->e0, Nfmep->ull, &my_delay);
313 itree_prune(Nfmep->eventtree);
314 lut_walk(Nfmep->eventtree, (lut_cb)set_needed_arrows, (void *)Nfmep);
315
316 stats_delete(Nfmep->Rcount);
317 stats_delete(Nfmep->Hcallcount);
318 stats_delete(Nfmep->Rcallcount);
319 stats_delete(Nfmep->Ccallcount);
320 stats_delete(Nfmep->Ecallcount);
321 stats_delete(Nfmep->Tcallcount);
322 stats_delete(Nfmep->Marrowcount);
323 stats_delete(Nfmep->diags);
324 itree_free(Nfmep->eventtree);
325 lut_free(Nfmep->globals, globals_destructor, NULL);
326 FREE(Nfmep);
327 return (B_TRUE);
328 }
329
330 static struct fme *
newfme(const char * e0class,const struct ipath * e0ipp,fmd_hdl_t * hdl,fmd_case_t * fmcase,fmd_event_t * ffep,nvlist_t * nvl)331 newfme(const char *e0class, const struct ipath *e0ipp, fmd_hdl_t *hdl,
332 fmd_case_t *fmcase, fmd_event_t *ffep, nvlist_t *nvl)
333 {
334 struct cfgdata *cfgdata;
335 int init_size;
336 extern int alloc_total();
337 nvlist_t *detector = NULL;
338 char *pathstr;
339 char *arg;
340
341 /*
342 * First check if e0ipp is actually in the topology so we can give a
343 * more useful error message.
344 */
345 ipathlastcomp(e0ipp);
346 pathstr = ipath2str(NULL, e0ipp);
347 cfgdata = config_snapshot();
348 platform_unit_translate(0, cfgdata->cooked, TOPO_PROP_RESOURCE,
349 &detector, pathstr);
350 FREE(pathstr);
351 structconfig_free(cfgdata->cooked);
352 config_free(cfgdata);
353 if (detector == NULL) {
354 /* See if class permits silent discard on unknown component. */
355 if (lut_lookup(Ereportenames_discard, (void *)e0class, NULL)) {
356 out(O_ALTFP|O_VERB2, "Unable to map \"%s\" ereport "
357 "to component path, but silent discard allowed.",
358 e0class);
359 fmd_case_close(hdl, fmcase);
360 } else {
361 Undiag_reason = UD_VAL_BADEVENTPATH;
362 (void) nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR,
363 &detector);
364 arg = ipath2str(e0class, e0ipp);
365 publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
366 FREE(arg);
367 }
368 return (NULL);
369 }
370
371 /*
372 * Next run a quick first pass of the rules with a dummy config. This
373 * allows us to prune those rules which can't possibly cause this
374 * ereport.
375 */
376 if (!prune_propagations(e0class, e0ipp)) {
377 /*
378 * The fault class must have been in the rules or we would
379 * not have registered for it (and got a "nosub"), and the
380 * pathname must be in the topology or we would have failed the
381 * previous test. So to get here means the combination of
382 * class and pathname in the ereport must be invalid.
383 */
384 Undiag_reason = UD_VAL_BADEVENTCLASS;
385 arg = ipath2str(e0class, e0ipp);
386 publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
387 nvlist_free(detector);
388 FREE(arg);
389 return (NULL);
390 }
391
392 /*
393 * Now go ahead and create the real fme using the pruned rules.
394 */
395 init_size = alloc_total();
396 out(O_ALTFP|O_STAMP, "start config_snapshot using %d bytes", init_size);
397 nvlist_free(detector);
398 pathstr = ipath2str(NULL, e0ipp);
399 cfgdata = config_snapshot();
400 platform_unit_translate(0, cfgdata->cooked, TOPO_PROP_RESOURCE,
401 &detector, pathstr);
402 FREE(pathstr);
403 platform_save_config(hdl, fmcase);
404 out(O_ALTFP|O_STAMP, "config_snapshot added %d bytes",
405 alloc_total() - init_size);
406
407 Nfmep = alloc_fme();
408
409 Nfmep->id = Nextid++;
410 Nfmep->config = cfgdata->cooked;
411 config_free(cfgdata);
412 Nfmep->posted_suspects = 0;
413 Nfmep->uniqobs = 0;
414 Nfmep->state = FME_NOTHING;
415 Nfmep->pull = 0ULL;
416 Nfmep->overflow = 0;
417
418 Nfmep->fmcase = fmcase;
419 Nfmep->hdl = hdl;
420
421 if ((Nfmep->eventtree = itree_create(Nfmep->config)) == NULL) {
422 Undiag_reason = UD_VAL_INSTFAIL;
423 arg = ipath2str(e0class, e0ipp);
424 publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
425 nvlist_free(detector);
426 FREE(arg);
427 structconfig_free(Nfmep->config);
428 destroy_fme_bufs(Nfmep);
429 FREE(Nfmep);
430 Nfmep = NULL;
431 return (NULL);
432 }
433
434 itree_ptree(O_ALTFP|O_VERB2, Nfmep->eventtree);
435
436 if ((Nfmep->e0 =
437 itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) {
438 Undiag_reason = UD_VAL_BADEVENTI;
439 arg = ipath2str(e0class, e0ipp);
440 publish_undiagnosable(hdl, ffep, fmcase, detector, arg);
441 nvlist_free(detector);
442 FREE(arg);
443 itree_free(Nfmep->eventtree);
444 structconfig_free(Nfmep->config);
445 destroy_fme_bufs(Nfmep);
446 FREE(Nfmep);
447 Nfmep = NULL;
448 return (NULL);
449 }
450
451 nvlist_free(detector);
452 return (fme_ready(Nfmep));
453 }
454
455 void
fme_fini(void)456 fme_fini(void)
457 {
458 struct fme *sfp, *fp;
459 struct case_list *ucasep, *nextcasep;
460
461 ucasep = Undiagablecaselist;
462 while (ucasep != NULL) {
463 nextcasep = ucasep->next;
464 FREE(ucasep);
465 ucasep = nextcasep;
466 }
467 Undiagablecaselist = NULL;
468
469 /* clean up closed fmes */
470 fp = ClosedFMEs;
471 while (fp != NULL) {
472 sfp = fp->next;
473 destroy_fme(fp);
474 fp = sfp;
475 }
476 ClosedFMEs = NULL;
477
478 fp = FMElist;
479 while (fp != NULL) {
480 sfp = fp->next;
481 destroy_fme(fp);
482 fp = sfp;
483 }
484 FMElist = EFMElist = NULL;
485
486 /* if we were in the middle of creating an fme, free it now */
487 if (Nfmep) {
488 destroy_fme(Nfmep);
489 Nfmep = NULL;
490 }
491 }
492
493 /*
494 * Allocated space for a buffer name. 20 bytes allows for
495 * a ridiculous 9,999,999 unique observations.
496 */
497 #define OBBUFNMSZ 20
498
499 /*
500 * serialize_observation
501 *
502 * Create a recoverable version of the current observation
503 * (f->ecurrent). We keep a serialized version of each unique
504 * observation in order that we may resume correctly the fme in the
505 * correct state if eft or fmd crashes and we're restarted.
506 */
507 static void
serialize_observation(struct fme * fp,const char * cls,const struct ipath * ipp)508 serialize_observation(struct fme *fp, const char *cls, const struct ipath *ipp)
509 {
510 size_t pkdlen;
511 char tmpbuf[OBBUFNMSZ];
512 char *pkd = NULL;
513 char *estr;
514
515 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", fp->uniqobs);
516 estr = ipath2str(cls, ipp);
517 fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, strlen(estr) + 1);
518 fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)estr,
519 strlen(estr) + 1);
520 FREE(estr);
521
522 if (fp->ecurrent != NULL && fp->ecurrent->nvp != NULL) {
523 (void) snprintf(tmpbuf,
524 OBBUFNMSZ, "observed%d.nvp", fp->uniqobs);
525 if (nvlist_xpack(fp->ecurrent->nvp,
526 &pkd, &pkdlen, NV_ENCODE_XDR, &Eft_nv_hdl) != 0)
527 out(O_DIE|O_SYS, "pack of observed nvl failed");
528 fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, pkdlen);
529 fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)pkd, pkdlen);
530 FREE(pkd);
531 }
532
533 fp->uniqobs++;
534 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs,
535 sizeof (fp->uniqobs));
536 }
537
538 /*
539 * init_fme_bufs -- We keep several bits of state about an fme for
540 * use if eft or fmd crashes and we're restarted.
541 */
542 static void
init_fme_bufs(struct fme * fp)543 init_fme_bufs(struct fme *fp)
544 {
545 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_PULL, sizeof (fp->pull));
546 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_PULL, (void *)&fp->pull,
547 sizeof (fp->pull));
548
549 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_ID, sizeof (fp->id));
550 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_ID, (void *)&fp->id,
551 sizeof (fp->id));
552
553 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_NOBS, sizeof (fp->uniqobs));
554 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs,
555 sizeof (fp->uniqobs));
556
557 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_POSTD,
558 sizeof (fp->posted_suspects));
559 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_POSTD,
560 (void *)&fp->posted_suspects, sizeof (fp->posted_suspects));
561 }
562
563 static void
destroy_fme_bufs(struct fme * fp)564 destroy_fme_bufs(struct fme *fp)
565 {
566 char tmpbuf[OBBUFNMSZ];
567 int o;
568
569 platform_restore_config(fp->hdl, fp->fmcase);
570 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFGLEN);
571 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFG);
572 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_PULL);
573 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_ID);
574 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_POSTD);
575 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_NOBS);
576
577 for (o = 0; o < fp->uniqobs; o++) {
578 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", o);
579 fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf);
580 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", o);
581 fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf);
582 }
583 }
584
585 /*
586 * reconstitute_observations -- convert a case's serialized observations
587 * back into struct events. Returns zero if all observations are
588 * successfully reconstituted.
589 */
590 static int
reconstitute_observations(struct fme * fmep)591 reconstitute_observations(struct fme *fmep)
592 {
593 struct event *ep;
594 struct node *epnamenp = NULL;
595 size_t pkdlen;
596 char *pkd = NULL;
597 char *tmpbuf = alloca(OBBUFNMSZ);
598 char *sepptr;
599 char *estr;
600 int ocnt;
601 int elen;
602
603 for (ocnt = 0; ocnt < fmep->uniqobs; ocnt++) {
604 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", ocnt);
605 elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
606 if (elen == 0) {
607 out(O_ALTFP,
608 "reconstitute_observation: no %s buffer found.",
609 tmpbuf);
610 Undiag_reason = UD_VAL_MISSINGOBS;
611 break;
612 }
613
614 estr = MALLOC(elen);
615 fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen);
616 sepptr = strchr(estr, '@');
617 if (sepptr == NULL) {
618 out(O_ALTFP,
619 "reconstitute_observation: %s: "
620 "missing @ separator in %s.",
621 tmpbuf, estr);
622 Undiag_reason = UD_VAL_MISSINGPATH;
623 FREE(estr);
624 break;
625 }
626
627 *sepptr = '\0';
628 if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) {
629 out(O_ALTFP,
630 "reconstitute_observation: %s: "
631 "trouble converting path string \"%s\" "
632 "to internal representation.",
633 tmpbuf, sepptr + 1);
634 Undiag_reason = UD_VAL_MISSINGPATH;
635 FREE(estr);
636 break;
637 }
638
639 /* construct the event */
640 ep = itree_lookup(fmep->eventtree,
641 stable(estr), ipath(epnamenp));
642 if (ep == NULL) {
643 out(O_ALTFP,
644 "reconstitute_observation: %s: "
645 "lookup of \"%s\" in itree failed.",
646 tmpbuf, ipath2str(estr, ipath(epnamenp)));
647 Undiag_reason = UD_VAL_BADOBS;
648 tree_free(epnamenp);
649 FREE(estr);
650 break;
651 }
652 tree_free(epnamenp);
653
654 /*
655 * We may or may not have a saved nvlist for the observation
656 */
657 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", ocnt);
658 pkdlen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
659 if (pkdlen != 0) {
660 pkd = MALLOC(pkdlen);
661 fmd_buf_read(fmep->hdl,
662 fmep->fmcase, tmpbuf, pkd, pkdlen);
663 ASSERT(ep->nvp == NULL);
664 if (nvlist_xunpack(pkd,
665 pkdlen, &ep->nvp, &Eft_nv_hdl) != 0)
666 out(O_DIE|O_SYS, "pack of observed nvl failed");
667 FREE(pkd);
668 }
669
670 if (ocnt == 0)
671 fmep->e0 = ep;
672
673 FREE(estr);
674 fmep->ecurrent = ep;
675 ep->count++;
676
677 /* link it into list of observations seen */
678 ep->observations = fmep->observations;
679 fmep->observations = ep;
680 }
681
682 if (ocnt == fmep->uniqobs) {
683 (void) fme_ready(fmep);
684 return (0);
685 }
686
687 return (1);
688 }
689
690 /*
691 * restart_fme -- called during eft initialization. Reconstitutes
692 * an in-progress fme.
693 */
694 void
fme_restart(fmd_hdl_t * hdl,fmd_case_t * inprogress)695 fme_restart(fmd_hdl_t *hdl, fmd_case_t *inprogress)
696 {
697 nvlist_t *defect;
698 struct case_list *bad;
699 struct fme *fmep;
700 struct cfgdata *cfgdata;
701 size_t rawsz;
702 struct event *ep;
703 char *tmpbuf = alloca(OBBUFNMSZ);
704 char *sepptr;
705 char *estr;
706 int elen;
707 struct node *epnamenp = NULL;
708 int init_size;
709 extern int alloc_total();
710 char *reason;
711
712 /*
713 * ignore solved or closed cases
714 */
715 if (fmd_case_solved(hdl, inprogress) ||
716 fmd_case_closed(hdl, inprogress))
717 return;
718
719 fmep = alloc_fme();
720 fmep->fmcase = inprogress;
721 fmep->hdl = hdl;
722
723 if (fmd_buf_size(hdl, inprogress, WOBUF_POSTD) == 0) {
724 out(O_ALTFP, "restart_fme: no saved posted status");
725 Undiag_reason = UD_VAL_MISSINGINFO;
726 goto badcase;
727 } else {
728 fmd_buf_read(hdl, inprogress, WOBUF_POSTD,
729 (void *)&fmep->posted_suspects,
730 sizeof (fmep->posted_suspects));
731 }
732
733 if (fmd_buf_size(hdl, inprogress, WOBUF_ID) == 0) {
734 out(O_ALTFP, "restart_fme: no saved id");
735 Undiag_reason = UD_VAL_MISSINGINFO;
736 goto badcase;
737 } else {
738 fmd_buf_read(hdl, inprogress, WOBUF_ID, (void *)&fmep->id,
739 sizeof (fmep->id));
740 }
741 if (Nextid <= fmep->id)
742 Nextid = fmep->id + 1;
743
744 out(O_ALTFP, "Replay FME %d", fmep->id);
745
746 if (fmd_buf_size(hdl, inprogress, WOBUF_CFGLEN) != sizeof (size_t)) {
747 out(O_ALTFP, "restart_fme: No config data");
748 Undiag_reason = UD_VAL_MISSINGINFO;
749 goto badcase;
750 }
751 fmd_buf_read(hdl, inprogress, WOBUF_CFGLEN, (void *)&rawsz,
752 sizeof (size_t));
753
754 if ((fmep->e0r = fmd_case_getprincipal(hdl, inprogress)) == NULL) {
755 out(O_ALTFP, "restart_fme: No event zero");
756 Undiag_reason = UD_VAL_MISSINGZERO;
757 goto badcase;
758 }
759
760 if (fmd_buf_size(hdl, inprogress, WOBUF_PULL) == 0) {
761 out(O_ALTFP, "restart_fme: no saved wait time");
762 Undiag_reason = UD_VAL_MISSINGINFO;
763 goto badcase;
764 } else {
765 fmd_buf_read(hdl, inprogress, WOBUF_PULL, (void *)&fmep->pull,
766 sizeof (fmep->pull));
767 }
768
769 if (fmd_buf_size(hdl, inprogress, WOBUF_NOBS) == 0) {
770 out(O_ALTFP, "restart_fme: no count of observations");
771 Undiag_reason = UD_VAL_MISSINGINFO;
772 goto badcase;
773 } else {
774 fmd_buf_read(hdl, inprogress, WOBUF_NOBS,
775 (void *)&fmep->uniqobs, sizeof (fmep->uniqobs));
776 }
777
778 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed0");
779 elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf);
780 if (elen == 0) {
781 out(O_ALTFP, "reconstitute_observation: no %s buffer found.",
782 tmpbuf);
783 Undiag_reason = UD_VAL_MISSINGOBS;
784 goto badcase;
785 }
786 estr = MALLOC(elen);
787 fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen);
788 sepptr = strchr(estr, '@');
789 if (sepptr == NULL) {
790 out(O_ALTFP, "reconstitute_observation: %s: "
791 "missing @ separator in %s.",
792 tmpbuf, estr);
793 Undiag_reason = UD_VAL_MISSINGPATH;
794 FREE(estr);
795 goto badcase;
796 }
797 *sepptr = '\0';
798 if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) {
799 out(O_ALTFP, "reconstitute_observation: %s: "
800 "trouble converting path string \"%s\" "
801 "to internal representation.", tmpbuf, sepptr + 1);
802 Undiag_reason = UD_VAL_MISSINGPATH;
803 FREE(estr);
804 goto badcase;
805 }
806 (void) prune_propagations(stable(estr), ipath(epnamenp));
807 tree_free(epnamenp);
808 FREE(estr);
809
810 init_size = alloc_total();
811 out(O_ALTFP|O_STAMP, "start config_restore using %d bytes", init_size);
812 cfgdata = MALLOC(sizeof (struct cfgdata));
813 cfgdata->cooked = NULL;
814 cfgdata->devcache = NULL;
815 cfgdata->devidcache = NULL;
816 cfgdata->tpcache = NULL;
817 cfgdata->cpucache = NULL;
818 cfgdata->raw_refcnt = 1;
819
820 if (rawsz > 0) {
821 if (fmd_buf_size(hdl, inprogress, WOBUF_CFG) != rawsz) {
822 out(O_ALTFP, "restart_fme: Config data size mismatch");
823 Undiag_reason = UD_VAL_CFGMISMATCH;
824 goto badcase;
825 }
826 cfgdata->begin = MALLOC(rawsz);
827 cfgdata->end = cfgdata->nextfree = cfgdata->begin + rawsz;
828 fmd_buf_read(hdl,
829 inprogress, WOBUF_CFG, cfgdata->begin, rawsz);
830 } else {
831 cfgdata->begin = cfgdata->end = cfgdata->nextfree = NULL;
832 }
833
834 config_cook(cfgdata);
835 fmep->config = cfgdata->cooked;
836 config_free(cfgdata);
837 out(O_ALTFP|O_STAMP, "config_restore added %d bytes",
838 alloc_total() - init_size);
839
840 if ((fmep->eventtree = itree_create(fmep->config)) == NULL) {
841 /* case not properly saved or irretrievable */
842 out(O_ALTFP, "restart_fme: NULL instance tree");
843 Undiag_reason = UD_VAL_INSTFAIL;
844 goto badcase;
845 }
846
847 itree_ptree(O_ALTFP|O_VERB2, fmep->eventtree);
848
849 if (reconstitute_observations(fmep) != 0)
850 goto badcase;
851
852 out(O_ALTFP|O_NONL, "FME %d replay observations: ", fmep->id);
853 for (ep = fmep->observations; ep; ep = ep->observations) {
854 out(O_ALTFP|O_NONL, " ");
855 itree_pevent_brief(O_ALTFP|O_NONL, ep);
856 }
857 out(O_ALTFP, NULL);
858
859 Open_fme_count++;
860
861 /* give the diagnosis algorithm a shot at the new FME state */
862 fme_eval(fmep, fmep->e0r);
863 return;
864
865 badcase:
866 if (fmep->eventtree != NULL)
867 itree_free(fmep->eventtree);
868 if (fmep->config)
869 structconfig_free(fmep->config);
870 destroy_fme_bufs(fmep);
871 FREE(fmep);
872
873 /*
874 * Since we're unable to restart the case, add it to the undiagable
875 * list and solve and close it as appropriate.
876 */
877 bad = MALLOC(sizeof (struct case_list));
878 bad->next = NULL;
879
880 if (Undiagablecaselist != NULL)
881 bad->next = Undiagablecaselist;
882 Undiagablecaselist = bad;
883 bad->fmcase = inprogress;
884
885 out(O_ALTFP|O_NONL, "[case %s (unable to restart), ",
886 fmd_case_uuid(hdl, bad->fmcase));
887
888 if (fmd_case_solved(hdl, bad->fmcase)) {
889 out(O_ALTFP|O_NONL, "already solved, ");
890 } else {
891 out(O_ALTFP|O_NONL, "solving, ");
892 defect = fmd_nvl_create_fault(hdl,
893 undiag_2defect_str(Undiag_reason), 100, NULL, NULL, NULL);
894 reason = undiag_2reason_str(Undiag_reason, NULL);
895 (void) nvlist_add_string(defect, UNDIAG_REASON, reason);
896 FREE(reason);
897 fmd_case_add_suspect(hdl, bad->fmcase, defect);
898 fmd_case_solve(hdl, bad->fmcase);
899 Undiag_reason = UD_VAL_UNKNOWN;
900 }
901
902 if (fmd_case_closed(hdl, bad->fmcase)) {
903 out(O_ALTFP, "already closed ]");
904 } else {
905 out(O_ALTFP, "closing ]");
906 fmd_case_close(hdl, bad->fmcase);
907 }
908 }
909
910 /*ARGSUSED*/
911 static void
globals_destructor(void * left,void * right,void * arg)912 globals_destructor(void *left, void *right, void *arg)
913 {
914 struct evalue *evp = (struct evalue *)right;
915 if (evp->t == NODEPTR)
916 tree_free((struct node *)(uintptr_t)evp->v);
917 evp->v = (uintptr_t)NULL;
918 FREE(evp);
919 }
920
921 void
destroy_fme(struct fme * f)922 destroy_fme(struct fme *f)
923 {
924 stats_delete(f->Rcount);
925 stats_delete(f->Hcallcount);
926 stats_delete(f->Rcallcount);
927 stats_delete(f->Ccallcount);
928 stats_delete(f->Ecallcount);
929 stats_delete(f->Tcallcount);
930 stats_delete(f->Marrowcount);
931 stats_delete(f->diags);
932
933 if (f->eventtree != NULL)
934 itree_free(f->eventtree);
935 if (f->config)
936 structconfig_free(f->config);
937 lut_free(f->globals, globals_destructor, NULL);
938 FREE(f);
939 }
940
941 static const char *
fme_state2str(enum fme_state s)942 fme_state2str(enum fme_state s)
943 {
944 switch (s) {
945 case FME_NOTHING: return ("NOTHING");
946 case FME_WAIT: return ("WAIT");
947 case FME_CREDIBLE: return ("CREDIBLE");
948 case FME_DISPROVED: return ("DISPROVED");
949 case FME_DEFERRED: return ("DEFERRED");
950 default: return ("UNKNOWN");
951 }
952 }
953
954 static int
is_problem(enum nametype t)955 is_problem(enum nametype t)
956 {
957 return (t == N_FAULT || t == N_DEFECT || t == N_UPSET);
958 }
959
960 static int
is_defect(enum nametype t)961 is_defect(enum nametype t)
962 {
963 return (t == N_DEFECT);
964 }
965
966 static int
is_upset(enum nametype t)967 is_upset(enum nametype t)
968 {
969 return (t == N_UPSET);
970 }
971
972 static void
fme_print(int flags,struct fme * fmep)973 fme_print(int flags, struct fme *fmep)
974 {
975 struct event *ep;
976
977 out(flags, "Fault Management Exercise %d", fmep->id);
978 out(flags, "\t State: %s", fme_state2str(fmep->state));
979 out(flags|O_NONL, "\t Start time: ");
980 ptree_timeval(flags|O_NONL, &fmep->ull);
981 out(flags, NULL);
982 if (fmep->wull) {
983 out(flags|O_NONL, "\t Wait time: ");
984 ptree_timeval(flags|O_NONL, &fmep->wull);
985 out(flags, NULL);
986 }
987 out(flags|O_NONL, "\t E0: ");
988 if (fmep->e0)
989 itree_pevent_brief(flags|O_NONL, fmep->e0);
990 else
991 out(flags|O_NONL, "NULL");
992 out(flags, NULL);
993 out(flags|O_NONL, "\tObservations:");
994 for (ep = fmep->observations; ep; ep = ep->observations) {
995 out(flags|O_NONL, " ");
996 itree_pevent_brief(flags|O_NONL, ep);
997 }
998 out(flags, NULL);
999 out(flags|O_NONL, "\tSuspect list:");
1000 for (ep = fmep->suspects; ep; ep = ep->suspects) {
1001 out(flags|O_NONL, " ");
1002 itree_pevent_brief(flags|O_NONL, ep);
1003 }
1004 out(flags, NULL);
1005 if (fmep->eventtree != NULL) {
1006 out(flags|O_VERB2, "\t Tree:");
1007 itree_ptree(flags|O_VERB2, fmep->eventtree);
1008 }
1009 }
1010
1011 static struct node *
pathstring2epnamenp(char * path)1012 pathstring2epnamenp(char *path)
1013 {
1014 char *sep = "/";
1015 struct node *ret;
1016 char *ptr;
1017
1018 if ((ptr = strtok(path, sep)) == NULL)
1019 out(O_DIE, "pathstring2epnamenp: invalid empty class");
1020
1021 ret = tree_iname(stable(ptr), NULL, 0);
1022
1023 while ((ptr = strtok(NULL, sep)) != NULL)
1024 ret = tree_name_append(ret,
1025 tree_iname(stable(ptr), NULL, 0));
1026
1027 return (ret);
1028 }
1029
1030 /*
1031 * for a given upset sp, increment the corresponding SERD engine. if the
1032 * SERD engine trips, return the ename and ipp of the resulting ereport.
1033 * returns true if engine tripped and *enamep and *ippp were filled in.
1034 */
1035 static int
serd_eval(struct fme * fmep,fmd_hdl_t * hdl,fmd_event_t * ffep,fmd_case_t * fmcase,struct event * sp,const char ** enamep,const struct ipath ** ippp)1036 serd_eval(struct fme *fmep, fmd_hdl_t *hdl, fmd_event_t *ffep,
1037 fmd_case_t *fmcase, struct event *sp, const char **enamep,
1038 const struct ipath **ippp)
1039 {
1040 struct node *serdinst;
1041 char *serdname;
1042 char *serdresource;
1043 char *serdclass;
1044 struct node *nid;
1045 struct serd_entry *newentp;
1046 int i, serdn = -1, serdincrement = 1, len = 0;
1047 char *serdsuffix = NULL, *serdt = NULL;
1048 struct evalue *ep;
1049
1050 ASSERT(sp->t == N_UPSET);
1051 ASSERT(ffep != NULL);
1052
1053 if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1054 (void *)"n", (lut_cmp)strcmp)) != NULL) {
1055 ASSERT(ep->t == UINT64);
1056 serdn = (int)ep->v;
1057 }
1058 if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1059 (void *)"t", (lut_cmp)strcmp)) != NULL) {
1060 ASSERT(ep->t == STRING);
1061 serdt = (char *)(uintptr_t)ep->v;
1062 }
1063 if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1064 (void *)"suffix", (lut_cmp)strcmp)) != NULL) {
1065 ASSERT(ep->t == STRING);
1066 serdsuffix = (char *)(uintptr_t)ep->v;
1067 }
1068 if ((ep = (struct evalue *)lut_lookup(sp->serdprops,
1069 (void *)"increment", (lut_cmp)strcmp)) != NULL) {
1070 ASSERT(ep->t == UINT64);
1071 serdincrement = (int)ep->v;
1072 }
1073
1074 /*
1075 * obtain instanced SERD engine from the upset sp. from this
1076 * derive serdname, the string used to identify the SERD engine.
1077 */
1078 serdinst = eventprop_lookup(sp, L_engine);
1079
1080 if (serdinst == NULL)
1081 return (-1);
1082
1083 len = strlen(serdinst->u.stmt.np->u.event.ename->u.name.s) + 1;
1084 if (serdsuffix != NULL)
1085 len += strlen(serdsuffix);
1086 serdclass = MALLOC(len);
1087 if (serdsuffix != NULL)
1088 (void) snprintf(serdclass, len, "%s%s",
1089 serdinst->u.stmt.np->u.event.ename->u.name.s, serdsuffix);
1090 else
1091 (void) snprintf(serdclass, len, "%s",
1092 serdinst->u.stmt.np->u.event.ename->u.name.s);
1093 serdresource = ipath2str(NULL,
1094 ipath(serdinst->u.stmt.np->u.event.epname));
1095 len += strlen(serdresource) + 1;
1096 serdname = MALLOC(len);
1097 (void) snprintf(serdname, len, "%s@%s", serdclass, serdresource);
1098 FREE(serdresource);
1099
1100 /* handle serd engine "id" property, if there is one */
1101 if ((nid =
1102 lut_lookup(serdinst->u.stmt.lutp, (void *)L_id, NULL)) != NULL) {
1103 struct evalue *gval;
1104 char suffixbuf[200];
1105 char *suffix;
1106 char *nserdname;
1107 size_t nname;
1108
1109 out(O_ALTFP|O_NONL, "serd \"%s\" id: ", serdname);
1110 ptree_name_iter(O_ALTFP|O_NONL, nid);
1111
1112 ASSERTinfo(nid->t == T_GLOBID, ptree_nodetype2str(nid->t));
1113
1114 if ((gval = lut_lookup(fmep->globals,
1115 (void *)nid->u.globid.s, NULL)) == NULL) {
1116 out(O_ALTFP, " undefined");
1117 } else if (gval->t == UINT64) {
1118 out(O_ALTFP, " %llu", gval->v);
1119 (void) sprintf(suffixbuf, "%llu", gval->v);
1120 suffix = suffixbuf;
1121 } else {
1122 out(O_ALTFP, " \"%s\"", (char *)(uintptr_t)gval->v);
1123 suffix = (char *)(uintptr_t)gval->v;
1124 }
1125
1126 nname = strlen(serdname) + strlen(suffix) + 2;
1127 nserdname = MALLOC(nname);
1128 (void) snprintf(nserdname, nname, "%s:%s", serdname, suffix);
1129 FREE(serdname);
1130 serdname = nserdname;
1131 }
1132
1133 /*
1134 * if the engine is empty, and we have an override for n/t then
1135 * destroy and recreate it.
1136 */
1137 if ((serdn != -1 || serdt != NULL) && fmd_serd_exists(hdl, serdname) &&
1138 fmd_serd_empty(hdl, serdname))
1139 fmd_serd_destroy(hdl, serdname);
1140
1141 if (!fmd_serd_exists(hdl, serdname)) {
1142 struct node *nN, *nT;
1143 const char *s;
1144 struct node *nodep;
1145 struct config *cp;
1146 char *path;
1147 uint_t nval;
1148 hrtime_t tval;
1149 int i;
1150 char *ptr;
1151 int got_n_override = 0, got_t_override = 0;
1152
1153 /* no SERD engine yet, so create it */
1154 nodep = serdinst->u.stmt.np->u.event.epname;
1155 path = ipath2str(NULL, ipath(nodep));
1156 cp = config_lookup(fmep->config, path, 0);
1157 FREE((void *)path);
1158
1159 /*
1160 * We allow serd paramaters to be overridden, either from
1161 * eft.conf file values (if Serd_Override is set) or from
1162 * driver properties (for "serd.io.device" engines).
1163 */
1164 if (Serd_Override != NULL) {
1165 char *save_ptr, *ptr1, *ptr2, *ptr3;
1166 ptr3 = save_ptr = STRDUP(Serd_Override);
1167 while (*ptr3 != '\0') {
1168 ptr1 = strchr(ptr3, ',');
1169 *ptr1 = '\0';
1170 if (strcmp(ptr3, serdclass) == 0) {
1171 ptr2 = strchr(ptr1 + 1, ',');
1172 *ptr2 = '\0';
1173 nval = atoi(ptr1 + 1);
1174 out(O_ALTFP, "serd override %s_n %d",
1175 serdclass, nval);
1176 ptr3 = strchr(ptr2 + 1, ' ');
1177 if (ptr3)
1178 *ptr3 = '\0';
1179 ptr = STRDUP(ptr2 + 1);
1180 out(O_ALTFP, "serd override %s_t %s",
1181 serdclass, ptr);
1182 got_n_override = 1;
1183 got_t_override = 1;
1184 break;
1185 } else {
1186 ptr2 = strchr(ptr1 + 1, ',');
1187 ptr3 = strchr(ptr2 + 1, ' ');
1188 if (ptr3 == NULL)
1189 break;
1190 }
1191 ptr3++;
1192 }
1193 FREE(save_ptr);
1194 }
1195
1196 if (cp && got_n_override == 0) {
1197 /*
1198 * convert serd engine class into property name
1199 */
1200 char *prop_name = MALLOC(strlen(serdclass) + 3);
1201 for (i = 0; i < strlen(serdclass); i++) {
1202 if (serdclass[i] == '.')
1203 prop_name[i] = '_';
1204 else
1205 prop_name[i] = serdclass[i];
1206 }
1207 prop_name[i++] = '_';
1208 prop_name[i++] = 'n';
1209 prop_name[i] = '\0';
1210 if (s = config_getprop(cp, prop_name)) {
1211 nval = atoi(s);
1212 out(O_ALTFP, "serd override %s_n %s",
1213 serdclass, s);
1214 got_n_override = 1;
1215 }
1216 prop_name[i - 1] = 't';
1217 if (s = config_getprop(cp, prop_name)) {
1218 ptr = STRDUP(s);
1219 out(O_ALTFP, "serd override %s_t %s",
1220 serdclass, s);
1221 got_t_override = 1;
1222 }
1223 FREE(prop_name);
1224 }
1225
1226 if (serdn != -1 && got_n_override == 0) {
1227 nval = serdn;
1228 out(O_ALTFP, "serd override %s_n %d", serdclass, serdn);
1229 got_n_override = 1;
1230 }
1231 if (serdt != NULL && got_t_override == 0) {
1232 ptr = STRDUP(serdt);
1233 out(O_ALTFP, "serd override %s_t %s", serdclass, serdt);
1234 got_t_override = 1;
1235 }
1236
1237 if (!got_n_override) {
1238 nN = lut_lookup(serdinst->u.stmt.lutp, (void *)L_N,
1239 NULL);
1240 ASSERT(nN->t == T_NUM);
1241 nval = (uint_t)nN->u.ull;
1242 }
1243 if (!got_t_override) {
1244 nT = lut_lookup(serdinst->u.stmt.lutp, (void *)L_T,
1245 NULL);
1246 ASSERT(nT->t == T_TIMEVAL);
1247 tval = (hrtime_t)nT->u.ull;
1248 } else {
1249 const unsigned long long *ullp;
1250 const char *suffix;
1251 int len;
1252
1253 len = strspn(ptr, "0123456789");
1254 suffix = stable(&ptr[len]);
1255 ullp = (unsigned long long *)lut_lookup(Timesuffixlut,
1256 (void *)suffix, NULL);
1257 ptr[len] = '\0';
1258 tval = strtoull(ptr, NULL, 0) * (ullp ? *ullp : 1ll);
1259 FREE(ptr);
1260 }
1261 fmd_serd_create(hdl, serdname, nval, tval);
1262 }
1263
1264 newentp = MALLOC(sizeof (*newentp));
1265 newentp->ename = stable(serdclass);
1266 FREE(serdclass);
1267 newentp->ipath = ipath(serdinst->u.stmt.np->u.event.epname);
1268 newentp->hdl = hdl;
1269 if (lut_lookup(SerdEngines, newentp, (lut_cmp)serd_cmp) == NULL) {
1270 SerdEngines = lut_add(SerdEngines, (void *)newentp,
1271 (void *)newentp, (lut_cmp)serd_cmp);
1272 Serd_need_save = 1;
1273 serd_save();
1274 } else {
1275 FREE(newentp);
1276 }
1277
1278
1279 /*
1280 * increment SERD engine. if engine fires, reset serd
1281 * engine and return trip_strcode if required.
1282 */
1283 for (i = 0; i < serdincrement; i++) {
1284 if (fmd_serd_record(hdl, serdname, ffep)) {
1285 fmd_case_add_serd(hdl, fmcase, serdname);
1286 fmd_serd_reset(hdl, serdname);
1287
1288 if (ippp) {
1289 struct node *tripinst =
1290 lut_lookup(serdinst->u.stmt.lutp,
1291 (void *)L_trip, NULL);
1292 ASSERT(tripinst != NULL);
1293 *enamep = tripinst->u.event.ename->u.name.s;
1294 *ippp = ipath(tripinst->u.event.epname);
1295 out(O_ALTFP|O_NONL,
1296 "[engine fired: %s, sending: ", serdname);
1297 ipath_print(O_ALTFP|O_NONL, *enamep, *ippp);
1298 out(O_ALTFP, "]");
1299 } else {
1300 out(O_ALTFP, "[engine fired: %s, no trip]",
1301 serdname);
1302 }
1303 FREE(serdname);
1304 return (1);
1305 }
1306 }
1307
1308 FREE(serdname);
1309 return (0);
1310 }
1311
1312 /*
1313 * search a suspect list for upsets. feed each upset to serd_eval() and
1314 * build up tripped[], an array of ereports produced by the firing of
1315 * any SERD engines. then feed each ereport back into
1316 * fme_receive_report().
1317 *
1318 * returns ntrip, the number of these ereports produced.
1319 */
1320 static int
upsets_eval(struct fme * fmep,fmd_event_t * ffep)1321 upsets_eval(struct fme *fmep, fmd_event_t *ffep)
1322 {
1323 /* we build an array of tripped ereports that we send ourselves */
1324 struct {
1325 const char *ename;
1326 const struct ipath *ipp;
1327 } *tripped;
1328 struct event *sp;
1329 int ntrip, nupset, i;
1330
1331 /*
1332 * count the number of upsets to determine the upper limit on
1333 * expected trip ereport strings. remember that one upset can
1334 * lead to at most one ereport.
1335 */
1336 nupset = 0;
1337 for (sp = fmep->suspects; sp; sp = sp->suspects) {
1338 if (sp->t == N_UPSET)
1339 nupset++;
1340 }
1341
1342 if (nupset == 0)
1343 return (0);
1344
1345 /*
1346 * get to this point if we have upsets and expect some trip
1347 * ereports
1348 */
1349 tripped = alloca(sizeof (*tripped) * nupset);
1350 bzero((void *)tripped, sizeof (*tripped) * nupset);
1351
1352 ntrip = 0;
1353 for (sp = fmep->suspects; sp; sp = sp->suspects)
1354 if (sp->t == N_UPSET &&
1355 serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, sp,
1356 &tripped[ntrip].ename, &tripped[ntrip].ipp) == 1)
1357 ntrip++;
1358
1359 for (i = 0; i < ntrip; i++) {
1360 struct event *ep, *nep;
1361 struct fme *nfmep;
1362 fmd_case_t *fmcase;
1363 const struct ipath *ipp;
1364 const char *eventstring;
1365 int prev_verbose;
1366 unsigned long long my_delay = TIMEVAL_EVENTUALLY;
1367 enum fme_state state;
1368
1369 /*
1370 * First try and evaluate a case with the trip ereport plus
1371 * all the other ereports that cause the trip. If that fails
1372 * to evaluate then try again with just this ereport on its own.
1373 */
1374 out(O_ALTFP|O_NONL, "fme_receive_report_serd: ");
1375 ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp);
1376 out(O_ALTFP|O_STAMP, NULL);
1377 ep = fmep->e0;
1378 eventstring = ep->enode->u.event.ename->u.name.s;
1379 ipp = ep->ipp;
1380
1381 /*
1382 * create a duplicate fme and case
1383 */
1384 fmcase = fmd_case_open(fmep->hdl, NULL);
1385 out(O_ALTFP|O_NONL, "duplicate fme for event [");
1386 ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1387 out(O_ALTFP, " ]");
1388
1389 if ((nfmep = newfme(eventstring, ipp, fmep->hdl,
1390 fmcase, ffep, ep->nvp)) == NULL) {
1391 out(O_ALTFP|O_NONL, "[");
1392 ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1393 out(O_ALTFP, " CANNOT DIAGNOSE]");
1394 continue;
1395 }
1396
1397 Open_fme_count++;
1398 nfmep->pull = fmep->pull;
1399 init_fme_bufs(nfmep);
1400 out(O_ALTFP|O_NONL, "[");
1401 ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1402 out(O_ALTFP, " created FME%d, case %s]", nfmep->id,
1403 fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
1404 if (ffep) {
1405 fmd_case_setprincipal(nfmep->hdl, nfmep->fmcase, ffep);
1406 fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase, ffep);
1407 nfmep->e0r = ffep;
1408 }
1409
1410 /*
1411 * add the original ereports
1412 */
1413 for (ep = fmep->observations; ep; ep = ep->observations) {
1414 eventstring = ep->enode->u.event.ename->u.name.s;
1415 ipp = ep->ipp;
1416 out(O_ALTFP|O_NONL, "adding event [");
1417 ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1418 out(O_ALTFP, " ]");
1419 nep = itree_lookup(nfmep->eventtree, eventstring, ipp);
1420 if (nep->count++ == 0) {
1421 nep->observations = nfmep->observations;
1422 nfmep->observations = nep;
1423 serialize_observation(nfmep, eventstring, ipp);
1424 nep->nvp = evnv_dupnvl(ep->nvp);
1425 }
1426 if (ep->ffep && ep->ffep != ffep)
1427 fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase,
1428 ep->ffep);
1429 stats_counter_bump(nfmep->Rcount);
1430 }
1431
1432 /*
1433 * add the serd trigger ereport
1434 */
1435 if ((ep = itree_lookup(nfmep->eventtree, tripped[i].ename,
1436 tripped[i].ipp)) == NULL) {
1437 /*
1438 * The trigger ereport is not in the instance tree. It
1439 * was presumably removed by prune_propagations() as
1440 * this combination of events is not present in the
1441 * rules.
1442 */
1443 out(O_ALTFP, "upsets_eval: e0 not in instance tree");
1444 Undiag_reason = UD_VAL_BADEVENTI;
1445 goto retry_lone_ereport;
1446 }
1447 out(O_ALTFP|O_NONL, "adding event [");
1448 ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp);
1449 out(O_ALTFP, " ]");
1450 nfmep->ecurrent = ep;
1451 ep->nvp = NULL;
1452 ep->count = 1;
1453 ep->observations = nfmep->observations;
1454 nfmep->observations = ep;
1455
1456 /*
1457 * just peek first.
1458 */
1459 nfmep->peek = 1;
1460 prev_verbose = Verbose;
1461 if (Debug == 0)
1462 Verbose = 0;
1463 lut_walk(nfmep->eventtree, (lut_cb)clear_arrows, (void *)nfmep);
1464 state = hypothesise(nfmep, nfmep->e0, nfmep->ull, &my_delay);
1465 nfmep->peek = 0;
1466 Verbose = prev_verbose;
1467 if (state == FME_DISPROVED) {
1468 out(O_ALTFP, "upsets_eval: hypothesis disproved");
1469 Undiag_reason = UD_VAL_UNSOLVD;
1470 retry_lone_ereport:
1471 /*
1472 * However the trigger ereport on its own might be
1473 * diagnosable, so check for that. Undo the new fme
1474 * and case we just created and call fme_receive_report.
1475 */
1476 out(O_ALTFP|O_NONL, "[");
1477 ipath_print(O_ALTFP|O_NONL, tripped[i].ename,
1478 tripped[i].ipp);
1479 out(O_ALTFP, " retrying with just trigger ereport]");
1480 itree_free(nfmep->eventtree);
1481 nfmep->eventtree = NULL;
1482 structconfig_free(nfmep->config);
1483 nfmep->config = NULL;
1484 destroy_fme_bufs(nfmep);
1485 fmd_case_close(nfmep->hdl, nfmep->fmcase);
1486 fme_receive_report(fmep->hdl, ffep,
1487 tripped[i].ename, tripped[i].ipp, NULL);
1488 continue;
1489 }
1490
1491 /*
1492 * and evaluate
1493 */
1494 serialize_observation(nfmep, tripped[i].ename, tripped[i].ipp);
1495 fme_eval(nfmep, ffep);
1496 }
1497
1498 return (ntrip);
1499 }
1500
1501 /*
1502 * fme_receive_external_report -- call when an external ereport comes in
1503 *
1504 * this routine just converts the relevant information from the ereport
1505 * into a format used internally and passes it on to fme_receive_report().
1506 */
1507 void
fme_receive_external_report(fmd_hdl_t * hdl,fmd_event_t * ffep,nvlist_t * nvl,const char * class)1508 fme_receive_external_report(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl,
1509 const char *class)
1510 {
1511 struct node *epnamenp;
1512 fmd_case_t *fmcase;
1513 const struct ipath *ipp;
1514 nvlist_t *detector = NULL;
1515
1516 class = stable(class);
1517
1518 /* Get the component path from the ereport */
1519 epnamenp = platform_getpath(nvl);
1520
1521 /* See if we ended up without a path. */
1522 if (epnamenp == NULL) {
1523 /* See if class permits silent discard on unknown component. */
1524 if (lut_lookup(Ereportenames_discard, (void *)class, NULL)) {
1525 out(O_ALTFP|O_VERB2, "Unable to map \"%s\" ereport "
1526 "to component path, but silent discard allowed.",
1527 class);
1528 } else {
1529 /*
1530 * XFILE: Failure to find a component is bad unless
1531 * 'discard_if_config_unknown=1' was specified in the
1532 * ereport definition. Indicate undiagnosable.
1533 */
1534 Undiag_reason = UD_VAL_NOPATH;
1535 fmcase = fmd_case_open(hdl, NULL);
1536
1537 /*
1538 * We don't have a component path here (which means that
1539 * the detector was not in hc-scheme and couldn't be
1540 * converted to hc-scheme. Report the raw detector as
1541 * the suspect resource if there is one.
1542 */
1543 (void) nvlist_lookup_nvlist(nvl, FM_EREPORT_DETECTOR,
1544 &detector);
1545 publish_undiagnosable(hdl, ffep, fmcase, detector,
1546 (char *)class);
1547 }
1548 return;
1549 }
1550
1551 ipp = ipath(epnamenp);
1552 tree_free(epnamenp);
1553 fme_receive_report(hdl, ffep, class, ipp, nvl);
1554 }
1555
1556 /*ARGSUSED*/
1557 void
fme_receive_repair_list(fmd_hdl_t * hdl,fmd_event_t * ffep,nvlist_t * nvl,const char * eventstring)1558 fme_receive_repair_list(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl,
1559 const char *eventstring)
1560 {
1561 char *uuid;
1562 nvlist_t **nva;
1563 uint_t nvc;
1564 const struct ipath *ipp;
1565
1566 if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) != 0 ||
1567 nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST,
1568 &nva, &nvc) != 0) {
1569 out(O_ALTFP, "No uuid or fault list for list.repaired event");
1570 return;
1571 }
1572
1573 out(O_ALTFP, "Processing list.repaired from case %s", uuid);
1574
1575 while (nvc-- != 0) {
1576 /*
1577 * Reset any istat or serd engine associated with this path.
1578 */
1579 char *path;
1580
1581 if ((ipp = platform_fault2ipath(*nva++)) == NULL)
1582 continue;
1583
1584 path = ipath2str(NULL, ipp);
1585 out(O_ALTFP, "fme_receive_repair_list: resetting state for %s",
1586 path);
1587 FREE(path);
1588
1589 lut_walk(Istats, (lut_cb)istat_counter_reset_cb, (void *)ipp);
1590 istat_save();
1591
1592 lut_walk(SerdEngines, (lut_cb)serd_reset_cb, (void *)ipp);
1593 serd_save();
1594 }
1595 }
1596
1597 /*ARGSUSED*/
1598 void
fme_receive_topology_change(void)1599 fme_receive_topology_change(void)
1600 {
1601 lut_walk(Istats, (lut_cb)istat_counter_topo_chg_cb, NULL);
1602 istat_save();
1603
1604 lut_walk(SerdEngines, (lut_cb)serd_topo_chg_cb, NULL);
1605 serd_save();
1606 }
1607
1608 static int mark_arrows(struct fme *fmep, struct event *ep, int mark,
1609 unsigned long long at_latest_by, unsigned long long *pdelay, int keep);
1610
1611 /* ARGSUSED */
1612 static void
clear_arrows(struct event * ep,struct event * ep2,struct fme * fmep)1613 clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep)
1614 {
1615 struct bubble *bp;
1616 struct arrowlist *ap;
1617
1618 ep->cached_state = 0;
1619 ep->keep_in_tree = 0;
1620 for (bp = itree_next_bubble(ep, NULL); bp;
1621 bp = itree_next_bubble(ep, bp)) {
1622 if (bp->t != B_FROM)
1623 continue;
1624 bp->mark = 0;
1625 for (ap = itree_next_arrow(bp, NULL); ap;
1626 ap = itree_next_arrow(bp, ap))
1627 ap->arrowp->mark = 0;
1628 }
1629 }
1630
1631 static void
fme_receive_report(fmd_hdl_t * hdl,fmd_event_t * ffep,const char * eventstring,const struct ipath * ipp,nvlist_t * nvl)1632 fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep,
1633 const char *eventstring, const struct ipath *ipp, nvlist_t *nvl)
1634 {
1635 struct event *ep;
1636 struct fme *fmep = NULL;
1637 struct fme *ofmep = NULL;
1638 struct fme *cfmep, *svfmep;
1639 int matched = 0;
1640 nvlist_t *defect;
1641 fmd_case_t *fmcase;
1642 char *reason;
1643
1644 out(O_ALTFP|O_NONL, "fme_receive_report: ");
1645 ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1646 out(O_ALTFP|O_STAMP, NULL);
1647
1648 /* decide which FME it goes to */
1649 for (fmep = FMElist; fmep; fmep = fmep->next) {
1650 int prev_verbose;
1651 unsigned long long my_delay = TIMEVAL_EVENTUALLY;
1652 enum fme_state state;
1653 nvlist_t *pre_peek_nvp = NULL;
1654
1655 if (fmep->overflow) {
1656 if (!(fmd_case_closed(fmep->hdl, fmep->fmcase)))
1657 ofmep = fmep;
1658
1659 continue;
1660 }
1661
1662 /*
1663 * ignore solved or closed cases
1664 */
1665 if (fmep->posted_suspects ||
1666 fmd_case_solved(fmep->hdl, fmep->fmcase) ||
1667 fmd_case_closed(fmep->hdl, fmep->fmcase))
1668 continue;
1669
1670 /* look up event in event tree for this FME */
1671 if ((ep = itree_lookup(fmep->eventtree,
1672 eventstring, ipp)) == NULL)
1673 continue;
1674
1675 /* note observation */
1676 fmep->ecurrent = ep;
1677 if (ep->count++ == 0) {
1678 /* link it into list of observations seen */
1679 ep->observations = fmep->observations;
1680 fmep->observations = ep;
1681 ep->nvp = evnv_dupnvl(nvl);
1682 } else {
1683 /* use new payload values for peek */
1684 pre_peek_nvp = ep->nvp;
1685 ep->nvp = evnv_dupnvl(nvl);
1686 }
1687
1688 /* tell hypothesise() not to mess with suspect list */
1689 fmep->peek = 1;
1690
1691 /* don't want this to be verbose (unless Debug is set) */
1692 prev_verbose = Verbose;
1693 if (Debug == 0)
1694 Verbose = 0;
1695
1696 lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep);
1697 state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay);
1698
1699 fmep->peek = 0;
1700
1701 /* put verbose flag back */
1702 Verbose = prev_verbose;
1703
1704 if (state != FME_DISPROVED) {
1705 /* found an FME that explains the ereport */
1706 matched++;
1707 out(O_ALTFP|O_NONL, "[");
1708 ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1709 out(O_ALTFP, " explained by FME%d]", fmep->id);
1710
1711 nvlist_free(pre_peek_nvp);
1712
1713 if (ep->count == 1)
1714 serialize_observation(fmep, eventstring, ipp);
1715
1716 if (ffep) {
1717 fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1718 ep->ffep = ffep;
1719 }
1720
1721 stats_counter_bump(fmep->Rcount);
1722
1723 /* re-eval FME */
1724 fme_eval(fmep, ffep);
1725 } else {
1726
1727 /* not a match, undo noting of observation */
1728 fmep->ecurrent = NULL;
1729 if (--ep->count == 0) {
1730 /* unlink it from observations */
1731 fmep->observations = ep->observations;
1732 ep->observations = NULL;
1733 nvlist_free(ep->nvp);
1734 ep->nvp = NULL;
1735 } else {
1736 nvlist_free(ep->nvp);
1737 ep->nvp = pre_peek_nvp;
1738 }
1739 }
1740 }
1741
1742 if (matched)
1743 return; /* explained by at least one existing FME */
1744
1745 /* clean up closed fmes */
1746 cfmep = ClosedFMEs;
1747 while (cfmep != NULL) {
1748 svfmep = cfmep->next;
1749 destroy_fme(cfmep);
1750 cfmep = svfmep;
1751 }
1752 ClosedFMEs = NULL;
1753
1754 if (ofmep) {
1755 out(O_ALTFP|O_NONL, "[");
1756 ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1757 out(O_ALTFP, " ADDING TO OVERFLOW FME]");
1758 if (ffep)
1759 fmd_case_add_ereport(hdl, ofmep->fmcase, ffep);
1760
1761 return;
1762
1763 } else if (Max_fme && (Open_fme_count >= Max_fme)) {
1764 out(O_ALTFP|O_NONL, "[");
1765 ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1766 out(O_ALTFP, " MAX OPEN FME REACHED]");
1767
1768 fmcase = fmd_case_open(hdl, NULL);
1769
1770 /* Create overflow fme */
1771 if ((fmep = newfme(eventstring, ipp, hdl, fmcase, ffep,
1772 nvl)) == NULL) {
1773 out(O_ALTFP|O_NONL, "[");
1774 ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1775 out(O_ALTFP, " CANNOT OPEN OVERFLOW FME]");
1776 return;
1777 }
1778
1779 Open_fme_count++;
1780
1781 init_fme_bufs(fmep);
1782 fmep->overflow = B_TRUE;
1783
1784 if (ffep)
1785 fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1786
1787 Undiag_reason = UD_VAL_MAXFME;
1788 defect = fmd_nvl_create_fault(hdl,
1789 undiag_2defect_str(Undiag_reason), 100, NULL, NULL, NULL);
1790 reason = undiag_2reason_str(Undiag_reason, NULL);
1791 (void) nvlist_add_string(defect, UNDIAG_REASON, reason);
1792 FREE(reason);
1793 fmd_case_add_suspect(hdl, fmep->fmcase, defect);
1794 fmd_case_solve(hdl, fmep->fmcase);
1795 Undiag_reason = UD_VAL_UNKNOWN;
1796 return;
1797 }
1798
1799 /* open a case */
1800 fmcase = fmd_case_open(hdl, NULL);
1801
1802 /* start a new FME */
1803 if ((fmep = newfme(eventstring, ipp, hdl, fmcase, ffep, nvl)) == NULL) {
1804 out(O_ALTFP|O_NONL, "[");
1805 ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1806 out(O_ALTFP, " CANNOT DIAGNOSE]");
1807 return;
1808 }
1809
1810 Open_fme_count++;
1811
1812 init_fme_bufs(fmep);
1813
1814 out(O_ALTFP|O_NONL, "[");
1815 ipath_print(O_ALTFP|O_NONL, eventstring, ipp);
1816 out(O_ALTFP, " created FME%d, case %s]", fmep->id,
1817 fmd_case_uuid(hdl, fmep->fmcase));
1818
1819 ep = fmep->e0;
1820 ASSERT(ep != NULL);
1821
1822 /* note observation */
1823 fmep->ecurrent = ep;
1824 if (ep->count++ == 0) {
1825 /* link it into list of observations seen */
1826 ep->observations = fmep->observations;
1827 fmep->observations = ep;
1828 ep->nvp = evnv_dupnvl(nvl);
1829 serialize_observation(fmep, eventstring, ipp);
1830 } else {
1831 /* new payload overrides any previous */
1832 nvlist_free(ep->nvp);
1833 ep->nvp = evnv_dupnvl(nvl);
1834 }
1835
1836 stats_counter_bump(fmep->Rcount);
1837
1838 if (ffep) {
1839 fmd_case_add_ereport(hdl, fmep->fmcase, ffep);
1840 fmd_case_setprincipal(hdl, fmep->fmcase, ffep);
1841 fmep->e0r = ffep;
1842 ep->ffep = ffep;
1843 }
1844
1845 /* give the diagnosis algorithm a shot at the new FME state */
1846 fme_eval(fmep, ffep);
1847 }
1848
1849 void
fme_status(int flags)1850 fme_status(int flags)
1851 {
1852 struct fme *fmep;
1853
1854 if (FMElist == NULL) {
1855 out(flags, "No fault management exercises underway.");
1856 return;
1857 }
1858
1859 for (fmep = FMElist; fmep; fmep = fmep->next)
1860 fme_print(flags, fmep);
1861 }
1862
1863 /*
1864 * "indent" routines used mostly for nicely formatted debug output, but also
1865 * for sanity checking for infinite recursion bugs.
1866 */
1867
1868 #define MAX_INDENT 1024
1869 static const char *indent_s[MAX_INDENT];
1870 static int current_indent;
1871
1872 static void
indent_push(const char * s)1873 indent_push(const char *s)
1874 {
1875 if (current_indent < MAX_INDENT)
1876 indent_s[current_indent++] = s;
1877 else
1878 out(O_DIE, "unexpected recursion depth (%d)", current_indent);
1879 }
1880
1881 static void
indent_set(const char * s)1882 indent_set(const char *s)
1883 {
1884 current_indent = 0;
1885 indent_push(s);
1886 }
1887
1888 static void
indent_pop(void)1889 indent_pop(void)
1890 {
1891 if (current_indent > 0)
1892 current_indent--;
1893 else
1894 out(O_DIE, "recursion underflow");
1895 }
1896
1897 static void
indent(void)1898 indent(void)
1899 {
1900 int i;
1901 if (!Verbose)
1902 return;
1903 for (i = 0; i < current_indent; i++)
1904 out(O_ALTFP|O_VERB|O_NONL, indent_s[i]);
1905 }
1906
1907 #define SLNEW 1
1908 #define SLCHANGED 2
1909 #define SLWAIT 3
1910 #define SLDISPROVED 4
1911
1912 static void
print_suspects(int circumstance,struct fme * fmep)1913 print_suspects(int circumstance, struct fme *fmep)
1914 {
1915 struct event *ep;
1916
1917 out(O_ALTFP|O_NONL, "[");
1918 if (circumstance == SLCHANGED) {
1919 out(O_ALTFP|O_NONL, "FME%d diagnosis changed. state: %s, "
1920 "suspect list:", fmep->id, fme_state2str(fmep->state));
1921 } else if (circumstance == SLWAIT) {
1922 out(O_ALTFP|O_NONL, "FME%d set wait timer %ld ", fmep->id,
1923 fmep->timer);
1924 ptree_timeval(O_ALTFP|O_NONL, &fmep->wull);
1925 } else if (circumstance == SLDISPROVED) {
1926 out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS UNKNOWN", fmep->id);
1927 } else {
1928 out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS PRODUCED:", fmep->id);
1929 }
1930
1931 if (circumstance == SLWAIT || circumstance == SLDISPROVED) {
1932 out(O_ALTFP, "]");
1933 return;
1934 }
1935
1936 for (ep = fmep->suspects; ep; ep = ep->suspects) {
1937 out(O_ALTFP|O_NONL, " ");
1938 itree_pevent_brief(O_ALTFP|O_NONL, ep);
1939 }
1940 out(O_ALTFP, "]");
1941 }
1942
1943 static struct node *
eventprop_lookup(struct event * ep,const char * propname)1944 eventprop_lookup(struct event *ep, const char *propname)
1945 {
1946 return (lut_lookup(ep->props, (void *)propname, NULL));
1947 }
1948
1949 #define MAXDIGITIDX 23
1950 static char numbuf[MAXDIGITIDX + 1];
1951
1952 static int
node2uint(struct node * n,uint_t * valp)1953 node2uint(struct node *n, uint_t *valp)
1954 {
1955 struct evalue value;
1956 struct lut *globals = NULL;
1957
1958 if (n == NULL)
1959 return (1);
1960
1961 /*
1962 * check value.v since we are being asked to convert an unsigned
1963 * long long int to an unsigned int
1964 */
1965 if (! eval_expr(n, NULL, NULL, &globals, NULL, NULL, 0, &value) ||
1966 value.t != UINT64 || value.v > (1ULL << 32))
1967 return (1);
1968
1969 *valp = (uint_t)value.v;
1970
1971 return (0);
1972 }
1973
1974 static nvlist_t *
node2fmri(struct node * n)1975 node2fmri(struct node *n)
1976 {
1977 nvlist_t **pa, *f, *p;
1978 struct node *nc;
1979 uint_t depth = 0;
1980 char *numstr, *nullbyte;
1981 char *failure;
1982 int err, i;
1983
1984 /* XXX do we need to be able to handle a non-T_NAME node? */
1985 if (n == NULL || n->t != T_NAME)
1986 return (NULL);
1987
1988 for (nc = n; nc != NULL; nc = nc->u.name.next) {
1989 if (nc->u.name.child == NULL || nc->u.name.child->t != T_NUM)
1990 break;
1991 depth++;
1992 }
1993
1994 if (nc != NULL) {
1995 /* We bailed early, something went wrong */
1996 return (NULL);
1997 }
1998
1999 if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0)
2000 out(O_DIE|O_SYS, "alloc of fmri nvl failed");
2001 pa = alloca(depth * sizeof (nvlist_t *));
2002 for (i = 0; i < depth; i++)
2003 pa[i] = NULL;
2004
2005 err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC);
2006 err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION);
2007 err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, "");
2008 err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth);
2009 if (err != 0) {
2010 failure = "basic construction of FMRI failed";
2011 goto boom;
2012 }
2013
2014 numbuf[MAXDIGITIDX] = '\0';
2015 nullbyte = &numbuf[MAXDIGITIDX];
2016 i = 0;
2017
2018 for (nc = n; nc != NULL; nc = nc->u.name.next) {
2019 err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl);
2020 if (err != 0) {
2021 failure = "alloc of an hc-pair failed";
2022 goto boom;
2023 }
2024 err = nvlist_add_string(p, FM_FMRI_HC_NAME, nc->u.name.s);
2025 numstr = ulltostr(nc->u.name.child->u.ull, nullbyte);
2026 err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr);
2027 if (err != 0) {
2028 failure = "construction of an hc-pair failed";
2029 goto boom;
2030 }
2031 pa[i++] = p;
2032 }
2033
2034 err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth);
2035 if (err == 0) {
2036 for (i = 0; i < depth; i++)
2037 nvlist_free(pa[i]);
2038 return (f);
2039 }
2040 failure = "addition of hc-pair array to FMRI failed";
2041
2042 boom:
2043 for (i = 0; i < depth; i++)
2044 nvlist_free(pa[i]);
2045 nvlist_free(f);
2046 out(O_DIE, "%s", failure);
2047 /*NOTREACHED*/
2048 return (NULL);
2049 }
2050
2051 /* an ipath cache entry is an array of these, with s==NULL at the end */
2052 struct ipath {
2053 const char *s; /* component name (in stable) */
2054 int i; /* instance number */
2055 };
2056
2057 static nvlist_t *
ipath2fmri(struct ipath * ipath)2058 ipath2fmri(struct ipath *ipath)
2059 {
2060 nvlist_t **pa, *f, *p;
2061 uint_t depth = 0;
2062 char *numstr, *nullbyte;
2063 char *failure;
2064 int err, i;
2065 struct ipath *ipp;
2066
2067 for (ipp = ipath; ipp->s != NULL; ipp++)
2068 depth++;
2069
2070 if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0)
2071 out(O_DIE|O_SYS, "alloc of fmri nvl failed");
2072 pa = alloca(depth * sizeof (nvlist_t *));
2073 for (i = 0; i < depth; i++)
2074 pa[i] = NULL;
2075
2076 err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC);
2077 err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION);
2078 err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, "");
2079 err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth);
2080 if (err != 0) {
2081 failure = "basic construction of FMRI failed";
2082 goto boom;
2083 }
2084
2085 numbuf[MAXDIGITIDX] = '\0';
2086 nullbyte = &numbuf[MAXDIGITIDX];
2087 i = 0;
2088
2089 for (ipp = ipath; ipp->s != NULL; ipp++) {
2090 err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl);
2091 if (err != 0) {
2092 failure = "alloc of an hc-pair failed";
2093 goto boom;
2094 }
2095 err = nvlist_add_string(p, FM_FMRI_HC_NAME, ipp->s);
2096 numstr = ulltostr(ipp->i, nullbyte);
2097 err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr);
2098 if (err != 0) {
2099 failure = "construction of an hc-pair failed";
2100 goto boom;
2101 }
2102 pa[i++] = p;
2103 }
2104
2105 err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth);
2106 if (err == 0) {
2107 for (i = 0; i < depth; i++)
2108 nvlist_free(pa[i]);
2109 return (f);
2110 }
2111 failure = "addition of hc-pair array to FMRI failed";
2112
2113 boom:
2114 for (i = 0; i < depth; i++)
2115 nvlist_free(pa[i]);
2116 nvlist_free(f);
2117 out(O_DIE, "%s", failure);
2118 /*NOTREACHED*/
2119 return (NULL);
2120 }
2121
2122 static uint8_t
percentof(uint_t part,uint_t whole)2123 percentof(uint_t part, uint_t whole)
2124 {
2125 unsigned long long p = part * 1000;
2126
2127 return ((p / whole / 10) + (((p / whole % 10) >= 5) ? 1 : 0));
2128 }
2129
2130 struct rsl {
2131 struct event *suspect;
2132 nvlist_t *asru;
2133 nvlist_t *fru;
2134 nvlist_t *rsrc;
2135 };
2136
2137 static void publish_suspects(struct fme *fmep, struct rsl *srl);
2138
2139 /*
2140 * rslfree -- free internal members of struct rsl not expected to be
2141 * freed elsewhere.
2142 */
2143 static void
rslfree(struct rsl * freeme)2144 rslfree(struct rsl *freeme)
2145 {
2146 nvlist_free(freeme->asru);
2147 nvlist_free(freeme->fru);
2148 if (freeme->rsrc != freeme->asru)
2149 nvlist_free(freeme->rsrc);
2150 }
2151
2152 /*
2153 * rslcmp -- compare two rsl structures. Use the following
2154 * comparisons to establish cardinality:
2155 *
2156 * 1. Name of the suspect's class. (simple strcmp)
2157 * 2. Name of the suspect's ASRU. (trickier, since nvlist)
2158 *
2159 */
2160 static int
rslcmp(const void * a,const void * b)2161 rslcmp(const void *a, const void *b)
2162 {
2163 struct rsl *r1 = (struct rsl *)a;
2164 struct rsl *r2 = (struct rsl *)b;
2165 int rv;
2166
2167 rv = strcmp(r1->suspect->enode->u.event.ename->u.name.s,
2168 r2->suspect->enode->u.event.ename->u.name.s);
2169 if (rv != 0)
2170 return (rv);
2171
2172 if (r1->rsrc == NULL && r2->rsrc == NULL)
2173 return (0);
2174 if (r1->rsrc == NULL)
2175 return (-1);
2176 if (r2->rsrc == NULL)
2177 return (1);
2178 return (evnv_cmpnvl(r1->rsrc, r2->rsrc, 0));
2179 }
2180
2181 /*
2182 * get_resources -- for a given suspect, determine what ASRU, FRU and
2183 * RSRC nvlists should be advertised in the final suspect list.
2184 */
2185 void
get_resources(struct event * sp,struct rsl * rsrcs,struct config * croot)2186 get_resources(struct event *sp, struct rsl *rsrcs, struct config *croot)
2187 {
2188 struct node *asrudef, *frudef;
2189 const struct ipath *asrupath, *frupath;
2190 nvlist_t *asru = NULL, *fru = NULL;
2191 nvlist_t *rsrc = NULL;
2192 char *pathstr;
2193
2194 /*
2195 * First find any ASRU and/or FRU defined in the
2196 * initial fault tree.
2197 */
2198 asrudef = eventprop_lookup(sp, L_ASRU);
2199 frudef = eventprop_lookup(sp, L_FRU);
2200
2201 /*
2202 * Create ipaths based on those definitions
2203 */
2204 asrupath = ipath(asrudef);
2205 frupath = ipath(frudef);
2206
2207 /*
2208 * Allow for platform translations of the FMRIs
2209 */
2210 pathstr = ipath2str(NULL, sp->ipp);
2211 platform_unit_translate(is_defect(sp->t), croot, TOPO_PROP_RESOURCE,
2212 &rsrc, pathstr);
2213 FREE(pathstr);
2214
2215 pathstr = ipath2str(NULL, asrupath);
2216 platform_unit_translate(is_defect(sp->t), croot, TOPO_PROP_ASRU,
2217 &asru, pathstr);
2218 FREE(pathstr);
2219
2220 pathstr = ipath2str(NULL, frupath);
2221 platform_unit_translate(is_defect(sp->t), croot, TOPO_PROP_FRU,
2222 &fru, pathstr);
2223 FREE(pathstr);
2224
2225 rsrcs->suspect = sp;
2226 rsrcs->asru = asru;
2227 rsrcs->fru = fru;
2228 rsrcs->rsrc = rsrc;
2229 }
2230
2231 /*
2232 * trim_suspects -- prior to publishing, we may need to remove some
2233 * suspects from the list. If we're auto-closing upsets, we don't
2234 * want any of those in the published list. If the ASRUs for multiple
2235 * defects resolve to the same ASRU (driver) we only want to publish
2236 * that as a single suspect.
2237 */
2238 static int
trim_suspects(struct fme * fmep,struct rsl * begin,struct rsl * begin2,fmd_event_t * ffep)2239 trim_suspects(struct fme *fmep, struct rsl *begin, struct rsl *begin2,
2240 fmd_event_t *ffep)
2241 {
2242 struct event *ep;
2243 struct rsl *rp = begin;
2244 struct rsl *rp2 = begin2;
2245 int mess_zero_count = 0;
2246 int serd_rval;
2247 uint_t messval;
2248
2249 /* remove any unwanted upsets and populate our array */
2250 for (ep = fmep->psuspects; ep; ep = ep->psuspects) {
2251 if (is_upset(ep->t))
2252 continue;
2253 serd_rval = serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, ep,
2254 NULL, NULL);
2255 if (serd_rval == 0)
2256 continue;
2257 if (node2uint(eventprop_lookup(ep, L_message),
2258 &messval) == 0 && messval == 0) {
2259 get_resources(ep, rp2, fmep->config);
2260 rp2++;
2261 mess_zero_count++;
2262 } else {
2263 get_resources(ep, rp, fmep->config);
2264 rp++;
2265 fmep->nsuspects++;
2266 }
2267 }
2268 return (mess_zero_count);
2269 }
2270
2271 /*
2272 * addpayloadprop -- add a payload prop to a problem
2273 */
2274 static void
addpayloadprop(const char * lhs,struct evalue * rhs,nvlist_t * fault)2275 addpayloadprop(const char *lhs, struct evalue *rhs, nvlist_t *fault)
2276 {
2277 nvlist_t *rsrc, *hcs;
2278
2279 ASSERT(fault != NULL);
2280 ASSERT(lhs != NULL);
2281 ASSERT(rhs != NULL);
2282
2283 if (nvlist_lookup_nvlist(fault, FM_FAULT_RESOURCE, &rsrc) != 0)
2284 out(O_DIE, "cannot add payloadprop \"%s\" to fault", lhs);
2285
2286 if (nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcs) != 0) {
2287 out(O_ALTFP|O_VERB2, "addpayloadprop: create hc_specific");
2288 if (nvlist_xalloc(&hcs, NV_UNIQUE_NAME, &Eft_nv_hdl) != 0)
2289 out(O_DIE,
2290 "cannot add payloadprop \"%s\" to fault", lhs);
2291 if (nvlist_add_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, hcs) != 0)
2292 out(O_DIE,
2293 "cannot add payloadprop \"%s\" to fault", lhs);
2294 nvlist_free(hcs);
2295 if (nvlist_lookup_nvlist(rsrc, FM_FMRI_HC_SPECIFIC, &hcs) != 0)
2296 out(O_DIE,
2297 "cannot add payloadprop \"%s\" to fault", lhs);
2298 } else
2299 out(O_ALTFP|O_VERB2, "addpayloadprop: reuse hc_specific");
2300
2301 if (rhs->t == UINT64) {
2302 out(O_ALTFP|O_VERB2, "addpayloadprop: %s=%llu", lhs, rhs->v);
2303
2304 if (nvlist_add_uint64(hcs, lhs, rhs->v) != 0)
2305 out(O_DIE,
2306 "cannot add payloadprop \"%s\" to fault", lhs);
2307 } else {
2308 out(O_ALTFP|O_VERB2, "addpayloadprop: %s=\"%s\"",
2309 lhs, (char *)(uintptr_t)rhs->v);
2310
2311 if (nvlist_add_string(hcs, lhs, (char *)(uintptr_t)rhs->v) != 0)
2312 out(O_DIE,
2313 "cannot add payloadprop \"%s\" to fault", lhs);
2314 }
2315 }
2316
2317 static char *Istatbuf;
2318 static char *Istatbufptr;
2319 static int Istatsz;
2320
2321 /*
2322 * istataddsize -- calculate size of istat and add it to Istatsz
2323 */
2324 /*ARGSUSED2*/
2325 static void
istataddsize(const struct istat_entry * lhs,struct stats * rhs,void * arg)2326 istataddsize(const struct istat_entry *lhs, struct stats *rhs, void *arg)
2327 {
2328 int val;
2329
2330 ASSERT(lhs != NULL);
2331 ASSERT(rhs != NULL);
2332
2333 if ((val = stats_counter_value(rhs)) == 0)
2334 return; /* skip zero-valued stats */
2335
2336 /* count up the size of the stat name */
2337 Istatsz += ipath2strlen(lhs->ename, lhs->ipath);
2338 Istatsz++; /* for the trailing NULL byte */
2339
2340 /* count up the size of the stat value */
2341 Istatsz += snprintf(NULL, 0, "%d", val);
2342 Istatsz++; /* for the trailing NULL byte */
2343 }
2344
2345 /*
2346 * istat2str -- serialize an istat, writing result to *Istatbufptr
2347 */
2348 /*ARGSUSED2*/
2349 static void
istat2str(const struct istat_entry * lhs,struct stats * rhs,void * arg)2350 istat2str(const struct istat_entry *lhs, struct stats *rhs, void *arg)
2351 {
2352 char *str;
2353 int len;
2354 int val;
2355
2356 ASSERT(lhs != NULL);
2357 ASSERT(rhs != NULL);
2358
2359 if ((val = stats_counter_value(rhs)) == 0)
2360 return; /* skip zero-valued stats */
2361
2362 /* serialize the stat name */
2363 str = ipath2str(lhs->ename, lhs->ipath);
2364 len = strlen(str);
2365
2366 ASSERT(Istatbufptr + len + 1 < &Istatbuf[Istatsz]);
2367 (void) strlcpy(Istatbufptr, str, &Istatbuf[Istatsz] - Istatbufptr);
2368 Istatbufptr += len;
2369 FREE(str);
2370 *Istatbufptr++ = '\0';
2371
2372 /* serialize the stat value */
2373 Istatbufptr += snprintf(Istatbufptr, &Istatbuf[Istatsz] - Istatbufptr,
2374 "%d", val);
2375 *Istatbufptr++ = '\0';
2376
2377 ASSERT(Istatbufptr <= &Istatbuf[Istatsz]);
2378 }
2379
2380 void
istat_save()2381 istat_save()
2382 {
2383 if (Istat_need_save == 0)
2384 return;
2385
2386 /* figure out how big the serialzed info is */
2387 Istatsz = 0;
2388 lut_walk(Istats, (lut_cb)istataddsize, NULL);
2389
2390 if (Istatsz == 0) {
2391 /* no stats to save */
2392 fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS);
2393 return;
2394 }
2395
2396 /* create the serialized buffer */
2397 Istatbufptr = Istatbuf = MALLOC(Istatsz);
2398 lut_walk(Istats, (lut_cb)istat2str, NULL);
2399
2400 /* clear out current saved stats */
2401 fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS);
2402
2403 /* write out the new version */
2404 fmd_buf_write(Hdl, NULL, WOBUF_ISTATS, Istatbuf, Istatsz);
2405 FREE(Istatbuf);
2406
2407 Istat_need_save = 0;
2408 }
2409
2410 int
istat_cmp(struct istat_entry * ent1,struct istat_entry * ent2)2411 istat_cmp(struct istat_entry *ent1, struct istat_entry *ent2)
2412 {
2413 if (ent1->ename != ent2->ename)
2414 return (ent2->ename - ent1->ename);
2415 if (ent1->ipath != ent2->ipath)
2416 return ((char *)ent2->ipath - (char *)ent1->ipath);
2417
2418 return (0);
2419 }
2420
2421 /*
2422 * istat-verify -- verify the component associated with a stat still exists
2423 *
2424 * if the component no longer exists, this routine resets the stat and
2425 * returns 0. if the component still exists, it returns 1.
2426 */
2427 static int
istat_verify(struct node * snp,struct istat_entry * entp)2428 istat_verify(struct node *snp, struct istat_entry *entp)
2429 {
2430 struct stats *statp;
2431 nvlist_t *fmri;
2432
2433 fmri = node2fmri(snp->u.event.epname);
2434 if (platform_path_exists(fmri)) {
2435 nvlist_free(fmri);
2436 return (1);
2437 }
2438 nvlist_free(fmri);
2439
2440 /* component no longer in system. zero out the associated stats */
2441 if ((statp = (struct stats *)
2442 lut_lookup(Istats, entp, (lut_cmp)istat_cmp)) == NULL ||
2443 stats_counter_value(statp) == 0)
2444 return (0); /* stat is already reset */
2445
2446 Istat_need_save = 1;
2447 stats_counter_reset(statp);
2448 return (0);
2449 }
2450
2451 static void
istat_bump(struct node * snp,int n)2452 istat_bump(struct node *snp, int n)
2453 {
2454 struct stats *statp;
2455 struct istat_entry ent;
2456
2457 ASSERT(snp != NULL);
2458 ASSERTinfo(snp->t == T_EVENT, ptree_nodetype2str(snp->t));
2459 ASSERT(snp->u.event.epname != NULL);
2460
2461 /* class name should be hoisted into a single stable entry */
2462 ASSERT(snp->u.event.ename->u.name.next == NULL);
2463 ent.ename = snp->u.event.ename->u.name.s;
2464 ent.ipath = ipath(snp->u.event.epname);
2465
2466 if (!istat_verify(snp, &ent)) {
2467 /* component no longer exists in system, nothing to do */
2468 return;
2469 }
2470
2471 if ((statp = (struct stats *)
2472 lut_lookup(Istats, &ent, (lut_cmp)istat_cmp)) == NULL) {
2473 /* need to create the counter */
2474 int cnt = 0;
2475 struct node *np;
2476 char *sname;
2477 char *snamep;
2478 struct istat_entry *newentp;
2479
2480 /* count up the size of the stat name */
2481 np = snp->u.event.ename;
2482 while (np != NULL) {
2483 cnt += strlen(np->u.name.s);
2484 cnt++; /* for the '.' or '@' */
2485 np = np->u.name.next;
2486 }
2487 np = snp->u.event.epname;
2488 while (np != NULL) {
2489 cnt += snprintf(NULL, 0, "%s%llu",
2490 np->u.name.s, np->u.name.child->u.ull);
2491 cnt++; /* for the '/' or trailing NULL byte */
2492 np = np->u.name.next;
2493 }
2494
2495 /* build the stat name */
2496 snamep = sname = alloca(cnt);
2497 np = snp->u.event.ename;
2498 while (np != NULL) {
2499 snamep += snprintf(snamep, &sname[cnt] - snamep,
2500 "%s", np->u.name.s);
2501 np = np->u.name.next;
2502 if (np)
2503 *snamep++ = '.';
2504 }
2505 *snamep++ = '@';
2506 np = snp->u.event.epname;
2507 while (np != NULL) {
2508 snamep += snprintf(snamep, &sname[cnt] - snamep,
2509 "%s%llu", np->u.name.s, np->u.name.child->u.ull);
2510 np = np->u.name.next;
2511 if (np)
2512 *snamep++ = '/';
2513 }
2514 *snamep++ = '\0';
2515
2516 /* create the new stat & add it to our list */
2517 newentp = MALLOC(sizeof (*newentp));
2518 *newentp = ent;
2519 statp = stats_new_counter(NULL, sname, 0);
2520 Istats = lut_add(Istats, (void *)newentp, (void *)statp,
2521 (lut_cmp)istat_cmp);
2522 }
2523
2524 /* if n is non-zero, set that value instead of bumping */
2525 if (n) {
2526 stats_counter_reset(statp);
2527 stats_counter_add(statp, n);
2528 } else
2529 stats_counter_bump(statp);
2530 Istat_need_save = 1;
2531
2532 ipath_print(O_ALTFP|O_VERB2, ent.ename, ent.ipath);
2533 out(O_ALTFP|O_VERB2, " %s to value %d", n ? "set" : "incremented",
2534 stats_counter_value(statp));
2535 }
2536
2537 /*ARGSUSED*/
2538 static void
istat_destructor(void * left,void * right,void * arg)2539 istat_destructor(void *left, void *right, void *arg)
2540 {
2541 struct istat_entry *entp = (struct istat_entry *)left;
2542 struct stats *statp = (struct stats *)right;
2543 FREE(entp);
2544 stats_delete(statp);
2545 }
2546
2547 /*
2548 * Callback used in a walk of the Istats to reset matching stat counters.
2549 */
2550 static void
istat_counter_reset_cb(struct istat_entry * entp,struct stats * statp,const struct ipath * ipp)2551 istat_counter_reset_cb(struct istat_entry *entp, struct stats *statp,
2552 const struct ipath *ipp)
2553 {
2554 char *path;
2555
2556 if (entp->ipath == ipp) {
2557 path = ipath2str(entp->ename, ipp);
2558 out(O_ALTFP, "istat_counter_reset_cb: resetting %s", path);
2559 FREE(path);
2560 stats_counter_reset(statp);
2561 Istat_need_save = 1;
2562 }
2563 }
2564
2565 /*ARGSUSED*/
2566 static void
istat_counter_topo_chg_cb(struct istat_entry * entp,struct stats * statp,void * unused)2567 istat_counter_topo_chg_cb(struct istat_entry *entp, struct stats *statp,
2568 void *unused)
2569 {
2570 char *path;
2571 nvlist_t *fmri;
2572
2573 fmri = ipath2fmri((struct ipath *)(entp->ipath));
2574 if (!platform_path_exists(fmri)) {
2575 path = ipath2str(entp->ename, entp->ipath);
2576 out(O_ALTFP, "istat_counter_topo_chg_cb: not present %s", path);
2577 FREE(path);
2578 stats_counter_reset(statp);
2579 Istat_need_save = 1;
2580 }
2581 nvlist_free(fmri);
2582 }
2583
2584 void
istat_fini(void)2585 istat_fini(void)
2586 {
2587 lut_free(Istats, istat_destructor, NULL);
2588 }
2589
2590 static char *Serdbuf;
2591 static char *Serdbufptr;
2592 static int Serdsz;
2593
2594 /*
2595 * serdaddsize -- calculate size of serd and add it to Serdsz
2596 */
2597 /*ARGSUSED*/
2598 static void
serdaddsize(const struct serd_entry * lhs,struct stats * rhs,void * arg)2599 serdaddsize(const struct serd_entry *lhs, struct stats *rhs, void *arg)
2600 {
2601 ASSERT(lhs != NULL);
2602
2603 /* count up the size of the stat name */
2604 Serdsz += ipath2strlen(lhs->ename, lhs->ipath);
2605 Serdsz++; /* for the trailing NULL byte */
2606 }
2607
2608 /*
2609 * serd2str -- serialize a serd engine, writing result to *Serdbufptr
2610 */
2611 /*ARGSUSED*/
2612 static void
serd2str(const struct serd_entry * lhs,struct stats * rhs,void * arg)2613 serd2str(const struct serd_entry *lhs, struct stats *rhs, void *arg)
2614 {
2615 char *str;
2616 int len;
2617
2618 ASSERT(lhs != NULL);
2619
2620 /* serialize the serd engine name */
2621 str = ipath2str(lhs->ename, lhs->ipath);
2622 len = strlen(str);
2623
2624 ASSERT(Serdbufptr + len + 1 <= &Serdbuf[Serdsz]);
2625 (void) strlcpy(Serdbufptr, str, &Serdbuf[Serdsz] - Serdbufptr);
2626 Serdbufptr += len;
2627 FREE(str);
2628 *Serdbufptr++ = '\0';
2629 ASSERT(Serdbufptr <= &Serdbuf[Serdsz]);
2630 }
2631
2632 void
serd_save()2633 serd_save()
2634 {
2635 if (Serd_need_save == 0)
2636 return;
2637
2638 /* figure out how big the serialzed info is */
2639 Serdsz = 0;
2640 lut_walk(SerdEngines, (lut_cb)serdaddsize, NULL);
2641
2642 if (Serdsz == 0) {
2643 /* no serd engines to save */
2644 fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS);
2645 return;
2646 }
2647
2648 /* create the serialized buffer */
2649 Serdbufptr = Serdbuf = MALLOC(Serdsz);
2650 lut_walk(SerdEngines, (lut_cb)serd2str, NULL);
2651
2652 /* clear out current saved stats */
2653 fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS);
2654
2655 /* write out the new version */
2656 fmd_buf_write(Hdl, NULL, WOBUF_SERDS, Serdbuf, Serdsz);
2657 FREE(Serdbuf);
2658 Serd_need_save = 0;
2659 }
2660
2661 int
serd_cmp(struct serd_entry * ent1,struct serd_entry * ent2)2662 serd_cmp(struct serd_entry *ent1, struct serd_entry *ent2)
2663 {
2664 if (ent1->ename != ent2->ename)
2665 return (ent2->ename - ent1->ename);
2666 if (ent1->ipath != ent2->ipath)
2667 return ((char *)ent2->ipath - (char *)ent1->ipath);
2668
2669 return (0);
2670 }
2671
2672 void
fme_serd_load(fmd_hdl_t * hdl)2673 fme_serd_load(fmd_hdl_t *hdl)
2674 {
2675 int sz;
2676 char *sbuf;
2677 char *sepptr;
2678 char *ptr;
2679 struct serd_entry *newentp;
2680 struct node *epname;
2681 nvlist_t *fmri;
2682 char *namestring;
2683
2684 if ((sz = fmd_buf_size(hdl, NULL, WOBUF_SERDS)) == 0)
2685 return;
2686 sbuf = alloca(sz);
2687 fmd_buf_read(hdl, NULL, WOBUF_SERDS, sbuf, sz);
2688 ptr = sbuf;
2689 while (ptr < &sbuf[sz]) {
2690 sepptr = strchr(ptr, '@');
2691 *sepptr = '\0';
2692 namestring = ptr;
2693 sepptr++;
2694 ptr = sepptr;
2695 ptr += strlen(ptr);
2696 ptr++; /* move past the '\0' separating paths */
2697 epname = pathstring2epnamenp(sepptr);
2698 fmri = node2fmri(epname);
2699 if (platform_path_exists(fmri)) {
2700 newentp = MALLOC(sizeof (*newentp));
2701 newentp->hdl = hdl;
2702 newentp->ipath = ipath(epname);
2703 newentp->ename = stable(namestring);
2704 SerdEngines = lut_add(SerdEngines, (void *)newentp,
2705 (void *)newentp, (lut_cmp)serd_cmp);
2706 } else
2707 Serd_need_save = 1;
2708 tree_free(epname);
2709 nvlist_free(fmri);
2710 }
2711 /* save it back again in case some of the paths no longer exist */
2712 serd_save();
2713 }
2714
2715 /*ARGSUSED*/
2716 static void
serd_destructor(void * left,void * right,void * arg)2717 serd_destructor(void *left, void *right, void *arg)
2718 {
2719 struct serd_entry *entp = (struct serd_entry *)left;
2720 FREE(entp);
2721 }
2722
2723 /*
2724 * Callback used in a walk of the SerdEngines to reset matching serd engines.
2725 */
2726 /*ARGSUSED*/
2727 static void
serd_reset_cb(struct serd_entry * entp,void * unused,const struct ipath * ipp)2728 serd_reset_cb(struct serd_entry *entp, void *unused, const struct ipath *ipp)
2729 {
2730 char *path;
2731
2732 if (entp->ipath == ipp) {
2733 path = ipath2str(entp->ename, ipp);
2734 out(O_ALTFP, "serd_reset_cb: resetting %s", path);
2735 fmd_serd_reset(entp->hdl, path);
2736 FREE(path);
2737 Serd_need_save = 1;
2738 }
2739 }
2740
2741 /*ARGSUSED*/
2742 static void
serd_topo_chg_cb(struct serd_entry * entp,void * unused,void * unused2)2743 serd_topo_chg_cb(struct serd_entry *entp, void *unused, void *unused2)
2744 {
2745 char *path;
2746 nvlist_t *fmri;
2747
2748 fmri = ipath2fmri((struct ipath *)(entp->ipath));
2749 if (!platform_path_exists(fmri)) {
2750 path = ipath2str(entp->ename, entp->ipath);
2751 out(O_ALTFP, "serd_topo_chg_cb: not present %s", path);
2752 fmd_serd_reset(entp->hdl, path);
2753 FREE(path);
2754 Serd_need_save = 1;
2755 }
2756 nvlist_free(fmri);
2757 }
2758
2759 void
serd_fini(void)2760 serd_fini(void)
2761 {
2762 lut_free(SerdEngines, serd_destructor, NULL);
2763 }
2764
2765 static void
publish_suspects(struct fme * fmep,struct rsl * srl)2766 publish_suspects(struct fme *fmep, struct rsl *srl)
2767 {
2768 struct rsl *rp;
2769 nvlist_t *fault;
2770 uint8_t cert;
2771 uint_t *frs;
2772 uint_t frsum, fr;
2773 uint_t messval;
2774 uint_t retireval;
2775 uint_t responseval;
2776 struct node *snp;
2777 int frcnt, fridx;
2778 boolean_t allfaulty = B_TRUE;
2779 struct rsl *erl = srl + fmep->nsuspects - 1;
2780
2781 /*
2782 * sort the array
2783 */
2784 qsort(srl, fmep->nsuspects, sizeof (struct rsl), rslcmp);
2785
2786 /* sum the fitrates */
2787 frs = alloca(fmep->nsuspects * sizeof (uint_t));
2788 fridx = frcnt = frsum = 0;
2789
2790 for (rp = srl; rp <= erl; rp++) {
2791 struct node *n;
2792
2793 n = eventprop_lookup(rp->suspect, L_FITrate);
2794 if (node2uint(n, &fr) != 0) {
2795 out(O_DEBUG|O_NONL, "event ");
2796 ipath_print(O_DEBUG|O_NONL,
2797 rp->suspect->enode->u.event.ename->u.name.s,
2798 rp->suspect->ipp);
2799 out(O_VERB, " has no FITrate (using 1)");
2800 fr = 1;
2801 } else if (fr == 0) {
2802 out(O_DEBUG|O_NONL, "event ");
2803 ipath_print(O_DEBUG|O_NONL,
2804 rp->suspect->enode->u.event.ename->u.name.s,
2805 rp->suspect->ipp);
2806 out(O_VERB, " has zero FITrate (using 1)");
2807 fr = 1;
2808 }
2809
2810 frs[fridx++] = fr;
2811 frsum += fr;
2812 frcnt++;
2813 }
2814
2815 /* Add them in reverse order of our sort, as fmd reverses order */
2816 for (rp = erl; rp >= srl; rp--) {
2817 cert = percentof(frs[--fridx], frsum);
2818 fault = fmd_nvl_create_fault(fmep->hdl,
2819 rp->suspect->enode->u.event.ename->u.name.s,
2820 cert,
2821 rp->asru,
2822 rp->fru,
2823 rp->rsrc);
2824 if (fault == NULL)
2825 out(O_DIE, "fault creation failed");
2826 /* if "message" property exists, add it to the fault */
2827 if (node2uint(eventprop_lookup(rp->suspect, L_message),
2828 &messval) == 0) {
2829
2830 out(O_ALTFP,
2831 "[FME%d, %s adds message=%d to suspect list]",
2832 fmep->id,
2833 rp->suspect->enode->u.event.ename->u.name.s,
2834 messval);
2835 if (nvlist_add_boolean_value(fault,
2836 FM_SUSPECT_MESSAGE,
2837 (messval) ? B_TRUE : B_FALSE) != 0) {
2838 out(O_DIE, "cannot add no-message to fault");
2839 }
2840 }
2841
2842 /* if "retire" property exists, add it to the fault */
2843 if (node2uint(eventprop_lookup(rp->suspect, L_retire),
2844 &retireval) == 0) {
2845
2846 out(O_ALTFP,
2847 "[FME%d, %s adds retire=%d to suspect list]",
2848 fmep->id,
2849 rp->suspect->enode->u.event.ename->u.name.s,
2850 retireval);
2851 if (nvlist_add_boolean_value(fault,
2852 FM_SUSPECT_RETIRE,
2853 (retireval) ? B_TRUE : B_FALSE) != 0) {
2854 out(O_DIE, "cannot add no-retire to fault");
2855 }
2856 }
2857
2858 /* if "response" property exists, add it to the fault */
2859 if (node2uint(eventprop_lookup(rp->suspect, L_response),
2860 &responseval) == 0) {
2861
2862 out(O_ALTFP,
2863 "[FME%d, %s adds response=%d to suspect list]",
2864 fmep->id,
2865 rp->suspect->enode->u.event.ename->u.name.s,
2866 responseval);
2867 if (nvlist_add_boolean_value(fault,
2868 FM_SUSPECT_RESPONSE,
2869 (responseval) ? B_TRUE : B_FALSE) != 0) {
2870 out(O_DIE, "cannot add no-response to fault");
2871 }
2872 }
2873
2874 /* add any payload properties */
2875 lut_walk(rp->suspect->payloadprops,
2876 (lut_cb)addpayloadprop, (void *)fault);
2877 rslfree(rp);
2878
2879 /*
2880 * If "action" property exists, evaluate it; this must be done
2881 * before the allfaulty check below since some actions may
2882 * modify the asru to be used in fmd_nvl_fmri_has_fault. This
2883 * needs to be restructured if any new actions are introduced
2884 * that have effects that we do not want to be visible if
2885 * we decide not to publish in the dupclose check below.
2886 */
2887 if ((snp = eventprop_lookup(rp->suspect, L_action)) != NULL) {
2888 struct evalue evalue;
2889
2890 out(O_ALTFP|O_NONL,
2891 "[FME%d, %s action ", fmep->id,
2892 rp->suspect->enode->u.event.ename->u.name.s);
2893 ptree_name_iter(O_ALTFP|O_NONL, snp);
2894 out(O_ALTFP, "]");
2895 Action_nvl = fault;
2896 (void) eval_expr(snp, NULL, NULL, NULL, NULL,
2897 NULL, 0, &evalue);
2898 }
2899
2900 fmd_case_add_suspect(fmep->hdl, fmep->fmcase, fault);
2901
2902 /*
2903 * check if the asru is already marked as "faulty".
2904 */
2905 if (allfaulty) {
2906 nvlist_t *asru;
2907
2908 out(O_ALTFP|O_VERB, "FME%d dup check ", fmep->id);
2909 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, rp->suspect);
2910 out(O_ALTFP|O_VERB|O_NONL, " ");
2911 if (nvlist_lookup_nvlist(fault,
2912 FM_FAULT_ASRU, &asru) != 0) {
2913 out(O_ALTFP|O_VERB, "NULL asru");
2914 allfaulty = B_FALSE;
2915 } else if (fmd_nvl_fmri_has_fault(fmep->hdl, asru,
2916 FMD_HAS_FAULT_ASRU, NULL)) {
2917 out(O_ALTFP|O_VERB, "faulty");
2918 } else {
2919 out(O_ALTFP|O_VERB, "not faulty");
2920 allfaulty = B_FALSE;
2921 }
2922 }
2923
2924 }
2925
2926 if (!allfaulty) {
2927 /*
2928 * don't update the count stat if all asrus are already
2929 * present and unrepaired in the asru cache
2930 */
2931 for (rp = erl; rp >= srl; rp--) {
2932 struct event *suspect = rp->suspect;
2933
2934 if (suspect == NULL)
2935 continue;
2936
2937 /* if "count" exists, increment the appropriate stat */
2938 if ((snp = eventprop_lookup(suspect,
2939 L_count)) != NULL) {
2940 out(O_ALTFP|O_NONL,
2941 "[FME%d, %s count ", fmep->id,
2942 suspect->enode->u.event.ename->u.name.s);
2943 ptree_name_iter(O_ALTFP|O_NONL, snp);
2944 out(O_ALTFP, "]");
2945 istat_bump(snp, 0);
2946
2947 }
2948 }
2949 istat_save(); /* write out any istat changes */
2950 }
2951 }
2952
2953 static const char *
undiag_2defect_str(int ud)2954 undiag_2defect_str(int ud)
2955 {
2956 switch (ud) {
2957 case UD_VAL_MISSINGINFO:
2958 case UD_VAL_MISSINGOBS:
2959 case UD_VAL_MISSINGPATH:
2960 case UD_VAL_MISSINGZERO:
2961 case UD_VAL_BADOBS:
2962 case UD_VAL_CFGMISMATCH:
2963 return (UNDIAG_DEFECT_CHKPT);
2964
2965 case UD_VAL_BADEVENTI:
2966 case UD_VAL_BADEVENTPATH:
2967 case UD_VAL_BADEVENTCLASS:
2968 case UD_VAL_INSTFAIL:
2969 case UD_VAL_NOPATH:
2970 case UD_VAL_UNSOLVD:
2971 return (UNDIAG_DEFECT_FME);
2972
2973 case UD_VAL_MAXFME:
2974 return (UNDIAG_DEFECT_LIMIT);
2975
2976 case UD_VAL_UNKNOWN:
2977 default:
2978 return (UNDIAG_DEFECT_UNKNOWN);
2979 }
2980 }
2981
2982 static const char *
undiag_2fault_str(int ud)2983 undiag_2fault_str(int ud)
2984 {
2985 switch (ud) {
2986 case UD_VAL_BADEVENTI:
2987 case UD_VAL_BADEVENTPATH:
2988 case UD_VAL_BADEVENTCLASS:
2989 case UD_VAL_INSTFAIL:
2990 case UD_VAL_NOPATH:
2991 case UD_VAL_UNSOLVD:
2992 return (UNDIAG_FAULT_FME);
2993 default:
2994 return (NULL);
2995 }
2996 }
2997
2998 static char *
undiag_2reason_str(int ud,char * arg)2999 undiag_2reason_str(int ud, char *arg)
3000 {
3001 const char *ptr;
3002 char *buf;
3003 int with_arg = 0;
3004
3005 switch (ud) {
3006 case UD_VAL_BADEVENTPATH:
3007 ptr = UD_STR_BADEVENTPATH;
3008 with_arg = 1;
3009 break;
3010 case UD_VAL_BADEVENTCLASS:
3011 ptr = UD_STR_BADEVENTCLASS;
3012 with_arg = 1;
3013 break;
3014 case UD_VAL_BADEVENTI:
3015 ptr = UD_STR_BADEVENTI;
3016 with_arg = 1;
3017 break;
3018 case UD_VAL_BADOBS:
3019 ptr = UD_STR_BADOBS;
3020 break;
3021 case UD_VAL_CFGMISMATCH:
3022 ptr = UD_STR_CFGMISMATCH;
3023 break;
3024 case UD_VAL_INSTFAIL:
3025 ptr = UD_STR_INSTFAIL;
3026 with_arg = 1;
3027 break;
3028 case UD_VAL_MAXFME:
3029 ptr = UD_STR_MAXFME;
3030 break;
3031 case UD_VAL_MISSINGINFO:
3032 ptr = UD_STR_MISSINGINFO;
3033 break;
3034 case UD_VAL_MISSINGOBS:
3035 ptr = UD_STR_MISSINGOBS;
3036 break;
3037 case UD_VAL_MISSINGPATH:
3038 ptr = UD_STR_MISSINGPATH;
3039 break;
3040 case UD_VAL_MISSINGZERO:
3041 ptr = UD_STR_MISSINGZERO;
3042 break;
3043 case UD_VAL_NOPATH:
3044 ptr = UD_STR_NOPATH;
3045 with_arg = 1;
3046 break;
3047 case UD_VAL_UNSOLVD:
3048 ptr = UD_STR_UNSOLVD;
3049 break;
3050 case UD_VAL_UNKNOWN:
3051 default:
3052 ptr = UD_STR_UNKNOWN;
3053 break;
3054 }
3055 if (with_arg) {
3056 buf = MALLOC(strlen(ptr) + strlen(arg) - 1);
3057 (void) sprintf(buf, ptr, arg);
3058 } else {
3059 buf = MALLOC(strlen(ptr) + 1);
3060 (void) sprintf(buf, ptr);
3061 }
3062 return (buf);
3063 }
3064
3065 static void
publish_undiagnosable(fmd_hdl_t * hdl,fmd_event_t * ffep,fmd_case_t * fmcase,nvlist_t * detector,char * arg)3066 publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep, fmd_case_t *fmcase,
3067 nvlist_t *detector, char *arg)
3068 {
3069 struct case_list *newcase;
3070 nvlist_t *defect, *fault;
3071 const char *faultstr;
3072 char *reason = undiag_2reason_str(Undiag_reason, arg);
3073
3074 out(O_ALTFP,
3075 "[undiagnosable ereport received, "
3076 "creating and closing a new case (%s)]", reason);
3077
3078 newcase = MALLOC(sizeof (struct case_list));
3079 newcase->next = NULL;
3080 newcase->fmcase = fmcase;
3081 if (Undiagablecaselist != NULL)
3082 newcase->next = Undiagablecaselist;
3083 Undiagablecaselist = newcase;
3084
3085 if (ffep != NULL)
3086 fmd_case_add_ereport(hdl, newcase->fmcase, ffep);
3087
3088 /* add defect */
3089 defect = fmd_nvl_create_fault(hdl,
3090 undiag_2defect_str(Undiag_reason), 50, NULL, NULL, detector);
3091 (void) nvlist_add_string(defect, UNDIAG_REASON, reason);
3092 (void) nvlist_add_boolean_value(defect, FM_SUSPECT_RETIRE, B_FALSE);
3093 (void) nvlist_add_boolean_value(defect, FM_SUSPECT_RESPONSE, B_FALSE);
3094 fmd_case_add_suspect(hdl, newcase->fmcase, defect);
3095
3096 /* add fault if appropriate */
3097 faultstr = undiag_2fault_str(Undiag_reason);
3098 if (faultstr != NULL) {
3099 fault = fmd_nvl_create_fault(hdl, faultstr, 50, NULL, NULL,
3100 detector);
3101 (void) nvlist_add_string(fault, UNDIAG_REASON, reason);
3102 (void) nvlist_add_boolean_value(fault, FM_SUSPECT_RETIRE,
3103 B_FALSE);
3104 (void) nvlist_add_boolean_value(fault, FM_SUSPECT_RESPONSE,
3105 B_FALSE);
3106 fmd_case_add_suspect(hdl, newcase->fmcase, fault);
3107 }
3108 FREE(reason);
3109
3110 /* solve and close case */
3111 fmd_case_solve(hdl, newcase->fmcase);
3112 fmd_case_close(hdl, newcase->fmcase);
3113 Undiag_reason = UD_VAL_UNKNOWN;
3114 }
3115
3116 static void
fme_undiagnosable(struct fme * f)3117 fme_undiagnosable(struct fme *f)
3118 {
3119 nvlist_t *defect, *fault, *detector = NULL;
3120 struct event *ep;
3121 char *pathstr;
3122 const char *faultstr;
3123 char *reason = undiag_2reason_str(Undiag_reason, NULL);
3124
3125 out(O_ALTFP, "[solving/closing FME%d, case %s (%s)]",
3126 f->id, fmd_case_uuid(f->hdl, f->fmcase), reason);
3127
3128 for (ep = f->observations; ep; ep = ep->observations) {
3129
3130 if (ep->ffep != f->e0r)
3131 fmd_case_add_ereport(f->hdl, f->fmcase, ep->ffep);
3132
3133 pathstr = ipath2str(NULL, ipath(platform_getpath(ep->nvp)));
3134 platform_unit_translate(0, f->config, TOPO_PROP_RESOURCE,
3135 &detector, pathstr);
3136 FREE(pathstr);
3137
3138 /* add defect */
3139 defect = fmd_nvl_create_fault(f->hdl,
3140 undiag_2defect_str(Undiag_reason), 50 / f->uniqobs,
3141 NULL, NULL, detector);
3142 (void) nvlist_add_string(defect, UNDIAG_REASON, reason);
3143 (void) nvlist_add_boolean_value(defect, FM_SUSPECT_RETIRE,
3144 B_FALSE);
3145 (void) nvlist_add_boolean_value(defect, FM_SUSPECT_RESPONSE,
3146 B_FALSE);
3147 fmd_case_add_suspect(f->hdl, f->fmcase, defect);
3148
3149 /* add fault if appropriate */
3150 faultstr = undiag_2fault_str(Undiag_reason);
3151 if (faultstr == NULL)
3152 continue;
3153 fault = fmd_nvl_create_fault(f->hdl, faultstr, 50 / f->uniqobs,
3154 NULL, NULL, detector);
3155 (void) nvlist_add_string(fault, UNDIAG_REASON, reason);
3156 (void) nvlist_add_boolean_value(fault, FM_SUSPECT_RETIRE,
3157 B_FALSE);
3158 (void) nvlist_add_boolean_value(fault, FM_SUSPECT_RESPONSE,
3159 B_FALSE);
3160 fmd_case_add_suspect(f->hdl, f->fmcase, fault);
3161 nvlist_free(detector);
3162 }
3163 FREE(reason);
3164 fmd_case_solve(f->hdl, f->fmcase);
3165 fmd_case_close(f->hdl, f->fmcase);
3166 Undiag_reason = UD_VAL_UNKNOWN;
3167 }
3168
3169 /*
3170 * fme_close_case
3171 *
3172 * Find the requested case amongst our fmes and close it. Free up
3173 * the related fme.
3174 */
3175 void
fme_close_case(fmd_hdl_t * hdl,fmd_case_t * fmcase)3176 fme_close_case(fmd_hdl_t *hdl, fmd_case_t *fmcase)
3177 {
3178 struct case_list *ucasep, *prevcasep = NULL;
3179 struct fme *prev = NULL;
3180 struct fme *fmep;
3181
3182 for (ucasep = Undiagablecaselist; ucasep; ucasep = ucasep->next) {
3183 if (fmcase != ucasep->fmcase) {
3184 prevcasep = ucasep;
3185 continue;
3186 }
3187
3188 if (prevcasep == NULL)
3189 Undiagablecaselist = Undiagablecaselist->next;
3190 else
3191 prevcasep->next = ucasep->next;
3192
3193 FREE(ucasep);
3194 return;
3195 }
3196
3197 for (fmep = FMElist; fmep; fmep = fmep->next) {
3198 if (fmep->hdl == hdl && fmep->fmcase == fmcase)
3199 break;
3200 prev = fmep;
3201 }
3202
3203 if (fmep == NULL) {
3204 out(O_WARN, "Eft asked to close unrecognized case [%s].",
3205 fmd_case_uuid(hdl, fmcase));
3206 return;
3207 }
3208
3209 if (EFMElist == fmep)
3210 EFMElist = prev;
3211
3212 if (prev == NULL)
3213 FMElist = FMElist->next;
3214 else
3215 prev->next = fmep->next;
3216
3217 fmep->next = NULL;
3218
3219 /* Get rid of any timer this fme has set */
3220 if (fmep->wull != 0)
3221 fmd_timer_remove(fmep->hdl, fmep->timer);
3222
3223 if (ClosedFMEs == NULL) {
3224 ClosedFMEs = fmep;
3225 } else {
3226 fmep->next = ClosedFMEs;
3227 ClosedFMEs = fmep;
3228 }
3229
3230 Open_fme_count--;
3231
3232 /* See if we can close the overflow FME */
3233 if (Open_fme_count <= Max_fme) {
3234 for (fmep = FMElist; fmep; fmep = fmep->next) {
3235 if (fmep->overflow && !(fmd_case_closed(fmep->hdl,
3236 fmep->fmcase)))
3237 break;
3238 }
3239
3240 if (fmep != NULL)
3241 fmd_case_close(fmep->hdl, fmep->fmcase);
3242 }
3243 }
3244
3245 /*
3246 * fme_set_timer()
3247 * If the time we need to wait for the given FME is less than the
3248 * current timer, kick that old timer out and establish a new one.
3249 */
3250 static int
fme_set_timer(struct fme * fmep,unsigned long long wull)3251 fme_set_timer(struct fme *fmep, unsigned long long wull)
3252 {
3253 out(O_ALTFP|O_VERB|O_NONL, " fme_set_timer: request to wait ");
3254 ptree_timeval(O_ALTFP|O_VERB, &wull);
3255
3256 if (wull <= fmep->pull) {
3257 out(O_ALTFP|O_VERB|O_NONL, "already have waited at least ");
3258 ptree_timeval(O_ALTFP|O_VERB, &fmep->pull);
3259 out(O_ALTFP|O_VERB, NULL);
3260 /* we've waited at least wull already, don't need timer */
3261 return (0);
3262 }
3263
3264 out(O_ALTFP|O_VERB|O_NONL, " currently ");
3265 if (fmep->wull != 0) {
3266 out(O_ALTFP|O_VERB|O_NONL, "waiting ");
3267 ptree_timeval(O_ALTFP|O_VERB, &fmep->wull);
3268 out(O_ALTFP|O_VERB, NULL);
3269 } else {
3270 out(O_ALTFP|O_VERB|O_NONL, "not waiting");
3271 out(O_ALTFP|O_VERB, NULL);
3272 }
3273
3274 if (fmep->wull != 0)
3275 if (wull >= fmep->wull)
3276 /* New timer would fire later than established timer */
3277 return (0);
3278
3279 if (fmep->wull != 0) {
3280 fmd_timer_remove(fmep->hdl, fmep->timer);
3281 }
3282
3283 fmep->timer = fmd_timer_install(fmep->hdl, (void *)fmep,
3284 fmep->e0r, wull);
3285 out(O_ALTFP|O_VERB, "timer set, id is %ld", fmep->timer);
3286 fmep->wull = wull;
3287 return (1);
3288 }
3289
3290 void
fme_timer_fired(struct fme * fmep,id_t tid)3291 fme_timer_fired(struct fme *fmep, id_t tid)
3292 {
3293 struct fme *ffmep = NULL;
3294
3295 for (ffmep = FMElist; ffmep; ffmep = ffmep->next)
3296 if (ffmep == fmep)
3297 break;
3298
3299 if (ffmep == NULL) {
3300 out(O_WARN, "Timer fired for an FME (%p) not in FMEs list.",
3301 (void *)fmep);
3302 return;
3303 }
3304
3305 out(O_ALTFP|O_VERB, "Timer fired %lx", tid);
3306 fmep->pull = fmep->wull;
3307 fmep->wull = 0;
3308 fmd_buf_write(fmep->hdl, fmep->fmcase,
3309 WOBUF_PULL, (void *)&fmep->pull, sizeof (fmep->pull));
3310
3311 fme_eval(fmep, fmep->e0r);
3312 }
3313
3314 /*
3315 * Preserve the fme's suspect list in its psuspects list, NULLing the
3316 * suspects list in the meantime.
3317 */
3318 static void
save_suspects(struct fme * fmep)3319 save_suspects(struct fme *fmep)
3320 {
3321 struct event *ep;
3322 struct event *nextep;
3323
3324 /* zero out the previous suspect list */
3325 for (ep = fmep->psuspects; ep; ep = nextep) {
3326 nextep = ep->psuspects;
3327 ep->psuspects = NULL;
3328 }
3329 fmep->psuspects = NULL;
3330
3331 /* zero out the suspect list, copying it to previous suspect list */
3332 fmep->psuspects = fmep->suspects;
3333 for (ep = fmep->suspects; ep; ep = nextep) {
3334 nextep = ep->suspects;
3335 ep->psuspects = ep->suspects;
3336 ep->suspects = NULL;
3337 ep->is_suspect = 0;
3338 }
3339 fmep->suspects = NULL;
3340 fmep->nsuspects = 0;
3341 }
3342
3343 /*
3344 * Retrieve the fme's suspect list from its psuspects list.
3345 */
3346 static void
restore_suspects(struct fme * fmep)3347 restore_suspects(struct fme *fmep)
3348 {
3349 struct event *ep;
3350 struct event *nextep;
3351
3352 fmep->nsuspects = 0;
3353 fmep->suspects = fmep->psuspects;
3354 for (ep = fmep->psuspects; ep; ep = nextep) {
3355 fmep->nsuspects++;
3356 nextep = ep->psuspects;
3357 ep->suspects = ep->psuspects;
3358 }
3359 }
3360
3361 /*
3362 * this is what we use to call the Emrys prototype code instead of main()
3363 */
3364 static void
fme_eval(struct fme * fmep,fmd_event_t * ffep)3365 fme_eval(struct fme *fmep, fmd_event_t *ffep)
3366 {
3367 struct event *ep;
3368 unsigned long long my_delay = TIMEVAL_EVENTUALLY;
3369 struct rsl *srl = NULL;
3370 struct rsl *srl2 = NULL;
3371 int mess_zero_count;
3372 int rpcnt;
3373
3374 save_suspects(fmep);
3375
3376 out(O_ALTFP, "Evaluate FME %d", fmep->id);
3377 indent_set(" ");
3378
3379 lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep);
3380 fmep->state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay);
3381
3382 out(O_ALTFP|O_NONL, "FME%d state: %s, suspect list:", fmep->id,
3383 fme_state2str(fmep->state));
3384 for (ep = fmep->suspects; ep; ep = ep->suspects) {
3385 out(O_ALTFP|O_NONL, " ");
3386 itree_pevent_brief(O_ALTFP|O_NONL, ep);
3387 }
3388 out(O_ALTFP, NULL);
3389
3390 switch (fmep->state) {
3391 case FME_CREDIBLE:
3392 print_suspects(SLNEW, fmep);
3393 (void) upsets_eval(fmep, ffep);
3394
3395 /*
3396 * we may have already posted suspects in upsets_eval() which
3397 * can recurse into fme_eval() again. If so then just return.
3398 */
3399 if (fmep->posted_suspects)
3400 return;
3401
3402 stats_counter_bump(fmep->diags);
3403 rpcnt = fmep->nsuspects;
3404 save_suspects(fmep);
3405
3406 /*
3407 * create two lists, one for "message=1" faults and one for
3408 * "message=0" faults. If we have a mixture we will generate
3409 * two separate suspect lists.
3410 */
3411 srl = MALLOC(rpcnt * sizeof (struct rsl));
3412 bzero(srl, rpcnt * sizeof (struct rsl));
3413 srl2 = MALLOC(rpcnt * sizeof (struct rsl));
3414 bzero(srl2, rpcnt * sizeof (struct rsl));
3415 mess_zero_count = trim_suspects(fmep, srl, srl2, ffep);
3416
3417 /*
3418 * If the resulting suspect list has no members, we're
3419 * done so simply close the case. Otherwise sort and publish.
3420 */
3421 if (fmep->nsuspects == 0 && mess_zero_count == 0) {
3422 out(O_ALTFP,
3423 "[FME%d, case %s (all suspects are upsets)]",
3424 fmep->id, fmd_case_uuid(fmep->hdl, fmep->fmcase));
3425 fmd_case_close(fmep->hdl, fmep->fmcase);
3426 } else if (fmep->nsuspects != 0 && mess_zero_count == 0) {
3427 publish_suspects(fmep, srl);
3428 out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3429 fmd_case_uuid(fmep->hdl, fmep->fmcase));
3430 fmd_case_solve(fmep->hdl, fmep->fmcase);
3431 } else if (fmep->nsuspects == 0 && mess_zero_count != 0) {
3432 fmep->nsuspects = mess_zero_count;
3433 publish_suspects(fmep, srl2);
3434 out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3435 fmd_case_uuid(fmep->hdl, fmep->fmcase));
3436 fmd_case_solve(fmep->hdl, fmep->fmcase);
3437 } else {
3438 struct event *obsp;
3439 struct fme *nfmep;
3440
3441 publish_suspects(fmep, srl);
3442 out(O_ALTFP, "[solving FME%d, case %s]", fmep->id,
3443 fmd_case_uuid(fmep->hdl, fmep->fmcase));
3444 fmd_case_solve(fmep->hdl, fmep->fmcase);
3445
3446 /*
3447 * Got both message=0 and message=1 so create a
3448 * duplicate case. Also need a temporary duplicate fme
3449 * structure for use by publish_suspects().
3450 */
3451 nfmep = alloc_fme();
3452 nfmep->id = Nextid++;
3453 nfmep->hdl = fmep->hdl;
3454 nfmep->nsuspects = mess_zero_count;
3455 nfmep->fmcase = fmd_case_open(fmep->hdl, NULL);
3456 out(O_ALTFP|O_STAMP,
3457 "[creating parallel FME%d, case %s]", nfmep->id,
3458 fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
3459 Open_fme_count++;
3460 if (ffep) {
3461 fmd_case_setprincipal(nfmep->hdl,
3462 nfmep->fmcase, ffep);
3463 fmd_case_add_ereport(nfmep->hdl,
3464 nfmep->fmcase, ffep);
3465 }
3466 for (obsp = fmep->observations; obsp;
3467 obsp = obsp->observations)
3468 if (obsp->ffep && obsp->ffep != ffep)
3469 fmd_case_add_ereport(nfmep->hdl,
3470 nfmep->fmcase, obsp->ffep);
3471
3472 publish_suspects(nfmep, srl2);
3473 out(O_ALTFP, "[solving FME%d, case %s]", nfmep->id,
3474 fmd_case_uuid(nfmep->hdl, nfmep->fmcase));
3475 fmd_case_solve(nfmep->hdl, nfmep->fmcase);
3476 FREE(nfmep);
3477 }
3478 FREE(srl);
3479 FREE(srl2);
3480 restore_suspects(fmep);
3481
3482 fmep->posted_suspects = 1;
3483 fmd_buf_write(fmep->hdl, fmep->fmcase,
3484 WOBUF_POSTD,
3485 (void *)&fmep->posted_suspects,
3486 sizeof (fmep->posted_suspects));
3487
3488 /*
3489 * Now the suspects have been posted, we can clear up
3490 * the instance tree as we won't be looking at it again.
3491 * Also cancel the timer as the case is now solved.
3492 */
3493 if (fmep->wull != 0) {
3494 fmd_timer_remove(fmep->hdl, fmep->timer);
3495 fmep->wull = 0;
3496 }
3497 break;
3498
3499 case FME_WAIT:
3500 ASSERT(my_delay > fmep->ull);
3501 (void) fme_set_timer(fmep, my_delay);
3502 print_suspects(SLWAIT, fmep);
3503 itree_prune(fmep->eventtree);
3504 return;
3505
3506 case FME_DISPROVED:
3507 print_suspects(SLDISPROVED, fmep);
3508 Undiag_reason = UD_VAL_UNSOLVD;
3509 fme_undiagnosable(fmep);
3510 break;
3511 }
3512
3513 itree_free(fmep->eventtree);
3514 fmep->eventtree = NULL;
3515 structconfig_free(fmep->config);
3516 fmep->config = NULL;
3517 destroy_fme_bufs(fmep);
3518 }
3519
3520 static void indent(void);
3521 static int triggered(struct fme *fmep, struct event *ep, int mark);
3522 static enum fme_state effects_test(struct fme *fmep,
3523 struct event *fault_event, unsigned long long at_latest_by,
3524 unsigned long long *pdelay);
3525 static enum fme_state requirements_test(struct fme *fmep, struct event *ep,
3526 unsigned long long at_latest_by, unsigned long long *pdelay);
3527 static enum fme_state causes_test(struct fme *fmep, struct event *ep,
3528 unsigned long long at_latest_by, unsigned long long *pdelay);
3529
3530 static int
checkconstraints(struct fme * fmep,struct arrow * arrowp)3531 checkconstraints(struct fme *fmep, struct arrow *arrowp)
3532 {
3533 struct constraintlist *ctp;
3534 struct evalue value;
3535 char *sep = "";
3536
3537 if (arrowp->forever_false) {
3538 indent();
3539 out(O_ALTFP|O_VERB|O_NONL, " Forever false constraint: ");
3540 for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3541 out(O_ALTFP|O_VERB|O_NONL, sep);
3542 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3543 sep = ", ";
3544 }
3545 out(O_ALTFP|O_VERB, NULL);
3546 return (0);
3547 }
3548 if (arrowp->forever_true) {
3549 indent();
3550 out(O_ALTFP|O_VERB|O_NONL, " Forever true constraint: ");
3551 for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3552 out(O_ALTFP|O_VERB|O_NONL, sep);
3553 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3554 sep = ", ";
3555 }
3556 out(O_ALTFP|O_VERB, NULL);
3557 return (1);
3558 }
3559
3560 for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3561 if (eval_expr(ctp->cnode, NULL, NULL,
3562 &fmep->globals, fmep->config,
3563 arrowp, 0, &value)) {
3564 /* evaluation successful */
3565 if (value.t == UNDEFINED || value.v == 0) {
3566 /* known false */
3567 arrowp->forever_false = 1;
3568 indent();
3569 out(O_ALTFP|O_VERB|O_NONL,
3570 " False constraint: ");
3571 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3572 out(O_ALTFP|O_VERB, NULL);
3573 return (0);
3574 }
3575 } else {
3576 /* evaluation unsuccessful -- unknown value */
3577 indent();
3578 out(O_ALTFP|O_VERB|O_NONL,
3579 " Deferred constraint: ");
3580 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3581 out(O_ALTFP|O_VERB, NULL);
3582 return (1);
3583 }
3584 }
3585 /* known true */
3586 arrowp->forever_true = 1;
3587 indent();
3588 out(O_ALTFP|O_VERB|O_NONL, " True constraint: ");
3589 for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) {
3590 out(O_ALTFP|O_VERB|O_NONL, sep);
3591 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0);
3592 sep = ", ";
3593 }
3594 out(O_ALTFP|O_VERB, NULL);
3595 return (1);
3596 }
3597
3598 static int
triggered(struct fme * fmep,struct event * ep,int mark)3599 triggered(struct fme *fmep, struct event *ep, int mark)
3600 {
3601 struct bubble *bp;
3602 struct arrowlist *ap;
3603 int count = 0;
3604
3605 stats_counter_bump(fmep->Tcallcount);
3606 for (bp = itree_next_bubble(ep, NULL); bp;
3607 bp = itree_next_bubble(ep, bp)) {
3608 if (bp->t != B_TO)
3609 continue;
3610 for (ap = itree_next_arrow(bp, NULL); ap;
3611 ap = itree_next_arrow(bp, ap)) {
3612 /* check count of marks against K in the bubble */
3613 if ((ap->arrowp->mark & mark) &&
3614 ++count >= bp->nork)
3615 return (1);
3616 }
3617 }
3618 return (0);
3619 }
3620
3621 static int
mark_arrows(struct fme * fmep,struct event * ep,int mark,unsigned long long at_latest_by,unsigned long long * pdelay,int keep)3622 mark_arrows(struct fme *fmep, struct event *ep, int mark,
3623 unsigned long long at_latest_by, unsigned long long *pdelay, int keep)
3624 {
3625 struct bubble *bp;
3626 struct arrowlist *ap;
3627 unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3628 unsigned long long my_delay;
3629 enum fme_state result;
3630 int retval = 0;
3631
3632 for (bp = itree_next_bubble(ep, NULL); bp;
3633 bp = itree_next_bubble(ep, bp)) {
3634 if (bp->t != B_FROM)
3635 continue;
3636 stats_counter_bump(fmep->Marrowcount);
3637 for (ap = itree_next_arrow(bp, NULL); ap;
3638 ap = itree_next_arrow(bp, ap)) {
3639 struct event *ep2 = ap->arrowp->head->myevent;
3640 /*
3641 * if we're clearing marks, we can avoid doing
3642 * all that work evaluating constraints.
3643 */
3644 if (mark == 0) {
3645 if (ap->arrowp->arrow_marked == 0)
3646 continue;
3647 ap->arrowp->arrow_marked = 0;
3648 ap->arrowp->mark &= ~EFFECTS_COUNTER;
3649 if (keep && (ep2->cached_state &
3650 (WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT)))
3651 ep2->keep_in_tree = 1;
3652 ep2->cached_state &=
3653 ~(WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT);
3654 (void) mark_arrows(fmep, ep2, mark, 0, NULL,
3655 keep);
3656 continue;
3657 }
3658 ap->arrowp->arrow_marked = 1;
3659 if (ep2->cached_state & REQMNTS_DISPROVED) {
3660 indent();
3661 out(O_ALTFP|O_VERB|O_NONL,
3662 " ALREADY DISPROVED ");
3663 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3664 out(O_ALTFP|O_VERB, NULL);
3665 continue;
3666 }
3667 if (ep2->cached_state & WAIT_EFFECT) {
3668 indent();
3669 out(O_ALTFP|O_VERB|O_NONL,
3670 " ALREADY EFFECTS WAIT ");
3671 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3672 out(O_ALTFP|O_VERB, NULL);
3673 continue;
3674 }
3675 if (ep2->cached_state & CREDIBLE_EFFECT) {
3676 indent();
3677 out(O_ALTFP|O_VERB|O_NONL,
3678 " ALREADY EFFECTS CREDIBLE ");
3679 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3680 out(O_ALTFP|O_VERB, NULL);
3681 continue;
3682 }
3683 if ((ep2->cached_state & PARENT_WAIT) &&
3684 (mark & PARENT_WAIT)) {
3685 indent();
3686 out(O_ALTFP|O_VERB|O_NONL,
3687 " ALREADY PARENT EFFECTS WAIT ");
3688 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3689 out(O_ALTFP|O_VERB, NULL);
3690 continue;
3691 }
3692 platform_set_payloadnvp(ep2->nvp);
3693 if (checkconstraints(fmep, ap->arrowp) == 0) {
3694 platform_set_payloadnvp(NULL);
3695 indent();
3696 out(O_ALTFP|O_VERB|O_NONL,
3697 " CONSTRAINTS FAIL ");
3698 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3699 out(O_ALTFP|O_VERB, NULL);
3700 continue;
3701 }
3702 platform_set_payloadnvp(NULL);
3703 ap->arrowp->mark |= EFFECTS_COUNTER;
3704 if (!triggered(fmep, ep2, EFFECTS_COUNTER)) {
3705 indent();
3706 out(O_ALTFP|O_VERB|O_NONL,
3707 " K-COUNT NOT YET MET ");
3708 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3709 out(O_ALTFP|O_VERB, NULL);
3710 continue;
3711 }
3712 ep2->cached_state &= ~PARENT_WAIT;
3713 /*
3714 * if we've reached an ereport and no propagation time
3715 * is specified, use the Hesitate value
3716 */
3717 if (ep2->t == N_EREPORT && at_latest_by == 0ULL &&
3718 ap->arrowp->maxdelay == 0ULL) {
3719 out(O_ALTFP|O_VERB|O_NONL, " default wait ");
3720 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3721 out(O_ALTFP|O_VERB, NULL);
3722 result = requirements_test(fmep, ep2, Hesitate,
3723 &my_delay);
3724 } else {
3725 result = requirements_test(fmep, ep2,
3726 at_latest_by + ap->arrowp->maxdelay,
3727 &my_delay);
3728 }
3729 if (result == FME_WAIT) {
3730 retval = WAIT_EFFECT;
3731 if (overall_delay > my_delay)
3732 overall_delay = my_delay;
3733 ep2->cached_state |= WAIT_EFFECT;
3734 indent();
3735 out(O_ALTFP|O_VERB|O_NONL, " EFFECTS WAIT ");
3736 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3737 out(O_ALTFP|O_VERB, NULL);
3738 indent_push(" E");
3739 if (mark_arrows(fmep, ep2, PARENT_WAIT,
3740 at_latest_by, &my_delay, 0) ==
3741 WAIT_EFFECT) {
3742 retval = WAIT_EFFECT;
3743 if (overall_delay > my_delay)
3744 overall_delay = my_delay;
3745 }
3746 indent_pop();
3747 } else if (result == FME_DISPROVED) {
3748 indent();
3749 out(O_ALTFP|O_VERB|O_NONL,
3750 " EFFECTS DISPROVED ");
3751 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3752 out(O_ALTFP|O_VERB, NULL);
3753 } else {
3754 ep2->cached_state |= mark;
3755 indent();
3756 if (mark == CREDIBLE_EFFECT)
3757 out(O_ALTFP|O_VERB|O_NONL,
3758 " EFFECTS CREDIBLE ");
3759 else
3760 out(O_ALTFP|O_VERB|O_NONL,
3761 " PARENT EFFECTS WAIT ");
3762 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2);
3763 out(O_ALTFP|O_VERB, NULL);
3764 indent_push(" E");
3765 if (mark_arrows(fmep, ep2, mark, at_latest_by,
3766 &my_delay, 0) == WAIT_EFFECT) {
3767 retval = WAIT_EFFECT;
3768 if (overall_delay > my_delay)
3769 overall_delay = my_delay;
3770 }
3771 indent_pop();
3772 }
3773 }
3774 }
3775 if (retval == WAIT_EFFECT)
3776 *pdelay = overall_delay;
3777 return (retval);
3778 }
3779
3780 static enum fme_state
effects_test(struct fme * fmep,struct event * fault_event,unsigned long long at_latest_by,unsigned long long * pdelay)3781 effects_test(struct fme *fmep, struct event *fault_event,
3782 unsigned long long at_latest_by, unsigned long long *pdelay)
3783 {
3784 struct event *error_event;
3785 enum fme_state return_value = FME_CREDIBLE;
3786 unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3787 unsigned long long my_delay;
3788
3789 stats_counter_bump(fmep->Ecallcount);
3790 indent_push(" E");
3791 indent();
3792 out(O_ALTFP|O_VERB|O_NONL, "->");
3793 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event);
3794 out(O_ALTFP|O_VERB, NULL);
3795
3796 if (mark_arrows(fmep, fault_event, CREDIBLE_EFFECT, at_latest_by,
3797 &my_delay, 0) == WAIT_EFFECT) {
3798 return_value = FME_WAIT;
3799 if (overall_delay > my_delay)
3800 overall_delay = my_delay;
3801 }
3802 for (error_event = fmep->observations;
3803 error_event; error_event = error_event->observations) {
3804 indent();
3805 out(O_ALTFP|O_VERB|O_NONL, " ");
3806 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, error_event);
3807 if (!(error_event->cached_state & CREDIBLE_EFFECT)) {
3808 if (error_event->cached_state &
3809 (PARENT_WAIT|WAIT_EFFECT)) {
3810 out(O_ALTFP|O_VERB, " NOT YET triggered");
3811 continue;
3812 }
3813 return_value = FME_DISPROVED;
3814 out(O_ALTFP|O_VERB, " NOT triggered");
3815 break;
3816 } else {
3817 out(O_ALTFP|O_VERB, " triggered");
3818 }
3819 }
3820 if (return_value == FME_DISPROVED) {
3821 (void) mark_arrows(fmep, fault_event, 0, 0, NULL, 0);
3822 } else {
3823 fault_event->keep_in_tree = 1;
3824 (void) mark_arrows(fmep, fault_event, 0, 0, NULL, 1);
3825 }
3826
3827 indent();
3828 out(O_ALTFP|O_VERB|O_NONL, "<-EFFECTS %s ",
3829 fme_state2str(return_value));
3830 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event);
3831 out(O_ALTFP|O_VERB, NULL);
3832 indent_pop();
3833 if (return_value == FME_WAIT)
3834 *pdelay = overall_delay;
3835 return (return_value);
3836 }
3837
3838 static enum fme_state
requirements_test(struct fme * fmep,struct event * ep,unsigned long long at_latest_by,unsigned long long * pdelay)3839 requirements_test(struct fme *fmep, struct event *ep,
3840 unsigned long long at_latest_by, unsigned long long *pdelay)
3841 {
3842 int waiting_events;
3843 int credible_events;
3844 int deferred_events;
3845 enum fme_state return_value = FME_CREDIBLE;
3846 unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
3847 unsigned long long arrow_delay;
3848 unsigned long long my_delay;
3849 struct event *ep2;
3850 struct bubble *bp;
3851 struct arrowlist *ap;
3852
3853 if (ep->cached_state & REQMNTS_CREDIBLE) {
3854 indent();
3855 out(O_ALTFP|O_VERB|O_NONL, " REQMNTS ALREADY CREDIBLE ");
3856 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3857 out(O_ALTFP|O_VERB, NULL);
3858 return (FME_CREDIBLE);
3859 }
3860 if (ep->cached_state & REQMNTS_DISPROVED) {
3861 indent();
3862 out(O_ALTFP|O_VERB|O_NONL, " REQMNTS ALREADY DISPROVED ");
3863 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3864 out(O_ALTFP|O_VERB, NULL);
3865 return (FME_DISPROVED);
3866 }
3867 if (ep->cached_state & REQMNTS_WAIT) {
3868 indent();
3869 *pdelay = ep->cached_delay;
3870 out(O_ALTFP|O_VERB|O_NONL, " REQMNTS ALREADY WAIT ");
3871 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3872 out(O_ALTFP|O_VERB|O_NONL, ", wait for: ");
3873 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3874 out(O_ALTFP|O_VERB, NULL);
3875 return (FME_WAIT);
3876 }
3877 stats_counter_bump(fmep->Rcallcount);
3878 indent_push(" R");
3879 indent();
3880 out(O_ALTFP|O_VERB|O_NONL, "->");
3881 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3882 out(O_ALTFP|O_VERB|O_NONL, ", at latest by: ");
3883 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3884 out(O_ALTFP|O_VERB, NULL);
3885
3886 if (ep->t == N_EREPORT) {
3887 if (ep->count == 0) {
3888 if (fmep->pull >= at_latest_by) {
3889 return_value = FME_DISPROVED;
3890 } else {
3891 ep->cached_delay = *pdelay = at_latest_by;
3892 return_value = FME_WAIT;
3893 }
3894 }
3895
3896 indent();
3897 switch (return_value) {
3898 case FME_CREDIBLE:
3899 ep->cached_state |= REQMNTS_CREDIBLE;
3900 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS CREDIBLE ");
3901 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3902 break;
3903 case FME_DISPROVED:
3904 ep->cached_state |= REQMNTS_DISPROVED;
3905 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED ");
3906 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3907 break;
3908 case FME_WAIT:
3909 ep->cached_state |= REQMNTS_WAIT;
3910 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS WAIT ");
3911 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
3912 out(O_ALTFP|O_VERB|O_NONL, " to ");
3913 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
3914 break;
3915 default:
3916 out(O_DIE, "requirements_test: unexpected fme_state");
3917 break;
3918 }
3919 out(O_ALTFP|O_VERB, NULL);
3920 indent_pop();
3921
3922 return (return_value);
3923 }
3924
3925 /* this event is not a report, descend the tree */
3926 for (bp = itree_next_bubble(ep, NULL); bp;
3927 bp = itree_next_bubble(ep, bp)) {
3928 int n;
3929
3930 if (bp->t != B_FROM)
3931 continue;
3932
3933 n = bp->nork;
3934
3935 credible_events = 0;
3936 waiting_events = 0;
3937 deferred_events = 0;
3938 arrow_delay = TIMEVAL_EVENTUALLY;
3939 /*
3940 * n is -1 for 'A' so adjust it.
3941 * XXX just count up the arrows for now.
3942 */
3943 if (n < 0) {
3944 n = 0;
3945 for (ap = itree_next_arrow(bp, NULL); ap;
3946 ap = itree_next_arrow(bp, ap))
3947 n++;
3948 indent();
3949 out(O_ALTFP|O_VERB, " Bubble Counted N=%d", n);
3950 } else {
3951 indent();
3952 out(O_ALTFP|O_VERB, " Bubble N=%d", n);
3953 }
3954
3955 if (n == 0)
3956 continue;
3957 if (!(bp->mark & (BUBBLE_ELIDED|BUBBLE_OK))) {
3958 for (ap = itree_next_arrow(bp, NULL); ap;
3959 ap = itree_next_arrow(bp, ap)) {
3960 ep2 = ap->arrowp->head->myevent;
3961 platform_set_payloadnvp(ep2->nvp);
3962 (void) checkconstraints(fmep, ap->arrowp);
3963 if (!ap->arrowp->forever_false) {
3964 /*
3965 * if all arrows are invalidated by the
3966 * constraints, then we should elide the
3967 * whole bubble to be consistant with
3968 * the tree creation time behaviour
3969 */
3970 bp->mark |= BUBBLE_OK;
3971 platform_set_payloadnvp(NULL);
3972 break;
3973 }
3974 platform_set_payloadnvp(NULL);
3975 }
3976 }
3977 for (ap = itree_next_arrow(bp, NULL); ap;
3978 ap = itree_next_arrow(bp, ap)) {
3979 ep2 = ap->arrowp->head->myevent;
3980 if (n <= credible_events)
3981 break;
3982
3983 ap->arrowp->mark |= REQMNTS_COUNTER;
3984 if (triggered(fmep, ep2, REQMNTS_COUNTER))
3985 /* XXX adding max timevals! */
3986 switch (requirements_test(fmep, ep2,
3987 at_latest_by + ap->arrowp->maxdelay,
3988 &my_delay)) {
3989 case FME_DEFERRED:
3990 deferred_events++;
3991 break;
3992 case FME_CREDIBLE:
3993 credible_events++;
3994 break;
3995 case FME_DISPROVED:
3996 break;
3997 case FME_WAIT:
3998 if (my_delay < arrow_delay)
3999 arrow_delay = my_delay;
4000 waiting_events++;
4001 break;
4002 default:
4003 out(O_DIE,
4004 "Bug in requirements_test.");
4005 }
4006 else
4007 deferred_events++;
4008 }
4009 if (!(bp->mark & BUBBLE_OK) && waiting_events == 0) {
4010 bp->mark |= BUBBLE_ELIDED;
4011 continue;
4012 }
4013 indent();
4014 out(O_ALTFP|O_VERB, " Credible: %d Waiting %d",
4015 credible_events + deferred_events, waiting_events);
4016 if (credible_events + deferred_events + waiting_events < n) {
4017 /* Can never meet requirements */
4018 ep->cached_state |= REQMNTS_DISPROVED;
4019 indent();
4020 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED ");
4021 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4022 out(O_ALTFP|O_VERB, NULL);
4023 indent_pop();
4024 return (FME_DISPROVED);
4025 }
4026 if (credible_events + deferred_events < n) {
4027 /* will have to wait */
4028 /* wait time is shortest known */
4029 if (arrow_delay < overall_delay)
4030 overall_delay = arrow_delay;
4031 return_value = FME_WAIT;
4032 } else if (credible_events < n) {
4033 if (return_value != FME_WAIT)
4034 return_value = FME_DEFERRED;
4035 }
4036 }
4037
4038 /*
4039 * don't mark as FME_DEFERRED. If this event isn't reached by another
4040 * path, then this will be considered FME_CREDIBLE. But if it is
4041 * reached by a different path so the K-count is met, then might
4042 * get overridden by FME_WAIT or FME_DISPROVED.
4043 */
4044 if (return_value == FME_WAIT) {
4045 ep->cached_state |= REQMNTS_WAIT;
4046 ep->cached_delay = *pdelay = overall_delay;
4047 } else if (return_value == FME_CREDIBLE) {
4048 ep->cached_state |= REQMNTS_CREDIBLE;
4049 }
4050 indent();
4051 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS %s ",
4052 fme_state2str(return_value));
4053 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4054 out(O_ALTFP|O_VERB, NULL);
4055 indent_pop();
4056 return (return_value);
4057 }
4058
4059 static enum fme_state
causes_test(struct fme * fmep,struct event * ep,unsigned long long at_latest_by,unsigned long long * pdelay)4060 causes_test(struct fme *fmep, struct event *ep,
4061 unsigned long long at_latest_by, unsigned long long *pdelay)
4062 {
4063 unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
4064 unsigned long long my_delay;
4065 int credible_results = 0;
4066 int waiting_results = 0;
4067 enum fme_state fstate;
4068 struct event *tail_event;
4069 struct bubble *bp;
4070 struct arrowlist *ap;
4071 int k = 1;
4072
4073 stats_counter_bump(fmep->Ccallcount);
4074 indent_push(" C");
4075 indent();
4076 out(O_ALTFP|O_VERB|O_NONL, "->");
4077 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4078 out(O_ALTFP|O_VERB, NULL);
4079
4080 for (bp = itree_next_bubble(ep, NULL); bp;
4081 bp = itree_next_bubble(ep, bp)) {
4082 if (bp->t != B_TO)
4083 continue;
4084 k = bp->nork; /* remember the K value */
4085 for (ap = itree_next_arrow(bp, NULL); ap;
4086 ap = itree_next_arrow(bp, ap)) {
4087 int do_not_follow = 0;
4088
4089 /*
4090 * if we get to the same event multiple times
4091 * only worry about the first one.
4092 */
4093 if (ap->arrowp->tail->myevent->cached_state &
4094 CAUSES_TESTED) {
4095 indent();
4096 out(O_ALTFP|O_VERB|O_NONL,
4097 " causes test already run for ");
4098 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL,
4099 ap->arrowp->tail->myevent);
4100 out(O_ALTFP|O_VERB, NULL);
4101 continue;
4102 }
4103
4104 /*
4105 * see if false constraint prevents us
4106 * from traversing this arrow
4107 */
4108 platform_set_payloadnvp(ep->nvp);
4109 if (checkconstraints(fmep, ap->arrowp) == 0)
4110 do_not_follow = 1;
4111 platform_set_payloadnvp(NULL);
4112 if (do_not_follow) {
4113 indent();
4114 out(O_ALTFP|O_VERB|O_NONL,
4115 " False arrow from ");
4116 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL,
4117 ap->arrowp->tail->myevent);
4118 out(O_ALTFP|O_VERB, NULL);
4119 continue;
4120 }
4121
4122 ap->arrowp->tail->myevent->cached_state |=
4123 CAUSES_TESTED;
4124 tail_event = ap->arrowp->tail->myevent;
4125 fstate = hypothesise(fmep, tail_event, at_latest_by,
4126 &my_delay);
4127
4128 switch (fstate) {
4129 case FME_WAIT:
4130 if (my_delay < overall_delay)
4131 overall_delay = my_delay;
4132 waiting_results++;
4133 break;
4134 case FME_CREDIBLE:
4135 credible_results++;
4136 break;
4137 case FME_DISPROVED:
4138 break;
4139 default:
4140 out(O_DIE, "Bug in causes_test");
4141 }
4142 }
4143 }
4144 /* compare against K */
4145 if (credible_results + waiting_results < k) {
4146 indent();
4147 out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES DISPROVED ");
4148 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4149 out(O_ALTFP|O_VERB, NULL);
4150 indent_pop();
4151 return (FME_DISPROVED);
4152 }
4153 if (waiting_results != 0) {
4154 *pdelay = overall_delay;
4155 indent();
4156 out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES WAIT ");
4157 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4158 out(O_ALTFP|O_VERB|O_NONL, " to ");
4159 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
4160 out(O_ALTFP|O_VERB, NULL);
4161 indent_pop();
4162 return (FME_WAIT);
4163 }
4164 indent();
4165 out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES CREDIBLE ");
4166 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4167 out(O_ALTFP|O_VERB, NULL);
4168 indent_pop();
4169 return (FME_CREDIBLE);
4170 }
4171
4172 static enum fme_state
hypothesise(struct fme * fmep,struct event * ep,unsigned long long at_latest_by,unsigned long long * pdelay)4173 hypothesise(struct fme *fmep, struct event *ep,
4174 unsigned long long at_latest_by, unsigned long long *pdelay)
4175 {
4176 enum fme_state rtr, otr;
4177 unsigned long long my_delay;
4178 unsigned long long overall_delay = TIMEVAL_EVENTUALLY;
4179
4180 stats_counter_bump(fmep->Hcallcount);
4181 indent_push(" H");
4182 indent();
4183 out(O_ALTFP|O_VERB|O_NONL, "->");
4184 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4185 out(O_ALTFP|O_VERB|O_NONL, ", at latest by: ");
4186 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by);
4187 out(O_ALTFP|O_VERB, NULL);
4188
4189 rtr = requirements_test(fmep, ep, at_latest_by, &my_delay);
4190 if ((rtr == FME_WAIT) && (my_delay < overall_delay))
4191 overall_delay = my_delay;
4192 if (rtr != FME_DISPROVED) {
4193 if (is_problem(ep->t)) {
4194 otr = effects_test(fmep, ep, at_latest_by, &my_delay);
4195 if (otr != FME_DISPROVED) {
4196 if (fmep->peek == 0 && ep->is_suspect == 0) {
4197 ep->suspects = fmep->suspects;
4198 ep->is_suspect = 1;
4199 fmep->suspects = ep;
4200 fmep->nsuspects++;
4201 }
4202 }
4203 } else
4204 otr = causes_test(fmep, ep, at_latest_by, &my_delay);
4205 if ((otr == FME_WAIT) && (my_delay < overall_delay))
4206 overall_delay = my_delay;
4207 if ((otr != FME_DISPROVED) &&
4208 ((rtr == FME_WAIT) || (otr == FME_WAIT)))
4209 *pdelay = overall_delay;
4210 }
4211 if (rtr == FME_DISPROVED) {
4212 indent();
4213 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4214 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4215 out(O_ALTFP|O_VERB, " (doesn't meet requirements)");
4216 indent_pop();
4217 return (FME_DISPROVED);
4218 }
4219 if ((otr == FME_DISPROVED) && is_problem(ep->t)) {
4220 indent();
4221 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4222 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4223 out(O_ALTFP|O_VERB, " (doesn't explain all reports)");
4224 indent_pop();
4225 return (FME_DISPROVED);
4226 }
4227 if (otr == FME_DISPROVED) {
4228 indent();
4229 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED ");
4230 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4231 out(O_ALTFP|O_VERB, " (causes are not credible)");
4232 indent_pop();
4233 return (FME_DISPROVED);
4234 }
4235 if ((rtr == FME_WAIT) || (otr == FME_WAIT)) {
4236 indent();
4237 out(O_ALTFP|O_VERB|O_NONL, "<-WAIT ");
4238 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4239 out(O_ALTFP|O_VERB|O_NONL, " to ");
4240 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &overall_delay);
4241 out(O_ALTFP|O_VERB, NULL);
4242 indent_pop();
4243 return (FME_WAIT);
4244 }
4245 indent();
4246 out(O_ALTFP|O_VERB|O_NONL, "<-CREDIBLE ");
4247 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep);
4248 out(O_ALTFP|O_VERB, NULL);
4249 indent_pop();
4250 return (FME_CREDIBLE);
4251 }
4252
4253 /*
4254 * fme_istat_load -- reconstitute any persistent istats
4255 */
4256 void
fme_istat_load(fmd_hdl_t * hdl)4257 fme_istat_load(fmd_hdl_t *hdl)
4258 {
4259 int sz;
4260 char *sbuf;
4261 char *ptr;
4262
4263 if ((sz = fmd_buf_size(hdl, NULL, WOBUF_ISTATS)) == 0) {
4264 out(O_ALTFP, "fme_istat_load: No stats");
4265 return;
4266 }
4267
4268 sbuf = alloca(sz);
4269
4270 fmd_buf_read(hdl, NULL, WOBUF_ISTATS, sbuf, sz);
4271
4272 /*
4273 * pick apart the serialized stats
4274 *
4275 * format is:
4276 * <class-name>, '@', <path>, '\0', <value>, '\0'
4277 * for example:
4278 * "stat.first@stat0/path0\02\0stat.second@stat0/path1\023\0"
4279 *
4280 * since this is parsing our own serialized data, any parsing issues
4281 * are fatal, so we check for them all with ASSERT() below.
4282 */
4283 ptr = sbuf;
4284 while (ptr < &sbuf[sz]) {
4285 char *sepptr;
4286 struct node *np;
4287 int val;
4288
4289 sepptr = strchr(ptr, '@');
4290 ASSERT(sepptr != NULL);
4291 *sepptr = '\0';
4292
4293 /* construct the event */
4294 np = newnode(T_EVENT, NULL, 0);
4295 np->u.event.ename = newnode(T_NAME, NULL, 0);
4296 np->u.event.ename->u.name.t = N_STAT;
4297 np->u.event.ename->u.name.s = stable(ptr);
4298 np->u.event.ename->u.name.it = IT_ENAME;
4299 np->u.event.ename->u.name.last = np->u.event.ename;
4300
4301 ptr = sepptr + 1;
4302 ASSERT(ptr < &sbuf[sz]);
4303 ptr += strlen(ptr);
4304 ptr++; /* move past the '\0' separating path from value */
4305 ASSERT(ptr < &sbuf[sz]);
4306 ASSERT(isdigit(*ptr));
4307 val = atoi(ptr);
4308 ASSERT(val > 0);
4309 ptr += strlen(ptr);
4310 ptr++; /* move past the final '\0' for this entry */
4311
4312 np->u.event.epname = pathstring2epnamenp(sepptr + 1);
4313 ASSERT(np->u.event.epname != NULL);
4314
4315 istat_bump(np, val);
4316 tree_free(np);
4317 }
4318
4319 istat_save();
4320 }
4321