1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * 26 * fme.c -- fault management exercise module 27 * 28 * this module provides the simulated fault management exercise. 29 */ 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 #include <stdio.h> 34 #include <stdlib.h> 35 #include <string.h> 36 #include <strings.h> 37 #include <ctype.h> 38 #include <alloca.h> 39 #include <libnvpair.h> 40 #include <sys/fm/protocol.h> 41 #include <fm/fmd_api.h> 42 #include "alloc.h" 43 #include "out.h" 44 #include "stats.h" 45 #include "stable.h" 46 #include "literals.h" 47 #include "lut.h" 48 #include "tree.h" 49 #include "ptree.h" 50 #include "itree.h" 51 #include "ipath.h" 52 #include "fme.h" 53 #include "evnv.h" 54 #include "eval.h" 55 #include "config.h" 56 #include "platform.h" 57 #include "esclex.h" 58 59 /* imported from eft.c... */ 60 extern char *Autoclose; 61 extern hrtime_t Hesitate; 62 extern char *Serd_Override; 63 extern nv_alloc_t Eft_nv_hdl; 64 extern int Max_fme; 65 extern fmd_hdl_t *Hdl; 66 67 static int Istat_need_save; 68 static int Serd_need_save; 69 void istat_save(void); 70 void serd_save(void); 71 72 /* fme under construction is global so we can free it on module abort */ 73 static struct fme *Nfmep; 74 75 static const char *Undiag_reason; 76 77 static int Nextid = 0; 78 79 static int Open_fme_count = 0; /* Count of open FMEs */ 80 81 /* list of fault management exercises underway */ 82 static struct fme { 83 struct fme *next; /* next exercise */ 84 unsigned long long ull; /* time when fme was created */ 85 int id; /* FME id */ 86 struct config *config; /* cooked configuration data */ 87 struct lut *eventtree; /* propagation tree for this FME */ 88 /* 89 * The initial error report that created this FME is kept in 90 * two forms. e0 points to the instance tree node and is used 91 * by fme_eval() as the starting point for the inference 92 * algorithm. e0r is the event handle FMD passed to us when 93 * the ereport first arrived and is used when setting timers, 94 * which are always relative to the time of this initial 95 * report. 96 */ 97 struct event *e0; 98 fmd_event_t *e0r; 99 100 id_t timer; /* for setting an fmd time-out */ 101 102 struct event *ecurrent; /* ereport under consideration */ 103 struct event *suspects; /* current suspect list */ 104 struct event *psuspects; /* previous suspect list */ 105 int nsuspects; /* count of suspects */ 106 int nonfault; /* zero if all suspects T_FAULT */ 107 int posted_suspects; /* true if we've posted a diagnosis */ 108 int uniqobs; /* number of unique events observed */ 109 int peek; /* just peeking, don't track suspects */ 110 int overflow; /* true if overflow FME */ 111 enum fme_state { 112 FME_NOTHING = 5000, /* not evaluated yet */ 113 FME_WAIT, /* need to wait for more info */ 114 FME_CREDIBLE, /* suspect list is credible */ 115 FME_DISPROVED, /* no valid suspects found */ 116 FME_DEFERRED /* don't know yet (k-count not met) */ 117 } state; 118 119 unsigned long long pull; /* time passed since created */ 120 unsigned long long wull; /* wait until this time for re-eval */ 121 struct event *observations; /* observation list */ 122 struct lut *globals; /* values of global variables */ 123 /* fmd interfacing */ 124 fmd_hdl_t *hdl; /* handle for talking with fmd */ 125 fmd_case_t *fmcase; /* what fmd 'case' we associate with */ 126 /* stats */ 127 struct stats *Rcount; 128 struct stats *Hcallcount; 129 struct stats *Rcallcount; 130 struct stats *Ccallcount; 131 struct stats *Ecallcount; 132 struct stats *Tcallcount; 133 struct stats *Marrowcount; 134 struct stats *diags; 135 } *FMElist, *EFMElist, *ClosedFMEs; 136 137 static struct case_list { 138 fmd_case_t *fmcase; 139 struct case_list *next; 140 } *Undiagablecaselist; 141 142 static void fme_eval(struct fme *fmep, fmd_event_t *ffep); 143 static enum fme_state hypothesise(struct fme *fmep, struct event *ep, 144 unsigned long long at_latest_by, unsigned long long *pdelay); 145 static struct node *eventprop_lookup(struct event *ep, const char *propname); 146 static struct node *pathstring2epnamenp(char *path); 147 static void publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep, 148 fmd_case_t *fmcase); 149 static void restore_suspects(struct fme *fmep); 150 static void save_suspects(struct fme *fmep); 151 static void destroy_fme(struct fme *f); 152 static void fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep, 153 const char *eventstring, const struct ipath *ipp, nvlist_t *nvl); 154 static void istat_counter_reset_cb(struct istat_entry *entp, 155 struct stats *statp, const struct ipath *ipp); 156 static void istat_counter_topo_chg_cb(struct istat_entry *entp, 157 struct stats *statp, void *unused); 158 static void serd_reset_cb(struct serd_entry *entp, void *unused, 159 const struct ipath *ipp); 160 static void serd_topo_chg_cb(struct serd_entry *entp, void *unused, 161 void *unused2); 162 static void destroy_fme_bufs(struct fme *fp); 163 164 static struct fme * 165 alloc_fme(void) 166 { 167 struct fme *fmep; 168 169 fmep = MALLOC(sizeof (*fmep)); 170 bzero(fmep, sizeof (*fmep)); 171 return (fmep); 172 } 173 174 /* 175 * fme_ready -- called when all initialization of the FME (except for 176 * stats) has completed successfully. Adds the fme to global lists 177 * and establishes its stats. 178 */ 179 static struct fme * 180 fme_ready(struct fme *fmep) 181 { 182 char nbuf[100]; 183 184 Nfmep = NULL; /* don't need to free this on module abort now */ 185 186 if (EFMElist) { 187 EFMElist->next = fmep; 188 EFMElist = fmep; 189 } else 190 FMElist = EFMElist = fmep; 191 192 (void) sprintf(nbuf, "fme%d.Rcount", fmep->id); 193 fmep->Rcount = stats_new_counter(nbuf, "ereports received", 0); 194 (void) sprintf(nbuf, "fme%d.Hcall", fmep->id); 195 fmep->Hcallcount = stats_new_counter(nbuf, "calls to hypothesise()", 1); 196 (void) sprintf(nbuf, "fme%d.Rcall", fmep->id); 197 fmep->Rcallcount = stats_new_counter(nbuf, 198 "calls to requirements_test()", 1); 199 (void) sprintf(nbuf, "fme%d.Ccall", fmep->id); 200 fmep->Ccallcount = stats_new_counter(nbuf, "calls to causes_test()", 1); 201 (void) sprintf(nbuf, "fme%d.Ecall", fmep->id); 202 fmep->Ecallcount = 203 stats_new_counter(nbuf, "calls to effects_test()", 1); 204 (void) sprintf(nbuf, "fme%d.Tcall", fmep->id); 205 fmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1); 206 (void) sprintf(nbuf, "fme%d.Marrow", fmep->id); 207 fmep->Marrowcount = stats_new_counter(nbuf, 208 "arrows marked by mark_arrows()", 1); 209 (void) sprintf(nbuf, "fme%d.diags", fmep->id); 210 fmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0); 211 212 out(O_ALTFP|O_VERB2, "newfme: config snapshot contains..."); 213 config_print(O_ALTFP|O_VERB2, fmep->config); 214 215 return (fmep); 216 } 217 218 extern void ipath_dummy_lut(struct arrow *); 219 extern struct lut *itree_create_dummy(const char *, const struct ipath *); 220 221 /* ARGSUSED */ 222 static void 223 set_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep) 224 { 225 struct bubble *bp; 226 struct arrowlist *ap; 227 228 for (bp = itree_next_bubble(ep, NULL); bp; 229 bp = itree_next_bubble(ep, bp)) { 230 if (bp->t != B_FROM) 231 continue; 232 for (ap = itree_next_arrow(bp, NULL); ap; 233 ap = itree_next_arrow(bp, ap)) { 234 ap->arrowp->pnode->u.arrow.needed = 1; 235 ipath_dummy_lut(ap->arrowp); 236 } 237 } 238 } 239 240 /* ARGSUSED */ 241 static void 242 unset_needed_arrows(struct event *ep, struct event *ep2, struct fme *fmep) 243 { 244 struct bubble *bp; 245 struct arrowlist *ap; 246 247 for (bp = itree_next_bubble(ep, NULL); bp; 248 bp = itree_next_bubble(ep, bp)) { 249 if (bp->t != B_FROM) 250 continue; 251 for (ap = itree_next_arrow(bp, NULL); ap; 252 ap = itree_next_arrow(bp, ap)) 253 ap->arrowp->pnode->u.arrow.needed = 0; 254 } 255 } 256 257 static void globals_destructor(void *left, void *right, void *arg); 258 static void clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep); 259 260 static void 261 prune_propagations(const char *e0class, const struct ipath *e0ipp) 262 { 263 char nbuf[100]; 264 unsigned long long my_delay = TIMEVAL_EVENTUALLY; 265 extern struct lut *Usednames; 266 267 Nfmep = alloc_fme(); 268 Nfmep->id = Nextid; 269 Nfmep->state = FME_NOTHING; 270 Nfmep->eventtree = itree_create_dummy(e0class, e0ipp); 271 if ((Nfmep->e0 = 272 itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) { 273 out(O_ALTFP, "prune_propagations: e0 not in instance tree"); 274 itree_free(Nfmep->eventtree); 275 FREE(Nfmep); 276 Nfmep = NULL; 277 return; 278 } 279 Nfmep->ecurrent = Nfmep->observations = Nfmep->e0; 280 Nfmep->e0->count++; 281 282 (void) sprintf(nbuf, "fme%d.Rcount", Nfmep->id); 283 Nfmep->Rcount = stats_new_counter(nbuf, "ereports received", 0); 284 (void) sprintf(nbuf, "fme%d.Hcall", Nfmep->id); 285 Nfmep->Hcallcount = 286 stats_new_counter(nbuf, "calls to hypothesise()", 1); 287 (void) sprintf(nbuf, "fme%d.Rcall", Nfmep->id); 288 Nfmep->Rcallcount = stats_new_counter(nbuf, 289 "calls to requirements_test()", 1); 290 (void) sprintf(nbuf, "fme%d.Ccall", Nfmep->id); 291 Nfmep->Ccallcount = 292 stats_new_counter(nbuf, "calls to causes_test()", 1); 293 (void) sprintf(nbuf, "fme%d.Ecall", Nfmep->id); 294 Nfmep->Ecallcount = 295 stats_new_counter(nbuf, "calls to effects_test()", 1); 296 (void) sprintf(nbuf, "fme%d.Tcall", Nfmep->id); 297 Nfmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1); 298 (void) sprintf(nbuf, "fme%d.Marrow", Nfmep->id); 299 Nfmep->Marrowcount = stats_new_counter(nbuf, 300 "arrows marked by mark_arrows()", 1); 301 (void) sprintf(nbuf, "fme%d.diags", Nfmep->id); 302 Nfmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0); 303 304 Nfmep->peek = 1; 305 lut_walk(Nfmep->eventtree, (lut_cb)unset_needed_arrows, (void *)Nfmep); 306 lut_free(Usednames, NULL, NULL); 307 Usednames = NULL; 308 lut_walk(Nfmep->eventtree, (lut_cb)clear_arrows, (void *)Nfmep); 309 (void) hypothesise(Nfmep, Nfmep->e0, Nfmep->ull, &my_delay); 310 itree_prune(Nfmep->eventtree); 311 lut_walk(Nfmep->eventtree, (lut_cb)set_needed_arrows, (void *)Nfmep); 312 313 stats_delete(Nfmep->Rcount); 314 stats_delete(Nfmep->Hcallcount); 315 stats_delete(Nfmep->Rcallcount); 316 stats_delete(Nfmep->Ccallcount); 317 stats_delete(Nfmep->Ecallcount); 318 stats_delete(Nfmep->Tcallcount); 319 stats_delete(Nfmep->Marrowcount); 320 stats_delete(Nfmep->diags); 321 itree_free(Nfmep->eventtree); 322 lut_free(Nfmep->globals, globals_destructor, NULL); 323 FREE(Nfmep); 324 } 325 326 static struct fme * 327 newfme(const char *e0class, const struct ipath *e0ipp, fmd_hdl_t *hdl, 328 fmd_case_t *fmcase) 329 { 330 struct cfgdata *cfgdata; 331 int init_size; 332 extern int alloc_total(); 333 334 init_size = alloc_total(); 335 out(O_ALTFP|O_STAMP, "start config_snapshot using %d bytes", init_size); 336 if ((cfgdata = config_snapshot()) == NULL) { 337 out(O_ALTFP, "newfme: NULL configuration"); 338 Undiag_reason = UD_NOCONF; 339 return (NULL); 340 } 341 platform_save_config(hdl, fmcase); 342 out(O_ALTFP|O_STAMP, "config_snapshot added %d bytes", 343 alloc_total() - init_size); 344 345 Nfmep = alloc_fme(); 346 347 Nfmep->id = Nextid++; 348 Nfmep->config = cfgdata->cooked; 349 config_free(cfgdata); 350 Nfmep->posted_suspects = 0; 351 Nfmep->uniqobs = 0; 352 Nfmep->state = FME_NOTHING; 353 Nfmep->pull = 0ULL; 354 Nfmep->overflow = 0; 355 356 Nfmep->fmcase = fmcase; 357 Nfmep->hdl = hdl; 358 359 if ((Nfmep->eventtree = itree_create(Nfmep->config)) == NULL) { 360 out(O_ALTFP, "newfme: NULL instance tree"); 361 Undiag_reason = UD_INSTFAIL; 362 structconfig_free(Nfmep->config); 363 destroy_fme_bufs(Nfmep); 364 FREE(Nfmep); 365 Nfmep = NULL; 366 return (NULL); 367 } 368 369 itree_ptree(O_ALTFP|O_VERB2, Nfmep->eventtree); 370 371 if ((Nfmep->e0 = 372 itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) { 373 out(O_ALTFP, "newfme: e0 not in instance tree"); 374 Undiag_reason = UD_BADEVENTI; 375 itree_free(Nfmep->eventtree); 376 structconfig_free(Nfmep->config); 377 destroy_fme_bufs(Nfmep); 378 FREE(Nfmep); 379 Nfmep = NULL; 380 return (NULL); 381 } 382 383 return (fme_ready(Nfmep)); 384 } 385 386 void 387 fme_fini(void) 388 { 389 struct fme *sfp, *fp; 390 struct case_list *ucasep, *nextcasep; 391 392 ucasep = Undiagablecaselist; 393 while (ucasep != NULL) { 394 nextcasep = ucasep->next; 395 FREE(ucasep); 396 ucasep = nextcasep; 397 } 398 Undiagablecaselist = NULL; 399 400 /* clean up closed fmes */ 401 fp = ClosedFMEs; 402 while (fp != NULL) { 403 sfp = fp->next; 404 destroy_fme(fp); 405 fp = sfp; 406 } 407 ClosedFMEs = NULL; 408 409 fp = FMElist; 410 while (fp != NULL) { 411 sfp = fp->next; 412 destroy_fme(fp); 413 fp = sfp; 414 } 415 FMElist = EFMElist = NULL; 416 417 /* if we were in the middle of creating an fme, free it now */ 418 if (Nfmep) { 419 destroy_fme(Nfmep); 420 Nfmep = NULL; 421 } 422 } 423 424 /* 425 * Allocated space for a buffer name. 20 bytes allows for 426 * a ridiculous 9,999,999 unique observations. 427 */ 428 #define OBBUFNMSZ 20 429 430 /* 431 * serialize_observation 432 * 433 * Create a recoverable version of the current observation 434 * (f->ecurrent). We keep a serialized version of each unique 435 * observation in order that we may resume correctly the fme in the 436 * correct state if eft or fmd crashes and we're restarted. 437 */ 438 static void 439 serialize_observation(struct fme *fp, const char *cls, const struct ipath *ipp) 440 { 441 size_t pkdlen; 442 char tmpbuf[OBBUFNMSZ]; 443 char *pkd = NULL; 444 char *estr; 445 446 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", fp->uniqobs); 447 estr = ipath2str(cls, ipp); 448 fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, strlen(estr) + 1); 449 fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)estr, 450 strlen(estr) + 1); 451 FREE(estr); 452 453 if (fp->ecurrent != NULL && fp->ecurrent->nvp != NULL) { 454 (void) snprintf(tmpbuf, 455 OBBUFNMSZ, "observed%d.nvp", fp->uniqobs); 456 if (nvlist_xpack(fp->ecurrent->nvp, 457 &pkd, &pkdlen, NV_ENCODE_XDR, &Eft_nv_hdl) != 0) 458 out(O_DIE|O_SYS, "pack of observed nvl failed"); 459 fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, pkdlen); 460 fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)pkd, pkdlen); 461 FREE(pkd); 462 } 463 464 fp->uniqobs++; 465 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs, 466 sizeof (fp->uniqobs)); 467 } 468 469 /* 470 * init_fme_bufs -- We keep several bits of state about an fme for 471 * use if eft or fmd crashes and we're restarted. 472 */ 473 static void 474 init_fme_bufs(struct fme *fp) 475 { 476 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_PULL, sizeof (fp->pull)); 477 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_PULL, (void *)&fp->pull, 478 sizeof (fp->pull)); 479 480 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_ID, sizeof (fp->id)); 481 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_ID, (void *)&fp->id, 482 sizeof (fp->id)); 483 484 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_NOBS, sizeof (fp->uniqobs)); 485 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs, 486 sizeof (fp->uniqobs)); 487 488 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_POSTD, 489 sizeof (fp->posted_suspects)); 490 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_POSTD, 491 (void *)&fp->posted_suspects, sizeof (fp->posted_suspects)); 492 } 493 494 static void 495 destroy_fme_bufs(struct fme *fp) 496 { 497 char tmpbuf[OBBUFNMSZ]; 498 int o; 499 500 platform_restore_config(fp->hdl, fp->fmcase); 501 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFGLEN); 502 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFG); 503 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_PULL); 504 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_ID); 505 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_POSTD); 506 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_NOBS); 507 508 for (o = 0; o < fp->uniqobs; o++) { 509 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", o); 510 fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf); 511 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", o); 512 fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf); 513 } 514 } 515 516 /* 517 * reconstitute_observations -- convert a case's serialized observations 518 * back into struct events. Returns zero if all observations are 519 * successfully reconstituted. 520 */ 521 static int 522 reconstitute_observations(struct fme *fmep) 523 { 524 struct event *ep; 525 struct node *epnamenp = NULL; 526 size_t pkdlen; 527 char *pkd = NULL; 528 char *tmpbuf = alloca(OBBUFNMSZ); 529 char *sepptr; 530 char *estr; 531 int ocnt; 532 int elen; 533 534 for (ocnt = 0; ocnt < fmep->uniqobs; ocnt++) { 535 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", ocnt); 536 elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf); 537 if (elen == 0) { 538 out(O_ALTFP, 539 "reconstitute_observation: no %s buffer found.", 540 tmpbuf); 541 Undiag_reason = UD_MISSINGOBS; 542 break; 543 } 544 545 estr = MALLOC(elen); 546 fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen); 547 sepptr = strchr(estr, '@'); 548 if (sepptr == NULL) { 549 out(O_ALTFP, 550 "reconstitute_observation: %s: " 551 "missing @ separator in %s.", 552 tmpbuf, estr); 553 Undiag_reason = UD_MISSINGPATH; 554 FREE(estr); 555 break; 556 } 557 558 *sepptr = '\0'; 559 if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) { 560 out(O_ALTFP, 561 "reconstitute_observation: %s: " 562 "trouble converting path string \"%s\" " 563 "to internal representation.", 564 tmpbuf, sepptr + 1); 565 Undiag_reason = UD_MISSINGPATH; 566 FREE(estr); 567 break; 568 } 569 570 /* construct the event */ 571 ep = itree_lookup(fmep->eventtree, 572 stable(estr), ipath(epnamenp)); 573 if (ep == NULL) { 574 out(O_ALTFP, 575 "reconstitute_observation: %s: " 576 "lookup of \"%s\" in itree failed.", 577 tmpbuf, ipath2str(estr, ipath(epnamenp))); 578 Undiag_reason = UD_BADOBS; 579 tree_free(epnamenp); 580 FREE(estr); 581 break; 582 } 583 tree_free(epnamenp); 584 585 /* 586 * We may or may not have a saved nvlist for the observation 587 */ 588 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", ocnt); 589 pkdlen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf); 590 if (pkdlen != 0) { 591 pkd = MALLOC(pkdlen); 592 fmd_buf_read(fmep->hdl, 593 fmep->fmcase, tmpbuf, pkd, pkdlen); 594 ASSERT(ep->nvp == NULL); 595 if (nvlist_xunpack(pkd, 596 pkdlen, &ep->nvp, &Eft_nv_hdl) != 0) 597 out(O_DIE|O_SYS, "pack of observed nvl failed"); 598 FREE(pkd); 599 } 600 601 if (ocnt == 0) 602 fmep->e0 = ep; 603 604 FREE(estr); 605 fmep->ecurrent = ep; 606 ep->count++; 607 608 /* link it into list of observations seen */ 609 ep->observations = fmep->observations; 610 fmep->observations = ep; 611 } 612 613 if (ocnt == fmep->uniqobs) { 614 (void) fme_ready(fmep); 615 return (0); 616 } 617 618 return (1); 619 } 620 621 /* 622 * restart_fme -- called during eft initialization. Reconstitutes 623 * an in-progress fme. 624 */ 625 void 626 fme_restart(fmd_hdl_t *hdl, fmd_case_t *inprogress) 627 { 628 nvlist_t *defect; 629 struct case_list *bad; 630 struct fme *fmep; 631 struct cfgdata *cfgdata; 632 size_t rawsz; 633 struct event *ep; 634 char *tmpbuf = alloca(OBBUFNMSZ); 635 char *sepptr; 636 char *estr; 637 int elen; 638 struct node *epnamenp = NULL; 639 int init_size; 640 extern int alloc_total(); 641 642 /* 643 * ignore solved or closed cases 644 */ 645 if (fmd_case_solved(hdl, inprogress) || 646 fmd_case_closed(hdl, inprogress)) 647 return; 648 649 fmep = alloc_fme(); 650 fmep->fmcase = inprogress; 651 fmep->hdl = hdl; 652 653 if (fmd_buf_size(hdl, inprogress, WOBUF_POSTD) == 0) { 654 out(O_ALTFP, "restart_fme: no saved posted status"); 655 Undiag_reason = UD_MISSINGINFO; 656 goto badcase; 657 } else { 658 fmd_buf_read(hdl, inprogress, WOBUF_POSTD, 659 (void *)&fmep->posted_suspects, 660 sizeof (fmep->posted_suspects)); 661 } 662 663 if (fmd_buf_size(hdl, inprogress, WOBUF_ID) == 0) { 664 out(O_ALTFP, "restart_fme: no saved id"); 665 Undiag_reason = UD_MISSINGINFO; 666 goto badcase; 667 } else { 668 fmd_buf_read(hdl, inprogress, WOBUF_ID, (void *)&fmep->id, 669 sizeof (fmep->id)); 670 } 671 if (Nextid <= fmep->id) 672 Nextid = fmep->id + 1; 673 674 out(O_ALTFP, "Replay FME %d", fmep->id); 675 676 if (fmd_buf_size(hdl, inprogress, WOBUF_CFGLEN) != sizeof (size_t)) { 677 out(O_ALTFP, "restart_fme: No config data"); 678 Undiag_reason = UD_MISSINGINFO; 679 goto badcase; 680 } 681 fmd_buf_read(hdl, inprogress, WOBUF_CFGLEN, (void *)&rawsz, 682 sizeof (size_t)); 683 684 if ((fmep->e0r = fmd_case_getprincipal(hdl, inprogress)) == NULL) { 685 out(O_ALTFP, "restart_fme: No event zero"); 686 Undiag_reason = UD_MISSINGZERO; 687 goto badcase; 688 } 689 690 if (fmd_buf_size(hdl, inprogress, WOBUF_PULL) == 0) { 691 out(O_ALTFP, "restart_fme: no saved wait time"); 692 Undiag_reason = UD_MISSINGINFO; 693 goto badcase; 694 } else { 695 fmd_buf_read(hdl, inprogress, WOBUF_PULL, (void *)&fmep->pull, 696 sizeof (fmep->pull)); 697 } 698 699 if (fmd_buf_size(hdl, inprogress, WOBUF_NOBS) == 0) { 700 out(O_ALTFP, "restart_fme: no count of observations"); 701 Undiag_reason = UD_MISSINGINFO; 702 goto badcase; 703 } else { 704 fmd_buf_read(hdl, inprogress, WOBUF_NOBS, 705 (void *)&fmep->uniqobs, sizeof (fmep->uniqobs)); 706 } 707 708 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed0"); 709 elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf); 710 if (elen == 0) { 711 out(O_ALTFP, "reconstitute_observation: no %s buffer found.", 712 tmpbuf); 713 Undiag_reason = UD_MISSINGOBS; 714 goto badcase; 715 } 716 estr = MALLOC(elen); 717 fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen); 718 sepptr = strchr(estr, '@'); 719 if (sepptr == NULL) { 720 out(O_ALTFP, "reconstitute_observation: %s: " 721 "missing @ separator in %s.", 722 tmpbuf, estr); 723 Undiag_reason = UD_MISSINGPATH; 724 FREE(estr); 725 goto badcase; 726 } 727 *sepptr = '\0'; 728 if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) { 729 out(O_ALTFP, "reconstitute_observation: %s: " 730 "trouble converting path string \"%s\" " 731 "to internal representation.", tmpbuf, sepptr + 1); 732 Undiag_reason = UD_MISSINGPATH; 733 FREE(estr); 734 goto badcase; 735 } 736 prune_propagations(stable(estr), ipath(epnamenp)); 737 tree_free(epnamenp); 738 FREE(estr); 739 740 init_size = alloc_total(); 741 out(O_ALTFP|O_STAMP, "start config_restore using %d bytes", init_size); 742 cfgdata = MALLOC(sizeof (struct cfgdata)); 743 cfgdata->cooked = NULL; 744 cfgdata->devcache = NULL; 745 cfgdata->cpucache = NULL; 746 cfgdata->raw_refcnt = 1; 747 748 if (rawsz > 0) { 749 if (fmd_buf_size(hdl, inprogress, WOBUF_CFG) != rawsz) { 750 out(O_ALTFP, "restart_fme: Config data size mismatch"); 751 Undiag_reason = UD_CFGMISMATCH; 752 goto badcase; 753 } 754 cfgdata->begin = MALLOC(rawsz); 755 cfgdata->end = cfgdata->nextfree = cfgdata->begin + rawsz; 756 fmd_buf_read(hdl, 757 inprogress, WOBUF_CFG, cfgdata->begin, rawsz); 758 } else { 759 cfgdata->begin = cfgdata->end = cfgdata->nextfree = NULL; 760 } 761 762 config_cook(cfgdata); 763 fmep->config = cfgdata->cooked; 764 config_free(cfgdata); 765 out(O_ALTFP|O_STAMP, "config_restore added %d bytes", 766 alloc_total() - init_size); 767 768 if ((fmep->eventtree = itree_create(fmep->config)) == NULL) { 769 /* case not properly saved or irretrievable */ 770 out(O_ALTFP, "restart_fme: NULL instance tree"); 771 Undiag_reason = UD_INSTFAIL; 772 goto badcase; 773 } 774 775 itree_ptree(O_ALTFP|O_VERB2, fmep->eventtree); 776 777 if (reconstitute_observations(fmep) != 0) 778 goto badcase; 779 780 out(O_ALTFP|O_NONL, "FME %d replay observations: ", fmep->id); 781 for (ep = fmep->observations; ep; ep = ep->observations) { 782 out(O_ALTFP|O_NONL, " "); 783 itree_pevent_brief(O_ALTFP|O_NONL, ep); 784 } 785 out(O_ALTFP, NULL); 786 787 Open_fme_count++; 788 789 /* give the diagnosis algorithm a shot at the new FME state */ 790 fme_eval(fmep, fmep->e0r); 791 return; 792 793 badcase: 794 if (fmep->eventtree != NULL) 795 itree_free(fmep->eventtree); 796 if (fmep->config) 797 structconfig_free(fmep->config); 798 destroy_fme_bufs(fmep); 799 FREE(fmep); 800 801 /* 802 * Since we're unable to restart the case, add it to the undiagable 803 * list and solve and close it as appropriate. 804 */ 805 bad = MALLOC(sizeof (struct case_list)); 806 bad->next = NULL; 807 808 if (Undiagablecaselist != NULL) 809 bad->next = Undiagablecaselist; 810 Undiagablecaselist = bad; 811 bad->fmcase = inprogress; 812 813 out(O_ALTFP|O_NONL, "[case %s (unable to restart), ", 814 fmd_case_uuid(hdl, bad->fmcase)); 815 816 if (fmd_case_solved(hdl, bad->fmcase)) { 817 out(O_ALTFP|O_NONL, "already solved, "); 818 } else { 819 out(O_ALTFP|O_NONL, "solving, "); 820 defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100, 821 NULL, NULL, NULL); 822 if (Undiag_reason != NULL) 823 (void) nvlist_add_string(defect, 824 UNDIAG_REASON, Undiag_reason); 825 fmd_case_add_suspect(hdl, bad->fmcase, defect); 826 fmd_case_solve(hdl, bad->fmcase); 827 } 828 829 if (fmd_case_closed(hdl, bad->fmcase)) { 830 out(O_ALTFP, "already closed ]"); 831 } else { 832 out(O_ALTFP, "closing ]"); 833 fmd_case_close(hdl, bad->fmcase); 834 } 835 } 836 837 /*ARGSUSED*/ 838 static void 839 globals_destructor(void *left, void *right, void *arg) 840 { 841 struct evalue *evp = (struct evalue *)right; 842 if (evp->t == NODEPTR) 843 tree_free((struct node *)(uintptr_t)evp->v); 844 evp->v = NULL; 845 FREE(evp); 846 } 847 848 void 849 destroy_fme(struct fme *f) 850 { 851 stats_delete(f->Rcount); 852 stats_delete(f->Hcallcount); 853 stats_delete(f->Rcallcount); 854 stats_delete(f->Ccallcount); 855 stats_delete(f->Ecallcount); 856 stats_delete(f->Tcallcount); 857 stats_delete(f->Marrowcount); 858 stats_delete(f->diags); 859 860 if (f->eventtree != NULL) 861 itree_free(f->eventtree); 862 if (f->config) 863 structconfig_free(f->config); 864 lut_free(f->globals, globals_destructor, NULL); 865 FREE(f); 866 } 867 868 static const char * 869 fme_state2str(enum fme_state s) 870 { 871 switch (s) { 872 case FME_NOTHING: return ("NOTHING"); 873 case FME_WAIT: return ("WAIT"); 874 case FME_CREDIBLE: return ("CREDIBLE"); 875 case FME_DISPROVED: return ("DISPROVED"); 876 case FME_DEFERRED: return ("DEFERRED"); 877 default: return ("UNKNOWN"); 878 } 879 } 880 881 static int 882 is_problem(enum nametype t) 883 { 884 return (t == N_FAULT || t == N_DEFECT || t == N_UPSET); 885 } 886 887 static int 888 is_fault(enum nametype t) 889 { 890 return (t == N_FAULT); 891 } 892 893 static int 894 is_defect(enum nametype t) 895 { 896 return (t == N_DEFECT); 897 } 898 899 static int 900 is_upset(enum nametype t) 901 { 902 return (t == N_UPSET); 903 } 904 905 static void 906 fme_print(int flags, struct fme *fmep) 907 { 908 struct event *ep; 909 910 out(flags, "Fault Management Exercise %d", fmep->id); 911 out(flags, "\t State: %s", fme_state2str(fmep->state)); 912 out(flags|O_NONL, "\t Start time: "); 913 ptree_timeval(flags|O_NONL, &fmep->ull); 914 out(flags, NULL); 915 if (fmep->wull) { 916 out(flags|O_NONL, "\t Wait time: "); 917 ptree_timeval(flags|O_NONL, &fmep->wull); 918 out(flags, NULL); 919 } 920 out(flags|O_NONL, "\t E0: "); 921 if (fmep->e0) 922 itree_pevent_brief(flags|O_NONL, fmep->e0); 923 else 924 out(flags|O_NONL, "NULL"); 925 out(flags, NULL); 926 out(flags|O_NONL, "\tObservations:"); 927 for (ep = fmep->observations; ep; ep = ep->observations) { 928 out(flags|O_NONL, " "); 929 itree_pevent_brief(flags|O_NONL, ep); 930 } 931 out(flags, NULL); 932 out(flags|O_NONL, "\tSuspect list:"); 933 for (ep = fmep->suspects; ep; ep = ep->suspects) { 934 out(flags|O_NONL, " "); 935 itree_pevent_brief(flags|O_NONL, ep); 936 } 937 out(flags, NULL); 938 if (fmep->eventtree != NULL) { 939 out(flags|O_VERB2, "\t Tree:"); 940 itree_ptree(flags|O_VERB2, fmep->eventtree); 941 } 942 } 943 944 static struct node * 945 pathstring2epnamenp(char *path) 946 { 947 char *sep = "/"; 948 struct node *ret; 949 char *ptr; 950 951 if ((ptr = strtok(path, sep)) == NULL) 952 out(O_DIE, "pathstring2epnamenp: invalid empty class"); 953 954 ret = tree_iname(stable(ptr), NULL, 0); 955 956 while ((ptr = strtok(NULL, sep)) != NULL) 957 ret = tree_name_append(ret, 958 tree_iname(stable(ptr), NULL, 0)); 959 960 return (ret); 961 } 962 963 /* 964 * for a given upset sp, increment the corresponding SERD engine. if the 965 * SERD engine trips, return the ename and ipp of the resulting ereport. 966 * returns true if engine tripped and *enamep and *ippp were filled in. 967 */ 968 static int 969 serd_eval(struct fme *fmep, fmd_hdl_t *hdl, fmd_event_t *ffep, 970 fmd_case_t *fmcase, struct event *sp, const char **enamep, 971 const struct ipath **ippp) 972 { 973 struct node *serdinst; 974 char *serdname; 975 struct node *nid; 976 struct serd_entry *newentp; 977 978 ASSERT(sp->t == N_UPSET); 979 ASSERT(ffep != NULL); 980 981 /* 982 * obtain instanced SERD engine from the upset sp. from this 983 * derive serdname, the string used to identify the SERD engine. 984 */ 985 serdinst = eventprop_lookup(sp, L_engine); 986 987 if (serdinst == NULL) 988 return (NULL); 989 990 serdname = ipath2str(serdinst->u.stmt.np->u.event.ename->u.name.s, 991 ipath(serdinst->u.stmt.np->u.event.epname)); 992 993 /* handle serd engine "id" property, if there is one */ 994 if ((nid = 995 lut_lookup(serdinst->u.stmt.lutp, (void *)L_id, NULL)) != NULL) { 996 struct evalue *gval; 997 char suffixbuf[200]; 998 char *suffix; 999 char *nserdname; 1000 size_t nname; 1001 1002 out(O_ALTFP|O_NONL, "serd \"%s\" id: ", serdname); 1003 ptree_name_iter(O_ALTFP|O_NONL, nid); 1004 1005 ASSERTinfo(nid->t == T_GLOBID, ptree_nodetype2str(nid->t)); 1006 1007 if ((gval = lut_lookup(fmep->globals, 1008 (void *)nid->u.globid.s, NULL)) == NULL) { 1009 out(O_ALTFP, " undefined"); 1010 } else if (gval->t == UINT64) { 1011 out(O_ALTFP, " %llu", gval->v); 1012 (void) sprintf(suffixbuf, "%llu", gval->v); 1013 suffix = suffixbuf; 1014 } else { 1015 out(O_ALTFP, " \"%s\"", (char *)(uintptr_t)gval->v); 1016 suffix = (char *)(uintptr_t)gval->v; 1017 } 1018 1019 nname = strlen(serdname) + strlen(suffix) + 2; 1020 nserdname = MALLOC(nname); 1021 (void) snprintf(nserdname, nname, "%s:%s", serdname, suffix); 1022 FREE(serdname); 1023 serdname = nserdname; 1024 } 1025 1026 if (!fmd_serd_exists(hdl, serdname)) { 1027 struct node *nN, *nT; 1028 const char *s; 1029 struct node *nodep; 1030 struct config *cp; 1031 char *path; 1032 uint_t nval; 1033 hrtime_t tval; 1034 const char *name; 1035 char *serd_name; 1036 int i; 1037 char *ptr; 1038 int got_n_override = 0, got_t_override = 0; 1039 1040 /* no SERD engine yet, so create it */ 1041 nodep = serdinst->u.stmt.np->u.event.epname; 1042 name = serdinst->u.stmt.np->u.event.ename->u.name.s; 1043 path = ipath2str(NULL, ipath(nodep)); 1044 cp = config_lookup(fmep->config, path, 0); 1045 FREE((void *)path); 1046 1047 /* 1048 * We allow serd paramaters to be overridden, either from 1049 * eft.conf file values (if Serd_Override is set) or from 1050 * driver properties (for "serd.io.device" engines). 1051 */ 1052 if (Serd_Override != NULL) { 1053 char *save_ptr, *ptr1, *ptr2, *ptr3; 1054 ptr3 = save_ptr = STRDUP(Serd_Override); 1055 while (*ptr3 != '\0') { 1056 ptr1 = strchr(ptr3, ','); 1057 *ptr1 = '\0'; 1058 if (strcmp(ptr3, name) == 0) { 1059 ptr2 = strchr(ptr1 + 1, ','); 1060 *ptr2 = '\0'; 1061 nval = atoi(ptr1 + 1); 1062 out(O_ALTFP, "serd override %s_n %d", 1063 name, nval); 1064 ptr3 = strchr(ptr2 + 1, ' '); 1065 if (ptr3) 1066 *ptr3 = '\0'; 1067 ptr = STRDUP(ptr2 + 1); 1068 out(O_ALTFP, "serd override %s_t %s", 1069 name, ptr); 1070 got_n_override = 1; 1071 got_t_override = 1; 1072 break; 1073 } else { 1074 ptr2 = strchr(ptr1 + 1, ','); 1075 ptr3 = strchr(ptr2 + 1, ' '); 1076 if (ptr3 == NULL) 1077 break; 1078 } 1079 ptr3++; 1080 } 1081 FREE(save_ptr); 1082 } 1083 1084 if (cp && got_n_override == 0) { 1085 /* 1086 * convert serd engine name into property name 1087 */ 1088 serd_name = MALLOC(strlen(name) + 3); 1089 for (i = 0; i < strlen(name); i++) { 1090 if (name[i] == '.') 1091 serd_name[i] = '_'; 1092 else 1093 serd_name[i] = name[i]; 1094 } 1095 serd_name[i++] = '_'; 1096 serd_name[i++] = 'n'; 1097 serd_name[i] = '\0'; 1098 if (s = config_getprop(cp, serd_name)) { 1099 nval = atoi(s); 1100 out(O_ALTFP, "serd override %s_n %s", name, s); 1101 got_n_override = 1; 1102 } 1103 serd_name[i - 1] = 't'; 1104 if (s = config_getprop(cp, serd_name)) { 1105 ptr = STRDUP(s); 1106 out(O_ALTFP, "serd override %s_t %s", name, s); 1107 got_t_override = 1; 1108 } 1109 FREE(serd_name); 1110 } 1111 1112 if (!got_n_override) { 1113 nN = lut_lookup(serdinst->u.stmt.lutp, (void *)L_N, 1114 NULL); 1115 ASSERT(nN->t == T_NUM); 1116 nval = (uint_t)nN->u.ull; 1117 } 1118 if (!got_t_override) { 1119 nT = lut_lookup(serdinst->u.stmt.lutp, (void *)L_T, 1120 NULL); 1121 ASSERT(nT->t == T_TIMEVAL); 1122 tval = (hrtime_t)nT->u.ull; 1123 } else { 1124 const unsigned long long *ullp; 1125 const char *suffix; 1126 int len; 1127 1128 len = strspn(ptr, "0123456789"); 1129 suffix = stable(&ptr[len]); 1130 ullp = (unsigned long long *)lut_lookup(Timesuffixlut, 1131 (void *)suffix, NULL); 1132 ptr[len] = '\0'; 1133 tval = (unsigned long long)strtoul(ptr, NULL, 0) * 1134 (ullp ? *ullp : 1ll); 1135 FREE(ptr); 1136 } 1137 fmd_serd_create(hdl, serdname, nval, tval); 1138 } 1139 1140 newentp = MALLOC(sizeof (*newentp)); 1141 newentp->ename = stable(serdinst->u.stmt.np->u.event.ename->u.name.s); 1142 newentp->ipath = ipath(serdinst->u.stmt.np->u.event.epname); 1143 newentp->hdl = hdl; 1144 if (lut_lookup(SerdEngines, newentp, (lut_cmp)serd_cmp) == NULL) { 1145 SerdEngines = lut_add(SerdEngines, (void *)newentp, 1146 (void *)newentp, (lut_cmp)serd_cmp); 1147 Serd_need_save = 1; 1148 serd_save(); 1149 } else { 1150 FREE(newentp); 1151 } 1152 1153 1154 /* 1155 * increment SERD engine. if engine fires, reset serd 1156 * engine and return trip_strcode 1157 */ 1158 if (fmd_serd_record(hdl, serdname, ffep)) { 1159 struct node *tripinst = lut_lookup(serdinst->u.stmt.lutp, 1160 (void *)L_trip, NULL); 1161 1162 ASSERT(tripinst != NULL); 1163 1164 *enamep = tripinst->u.event.ename->u.name.s; 1165 *ippp = ipath(tripinst->u.event.epname); 1166 1167 fmd_case_add_serd(hdl, fmcase, serdname); 1168 fmd_serd_reset(hdl, serdname); 1169 out(O_ALTFP|O_NONL, "[engine fired: %s, sending: ", serdname); 1170 ipath_print(O_ALTFP|O_NONL, *enamep, *ippp); 1171 out(O_ALTFP, "]"); 1172 1173 FREE(serdname); 1174 return (1); 1175 } 1176 1177 FREE(serdname); 1178 return (0); 1179 } 1180 1181 /* 1182 * search a suspect list for upsets. feed each upset to serd_eval() and 1183 * build up tripped[], an array of ereports produced by the firing of 1184 * any SERD engines. then feed each ereport back into 1185 * fme_receive_report(). 1186 * 1187 * returns ntrip, the number of these ereports produced. 1188 */ 1189 static int 1190 upsets_eval(struct fme *fmep, fmd_event_t *ffep) 1191 { 1192 /* we build an array of tripped ereports that we send ourselves */ 1193 struct { 1194 const char *ename; 1195 const struct ipath *ipp; 1196 } *tripped; 1197 struct event *sp; 1198 int ntrip, nupset, i; 1199 1200 /* 1201 * count the number of upsets to determine the upper limit on 1202 * expected trip ereport strings. remember that one upset can 1203 * lead to at most one ereport. 1204 */ 1205 nupset = 0; 1206 for (sp = fmep->suspects; sp; sp = sp->suspects) { 1207 if (sp->t == N_UPSET) 1208 nupset++; 1209 } 1210 1211 if (nupset == 0) 1212 return (0); 1213 1214 /* 1215 * get to this point if we have upsets and expect some trip 1216 * ereports 1217 */ 1218 tripped = alloca(sizeof (*tripped) * nupset); 1219 bzero((void *)tripped, sizeof (*tripped) * nupset); 1220 1221 ntrip = 0; 1222 for (sp = fmep->suspects; sp; sp = sp->suspects) 1223 if (sp->t == N_UPSET && 1224 serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, sp, 1225 &tripped[ntrip].ename, &tripped[ntrip].ipp)) 1226 ntrip++; 1227 1228 for (i = 0; i < ntrip; i++) { 1229 struct event *ep, *nep; 1230 struct fme *nfmep; 1231 fmd_case_t *fmcase; 1232 const struct ipath *ipp; 1233 const char *eventstring; 1234 int prev_verbose; 1235 unsigned long long my_delay = TIMEVAL_EVENTUALLY; 1236 enum fme_state state; 1237 1238 /* 1239 * First try and evaluate a case with the trip ereport plus 1240 * all the other ereports that cause the trip. If that fails 1241 * to evaluate then try again with just this ereport on its own. 1242 */ 1243 out(O_ALTFP|O_NONL, "fme_receive_report_serd: "); 1244 ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp); 1245 out(O_ALTFP|O_STAMP, NULL); 1246 ep = fmep->e0; 1247 eventstring = ep->enode->u.event.ename->u.name.s; 1248 ipp = ep->ipp; 1249 prune_propagations(eventstring, ipp); 1250 1251 /* 1252 * create a duplicate fme and case 1253 */ 1254 fmcase = fmd_case_open(fmep->hdl, NULL); 1255 out(O_ALTFP|O_NONL, "duplicate fme for event ["); 1256 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1257 out(O_ALTFP, " ]"); 1258 if ((nfmep = newfme(eventstring, ipp, fmep->hdl, 1259 fmcase)) == NULL) { 1260 out(O_ALTFP|O_NONL, "["); 1261 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1262 out(O_ALTFP, " CANNOT DIAGNOSE]"); 1263 publish_undiagnosable(fmep->hdl, ffep, fmcase); 1264 continue; 1265 } 1266 Open_fme_count++; 1267 nfmep->pull = fmep->pull; 1268 init_fme_bufs(nfmep); 1269 out(O_ALTFP|O_NONL, "["); 1270 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1271 out(O_ALTFP, " created FME%d, case %s]", nfmep->id, 1272 fmd_case_uuid(nfmep->hdl, nfmep->fmcase)); 1273 if (ffep) { 1274 fmd_case_setprincipal(nfmep->hdl, nfmep->fmcase, ffep); 1275 nfmep->e0r = ffep; 1276 } 1277 1278 /* 1279 * add the original ereports 1280 */ 1281 for (ep = fmep->observations; ep; ep = ep->observations) { 1282 eventstring = ep->enode->u.event.ename->u.name.s; 1283 ipp = ep->ipp; 1284 out(O_ALTFP|O_NONL, "adding event ["); 1285 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1286 out(O_ALTFP, " ]"); 1287 nep = itree_lookup(nfmep->eventtree, eventstring, ipp); 1288 if (nep->count++ == 0) { 1289 nep->observations = nfmep->observations; 1290 nfmep->observations = nep; 1291 serialize_observation(nfmep, eventstring, ipp); 1292 nep->nvp = evnv_dupnvl(ep->nvp); 1293 } 1294 if (ffep) 1295 fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase, 1296 ffep); 1297 stats_counter_bump(nfmep->Rcount); 1298 } 1299 1300 /* 1301 * add the serd trigger ereport 1302 */ 1303 if ((ep = itree_lookup(nfmep->eventtree, tripped[i].ename, 1304 tripped[i].ipp)) == NULL) { 1305 /* 1306 * The trigger ereport is not in the instance tree. It 1307 * was presumably removed by prune_propagations() as 1308 * this combination of events is not present in the 1309 * rules. 1310 */ 1311 out(O_ALTFP, "upsets_eval: e0 not in instance tree"); 1312 Undiag_reason = UD_BADEVENTI; 1313 goto retry_lone_ereport; 1314 } 1315 out(O_ALTFP|O_NONL, "adding event ["); 1316 ipath_print(O_ALTFP|O_NONL, tripped[i].ename, tripped[i].ipp); 1317 out(O_ALTFP, " ]"); 1318 nfmep->ecurrent = ep; 1319 ep->nvp = NULL; 1320 ep->count = 1; 1321 ep->observations = nfmep->observations; 1322 nfmep->observations = ep; 1323 1324 /* 1325 * just peek first. 1326 */ 1327 nfmep->peek = 1; 1328 prev_verbose = Verbose; 1329 if (Debug == 0) 1330 Verbose = 0; 1331 lut_walk(nfmep->eventtree, (lut_cb)clear_arrows, (void *)nfmep); 1332 state = hypothesise(nfmep, nfmep->e0, nfmep->ull, &my_delay); 1333 nfmep->peek = 0; 1334 Verbose = prev_verbose; 1335 if (state == FME_DISPROVED) { 1336 out(O_ALTFP, "upsets_eval: hypothesis disproved"); 1337 Undiag_reason = UD_UNSOLVD; 1338 retry_lone_ereport: 1339 /* 1340 * However the trigger ereport on its own might be 1341 * diagnosable, so check for that. Undo the new fme 1342 * and case we just created and call fme_receive_report. 1343 */ 1344 out(O_ALTFP|O_NONL, "["); 1345 ipath_print(O_ALTFP|O_NONL, tripped[i].ename, 1346 tripped[i].ipp); 1347 out(O_ALTFP, " retrying with just trigger ereport]"); 1348 itree_free(nfmep->eventtree); 1349 nfmep->eventtree = NULL; 1350 structconfig_free(nfmep->config); 1351 nfmep->config = NULL; 1352 destroy_fme_bufs(nfmep); 1353 fmd_case_close(nfmep->hdl, nfmep->fmcase); 1354 fme_receive_report(fmep->hdl, ffep, 1355 tripped[i].ename, tripped[i].ipp, NULL); 1356 continue; 1357 } 1358 1359 /* 1360 * and evaluate 1361 */ 1362 serialize_observation(nfmep, tripped[i].ename, tripped[i].ipp); 1363 if (ffep) 1364 fmd_case_add_ereport(nfmep->hdl, nfmep->fmcase, ffep); 1365 stats_counter_bump(nfmep->Rcount); 1366 fme_eval(nfmep, ffep); 1367 } 1368 1369 return (ntrip); 1370 } 1371 1372 /* 1373 * fme_receive_external_report -- call when an external ereport comes in 1374 * 1375 * this routine just converts the relevant information from the ereport 1376 * into a format used internally and passes it on to fme_receive_report(). 1377 */ 1378 void 1379 fme_receive_external_report(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl, 1380 const char *eventstring) 1381 { 1382 struct node *epnamenp = platform_getpath(nvl); 1383 const struct ipath *ipp; 1384 1385 /* 1386 * XFILE: If we ended up without a path, it's an X-file. 1387 * For now, use our undiagnosable interface. 1388 */ 1389 if (epnamenp == NULL) { 1390 fmd_case_t *fmcase; 1391 1392 out(O_ALTFP, "XFILE: Unable to get path from ereport"); 1393 Undiag_reason = UD_NOPATH; 1394 fmcase = fmd_case_open(hdl, NULL); 1395 publish_undiagnosable(hdl, ffep, fmcase); 1396 return; 1397 } 1398 1399 ipp = ipath(epnamenp); 1400 tree_free(epnamenp); 1401 fme_receive_report(hdl, ffep, stable(eventstring), ipp, nvl); 1402 } 1403 1404 /*ARGSUSED*/ 1405 void 1406 fme_receive_repair_list(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl, 1407 const char *eventstring) 1408 { 1409 char *uuid; 1410 nvlist_t **nva; 1411 uint_t nvc; 1412 const struct ipath *ipp; 1413 1414 if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) != 0 || 1415 nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST, 1416 &nva, &nvc) != 0) { 1417 out(O_ALTFP, "No uuid or fault list for list.repaired event"); 1418 return; 1419 } 1420 1421 out(O_ALTFP, "Processing list.repaired from case %s", uuid); 1422 1423 while (nvc-- != 0) { 1424 /* 1425 * Reset any istat or serd engine associated with this path. 1426 */ 1427 char *path; 1428 1429 if ((ipp = platform_fault2ipath(*nva++)) == NULL) 1430 continue; 1431 1432 path = ipath2str(NULL, ipp); 1433 out(O_ALTFP, "fme_receive_repair_list: resetting state for %s", 1434 path); 1435 FREE(path); 1436 1437 lut_walk(Istats, (lut_cb)istat_counter_reset_cb, (void *)ipp); 1438 istat_save(); 1439 1440 lut_walk(SerdEngines, (lut_cb)serd_reset_cb, (void *)ipp); 1441 serd_save(); 1442 } 1443 } 1444 1445 /*ARGSUSED*/ 1446 void 1447 fme_receive_topology_change(void) 1448 { 1449 lut_walk(Istats, (lut_cb)istat_counter_topo_chg_cb, NULL); 1450 istat_save(); 1451 1452 lut_walk(SerdEngines, (lut_cb)serd_topo_chg_cb, NULL); 1453 serd_save(); 1454 } 1455 1456 static int mark_arrows(struct fme *fmep, struct event *ep, int mark, 1457 unsigned long long at_latest_by, unsigned long long *pdelay, int keep); 1458 1459 /* ARGSUSED */ 1460 static void 1461 clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep) 1462 { 1463 struct bubble *bp; 1464 struct arrowlist *ap; 1465 1466 ep->cached_state = 0; 1467 ep->keep_in_tree = 0; 1468 for (bp = itree_next_bubble(ep, NULL); bp; 1469 bp = itree_next_bubble(ep, bp)) { 1470 if (bp->t != B_FROM) 1471 continue; 1472 bp->mark = 0; 1473 for (ap = itree_next_arrow(bp, NULL); ap; 1474 ap = itree_next_arrow(bp, ap)) 1475 ap->arrowp->mark = 0; 1476 } 1477 } 1478 1479 static void 1480 fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep, 1481 const char *eventstring, const struct ipath *ipp, nvlist_t *nvl) 1482 { 1483 struct event *ep; 1484 struct fme *fmep = NULL; 1485 struct fme *ofmep = NULL; 1486 struct fme *cfmep, *svfmep; 1487 int matched = 0; 1488 nvlist_t *defect; 1489 fmd_case_t *fmcase; 1490 1491 out(O_ALTFP|O_NONL, "fme_receive_report: "); 1492 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1493 out(O_ALTFP|O_STAMP, NULL); 1494 1495 /* decide which FME it goes to */ 1496 for (fmep = FMElist; fmep; fmep = fmep->next) { 1497 int prev_verbose; 1498 unsigned long long my_delay = TIMEVAL_EVENTUALLY; 1499 enum fme_state state; 1500 nvlist_t *pre_peek_nvp = NULL; 1501 1502 if (fmep->overflow) { 1503 if (!(fmd_case_closed(fmep->hdl, fmep->fmcase))) 1504 ofmep = fmep; 1505 1506 continue; 1507 } 1508 1509 /* 1510 * ignore solved or closed cases 1511 */ 1512 if (fmep->posted_suspects || 1513 fmd_case_solved(fmep->hdl, fmep->fmcase) || 1514 fmd_case_closed(fmep->hdl, fmep->fmcase)) 1515 continue; 1516 1517 /* look up event in event tree for this FME */ 1518 if ((ep = itree_lookup(fmep->eventtree, 1519 eventstring, ipp)) == NULL) 1520 continue; 1521 1522 /* note observation */ 1523 fmep->ecurrent = ep; 1524 if (ep->count++ == 0) { 1525 /* link it into list of observations seen */ 1526 ep->observations = fmep->observations; 1527 fmep->observations = ep; 1528 ep->nvp = evnv_dupnvl(nvl); 1529 } else { 1530 /* use new payload values for peek */ 1531 pre_peek_nvp = ep->nvp; 1532 ep->nvp = evnv_dupnvl(nvl); 1533 } 1534 1535 /* tell hypothesise() not to mess with suspect list */ 1536 fmep->peek = 1; 1537 1538 /* don't want this to be verbose (unless Debug is set) */ 1539 prev_verbose = Verbose; 1540 if (Debug == 0) 1541 Verbose = 0; 1542 1543 lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep); 1544 state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay); 1545 1546 fmep->peek = 0; 1547 1548 /* put verbose flag back */ 1549 Verbose = prev_verbose; 1550 1551 if (state != FME_DISPROVED) { 1552 /* found an FME that explains the ereport */ 1553 matched++; 1554 out(O_ALTFP|O_NONL, "["); 1555 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1556 out(O_ALTFP, " explained by FME%d]", fmep->id); 1557 1558 if (pre_peek_nvp) 1559 nvlist_free(pre_peek_nvp); 1560 1561 if (ep->count == 1) 1562 serialize_observation(fmep, eventstring, ipp); 1563 1564 if (ffep) 1565 fmd_case_add_ereport(hdl, fmep->fmcase, ffep); 1566 1567 stats_counter_bump(fmep->Rcount); 1568 1569 /* re-eval FME */ 1570 fme_eval(fmep, ffep); 1571 } else { 1572 1573 /* not a match, undo noting of observation */ 1574 fmep->ecurrent = NULL; 1575 if (--ep->count == 0) { 1576 /* unlink it from observations */ 1577 fmep->observations = ep->observations; 1578 ep->observations = NULL; 1579 nvlist_free(ep->nvp); 1580 ep->nvp = NULL; 1581 } else { 1582 nvlist_free(ep->nvp); 1583 ep->nvp = pre_peek_nvp; 1584 } 1585 } 1586 } 1587 1588 if (matched) 1589 return; /* explained by at least one existing FME */ 1590 1591 /* clean up closed fmes */ 1592 cfmep = ClosedFMEs; 1593 while (cfmep != NULL) { 1594 svfmep = cfmep->next; 1595 destroy_fme(cfmep); 1596 cfmep = svfmep; 1597 } 1598 ClosedFMEs = NULL; 1599 prune_propagations(eventstring, ipp); 1600 1601 if (ofmep) { 1602 out(O_ALTFP|O_NONL, "["); 1603 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1604 out(O_ALTFP, " ADDING TO OVERFLOW FME]"); 1605 if (ffep) 1606 fmd_case_add_ereport(hdl, ofmep->fmcase, ffep); 1607 1608 return; 1609 1610 } else if (Max_fme && (Open_fme_count >= Max_fme)) { 1611 out(O_ALTFP|O_NONL, "["); 1612 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1613 out(O_ALTFP, " MAX OPEN FME REACHED]"); 1614 1615 fmcase = fmd_case_open(hdl, NULL); 1616 1617 /* Create overflow fme */ 1618 if ((fmep = newfme(eventstring, ipp, hdl, fmcase)) == NULL) { 1619 out(O_ALTFP|O_NONL, "["); 1620 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1621 out(O_ALTFP, " CANNOT OPEN OVERFLOW FME]"); 1622 publish_undiagnosable(hdl, ffep, fmcase); 1623 return; 1624 } 1625 1626 Open_fme_count++; 1627 1628 init_fme_bufs(fmep); 1629 fmep->overflow = B_TRUE; 1630 1631 if (ffep) 1632 fmd_case_add_ereport(hdl, fmep->fmcase, ffep); 1633 1634 defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100, 1635 NULL, NULL, NULL); 1636 (void) nvlist_add_string(defect, UNDIAG_REASON, UD_MAXFME); 1637 fmd_case_add_suspect(hdl, fmep->fmcase, defect); 1638 fmd_case_solve(hdl, fmep->fmcase); 1639 return; 1640 } 1641 1642 /* open a case */ 1643 fmcase = fmd_case_open(hdl, NULL); 1644 1645 /* start a new FME */ 1646 if ((fmep = newfme(eventstring, ipp, hdl, fmcase)) == NULL) { 1647 out(O_ALTFP|O_NONL, "["); 1648 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1649 out(O_ALTFP, " CANNOT DIAGNOSE]"); 1650 publish_undiagnosable(hdl, ffep, fmcase); 1651 return; 1652 } 1653 1654 Open_fme_count++; 1655 1656 init_fme_bufs(fmep); 1657 1658 out(O_ALTFP|O_NONL, "["); 1659 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1660 out(O_ALTFP, " created FME%d, case %s]", fmep->id, 1661 fmd_case_uuid(hdl, fmep->fmcase)); 1662 1663 ep = fmep->e0; 1664 ASSERT(ep != NULL); 1665 1666 /* note observation */ 1667 fmep->ecurrent = ep; 1668 if (ep->count++ == 0) { 1669 /* link it into list of observations seen */ 1670 ep->observations = fmep->observations; 1671 fmep->observations = ep; 1672 ep->nvp = evnv_dupnvl(nvl); 1673 serialize_observation(fmep, eventstring, ipp); 1674 } else { 1675 /* new payload overrides any previous */ 1676 nvlist_free(ep->nvp); 1677 ep->nvp = evnv_dupnvl(nvl); 1678 } 1679 1680 stats_counter_bump(fmep->Rcount); 1681 1682 if (ffep) { 1683 fmd_case_add_ereport(hdl, fmep->fmcase, ffep); 1684 fmd_case_setprincipal(hdl, fmep->fmcase, ffep); 1685 fmep->e0r = ffep; 1686 } 1687 1688 /* give the diagnosis algorithm a shot at the new FME state */ 1689 fme_eval(fmep, ffep); 1690 } 1691 1692 void 1693 fme_status(int flags) 1694 { 1695 struct fme *fmep; 1696 1697 if (FMElist == NULL) { 1698 out(flags, "No fault management exercises underway."); 1699 return; 1700 } 1701 1702 for (fmep = FMElist; fmep; fmep = fmep->next) 1703 fme_print(flags, fmep); 1704 } 1705 1706 /* 1707 * "indent" routines used mostly for nicely formatted debug output, but also 1708 * for sanity checking for infinite recursion bugs. 1709 */ 1710 1711 #define MAX_INDENT 1024 1712 static const char *indent_s[MAX_INDENT]; 1713 static int current_indent; 1714 1715 static void 1716 indent_push(const char *s) 1717 { 1718 if (current_indent < MAX_INDENT) 1719 indent_s[current_indent++] = s; 1720 else 1721 out(O_DIE, "unexpected recursion depth (%d)", current_indent); 1722 } 1723 1724 static void 1725 indent_set(const char *s) 1726 { 1727 current_indent = 0; 1728 indent_push(s); 1729 } 1730 1731 static void 1732 indent_pop(void) 1733 { 1734 if (current_indent > 0) 1735 current_indent--; 1736 else 1737 out(O_DIE, "recursion underflow"); 1738 } 1739 1740 static void 1741 indent(void) 1742 { 1743 int i; 1744 if (!Verbose) 1745 return; 1746 for (i = 0; i < current_indent; i++) 1747 out(O_ALTFP|O_VERB|O_NONL, indent_s[i]); 1748 } 1749 1750 #define SLNEW 1 1751 #define SLCHANGED 2 1752 #define SLWAIT 3 1753 #define SLDISPROVED 4 1754 1755 static void 1756 print_suspects(int circumstance, struct fme *fmep) 1757 { 1758 struct event *ep; 1759 1760 out(O_ALTFP|O_NONL, "["); 1761 if (circumstance == SLCHANGED) { 1762 out(O_ALTFP|O_NONL, "FME%d diagnosis changed. state: %s, " 1763 "suspect list:", fmep->id, fme_state2str(fmep->state)); 1764 } else if (circumstance == SLWAIT) { 1765 out(O_ALTFP|O_NONL, "FME%d set wait timer %ld ", fmep->id, 1766 fmep->timer); 1767 ptree_timeval(O_ALTFP|O_NONL, &fmep->wull); 1768 } else if (circumstance == SLDISPROVED) { 1769 out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS UNKNOWN", fmep->id); 1770 } else { 1771 out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS PRODUCED:", fmep->id); 1772 } 1773 1774 if (circumstance == SLWAIT || circumstance == SLDISPROVED) { 1775 out(O_ALTFP, "]"); 1776 return; 1777 } 1778 1779 for (ep = fmep->suspects; ep; ep = ep->suspects) { 1780 out(O_ALTFP|O_NONL, " "); 1781 itree_pevent_brief(O_ALTFP|O_NONL, ep); 1782 } 1783 out(O_ALTFP, "]"); 1784 } 1785 1786 static struct node * 1787 eventprop_lookup(struct event *ep, const char *propname) 1788 { 1789 return (lut_lookup(ep->props, (void *)propname, NULL)); 1790 } 1791 1792 #define MAXDIGITIDX 23 1793 static char numbuf[MAXDIGITIDX + 1]; 1794 1795 static int 1796 node2uint(struct node *n, uint_t *valp) 1797 { 1798 struct evalue value; 1799 struct lut *globals = NULL; 1800 1801 if (n == NULL) 1802 return (1); 1803 1804 /* 1805 * check value.v since we are being asked to convert an unsigned 1806 * long long int to an unsigned int 1807 */ 1808 if (! eval_expr(n, NULL, NULL, &globals, NULL, NULL, 0, &value) || 1809 value.t != UINT64 || value.v > (1ULL << 32)) 1810 return (1); 1811 1812 *valp = (uint_t)value.v; 1813 1814 return (0); 1815 } 1816 1817 static nvlist_t * 1818 node2fmri(struct node *n) 1819 { 1820 nvlist_t **pa, *f, *p; 1821 struct node *nc; 1822 uint_t depth = 0; 1823 char *numstr, *nullbyte; 1824 char *failure; 1825 int err, i; 1826 1827 /* XXX do we need to be able to handle a non-T_NAME node? */ 1828 if (n == NULL || n->t != T_NAME) 1829 return (NULL); 1830 1831 for (nc = n; nc != NULL; nc = nc->u.name.next) { 1832 if (nc->u.name.child == NULL || nc->u.name.child->t != T_NUM) 1833 break; 1834 depth++; 1835 } 1836 1837 if (nc != NULL) { 1838 /* We bailed early, something went wrong */ 1839 return (NULL); 1840 } 1841 1842 if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0) 1843 out(O_DIE|O_SYS, "alloc of fmri nvl failed"); 1844 pa = alloca(depth * sizeof (nvlist_t *)); 1845 for (i = 0; i < depth; i++) 1846 pa[i] = NULL; 1847 1848 err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC); 1849 err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION); 1850 err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, ""); 1851 err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth); 1852 if (err != 0) { 1853 failure = "basic construction of FMRI failed"; 1854 goto boom; 1855 } 1856 1857 numbuf[MAXDIGITIDX] = '\0'; 1858 nullbyte = &numbuf[MAXDIGITIDX]; 1859 i = 0; 1860 1861 for (nc = n; nc != NULL; nc = nc->u.name.next) { 1862 err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl); 1863 if (err != 0) { 1864 failure = "alloc of an hc-pair failed"; 1865 goto boom; 1866 } 1867 err = nvlist_add_string(p, FM_FMRI_HC_NAME, nc->u.name.s); 1868 numstr = ulltostr(nc->u.name.child->u.ull, nullbyte); 1869 err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr); 1870 if (err != 0) { 1871 failure = "construction of an hc-pair failed"; 1872 goto boom; 1873 } 1874 pa[i++] = p; 1875 } 1876 1877 err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth); 1878 if (err == 0) { 1879 for (i = 0; i < depth; i++) 1880 if (pa[i] != NULL) 1881 nvlist_free(pa[i]); 1882 return (f); 1883 } 1884 failure = "addition of hc-pair array to FMRI failed"; 1885 1886 boom: 1887 for (i = 0; i < depth; i++) 1888 if (pa[i] != NULL) 1889 nvlist_free(pa[i]); 1890 nvlist_free(f); 1891 out(O_DIE, "%s", failure); 1892 /*NOTREACHED*/ 1893 return (NULL); 1894 } 1895 1896 /* an ipath cache entry is an array of these, with s==NULL at the end */ 1897 struct ipath { 1898 const char *s; /* component name (in stable) */ 1899 int i; /* instance number */ 1900 }; 1901 1902 static nvlist_t * 1903 ipath2fmri(struct ipath *ipath) 1904 { 1905 nvlist_t **pa, *f, *p; 1906 uint_t depth = 0; 1907 char *numstr, *nullbyte; 1908 char *failure; 1909 int err, i; 1910 struct ipath *ipp; 1911 1912 for (ipp = ipath; ipp->s != NULL; ipp++) 1913 depth++; 1914 1915 if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0) 1916 out(O_DIE|O_SYS, "alloc of fmri nvl failed"); 1917 pa = alloca(depth * sizeof (nvlist_t *)); 1918 for (i = 0; i < depth; i++) 1919 pa[i] = NULL; 1920 1921 err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC); 1922 err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION); 1923 err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, ""); 1924 err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth); 1925 if (err != 0) { 1926 failure = "basic construction of FMRI failed"; 1927 goto boom; 1928 } 1929 1930 numbuf[MAXDIGITIDX] = '\0'; 1931 nullbyte = &numbuf[MAXDIGITIDX]; 1932 i = 0; 1933 1934 for (ipp = ipath; ipp->s != NULL; ipp++) { 1935 err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl); 1936 if (err != 0) { 1937 failure = "alloc of an hc-pair failed"; 1938 goto boom; 1939 } 1940 err = nvlist_add_string(p, FM_FMRI_HC_NAME, ipp->s); 1941 numstr = ulltostr(ipp->i, nullbyte); 1942 err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr); 1943 if (err != 0) { 1944 failure = "construction of an hc-pair failed"; 1945 goto boom; 1946 } 1947 pa[i++] = p; 1948 } 1949 1950 err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth); 1951 if (err == 0) { 1952 for (i = 0; i < depth; i++) 1953 if (pa[i] != NULL) 1954 nvlist_free(pa[i]); 1955 return (f); 1956 } 1957 failure = "addition of hc-pair array to FMRI failed"; 1958 1959 boom: 1960 for (i = 0; i < depth; i++) 1961 if (pa[i] != NULL) 1962 nvlist_free(pa[i]); 1963 nvlist_free(f); 1964 out(O_DIE, "%s", failure); 1965 /*NOTREACHED*/ 1966 return (NULL); 1967 } 1968 1969 static uint_t 1970 avg(uint_t sum, uint_t cnt) 1971 { 1972 unsigned long long s = sum * 10; 1973 1974 return ((s / cnt / 10) + (((s / cnt % 10) >= 5) ? 1 : 0)); 1975 } 1976 1977 static uint8_t 1978 percentof(uint_t part, uint_t whole) 1979 { 1980 unsigned long long p = part * 1000; 1981 1982 return ((p / whole / 10) + (((p / whole % 10) >= 5) ? 1 : 0)); 1983 } 1984 1985 struct rsl { 1986 struct event *suspect; 1987 nvlist_t *asru; 1988 nvlist_t *fru; 1989 nvlist_t *rsrc; 1990 }; 1991 1992 /* 1993 * rslfree -- free internal members of struct rsl not expected to be 1994 * freed elsewhere. 1995 */ 1996 static void 1997 rslfree(struct rsl *freeme) 1998 { 1999 if (freeme->asru != NULL) 2000 nvlist_free(freeme->asru); 2001 if (freeme->fru != NULL) 2002 nvlist_free(freeme->fru); 2003 if (freeme->rsrc != NULL && freeme->rsrc != freeme->asru) 2004 nvlist_free(freeme->rsrc); 2005 } 2006 2007 /* 2008 * rslcmp -- compare two rsl structures. Use the following 2009 * comparisons to establish cardinality: 2010 * 2011 * 1. Name of the suspect's class. (simple strcmp) 2012 * 2. Name of the suspect's ASRU. (trickier, since nvlist) 2013 * 2014 */ 2015 static int 2016 rslcmp(const void *a, const void *b) 2017 { 2018 struct rsl *r1 = (struct rsl *)a; 2019 struct rsl *r2 = (struct rsl *)b; 2020 int rv; 2021 2022 rv = strcmp(r1->suspect->enode->u.event.ename->u.name.s, 2023 r2->suspect->enode->u.event.ename->u.name.s); 2024 if (rv != 0) 2025 return (rv); 2026 2027 if (r1->asru == NULL && r2->asru == NULL) 2028 return (0); 2029 if (r1->asru == NULL) 2030 return (-1); 2031 if (r2->asru == NULL) 2032 return (1); 2033 return (evnv_cmpnvl(r1->asru, r2->asru, 0)); 2034 } 2035 2036 /* 2037 * rsluniq -- given an array of rsl structures, seek out and "remove" 2038 * any duplicates. Dups are "remove"d by NULLing the suspect pointer 2039 * of the array element. Removal also means updating the number of 2040 * problems and the number of problems which are not faults. User 2041 * provides the first and last element pointers. 2042 */ 2043 static void 2044 rsluniq(struct rsl *first, struct rsl *last, int *nprobs, int *nnonf) 2045 { 2046 struct rsl *cr; 2047 2048 if (*nprobs == 1) 2049 return; 2050 2051 /* 2052 * At this point, we only expect duplicate defects. 2053 * Eversholt's diagnosis algorithm prevents duplicate 2054 * suspects, but we rewrite defects in the platform code after 2055 * the diagnosis is made, and that can introduce new 2056 * duplicates. 2057 */ 2058 while (first <= last) { 2059 if (first->suspect == NULL || !is_defect(first->suspect->t)) { 2060 first++; 2061 continue; 2062 } 2063 cr = first + 1; 2064 while (cr <= last) { 2065 if (is_defect(first->suspect->t)) { 2066 if (rslcmp(first, cr) == 0) { 2067 cr->suspect = NULL; 2068 rslfree(cr); 2069 (*nprobs)--; 2070 (*nnonf)--; 2071 } 2072 } 2073 /* 2074 * assume all defects are in order after our 2075 * sort and short circuit here with "else break" ? 2076 */ 2077 cr++; 2078 } 2079 first++; 2080 } 2081 } 2082 2083 /* 2084 * get_resources -- for a given suspect, determine what ASRU, FRU and 2085 * RSRC nvlists should be advertised in the final suspect list. 2086 */ 2087 void 2088 get_resources(struct event *sp, struct rsl *rsrcs, struct config *croot) 2089 { 2090 struct node *asrudef, *frudef; 2091 nvlist_t *asru, *fru; 2092 nvlist_t *rsrc = NULL; 2093 char *pathstr; 2094 2095 /* 2096 * First find any ASRU and/or FRU defined in the 2097 * initial fault tree. 2098 */ 2099 asrudef = eventprop_lookup(sp, L_ASRU); 2100 frudef = eventprop_lookup(sp, L_FRU); 2101 2102 /* 2103 * Create FMRIs based on those definitions 2104 */ 2105 asru = node2fmri(asrudef); 2106 fru = node2fmri(frudef); 2107 pathstr = ipath2str(NULL, sp->ipp); 2108 2109 /* 2110 * Allow for platform translations of the FMRIs 2111 */ 2112 platform_units_translate(is_defect(sp->t), croot, &asru, &fru, &rsrc, 2113 pathstr); 2114 2115 FREE(pathstr); 2116 rsrcs->suspect = sp; 2117 rsrcs->asru = asru; 2118 rsrcs->fru = fru; 2119 rsrcs->rsrc = rsrc; 2120 } 2121 2122 /* 2123 * trim_suspects -- prior to publishing, we may need to remove some 2124 * suspects from the list. If we're auto-closing upsets, we don't 2125 * want any of those in the published list. If the ASRUs for multiple 2126 * defects resolve to the same ASRU (driver) we only want to publish 2127 * that as a single suspect. 2128 */ 2129 static void 2130 trim_suspects(struct fme *fmep, boolean_t no_upsets, struct rsl **begin, 2131 struct rsl **end) 2132 { 2133 struct event *ep; 2134 struct rsl *rp; 2135 int rpcnt; 2136 2137 /* 2138 * First save the suspects in the psuspects, then copy back 2139 * only the ones we wish to retain. This resets nsuspects to 2140 * zero. 2141 */ 2142 rpcnt = fmep->nsuspects; 2143 save_suspects(fmep); 2144 2145 /* 2146 * allocate an array of resource pointers for the suspects. 2147 * We may end up using less than the full allocation, but this 2148 * is a very short-lived array. publish_suspects() will free 2149 * this array when it's done using it. 2150 */ 2151 rp = *begin = MALLOC(rpcnt * sizeof (struct rsl)); 2152 bzero(rp, rpcnt * sizeof (struct rsl)); 2153 2154 /* first pass, remove any unwanted upsets and populate our array */ 2155 for (ep = fmep->psuspects; ep; ep = ep->psuspects) { 2156 if (no_upsets && is_upset(ep->t)) 2157 continue; 2158 get_resources(ep, rp, fmep->config); 2159 rp++; 2160 fmep->nsuspects++; 2161 if (!is_fault(ep->t)) 2162 fmep->nonfault++; 2163 } 2164 2165 /* if all we had was unwanted upsets, we're done */ 2166 if (fmep->nsuspects == 0) 2167 return; 2168 2169 *end = rp - 1; 2170 2171 /* sort the array */ 2172 qsort(*begin, fmep->nsuspects, sizeof (struct rsl), rslcmp); 2173 rsluniq(*begin, *end, &fmep->nsuspects, &fmep->nonfault); 2174 } 2175 2176 /* 2177 * addpayloadprop -- add a payload prop to a problem 2178 */ 2179 static void 2180 addpayloadprop(const char *lhs, struct evalue *rhs, nvlist_t *fault) 2181 { 2182 ASSERT(fault != NULL); 2183 ASSERT(lhs != NULL); 2184 ASSERT(rhs != NULL); 2185 2186 if (rhs->t == UINT64) { 2187 out(O_ALTFP|O_VERB2, "addpayloadprop: %s=%llu", lhs, rhs->v); 2188 2189 if (nvlist_add_uint64(fault, lhs, rhs->v) != 0) 2190 out(O_DIE, 2191 "cannot add payloadprop \"%s\" to fault", lhs); 2192 } else { 2193 out(O_ALTFP|O_VERB2, "addpayloadprop: %s=\"%s\"", 2194 lhs, (char *)(uintptr_t)rhs->v); 2195 2196 if (nvlist_add_string(fault, lhs, (char *)(uintptr_t)rhs->v) != 2197 0) 2198 out(O_DIE, 2199 "cannot add payloadprop \"%s\" to fault", lhs); 2200 } 2201 } 2202 2203 static char *Istatbuf; 2204 static char *Istatbufptr; 2205 static int Istatsz; 2206 2207 /* 2208 * istataddsize -- calculate size of istat and add it to Istatsz 2209 */ 2210 /*ARGSUSED2*/ 2211 static void 2212 istataddsize(const struct istat_entry *lhs, struct stats *rhs, void *arg) 2213 { 2214 int val; 2215 2216 ASSERT(lhs != NULL); 2217 ASSERT(rhs != NULL); 2218 2219 if ((val = stats_counter_value(rhs)) == 0) 2220 return; /* skip zero-valued stats */ 2221 2222 /* count up the size of the stat name */ 2223 Istatsz += ipath2strlen(lhs->ename, lhs->ipath); 2224 Istatsz++; /* for the trailing NULL byte */ 2225 2226 /* count up the size of the stat value */ 2227 Istatsz += snprintf(NULL, 0, "%d", val); 2228 Istatsz++; /* for the trailing NULL byte */ 2229 } 2230 2231 /* 2232 * istat2str -- serialize an istat, writing result to *Istatbufptr 2233 */ 2234 /*ARGSUSED2*/ 2235 static void 2236 istat2str(const struct istat_entry *lhs, struct stats *rhs, void *arg) 2237 { 2238 char *str; 2239 int len; 2240 int val; 2241 2242 ASSERT(lhs != NULL); 2243 ASSERT(rhs != NULL); 2244 2245 if ((val = stats_counter_value(rhs)) == 0) 2246 return; /* skip zero-valued stats */ 2247 2248 /* serialize the stat name */ 2249 str = ipath2str(lhs->ename, lhs->ipath); 2250 len = strlen(str); 2251 2252 ASSERT(Istatbufptr + len + 1 < &Istatbuf[Istatsz]); 2253 (void) strlcpy(Istatbufptr, str, &Istatbuf[Istatsz] - Istatbufptr); 2254 Istatbufptr += len; 2255 FREE(str); 2256 *Istatbufptr++ = '\0'; 2257 2258 /* serialize the stat value */ 2259 Istatbufptr += snprintf(Istatbufptr, &Istatbuf[Istatsz] - Istatbufptr, 2260 "%d", val); 2261 *Istatbufptr++ = '\0'; 2262 2263 ASSERT(Istatbufptr <= &Istatbuf[Istatsz]); 2264 } 2265 2266 void 2267 istat_save() 2268 { 2269 if (Istat_need_save == 0) 2270 return; 2271 2272 /* figure out how big the serialzed info is */ 2273 Istatsz = 0; 2274 lut_walk(Istats, (lut_cb)istataddsize, NULL); 2275 2276 if (Istatsz == 0) { 2277 /* no stats to save */ 2278 fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS); 2279 return; 2280 } 2281 2282 /* create the serialized buffer */ 2283 Istatbufptr = Istatbuf = MALLOC(Istatsz); 2284 lut_walk(Istats, (lut_cb)istat2str, NULL); 2285 2286 /* clear out current saved stats */ 2287 fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS); 2288 2289 /* write out the new version */ 2290 fmd_buf_write(Hdl, NULL, WOBUF_ISTATS, Istatbuf, Istatsz); 2291 FREE(Istatbuf); 2292 2293 Istat_need_save = 0; 2294 } 2295 2296 int 2297 istat_cmp(struct istat_entry *ent1, struct istat_entry *ent2) 2298 { 2299 if (ent1->ename != ent2->ename) 2300 return (ent2->ename - ent1->ename); 2301 if (ent1->ipath != ent2->ipath) 2302 return ((char *)ent2->ipath - (char *)ent1->ipath); 2303 2304 return (0); 2305 } 2306 2307 /* 2308 * istat-verify -- verify the component associated with a stat still exists 2309 * 2310 * if the component no longer exists, this routine resets the stat and 2311 * returns 0. if the component still exists, it returns 1. 2312 */ 2313 static int 2314 istat_verify(struct node *snp, struct istat_entry *entp) 2315 { 2316 struct stats *statp; 2317 nvlist_t *fmri; 2318 2319 fmri = node2fmri(snp->u.event.epname); 2320 if (platform_path_exists(fmri)) { 2321 nvlist_free(fmri); 2322 return (1); 2323 } 2324 nvlist_free(fmri); 2325 2326 /* component no longer in system. zero out the associated stats */ 2327 if ((statp = (struct stats *) 2328 lut_lookup(Istats, entp, (lut_cmp)istat_cmp)) == NULL || 2329 stats_counter_value(statp) == 0) 2330 return (0); /* stat is already reset */ 2331 2332 Istat_need_save = 1; 2333 stats_counter_reset(statp); 2334 return (0); 2335 } 2336 2337 static void 2338 istat_bump(struct node *snp, int n) 2339 { 2340 struct stats *statp; 2341 struct istat_entry ent; 2342 2343 ASSERT(snp != NULL); 2344 ASSERTinfo(snp->t == T_EVENT, ptree_nodetype2str(snp->t)); 2345 ASSERT(snp->u.event.epname != NULL); 2346 2347 /* class name should be hoisted into a single stable entry */ 2348 ASSERT(snp->u.event.ename->u.name.next == NULL); 2349 ent.ename = snp->u.event.ename->u.name.s; 2350 ent.ipath = ipath(snp->u.event.epname); 2351 2352 if (!istat_verify(snp, &ent)) { 2353 /* component no longer exists in system, nothing to do */ 2354 return; 2355 } 2356 2357 if ((statp = (struct stats *) 2358 lut_lookup(Istats, &ent, (lut_cmp)istat_cmp)) == NULL) { 2359 /* need to create the counter */ 2360 int cnt = 0; 2361 struct node *np; 2362 char *sname; 2363 char *snamep; 2364 struct istat_entry *newentp; 2365 2366 /* count up the size of the stat name */ 2367 np = snp->u.event.ename; 2368 while (np != NULL) { 2369 cnt += strlen(np->u.name.s); 2370 cnt++; /* for the '.' or '@' */ 2371 np = np->u.name.next; 2372 } 2373 np = snp->u.event.epname; 2374 while (np != NULL) { 2375 cnt += snprintf(NULL, 0, "%s%llu", 2376 np->u.name.s, np->u.name.child->u.ull); 2377 cnt++; /* for the '/' or trailing NULL byte */ 2378 np = np->u.name.next; 2379 } 2380 2381 /* build the stat name */ 2382 snamep = sname = alloca(cnt); 2383 np = snp->u.event.ename; 2384 while (np != NULL) { 2385 snamep += snprintf(snamep, &sname[cnt] - snamep, 2386 "%s", np->u.name.s); 2387 np = np->u.name.next; 2388 if (np) 2389 *snamep++ = '.'; 2390 } 2391 *snamep++ = '@'; 2392 np = snp->u.event.epname; 2393 while (np != NULL) { 2394 snamep += snprintf(snamep, &sname[cnt] - snamep, 2395 "%s%llu", np->u.name.s, np->u.name.child->u.ull); 2396 np = np->u.name.next; 2397 if (np) 2398 *snamep++ = '/'; 2399 } 2400 *snamep++ = '\0'; 2401 2402 /* create the new stat & add it to our list */ 2403 newentp = MALLOC(sizeof (*newentp)); 2404 *newentp = ent; 2405 statp = stats_new_counter(NULL, sname, 0); 2406 Istats = lut_add(Istats, (void *)newentp, (void *)statp, 2407 (lut_cmp)istat_cmp); 2408 } 2409 2410 /* if n is non-zero, set that value instead of bumping */ 2411 if (n) { 2412 stats_counter_reset(statp); 2413 stats_counter_add(statp, n); 2414 } else 2415 stats_counter_bump(statp); 2416 Istat_need_save = 1; 2417 2418 ipath_print(O_ALTFP|O_VERB2, ent.ename, ent.ipath); 2419 out(O_ALTFP|O_VERB2, " %s to value %d", n ? "set" : "incremented", 2420 stats_counter_value(statp)); 2421 } 2422 2423 /*ARGSUSED*/ 2424 static void 2425 istat_destructor(void *left, void *right, void *arg) 2426 { 2427 struct istat_entry *entp = (struct istat_entry *)left; 2428 struct stats *statp = (struct stats *)right; 2429 FREE(entp); 2430 stats_delete(statp); 2431 } 2432 2433 /* 2434 * Callback used in a walk of the Istats to reset matching stat counters. 2435 */ 2436 static void 2437 istat_counter_reset_cb(struct istat_entry *entp, struct stats *statp, 2438 const struct ipath *ipp) 2439 { 2440 char *path; 2441 2442 if (entp->ipath == ipp) { 2443 path = ipath2str(entp->ename, ipp); 2444 out(O_ALTFP, "istat_counter_reset_cb: resetting %s", path); 2445 FREE(path); 2446 stats_counter_reset(statp); 2447 Istat_need_save = 1; 2448 } 2449 } 2450 2451 /*ARGSUSED*/ 2452 static void 2453 istat_counter_topo_chg_cb(struct istat_entry *entp, struct stats *statp, 2454 void *unused) 2455 { 2456 char *path; 2457 nvlist_t *fmri; 2458 2459 fmri = ipath2fmri((struct ipath *)(entp->ipath)); 2460 if (!platform_path_exists(fmri)) { 2461 path = ipath2str(entp->ename, entp->ipath); 2462 out(O_ALTFP, "istat_counter_topo_chg_cb: not present %s", path); 2463 FREE(path); 2464 stats_counter_reset(statp); 2465 Istat_need_save = 1; 2466 } 2467 nvlist_free(fmri); 2468 } 2469 2470 void 2471 istat_fini(void) 2472 { 2473 lut_free(Istats, istat_destructor, NULL); 2474 } 2475 2476 static char *Serdbuf; 2477 static char *Serdbufptr; 2478 static int Serdsz; 2479 2480 /* 2481 * serdaddsize -- calculate size of serd and add it to Serdsz 2482 */ 2483 /*ARGSUSED*/ 2484 static void 2485 serdaddsize(const struct serd_entry *lhs, struct stats *rhs, void *arg) 2486 { 2487 ASSERT(lhs != NULL); 2488 2489 /* count up the size of the stat name */ 2490 Serdsz += ipath2strlen(lhs->ename, lhs->ipath); 2491 Serdsz++; /* for the trailing NULL byte */ 2492 } 2493 2494 /* 2495 * serd2str -- serialize a serd engine, writing result to *Serdbufptr 2496 */ 2497 /*ARGSUSED*/ 2498 static void 2499 serd2str(const struct serd_entry *lhs, struct stats *rhs, void *arg) 2500 { 2501 char *str; 2502 int len; 2503 2504 ASSERT(lhs != NULL); 2505 2506 /* serialize the serd engine name */ 2507 str = ipath2str(lhs->ename, lhs->ipath); 2508 len = strlen(str); 2509 2510 ASSERT(Serdbufptr + len + 1 <= &Serdbuf[Serdsz]); 2511 (void) strlcpy(Serdbufptr, str, &Serdbuf[Serdsz] - Serdbufptr); 2512 Serdbufptr += len; 2513 FREE(str); 2514 *Serdbufptr++ = '\0'; 2515 ASSERT(Serdbufptr <= &Serdbuf[Serdsz]); 2516 } 2517 2518 void 2519 serd_save() 2520 { 2521 if (Serd_need_save == 0) 2522 return; 2523 2524 /* figure out how big the serialzed info is */ 2525 Serdsz = 0; 2526 lut_walk(SerdEngines, (lut_cb)serdaddsize, NULL); 2527 2528 if (Serdsz == 0) { 2529 /* no serd engines to save */ 2530 fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS); 2531 return; 2532 } 2533 2534 /* create the serialized buffer */ 2535 Serdbufptr = Serdbuf = MALLOC(Serdsz); 2536 lut_walk(SerdEngines, (lut_cb)serd2str, NULL); 2537 2538 /* clear out current saved stats */ 2539 fmd_buf_destroy(Hdl, NULL, WOBUF_SERDS); 2540 2541 /* write out the new version */ 2542 fmd_buf_write(Hdl, NULL, WOBUF_SERDS, Serdbuf, Serdsz); 2543 FREE(Serdbuf); 2544 Serd_need_save = 0; 2545 } 2546 2547 int 2548 serd_cmp(struct serd_entry *ent1, struct serd_entry *ent2) 2549 { 2550 if (ent1->ename != ent2->ename) 2551 return (ent2->ename - ent1->ename); 2552 if (ent1->ipath != ent2->ipath) 2553 return ((char *)ent2->ipath - (char *)ent1->ipath); 2554 2555 return (0); 2556 } 2557 2558 void 2559 fme_serd_load(fmd_hdl_t *hdl) 2560 { 2561 int sz; 2562 char *sbuf; 2563 char *sepptr; 2564 char *ptr; 2565 struct serd_entry *newentp; 2566 struct node *epname; 2567 nvlist_t *fmri; 2568 char *namestring; 2569 2570 if ((sz = fmd_buf_size(hdl, NULL, WOBUF_SERDS)) == 0) 2571 return; 2572 sbuf = alloca(sz); 2573 fmd_buf_read(hdl, NULL, WOBUF_SERDS, sbuf, sz); 2574 ptr = sbuf; 2575 while (ptr < &sbuf[sz]) { 2576 sepptr = strchr(ptr, '@'); 2577 *sepptr = '\0'; 2578 namestring = ptr; 2579 sepptr++; 2580 ptr = sepptr; 2581 ptr += strlen(ptr); 2582 ptr++; /* move past the '\0' separating paths */ 2583 epname = pathstring2epnamenp(sepptr); 2584 fmri = node2fmri(epname); 2585 if (platform_path_exists(fmri)) { 2586 newentp = MALLOC(sizeof (*newentp)); 2587 newentp->hdl = hdl; 2588 newentp->ipath = ipath(epname); 2589 newentp->ename = stable(namestring); 2590 SerdEngines = lut_add(SerdEngines, (void *)newentp, 2591 (void *)newentp, (lut_cmp)serd_cmp); 2592 } else 2593 Serd_need_save = 1; 2594 tree_free(epname); 2595 nvlist_free(fmri); 2596 } 2597 /* save it back again in case some of the paths no longer exist */ 2598 serd_save(); 2599 } 2600 2601 /*ARGSUSED*/ 2602 static void 2603 serd_destructor(void *left, void *right, void *arg) 2604 { 2605 struct serd_entry *entp = (struct serd_entry *)left; 2606 FREE(entp); 2607 } 2608 2609 /* 2610 * Callback used in a walk of the SerdEngines to reset matching serd engines. 2611 */ 2612 /*ARGSUSED*/ 2613 static void 2614 serd_reset_cb(struct serd_entry *entp, void *unused, const struct ipath *ipp) 2615 { 2616 char *path; 2617 2618 if (entp->ipath == ipp) { 2619 path = ipath2str(entp->ename, ipp); 2620 out(O_ALTFP, "serd_reset_cb: resetting %s", path); 2621 fmd_serd_reset(entp->hdl, path); 2622 FREE(path); 2623 Serd_need_save = 1; 2624 } 2625 } 2626 2627 /*ARGSUSED*/ 2628 static void 2629 serd_topo_chg_cb(struct serd_entry *entp, void *unused, void *unused2) 2630 { 2631 char *path; 2632 nvlist_t *fmri; 2633 2634 fmri = ipath2fmri((struct ipath *)(entp->ipath)); 2635 if (!platform_path_exists(fmri)) { 2636 path = ipath2str(entp->ename, entp->ipath); 2637 out(O_ALTFP, "serd_topo_chg_cb: not present %s", path); 2638 fmd_serd_reset(entp->hdl, path); 2639 FREE(path); 2640 Serd_need_save = 1; 2641 } 2642 nvlist_free(fmri); 2643 } 2644 2645 void 2646 serd_fini(void) 2647 { 2648 lut_free(SerdEngines, serd_destructor, NULL); 2649 } 2650 2651 static void 2652 publish_suspects(struct fme *fmep) 2653 { 2654 struct rsl *srl = NULL; 2655 struct rsl *erl; 2656 struct rsl *rp; 2657 nvlist_t *fault; 2658 uint8_t cert; 2659 uint_t *frs; 2660 uint_t fravg, frsum, fr; 2661 uint_t messval; 2662 struct node *snp; 2663 int frcnt, fridx; 2664 boolean_t no_upsets = B_FALSE; 2665 boolean_t allfaulty = B_TRUE; 2666 2667 stats_counter_bump(fmep->diags); 2668 2669 /* 2670 * If we're auto-closing upsets, we don't want to include them 2671 * in any produced suspect lists or certainty accounting. 2672 */ 2673 if (Autoclose != NULL) 2674 if (strcmp(Autoclose, "true") == 0 || 2675 strcmp(Autoclose, "all") == 0 || 2676 strcmp(Autoclose, "upsets") == 0) 2677 no_upsets = B_TRUE; 2678 2679 trim_suspects(fmep, no_upsets, &srl, &erl); 2680 2681 /* 2682 * If the resulting suspect list has no members, we're 2683 * done. Returning here will simply close the case. 2684 */ 2685 if (fmep->nsuspects == 0) { 2686 out(O_ALTFP, 2687 "[FME%d, case %s (all suspects are upsets)]", 2688 fmep->id, fmd_case_uuid(fmep->hdl, fmep->fmcase)); 2689 FREE(srl); 2690 restore_suspects(fmep); 2691 return; 2692 } 2693 2694 /* 2695 * If the suspect list is all faults, then for a given fault, 2696 * say X of N, X's certainty is computed via: 2697 * 2698 * fitrate(X) / (fitrate(1) + ... + fitrate(N)) * 100 2699 * 2700 * If none of the suspects are faults, and there are N suspects, 2701 * the certainty of a given suspect is 100/N. 2702 * 2703 * If there are are a mixture of faults and other problems in 2704 * the suspect list, we take an average of the faults' 2705 * FITrates and treat this average as the FITrate for any 2706 * non-faults. The fitrate of any given suspect is then 2707 * computed per the first formula above. 2708 */ 2709 if (fmep->nonfault == fmep->nsuspects) { 2710 /* NO faults in the suspect list */ 2711 cert = percentof(1, fmep->nsuspects); 2712 } else { 2713 /* sum the fitrates */ 2714 frs = alloca(fmep->nsuspects * sizeof (uint_t)); 2715 fridx = frcnt = frsum = 0; 2716 2717 for (rp = srl; rp <= erl; rp++) { 2718 struct node *n; 2719 2720 if (rp->suspect == NULL) 2721 continue; 2722 if (!is_fault(rp->suspect->t)) { 2723 frs[fridx++] = 0; 2724 continue; 2725 } 2726 n = eventprop_lookup(rp->suspect, L_FITrate); 2727 if (node2uint(n, &fr) != 0) { 2728 out(O_DEBUG|O_NONL, "event "); 2729 ipath_print(O_DEBUG|O_NONL, 2730 rp->suspect->enode->u.event.ename->u.name.s, 2731 rp->suspect->ipp); 2732 out(O_DEBUG, " has no FITrate (using 1)"); 2733 fr = 1; 2734 } else if (fr == 0) { 2735 out(O_DEBUG|O_NONL, "event "); 2736 ipath_print(O_DEBUG|O_NONL, 2737 rp->suspect->enode->u.event.ename->u.name.s, 2738 rp->suspect->ipp); 2739 out(O_DEBUG, " has zero FITrate (using 1)"); 2740 fr = 1; 2741 } 2742 2743 frs[fridx++] = fr; 2744 frsum += fr; 2745 frcnt++; 2746 } 2747 fravg = avg(frsum, frcnt); 2748 for (fridx = 0; fridx < fmep->nsuspects; fridx++) 2749 if (frs[fridx] == 0) { 2750 frs[fridx] = fravg; 2751 frsum += fravg; 2752 } 2753 } 2754 2755 /* Add them in reverse order of our sort, as fmd reverses order */ 2756 for (rp = erl; rp >= srl; rp--) { 2757 if (rp->suspect == NULL) 2758 continue; 2759 if (!is_fault(rp->suspect->t)) 2760 allfaulty = B_FALSE; 2761 if (fmep->nonfault != fmep->nsuspects) 2762 cert = percentof(frs[--fridx], frsum); 2763 fault = fmd_nvl_create_fault(fmep->hdl, 2764 rp->suspect->enode->u.event.ename->u.name.s, 2765 cert, 2766 rp->asru, 2767 rp->fru, 2768 rp->rsrc); 2769 if (fault == NULL) 2770 out(O_DIE, "fault creation failed"); 2771 /* if "message" property exists, add it to the fault */ 2772 if (node2uint(eventprop_lookup(rp->suspect, L_message), 2773 &messval) == 0) { 2774 2775 out(O_ALTFP, 2776 "[FME%d, %s adds message=%d to suspect list]", 2777 fmep->id, 2778 rp->suspect->enode->u.event.ename->u.name.s, 2779 messval); 2780 if (nvlist_add_boolean_value(fault, 2781 FM_SUSPECT_MESSAGE, 2782 (messval) ? B_TRUE : B_FALSE) != 0) { 2783 out(O_DIE, "cannot add no-message to fault"); 2784 } 2785 } 2786 /* add any payload properties */ 2787 lut_walk(rp->suspect->payloadprops, 2788 (lut_cb)addpayloadprop, (void *)fault); 2789 fmd_case_add_suspect(fmep->hdl, fmep->fmcase, fault); 2790 rslfree(rp); 2791 2792 /* 2793 * If "action" property exists, evaluate it; this must be done 2794 * before the dupclose check below since some actions may 2795 * modify the asru to be used in fmd_nvl_fmri_faulty. This 2796 * needs to be restructured if any new actions are introduced 2797 * that have effects that we do not want to be visible if 2798 * we decide not to publish in the dupclose check below. 2799 */ 2800 if ((snp = eventprop_lookup(rp->suspect, L_action)) != NULL) { 2801 struct evalue evalue; 2802 2803 out(O_ALTFP|O_NONL, 2804 "[FME%d, %s action ", fmep->id, 2805 rp->suspect->enode->u.event.ename->u.name.s); 2806 ptree_name_iter(O_ALTFP|O_NONL, snp); 2807 out(O_ALTFP, "]"); 2808 Action_nvl = fault; 2809 (void) eval_expr(snp, NULL, NULL, NULL, NULL, 2810 NULL, 0, &evalue); 2811 } 2812 2813 /* 2814 * check if the asru is already marked as "faulty". 2815 */ 2816 if (allfaulty) { 2817 nvlist_t *asru; 2818 2819 out(O_ALTFP|O_VERB, "FMD%d dup check ", fmep->id); 2820 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, rp->suspect); 2821 out(O_ALTFP|O_VERB|O_NONL, " "); 2822 if (nvlist_lookup_nvlist(fault, 2823 FM_FAULT_ASRU, &asru) != 0) { 2824 out(O_ALTFP|O_VERB, "NULL asru"); 2825 allfaulty = B_FALSE; 2826 } else if (fmd_nvl_fmri_faulty(fmep->hdl, asru)) { 2827 out(O_ALTFP|O_VERB, "faulty"); 2828 } else { 2829 out(O_ALTFP|O_VERB, "not faulty"); 2830 allfaulty = B_FALSE; 2831 } 2832 } 2833 2834 } 2835 2836 /* 2837 * We are going to publish so take any pre-publication actions. 2838 */ 2839 if (!allfaulty) { 2840 /* 2841 * don't update the count stat if all asrus are already 2842 * present and unrepaired in the asru cache 2843 */ 2844 for (rp = erl; rp >= srl; rp--) { 2845 struct event *suspect = rp->suspect; 2846 2847 if (suspect == NULL) 2848 continue; 2849 2850 /* if "count" exists, increment the appropriate stat */ 2851 if ((snp = eventprop_lookup(suspect, 2852 L_count)) != NULL) { 2853 out(O_ALTFP|O_NONL, 2854 "[FME%d, %s count ", fmep->id, 2855 suspect->enode->u.event.ename->u.name.s); 2856 ptree_name_iter(O_ALTFP|O_NONL, snp); 2857 out(O_ALTFP, "]"); 2858 istat_bump(snp, 0); 2859 2860 } 2861 } 2862 istat_save(); /* write out any istat changes */ 2863 } 2864 2865 out(O_ALTFP, "[solving FME%d, case %s]", fmep->id, 2866 fmd_case_uuid(fmep->hdl, fmep->fmcase)); 2867 fmd_case_solve(fmep->hdl, fmep->fmcase); 2868 2869 /* 2870 * revert to the original suspect list 2871 */ 2872 FREE(srl); 2873 restore_suspects(fmep); 2874 } 2875 2876 static void 2877 publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep, fmd_case_t *fmcase) 2878 { 2879 struct case_list *newcase; 2880 nvlist_t *defect; 2881 2882 out(O_ALTFP, 2883 "[undiagnosable ereport received, " 2884 "creating and closing a new case (%s)]", 2885 Undiag_reason ? Undiag_reason : "reason not provided"); 2886 2887 newcase = MALLOC(sizeof (struct case_list)); 2888 newcase->next = NULL; 2889 newcase->fmcase = fmcase; 2890 if (Undiagablecaselist != NULL) 2891 newcase->next = Undiagablecaselist; 2892 Undiagablecaselist = newcase; 2893 2894 if (ffep != NULL) 2895 fmd_case_add_ereport(hdl, newcase->fmcase, ffep); 2896 2897 defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100, 2898 NULL, NULL, NULL); 2899 if (Undiag_reason != NULL) 2900 (void) nvlist_add_string(defect, UNDIAG_REASON, Undiag_reason); 2901 fmd_case_add_suspect(hdl, newcase->fmcase, defect); 2902 2903 fmd_case_solve(hdl, newcase->fmcase); 2904 fmd_case_close(hdl, newcase->fmcase); 2905 } 2906 2907 static void 2908 fme_undiagnosble_pci(struct fme *f, nvlist_t *rc_detector) { 2909 nvlist_t *defect, *asru; 2910 char *path; 2911 2912 (void) nvlist_lookup_string(rc_detector, FM_FMRI_DEV_PATH, &path); 2913 out(O_ALTFP, "[solving/closing PCIE FME%d PATH %s]", f->id, path); 2914 2915 (void) nvlist_xalloc(&asru, NV_UNIQUE_NAME, &Eft_nv_hdl); 2916 (void) nvlist_add_uint8(asru, FM_VERSION, FM_HC_SCHEME_VERSION); 2917 (void) nvlist_add_string(asru, FM_FMRI_SCHEME, FM_FMRI_SCHEME_DEV); 2918 (void) nvlist_add_string(asru, FM_FMRI_DEV_PATH, path); 2919 2920 defect = fmd_nvl_create_fault(f->hdl, 2921 "fault.sunos.eft.unknown_pci_fault", 100, 2922 asru, NULL, NULL); 2923 2924 (void) nvlist_add_string(defect, UNDIAG_REASON, UD_PCIUNSOLVD); 2925 fmd_case_pci_undiagnosable(f->hdl, f->fmcase, defect); 2926 2927 fmd_case_add_suspect(f->hdl, f->fmcase, defect); 2928 fmd_case_solve(f->hdl, f->fmcase); 2929 fmd_case_close(f->hdl, f->fmcase); 2930 } 2931 2932 static void 2933 fme_undiagnosable(struct fme *f) 2934 { 2935 nvlist_t *defect; 2936 nvlist_t *rc_detector; 2937 2938 out(O_ALTFP, "[solving/closing FME%d, case %s (%s)]", 2939 f->id, fmd_case_uuid(f->hdl, f->fmcase), 2940 Undiag_reason ? Undiag_reason : "undiagnosable"); 2941 2942 if ((strcmp(Undiag_reason, UD_UNSOLVD) == 0) && 2943 fmd_case_is_pcie(f->hdl, f->fmcase, &rc_detector)) { 2944 fme_undiagnosble_pci(f, rc_detector); 2945 return; 2946 } 2947 2948 defect = fmd_nvl_create_fault(f->hdl, UNDIAGNOSABLE_DEFECT, 100, 2949 NULL, NULL, NULL); 2950 if (Undiag_reason != NULL) 2951 (void) nvlist_add_string(defect, UNDIAG_REASON, Undiag_reason); 2952 fmd_case_add_suspect(f->hdl, f->fmcase, defect); 2953 fmd_case_solve(f->hdl, f->fmcase); 2954 fmd_case_close(f->hdl, f->fmcase); 2955 } 2956 2957 /* 2958 * fme_close_case 2959 * 2960 * Find the requested case amongst our fmes and close it. Free up 2961 * the related fme. 2962 */ 2963 void 2964 fme_close_case(fmd_hdl_t *hdl, fmd_case_t *fmcase) 2965 { 2966 struct case_list *ucasep, *prevcasep = NULL; 2967 struct fme *prev = NULL; 2968 struct fme *fmep; 2969 2970 for (ucasep = Undiagablecaselist; ucasep; ucasep = ucasep->next) { 2971 if (fmcase != ucasep->fmcase) { 2972 prevcasep = ucasep; 2973 continue; 2974 } 2975 2976 if (prevcasep == NULL) 2977 Undiagablecaselist = Undiagablecaselist->next; 2978 else 2979 prevcasep->next = ucasep->next; 2980 2981 FREE(ucasep); 2982 return; 2983 } 2984 2985 for (fmep = FMElist; fmep; fmep = fmep->next) { 2986 if (fmep->hdl == hdl && fmep->fmcase == fmcase) 2987 break; 2988 prev = fmep; 2989 } 2990 2991 if (fmep == NULL) { 2992 out(O_WARN, "Eft asked to close unrecognized case [%s].", 2993 fmd_case_uuid(hdl, fmcase)); 2994 return; 2995 } 2996 2997 if (EFMElist == fmep) 2998 EFMElist = prev; 2999 3000 if (prev == NULL) 3001 FMElist = FMElist->next; 3002 else 3003 prev->next = fmep->next; 3004 3005 fmep->next = NULL; 3006 3007 /* Get rid of any timer this fme has set */ 3008 if (fmep->wull != 0) 3009 fmd_timer_remove(fmep->hdl, fmep->timer); 3010 3011 if (ClosedFMEs == NULL) { 3012 ClosedFMEs = fmep; 3013 } else { 3014 fmep->next = ClosedFMEs; 3015 ClosedFMEs = fmep; 3016 } 3017 3018 Open_fme_count--; 3019 3020 /* See if we can close the overflow FME */ 3021 if (Open_fme_count <= Max_fme) { 3022 for (fmep = FMElist; fmep; fmep = fmep->next) { 3023 if (fmep->overflow && !(fmd_case_closed(fmep->hdl, 3024 fmep->fmcase))) 3025 break; 3026 } 3027 3028 if (fmep != NULL) 3029 fmd_case_close(fmep->hdl, fmep->fmcase); 3030 } 3031 } 3032 3033 /* 3034 * fme_set_timer() 3035 * If the time we need to wait for the given FME is less than the 3036 * current timer, kick that old timer out and establish a new one. 3037 */ 3038 static int 3039 fme_set_timer(struct fme *fmep, unsigned long long wull) 3040 { 3041 out(O_ALTFP|O_VERB|O_NONL, " fme_set_timer: request to wait "); 3042 ptree_timeval(O_ALTFP|O_VERB, &wull); 3043 3044 if (wull <= fmep->pull) { 3045 out(O_ALTFP|O_VERB|O_NONL, "already have waited at least "); 3046 ptree_timeval(O_ALTFP|O_VERB, &fmep->pull); 3047 out(O_ALTFP|O_VERB, NULL); 3048 /* we've waited at least wull already, don't need timer */ 3049 return (0); 3050 } 3051 3052 out(O_ALTFP|O_VERB|O_NONL, " currently "); 3053 if (fmep->wull != 0) { 3054 out(O_ALTFP|O_VERB|O_NONL, "waiting "); 3055 ptree_timeval(O_ALTFP|O_VERB, &fmep->wull); 3056 out(O_ALTFP|O_VERB, NULL); 3057 } else { 3058 out(O_ALTFP|O_VERB|O_NONL, "not waiting"); 3059 out(O_ALTFP|O_VERB, NULL); 3060 } 3061 3062 if (fmep->wull != 0) 3063 if (wull >= fmep->wull) 3064 /* New timer would fire later than established timer */ 3065 return (0); 3066 3067 if (fmep->wull != 0) { 3068 fmd_timer_remove(fmep->hdl, fmep->timer); 3069 } 3070 3071 fmep->timer = fmd_timer_install(fmep->hdl, (void *)fmep, 3072 fmep->e0r, wull); 3073 out(O_ALTFP|O_VERB, "timer set, id is %ld", fmep->timer); 3074 fmep->wull = wull; 3075 return (1); 3076 } 3077 3078 void 3079 fme_timer_fired(struct fme *fmep, id_t tid) 3080 { 3081 struct fme *ffmep = NULL; 3082 3083 for (ffmep = FMElist; ffmep; ffmep = ffmep->next) 3084 if (ffmep == fmep) 3085 break; 3086 3087 if (ffmep == NULL) { 3088 out(O_WARN, "Timer fired for an FME (%p) not in FMEs list.", 3089 (void *)fmep); 3090 return; 3091 } 3092 3093 out(O_ALTFP|O_VERB, "Timer fired %lx", tid); 3094 fmep->pull = fmep->wull; 3095 fmep->wull = 0; 3096 fmd_buf_write(fmep->hdl, fmep->fmcase, 3097 WOBUF_PULL, (void *)&fmep->pull, sizeof (fmep->pull)); 3098 3099 fme_eval(fmep, fmep->e0r); 3100 } 3101 3102 /* 3103 * Preserve the fme's suspect list in its psuspects list, NULLing the 3104 * suspects list in the meantime. 3105 */ 3106 static void 3107 save_suspects(struct fme *fmep) 3108 { 3109 struct event *ep; 3110 struct event *nextep; 3111 3112 /* zero out the previous suspect list */ 3113 for (ep = fmep->psuspects; ep; ep = nextep) { 3114 nextep = ep->psuspects; 3115 ep->psuspects = NULL; 3116 } 3117 fmep->psuspects = NULL; 3118 3119 /* zero out the suspect list, copying it to previous suspect list */ 3120 fmep->psuspects = fmep->suspects; 3121 for (ep = fmep->suspects; ep; ep = nextep) { 3122 nextep = ep->suspects; 3123 ep->psuspects = ep->suspects; 3124 ep->suspects = NULL; 3125 ep->is_suspect = 0; 3126 } 3127 fmep->suspects = NULL; 3128 fmep->nsuspects = 0; 3129 fmep->nonfault = 0; 3130 } 3131 3132 /* 3133 * Retrieve the fme's suspect list from its psuspects list. 3134 */ 3135 static void 3136 restore_suspects(struct fme *fmep) 3137 { 3138 struct event *ep; 3139 struct event *nextep; 3140 3141 fmep->nsuspects = fmep->nonfault = 0; 3142 fmep->suspects = fmep->psuspects; 3143 for (ep = fmep->psuspects; ep; ep = nextep) { 3144 fmep->nsuspects++; 3145 if (!is_fault(ep->t)) 3146 fmep->nonfault++; 3147 nextep = ep->psuspects; 3148 ep->suspects = ep->psuspects; 3149 } 3150 } 3151 3152 /* 3153 * this is what we use to call the Emrys prototype code instead of main() 3154 */ 3155 static void 3156 fme_eval(struct fme *fmep, fmd_event_t *ffep) 3157 { 3158 struct event *ep; 3159 unsigned long long my_delay = TIMEVAL_EVENTUALLY; 3160 3161 save_suspects(fmep); 3162 3163 out(O_ALTFP, "Evaluate FME %d", fmep->id); 3164 indent_set(" "); 3165 3166 lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep); 3167 fmep->state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay); 3168 3169 out(O_ALTFP|O_NONL, "FME%d state: %s, suspect list:", fmep->id, 3170 fme_state2str(fmep->state)); 3171 for (ep = fmep->suspects; ep; ep = ep->suspects) { 3172 out(O_ALTFP|O_NONL, " "); 3173 itree_pevent_brief(O_ALTFP|O_NONL, ep); 3174 } 3175 out(O_ALTFP, NULL); 3176 3177 switch (fmep->state) { 3178 case FME_CREDIBLE: 3179 print_suspects(SLNEW, fmep); 3180 (void) upsets_eval(fmep, ffep); 3181 3182 /* 3183 * we may have already posted suspects in upsets_eval() which 3184 * can recurse into fme_eval() again. If so then just return. 3185 */ 3186 if (fmep->posted_suspects) 3187 return; 3188 3189 publish_suspects(fmep); 3190 fmep->posted_suspects = 1; 3191 fmd_buf_write(fmep->hdl, fmep->fmcase, 3192 WOBUF_POSTD, 3193 (void *)&fmep->posted_suspects, 3194 sizeof (fmep->posted_suspects)); 3195 3196 /* 3197 * Now the suspects have been posted, we can clear up 3198 * the instance tree as we won't be looking at it again. 3199 * Also cancel the timer as the case is now solved. 3200 */ 3201 if (fmep->wull != 0) { 3202 fmd_timer_remove(fmep->hdl, fmep->timer); 3203 fmep->wull = 0; 3204 } 3205 break; 3206 3207 case FME_WAIT: 3208 ASSERT(my_delay > fmep->ull); 3209 (void) fme_set_timer(fmep, my_delay); 3210 print_suspects(SLWAIT, fmep); 3211 itree_prune(fmep->eventtree); 3212 return; 3213 3214 case FME_DISPROVED: 3215 print_suspects(SLDISPROVED, fmep); 3216 Undiag_reason = UD_UNSOLVD; 3217 fme_undiagnosable(fmep); 3218 break; 3219 } 3220 3221 if (fmep->posted_suspects == 1 && Autoclose != NULL) { 3222 int doclose = 0; 3223 3224 if (strcmp(Autoclose, "true") == 0 || 3225 strcmp(Autoclose, "all") == 0) 3226 doclose = 1; 3227 3228 if (strcmp(Autoclose, "upsets") == 0) { 3229 doclose = 1; 3230 for (ep = fmep->suspects; ep; ep = ep->suspects) { 3231 if (ep->t != N_UPSET) { 3232 doclose = 0; 3233 break; 3234 } 3235 } 3236 } 3237 3238 if (doclose) { 3239 out(O_ALTFP, "[closing FME%d, case %s (autoclose)]", 3240 fmep->id, fmd_case_uuid(fmep->hdl, fmep->fmcase)); 3241 fmd_case_close(fmep->hdl, fmep->fmcase); 3242 } 3243 } 3244 itree_free(fmep->eventtree); 3245 fmep->eventtree = NULL; 3246 structconfig_free(fmep->config); 3247 fmep->config = NULL; 3248 destroy_fme_bufs(fmep); 3249 } 3250 3251 static void indent(void); 3252 static int triggered(struct fme *fmep, struct event *ep, int mark); 3253 static enum fme_state effects_test(struct fme *fmep, 3254 struct event *fault_event, unsigned long long at_latest_by, 3255 unsigned long long *pdelay); 3256 static enum fme_state requirements_test(struct fme *fmep, struct event *ep, 3257 unsigned long long at_latest_by, unsigned long long *pdelay); 3258 static enum fme_state causes_test(struct fme *fmep, struct event *ep, 3259 unsigned long long at_latest_by, unsigned long long *pdelay); 3260 3261 static int 3262 checkconstraints(struct fme *fmep, struct arrow *arrowp) 3263 { 3264 struct constraintlist *ctp; 3265 struct evalue value; 3266 char *sep = ""; 3267 3268 if (arrowp->forever_false) { 3269 indent(); 3270 out(O_ALTFP|O_VERB|O_NONL, " Forever false constraint: "); 3271 for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) { 3272 out(O_ALTFP|O_VERB|O_NONL, sep); 3273 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0); 3274 sep = ", "; 3275 } 3276 out(O_ALTFP|O_VERB, NULL); 3277 return (0); 3278 } 3279 if (arrowp->forever_true) { 3280 indent(); 3281 out(O_ALTFP|O_VERB|O_NONL, " Forever true constraint: "); 3282 for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) { 3283 out(O_ALTFP|O_VERB|O_NONL, sep); 3284 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0); 3285 sep = ", "; 3286 } 3287 out(O_ALTFP|O_VERB, NULL); 3288 return (1); 3289 } 3290 3291 for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) { 3292 if (eval_expr(ctp->cnode, NULL, NULL, 3293 &fmep->globals, fmep->config, 3294 arrowp, 0, &value)) { 3295 /* evaluation successful */ 3296 if (value.t == UNDEFINED || value.v == 0) { 3297 /* known false */ 3298 arrowp->forever_false = 1; 3299 indent(); 3300 out(O_ALTFP|O_VERB|O_NONL, 3301 " False constraint: "); 3302 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0); 3303 out(O_ALTFP|O_VERB, NULL); 3304 return (0); 3305 } 3306 } else { 3307 /* evaluation unsuccessful -- unknown value */ 3308 indent(); 3309 out(O_ALTFP|O_VERB|O_NONL, 3310 " Deferred constraint: "); 3311 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0); 3312 out(O_ALTFP|O_VERB, NULL); 3313 return (1); 3314 } 3315 } 3316 /* known true */ 3317 arrowp->forever_true = 1; 3318 indent(); 3319 out(O_ALTFP|O_VERB|O_NONL, " True constraint: "); 3320 for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) { 3321 out(O_ALTFP|O_VERB|O_NONL, sep); 3322 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0); 3323 sep = ", "; 3324 } 3325 out(O_ALTFP|O_VERB, NULL); 3326 return (1); 3327 } 3328 3329 static int 3330 triggered(struct fme *fmep, struct event *ep, int mark) 3331 { 3332 struct bubble *bp; 3333 struct arrowlist *ap; 3334 int count = 0; 3335 3336 stats_counter_bump(fmep->Tcallcount); 3337 for (bp = itree_next_bubble(ep, NULL); bp; 3338 bp = itree_next_bubble(ep, bp)) { 3339 if (bp->t != B_TO) 3340 continue; 3341 for (ap = itree_next_arrow(bp, NULL); ap; 3342 ap = itree_next_arrow(bp, ap)) { 3343 /* check count of marks against K in the bubble */ 3344 if ((ap->arrowp->mark & mark) && 3345 ++count >= bp->nork) 3346 return (1); 3347 } 3348 } 3349 return (0); 3350 } 3351 3352 static int 3353 mark_arrows(struct fme *fmep, struct event *ep, int mark, 3354 unsigned long long at_latest_by, unsigned long long *pdelay, int keep) 3355 { 3356 struct bubble *bp; 3357 struct arrowlist *ap; 3358 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 3359 unsigned long long my_delay; 3360 enum fme_state result; 3361 int retval = 0; 3362 3363 for (bp = itree_next_bubble(ep, NULL); bp; 3364 bp = itree_next_bubble(ep, bp)) { 3365 if (bp->t != B_FROM) 3366 continue; 3367 stats_counter_bump(fmep->Marrowcount); 3368 for (ap = itree_next_arrow(bp, NULL); ap; 3369 ap = itree_next_arrow(bp, ap)) { 3370 struct event *ep2 = ap->arrowp->head->myevent; 3371 /* 3372 * if we're clearing marks, we can avoid doing 3373 * all that work evaluating constraints. 3374 */ 3375 if (mark == 0) { 3376 if (ap->arrowp->arrow_marked == 0) 3377 continue; 3378 ap->arrowp->arrow_marked = 0; 3379 ap->arrowp->mark &= ~EFFECTS_COUNTER; 3380 if (keep && (ep2->cached_state & 3381 (WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT))) 3382 ep2->keep_in_tree = 1; 3383 ep2->cached_state &= 3384 ~(WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT); 3385 (void) mark_arrows(fmep, ep2, mark, 0, NULL, 3386 keep); 3387 continue; 3388 } 3389 ap->arrowp->arrow_marked = 1; 3390 if (ep2->cached_state & REQMNTS_DISPROVED) { 3391 indent(); 3392 out(O_ALTFP|O_VERB|O_NONL, 3393 " ALREADY DISPROVED "); 3394 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3395 out(O_ALTFP|O_VERB, NULL); 3396 continue; 3397 } 3398 if (ep2->cached_state & WAIT_EFFECT) { 3399 indent(); 3400 out(O_ALTFP|O_VERB|O_NONL, 3401 " ALREADY EFFECTS WAIT "); 3402 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3403 out(O_ALTFP|O_VERB, NULL); 3404 continue; 3405 } 3406 if (ep2->cached_state & CREDIBLE_EFFECT) { 3407 indent(); 3408 out(O_ALTFP|O_VERB|O_NONL, 3409 " ALREADY EFFECTS CREDIBLE "); 3410 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3411 out(O_ALTFP|O_VERB, NULL); 3412 continue; 3413 } 3414 if ((ep2->cached_state & PARENT_WAIT) && 3415 (mark & PARENT_WAIT)) { 3416 indent(); 3417 out(O_ALTFP|O_VERB|O_NONL, 3418 " ALREADY PARENT EFFECTS WAIT "); 3419 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3420 out(O_ALTFP|O_VERB, NULL); 3421 continue; 3422 } 3423 platform_set_payloadnvp(ep2->nvp); 3424 if (checkconstraints(fmep, ap->arrowp) == 0) { 3425 platform_set_payloadnvp(NULL); 3426 indent(); 3427 out(O_ALTFP|O_VERB|O_NONL, 3428 " CONSTRAINTS FAIL "); 3429 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3430 out(O_ALTFP|O_VERB, NULL); 3431 continue; 3432 } 3433 platform_set_payloadnvp(NULL); 3434 ap->arrowp->mark |= EFFECTS_COUNTER; 3435 if (!triggered(fmep, ep2, EFFECTS_COUNTER)) { 3436 indent(); 3437 out(O_ALTFP|O_VERB|O_NONL, 3438 " K-COUNT NOT YET MET "); 3439 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3440 out(O_ALTFP|O_VERB, NULL); 3441 continue; 3442 } 3443 ep2->cached_state &= ~PARENT_WAIT; 3444 /* 3445 * if we've reached an ereport and no propagation time 3446 * is specified, use the Hesitate value 3447 */ 3448 if (ep2->t == N_EREPORT && at_latest_by == 0ULL && 3449 ap->arrowp->maxdelay == 0ULL) { 3450 out(O_ALTFP|O_VERB|O_NONL, " default wait "); 3451 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3452 out(O_ALTFP|O_VERB, NULL); 3453 result = requirements_test(fmep, ep2, Hesitate, 3454 &my_delay); 3455 } else { 3456 result = requirements_test(fmep, ep2, 3457 at_latest_by + ap->arrowp->maxdelay, 3458 &my_delay); 3459 } 3460 if (result == FME_WAIT) { 3461 retval = WAIT_EFFECT; 3462 if (overall_delay > my_delay) 3463 overall_delay = my_delay; 3464 ep2->cached_state |= WAIT_EFFECT; 3465 indent(); 3466 out(O_ALTFP|O_VERB|O_NONL, " EFFECTS WAIT "); 3467 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3468 out(O_ALTFP|O_VERB, NULL); 3469 indent_push(" E"); 3470 if (mark_arrows(fmep, ep2, PARENT_WAIT, 3471 at_latest_by, &my_delay, 0) == 3472 WAIT_EFFECT) { 3473 retval = WAIT_EFFECT; 3474 if (overall_delay > my_delay) 3475 overall_delay = my_delay; 3476 } 3477 indent_pop(); 3478 } else if (result == FME_DISPROVED) { 3479 indent(); 3480 out(O_ALTFP|O_VERB|O_NONL, 3481 " EFFECTS DISPROVED "); 3482 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3483 out(O_ALTFP|O_VERB, NULL); 3484 } else { 3485 ep2->cached_state |= mark; 3486 indent(); 3487 if (mark == CREDIBLE_EFFECT) 3488 out(O_ALTFP|O_VERB|O_NONL, 3489 " EFFECTS CREDIBLE "); 3490 else 3491 out(O_ALTFP|O_VERB|O_NONL, 3492 " PARENT EFFECTS WAIT "); 3493 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 3494 out(O_ALTFP|O_VERB, NULL); 3495 indent_push(" E"); 3496 if (mark_arrows(fmep, ep2, mark, at_latest_by, 3497 &my_delay, 0) == WAIT_EFFECT) { 3498 retval = WAIT_EFFECT; 3499 if (overall_delay > my_delay) 3500 overall_delay = my_delay; 3501 } 3502 indent_pop(); 3503 } 3504 } 3505 } 3506 if (retval == WAIT_EFFECT) 3507 *pdelay = overall_delay; 3508 return (retval); 3509 } 3510 3511 static enum fme_state 3512 effects_test(struct fme *fmep, struct event *fault_event, 3513 unsigned long long at_latest_by, unsigned long long *pdelay) 3514 { 3515 struct event *error_event; 3516 enum fme_state return_value = FME_CREDIBLE; 3517 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 3518 unsigned long long my_delay; 3519 3520 stats_counter_bump(fmep->Ecallcount); 3521 indent_push(" E"); 3522 indent(); 3523 out(O_ALTFP|O_VERB|O_NONL, "->"); 3524 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event); 3525 out(O_ALTFP|O_VERB, NULL); 3526 3527 if (mark_arrows(fmep, fault_event, CREDIBLE_EFFECT, at_latest_by, 3528 &my_delay, 0) == WAIT_EFFECT) { 3529 return_value = FME_WAIT; 3530 if (overall_delay > my_delay) 3531 overall_delay = my_delay; 3532 } 3533 for (error_event = fmep->observations; 3534 error_event; error_event = error_event->observations) { 3535 indent(); 3536 out(O_ALTFP|O_VERB|O_NONL, " "); 3537 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, error_event); 3538 if (!(error_event->cached_state & CREDIBLE_EFFECT)) { 3539 if (error_event->cached_state & 3540 (PARENT_WAIT|WAIT_EFFECT)) { 3541 out(O_ALTFP|O_VERB, " NOT YET triggered"); 3542 continue; 3543 } 3544 return_value = FME_DISPROVED; 3545 out(O_ALTFP|O_VERB, " NOT triggered"); 3546 break; 3547 } else { 3548 out(O_ALTFP|O_VERB, " triggered"); 3549 } 3550 } 3551 if (return_value == FME_DISPROVED) { 3552 (void) mark_arrows(fmep, fault_event, 0, 0, NULL, 0); 3553 } else { 3554 fault_event->keep_in_tree = 1; 3555 (void) mark_arrows(fmep, fault_event, 0, 0, NULL, 1); 3556 } 3557 3558 indent(); 3559 out(O_ALTFP|O_VERB|O_NONL, "<-EFFECTS %s ", 3560 fme_state2str(return_value)); 3561 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event); 3562 out(O_ALTFP|O_VERB, NULL); 3563 indent_pop(); 3564 if (return_value == FME_WAIT) 3565 *pdelay = overall_delay; 3566 return (return_value); 3567 } 3568 3569 static enum fme_state 3570 requirements_test(struct fme *fmep, struct event *ep, 3571 unsigned long long at_latest_by, unsigned long long *pdelay) 3572 { 3573 int waiting_events; 3574 int credible_events; 3575 int deferred_events; 3576 enum fme_state return_value = FME_CREDIBLE; 3577 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 3578 unsigned long long arrow_delay; 3579 unsigned long long my_delay; 3580 struct event *ep2; 3581 struct bubble *bp; 3582 struct arrowlist *ap; 3583 3584 if (ep->cached_state & REQMNTS_CREDIBLE) { 3585 indent(); 3586 out(O_ALTFP|O_VERB|O_NONL, " REQMNTS ALREADY CREDIBLE "); 3587 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3588 out(O_ALTFP|O_VERB, NULL); 3589 return (FME_CREDIBLE); 3590 } 3591 if (ep->cached_state & REQMNTS_DISPROVED) { 3592 indent(); 3593 out(O_ALTFP|O_VERB|O_NONL, " REQMNTS ALREADY DISPROVED "); 3594 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3595 out(O_ALTFP|O_VERB, NULL); 3596 return (FME_DISPROVED); 3597 } 3598 if (ep->cached_state & REQMNTS_WAIT) { 3599 indent(); 3600 *pdelay = ep->cached_delay; 3601 out(O_ALTFP|O_VERB|O_NONL, " REQMNTS ALREADY WAIT "); 3602 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3603 out(O_ALTFP|O_VERB|O_NONL, ", wait for: "); 3604 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 3605 out(O_ALTFP|O_VERB, NULL); 3606 return (FME_WAIT); 3607 } 3608 stats_counter_bump(fmep->Rcallcount); 3609 indent_push(" R"); 3610 indent(); 3611 out(O_ALTFP|O_VERB|O_NONL, "->"); 3612 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3613 out(O_ALTFP|O_VERB|O_NONL, ", at latest by: "); 3614 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 3615 out(O_ALTFP|O_VERB, NULL); 3616 3617 if (ep->t == N_EREPORT) { 3618 if (ep->count == 0) { 3619 if (fmep->pull >= at_latest_by) { 3620 return_value = FME_DISPROVED; 3621 } else { 3622 ep->cached_delay = *pdelay = at_latest_by; 3623 return_value = FME_WAIT; 3624 } 3625 } 3626 3627 indent(); 3628 switch (return_value) { 3629 case FME_CREDIBLE: 3630 ep->cached_state |= REQMNTS_CREDIBLE; 3631 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS CREDIBLE "); 3632 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3633 break; 3634 case FME_DISPROVED: 3635 ep->cached_state |= REQMNTS_DISPROVED; 3636 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED "); 3637 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3638 break; 3639 case FME_WAIT: 3640 ep->cached_state |= REQMNTS_WAIT; 3641 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS WAIT "); 3642 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3643 out(O_ALTFP|O_VERB|O_NONL, " to "); 3644 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 3645 break; 3646 default: 3647 out(O_DIE, "requirements_test: unexpected fme_state"); 3648 break; 3649 } 3650 out(O_ALTFP|O_VERB, NULL); 3651 indent_pop(); 3652 3653 return (return_value); 3654 } 3655 3656 /* this event is not a report, descend the tree */ 3657 for (bp = itree_next_bubble(ep, NULL); bp; 3658 bp = itree_next_bubble(ep, bp)) { 3659 int n; 3660 3661 if (bp->t != B_FROM) 3662 continue; 3663 3664 n = bp->nork; 3665 3666 credible_events = 0; 3667 waiting_events = 0; 3668 deferred_events = 0; 3669 arrow_delay = TIMEVAL_EVENTUALLY; 3670 /* 3671 * n is -1 for 'A' so adjust it. 3672 * XXX just count up the arrows for now. 3673 */ 3674 if (n < 0) { 3675 n = 0; 3676 for (ap = itree_next_arrow(bp, NULL); ap; 3677 ap = itree_next_arrow(bp, ap)) 3678 n++; 3679 indent(); 3680 out(O_ALTFP|O_VERB, " Bubble Counted N=%d", n); 3681 } else { 3682 indent(); 3683 out(O_ALTFP|O_VERB, " Bubble N=%d", n); 3684 } 3685 3686 if (n == 0) 3687 continue; 3688 if (!(bp->mark & (BUBBLE_ELIDED|BUBBLE_OK))) { 3689 for (ap = itree_next_arrow(bp, NULL); ap; 3690 ap = itree_next_arrow(bp, ap)) { 3691 ep2 = ap->arrowp->head->myevent; 3692 platform_set_payloadnvp(ep2->nvp); 3693 if (checkconstraints(fmep, ap->arrowp) == 0) { 3694 /* 3695 * if any arrow is invalidated by the 3696 * constraints, then we should elide the 3697 * whole bubble to be consistant with 3698 * the tree creation time behaviour 3699 */ 3700 bp->mark |= BUBBLE_ELIDED; 3701 platform_set_payloadnvp(NULL); 3702 break; 3703 } 3704 platform_set_payloadnvp(NULL); 3705 } 3706 } 3707 if (bp->mark & BUBBLE_ELIDED) 3708 continue; 3709 bp->mark |= BUBBLE_OK; 3710 for (ap = itree_next_arrow(bp, NULL); ap; 3711 ap = itree_next_arrow(bp, ap)) { 3712 ep2 = ap->arrowp->head->myevent; 3713 if (n <= credible_events) 3714 break; 3715 3716 ap->arrowp->mark |= REQMNTS_COUNTER; 3717 if (triggered(fmep, ep2, REQMNTS_COUNTER)) 3718 /* XXX adding max timevals! */ 3719 switch (requirements_test(fmep, ep2, 3720 at_latest_by + ap->arrowp->maxdelay, 3721 &my_delay)) { 3722 case FME_DEFERRED: 3723 deferred_events++; 3724 break; 3725 case FME_CREDIBLE: 3726 credible_events++; 3727 break; 3728 case FME_DISPROVED: 3729 break; 3730 case FME_WAIT: 3731 if (my_delay < arrow_delay) 3732 arrow_delay = my_delay; 3733 waiting_events++; 3734 break; 3735 default: 3736 out(O_DIE, 3737 "Bug in requirements_test."); 3738 } 3739 else 3740 deferred_events++; 3741 } 3742 indent(); 3743 out(O_ALTFP|O_VERB, " Credible: %d Waiting %d", 3744 credible_events + deferred_events, waiting_events); 3745 if (credible_events + deferred_events + waiting_events < n) { 3746 /* Can never meet requirements */ 3747 ep->cached_state |= REQMNTS_DISPROVED; 3748 indent(); 3749 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED "); 3750 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3751 out(O_ALTFP|O_VERB, NULL); 3752 indent_pop(); 3753 return (FME_DISPROVED); 3754 } 3755 if (credible_events + deferred_events < n) { 3756 /* will have to wait */ 3757 /* wait time is shortest known */ 3758 if (arrow_delay < overall_delay) 3759 overall_delay = arrow_delay; 3760 return_value = FME_WAIT; 3761 } else if (credible_events < n) { 3762 if (return_value != FME_WAIT) 3763 return_value = FME_DEFERRED; 3764 } 3765 } 3766 3767 /* 3768 * don't mark as FME_DEFERRED. If this event isn't reached by another 3769 * path, then this will be considered FME_CREDIBLE. But if it is 3770 * reached by a different path so the K-count is met, then might 3771 * get overridden by FME_WAIT or FME_DISPROVED. 3772 */ 3773 if (return_value == FME_WAIT) { 3774 ep->cached_state |= REQMNTS_WAIT; 3775 ep->cached_delay = *pdelay = overall_delay; 3776 } else if (return_value == FME_CREDIBLE) { 3777 ep->cached_state |= REQMNTS_CREDIBLE; 3778 } 3779 indent(); 3780 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS %s ", 3781 fme_state2str(return_value)); 3782 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3783 out(O_ALTFP|O_VERB, NULL); 3784 indent_pop(); 3785 return (return_value); 3786 } 3787 3788 static enum fme_state 3789 causes_test(struct fme *fmep, struct event *ep, 3790 unsigned long long at_latest_by, unsigned long long *pdelay) 3791 { 3792 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 3793 unsigned long long my_delay; 3794 int credible_results = 0; 3795 int waiting_results = 0; 3796 enum fme_state fstate; 3797 struct event *tail_event; 3798 struct bubble *bp; 3799 struct arrowlist *ap; 3800 int k = 1; 3801 3802 stats_counter_bump(fmep->Ccallcount); 3803 indent_push(" C"); 3804 indent(); 3805 out(O_ALTFP|O_VERB|O_NONL, "->"); 3806 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3807 out(O_ALTFP|O_VERB, NULL); 3808 3809 for (bp = itree_next_bubble(ep, NULL); bp; 3810 bp = itree_next_bubble(ep, bp)) { 3811 if (bp->t != B_TO) 3812 continue; 3813 k = bp->nork; /* remember the K value */ 3814 for (ap = itree_next_arrow(bp, NULL); ap; 3815 ap = itree_next_arrow(bp, ap)) { 3816 int do_not_follow = 0; 3817 3818 /* 3819 * if we get to the same event multiple times 3820 * only worry about the first one. 3821 */ 3822 if (ap->arrowp->tail->myevent->cached_state & 3823 CAUSES_TESTED) { 3824 indent(); 3825 out(O_ALTFP|O_VERB|O_NONL, 3826 " causes test already run for "); 3827 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, 3828 ap->arrowp->tail->myevent); 3829 out(O_ALTFP|O_VERB, NULL); 3830 continue; 3831 } 3832 3833 /* 3834 * see if false constraint prevents us 3835 * from traversing this arrow 3836 */ 3837 platform_set_payloadnvp(ep->nvp); 3838 if (checkconstraints(fmep, ap->arrowp) == 0) 3839 do_not_follow = 1; 3840 platform_set_payloadnvp(NULL); 3841 if (do_not_follow) { 3842 indent(); 3843 out(O_ALTFP|O_VERB|O_NONL, 3844 " False arrow from "); 3845 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, 3846 ap->arrowp->tail->myevent); 3847 out(O_ALTFP|O_VERB, NULL); 3848 continue; 3849 } 3850 3851 ap->arrowp->tail->myevent->cached_state |= 3852 CAUSES_TESTED; 3853 tail_event = ap->arrowp->tail->myevent; 3854 fstate = hypothesise(fmep, tail_event, at_latest_by, 3855 &my_delay); 3856 3857 switch (fstate) { 3858 case FME_WAIT: 3859 if (my_delay < overall_delay) 3860 overall_delay = my_delay; 3861 waiting_results++; 3862 break; 3863 case FME_CREDIBLE: 3864 credible_results++; 3865 break; 3866 case FME_DISPROVED: 3867 break; 3868 default: 3869 out(O_DIE, "Bug in causes_test"); 3870 } 3871 } 3872 } 3873 /* compare against K */ 3874 if (credible_results + waiting_results < k) { 3875 indent(); 3876 out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES DISPROVED "); 3877 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3878 out(O_ALTFP|O_VERB, NULL); 3879 indent_pop(); 3880 return (FME_DISPROVED); 3881 } 3882 if (waiting_results != 0) { 3883 *pdelay = overall_delay; 3884 indent(); 3885 out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES WAIT "); 3886 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3887 out(O_ALTFP|O_VERB|O_NONL, " to "); 3888 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 3889 out(O_ALTFP|O_VERB, NULL); 3890 indent_pop(); 3891 return (FME_WAIT); 3892 } 3893 indent(); 3894 out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES CREDIBLE "); 3895 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3896 out(O_ALTFP|O_VERB, NULL); 3897 indent_pop(); 3898 return (FME_CREDIBLE); 3899 } 3900 3901 static enum fme_state 3902 hypothesise(struct fme *fmep, struct event *ep, 3903 unsigned long long at_latest_by, unsigned long long *pdelay) 3904 { 3905 enum fme_state rtr, otr; 3906 unsigned long long my_delay; 3907 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 3908 3909 stats_counter_bump(fmep->Hcallcount); 3910 indent_push(" H"); 3911 indent(); 3912 out(O_ALTFP|O_VERB|O_NONL, "->"); 3913 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3914 out(O_ALTFP|O_VERB|O_NONL, ", at latest by: "); 3915 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 3916 out(O_ALTFP|O_VERB, NULL); 3917 3918 rtr = requirements_test(fmep, ep, at_latest_by, &my_delay); 3919 if ((rtr == FME_WAIT) && (my_delay < overall_delay)) 3920 overall_delay = my_delay; 3921 if (rtr != FME_DISPROVED) { 3922 if (is_problem(ep->t)) { 3923 otr = effects_test(fmep, ep, at_latest_by, &my_delay); 3924 if (otr != FME_DISPROVED) { 3925 if (fmep->peek == 0 && ep->is_suspect == 0) { 3926 ep->suspects = fmep->suspects; 3927 ep->is_suspect = 1; 3928 fmep->suspects = ep; 3929 fmep->nsuspects++; 3930 if (!is_fault(ep->t)) 3931 fmep->nonfault++; 3932 } 3933 } 3934 } else 3935 otr = causes_test(fmep, ep, at_latest_by, &my_delay); 3936 if ((otr == FME_WAIT) && (my_delay < overall_delay)) 3937 overall_delay = my_delay; 3938 if ((otr != FME_DISPROVED) && 3939 ((rtr == FME_WAIT) || (otr == FME_WAIT))) 3940 *pdelay = overall_delay; 3941 } 3942 if (rtr == FME_DISPROVED) { 3943 indent(); 3944 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED "); 3945 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3946 out(O_ALTFP|O_VERB, " (doesn't meet requirements)"); 3947 indent_pop(); 3948 return (FME_DISPROVED); 3949 } 3950 if ((otr == FME_DISPROVED) && is_problem(ep->t)) { 3951 indent(); 3952 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED "); 3953 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3954 out(O_ALTFP|O_VERB, " (doesn't explain all reports)"); 3955 indent_pop(); 3956 return (FME_DISPROVED); 3957 } 3958 if (otr == FME_DISPROVED) { 3959 indent(); 3960 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED "); 3961 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3962 out(O_ALTFP|O_VERB, " (causes are not credible)"); 3963 indent_pop(); 3964 return (FME_DISPROVED); 3965 } 3966 if ((rtr == FME_WAIT) || (otr == FME_WAIT)) { 3967 indent(); 3968 out(O_ALTFP|O_VERB|O_NONL, "<-WAIT "); 3969 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3970 out(O_ALTFP|O_VERB|O_NONL, " to "); 3971 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &overall_delay); 3972 out(O_ALTFP|O_VERB, NULL); 3973 indent_pop(); 3974 return (FME_WAIT); 3975 } 3976 indent(); 3977 out(O_ALTFP|O_VERB|O_NONL, "<-CREDIBLE "); 3978 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3979 out(O_ALTFP|O_VERB, NULL); 3980 indent_pop(); 3981 return (FME_CREDIBLE); 3982 } 3983 3984 /* 3985 * fme_istat_load -- reconstitute any persistent istats 3986 */ 3987 void 3988 fme_istat_load(fmd_hdl_t *hdl) 3989 { 3990 int sz; 3991 char *sbuf; 3992 char *ptr; 3993 3994 if ((sz = fmd_buf_size(hdl, NULL, WOBUF_ISTATS)) == 0) { 3995 out(O_ALTFP, "fme_istat_load: No stats"); 3996 return; 3997 } 3998 3999 sbuf = alloca(sz); 4000 4001 fmd_buf_read(hdl, NULL, WOBUF_ISTATS, sbuf, sz); 4002 4003 /* 4004 * pick apart the serialized stats 4005 * 4006 * format is: 4007 * <class-name>, '@', <path>, '\0', <value>, '\0' 4008 * for example: 4009 * "stat.first@stat0/path0\02\0stat.second@stat0/path1\023\0" 4010 * 4011 * since this is parsing our own serialized data, any parsing issues 4012 * are fatal, so we check for them all with ASSERT() below. 4013 */ 4014 ptr = sbuf; 4015 while (ptr < &sbuf[sz]) { 4016 char *sepptr; 4017 struct node *np; 4018 int val; 4019 4020 sepptr = strchr(ptr, '@'); 4021 ASSERT(sepptr != NULL); 4022 *sepptr = '\0'; 4023 4024 /* construct the event */ 4025 np = newnode(T_EVENT, NULL, 0); 4026 np->u.event.ename = newnode(T_NAME, NULL, 0); 4027 np->u.event.ename->u.name.t = N_STAT; 4028 np->u.event.ename->u.name.s = stable(ptr); 4029 np->u.event.ename->u.name.it = IT_ENAME; 4030 np->u.event.ename->u.name.last = np->u.event.ename; 4031 4032 ptr = sepptr + 1; 4033 ASSERT(ptr < &sbuf[sz]); 4034 ptr += strlen(ptr); 4035 ptr++; /* move past the '\0' separating path from value */ 4036 ASSERT(ptr < &sbuf[sz]); 4037 ASSERT(isdigit(*ptr)); 4038 val = atoi(ptr); 4039 ASSERT(val > 0); 4040 ptr += strlen(ptr); 4041 ptr++; /* move past the final '\0' for this entry */ 4042 4043 np->u.event.epname = pathstring2epnamenp(sepptr + 1); 4044 ASSERT(np->u.event.epname != NULL); 4045 4046 istat_bump(np, val); 4047 tree_free(np); 4048 } 4049 4050 istat_save(); 4051 } 4052