1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * 26 * fme.c -- fault management exercise module 27 * 28 * this module provides the simulated fault management exercise. 29 */ 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 #include <stdio.h> 34 #include <stdlib.h> 35 #include <string.h> 36 #include <strings.h> 37 #include <ctype.h> 38 #include <alloca.h> 39 #include <libnvpair.h> 40 #include <sys/fm/protocol.h> 41 #include <fm/fmd_api.h> 42 #include "alloc.h" 43 #include "out.h" 44 #include "stats.h" 45 #include "stable.h" 46 #include "literals.h" 47 #include "lut.h" 48 #include "tree.h" 49 #include "ptree.h" 50 #include "itree.h" 51 #include "ipath.h" 52 #include "fme.h" 53 #include "evnv.h" 54 #include "eval.h" 55 #include "config.h" 56 #include "platform.h" 57 58 /* imported from eft.c... */ 59 extern char *Autoclose; 60 extern int Dupclose; 61 extern hrtime_t Hesitate; 62 extern nv_alloc_t Eft_nv_hdl; 63 extern int Max_fme; 64 extern fmd_hdl_t *Hdl; 65 66 static int Istat_need_save; 67 void istat_save(void); 68 69 /* fme under construction is global so we can free it on module abort */ 70 static struct fme *Nfmep; 71 72 static const char *Undiag_reason; 73 74 static int Nextid = 0; 75 76 static int Open_fme_count = 0; /* Count of open FMEs */ 77 78 /* list of fault management exercises underway */ 79 static struct fme { 80 struct fme *next; /* next exercise */ 81 unsigned long long ull; /* time when fme was created */ 82 int id; /* FME id */ 83 struct cfgdata *cfgdata; /* full configuration data */ 84 struct lut *eventtree; /* propagation tree for this FME */ 85 /* 86 * The initial error report that created this FME is kept in 87 * two forms. e0 points to the instance tree node and is used 88 * by fme_eval() as the starting point for the inference 89 * algorithm. e0r is the event handle FMD passed to us when 90 * the ereport first arrived and is used when setting timers, 91 * which are always relative to the time of this initial 92 * report. 93 */ 94 struct event *e0; 95 fmd_event_t *e0r; 96 97 id_t timer; /* for setting an fmd time-out */ 98 99 struct event *ecurrent; /* ereport under consideration */ 100 struct event *suspects; /* current suspect list */ 101 struct event *psuspects; /* previous suspect list */ 102 int nsuspects; /* count of suspects */ 103 int nonfault; /* zero if all suspects T_FAULT */ 104 int posted_suspects; /* true if we've posted a diagnosis */ 105 int uniqobs; /* number of unique events observed */ 106 int peek; /* just peeking, don't track suspects */ 107 int overflow; /* true if overflow FME */ 108 enum fme_state { 109 FME_NOTHING = 5000, /* not evaluated yet */ 110 FME_WAIT, /* need to wait for more info */ 111 FME_CREDIBLE, /* suspect list is credible */ 112 FME_DISPROVED, /* no valid suspects found */ 113 FME_DEFERRED /* don't know yet (k-count not met) */ 114 } state; 115 116 unsigned long long pull; /* time passed since created */ 117 unsigned long long wull; /* wait until this time for re-eval */ 118 struct event *observations; /* observation list */ 119 struct lut *globals; /* values of global variables */ 120 /* fmd interfacing */ 121 fmd_hdl_t *hdl; /* handle for talking with fmd */ 122 fmd_case_t *fmcase; /* what fmd 'case' we associate with */ 123 /* stats */ 124 struct stats *Rcount; 125 struct stats *Hcallcount; 126 struct stats *Rcallcount; 127 struct stats *Ccallcount; 128 struct stats *Ecallcount; 129 struct stats *Tcallcount; 130 struct stats *Marrowcount; 131 struct stats *diags; 132 } *FMElist, *EFMElist, *ClosedFMEs; 133 134 static struct case_list { 135 fmd_case_t *fmcase; 136 struct case_list *next; 137 } *Undiagablecaselist; 138 139 static void fme_eval(struct fme *fmep, fmd_event_t *ffep); 140 static enum fme_state hypothesise(struct fme *fmep, struct event *ep, 141 unsigned long long at_latest_by, unsigned long long *pdelay); 142 static struct node *eventprop_lookup(struct event *ep, const char *propname); 143 static struct node *pathstring2epnamenp(char *path); 144 static void publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep); 145 static void restore_suspects(struct fme *fmep); 146 static void save_suspects(struct fme *fmep); 147 static void destroy_fme(struct fme *f); 148 static void fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep, 149 const char *eventstring, const struct ipath *ipp, nvlist_t *nvl); 150 static void istat_counter_reset_cb(struct istat_entry *entp, 151 struct stats *statp, const struct ipath *ipp); 152 153 static struct fme * 154 alloc_fme(void) 155 { 156 struct fme *fmep; 157 158 fmep = MALLOC(sizeof (*fmep)); 159 bzero(fmep, sizeof (*fmep)); 160 return (fmep); 161 } 162 163 /* 164 * fme_ready -- called when all initialization of the FME (except for 165 * stats) has completed successfully. Adds the fme to global lists 166 * and establishes its stats. 167 */ 168 static struct fme * 169 fme_ready(struct fme *fmep) 170 { 171 char nbuf[100]; 172 173 Nfmep = NULL; /* don't need to free this on module abort now */ 174 175 if (EFMElist) { 176 EFMElist->next = fmep; 177 EFMElist = fmep; 178 } else 179 FMElist = EFMElist = fmep; 180 181 (void) sprintf(nbuf, "fme%d.Rcount", fmep->id); 182 fmep->Rcount = stats_new_counter(nbuf, "ereports received", 0); 183 (void) sprintf(nbuf, "fme%d.Hcall", fmep->id); 184 fmep->Hcallcount = stats_new_counter(nbuf, "calls to hypothesise()", 1); 185 (void) sprintf(nbuf, "fme%d.Rcall", fmep->id); 186 fmep->Rcallcount = stats_new_counter(nbuf, 187 "calls to requirements_test()", 1); 188 (void) sprintf(nbuf, "fme%d.Ccall", fmep->id); 189 fmep->Ccallcount = stats_new_counter(nbuf, "calls to causes_test()", 1); 190 (void) sprintf(nbuf, "fme%d.Ecall", fmep->id); 191 fmep->Ecallcount = 192 stats_new_counter(nbuf, "calls to effects_test()", 1); 193 (void) sprintf(nbuf, "fme%d.Tcall", fmep->id); 194 fmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1); 195 (void) sprintf(nbuf, "fme%d.Marrow", fmep->id); 196 fmep->Marrowcount = stats_new_counter(nbuf, 197 "arrows marked by mark_arrows()", 1); 198 (void) sprintf(nbuf, "fme%d.diags", fmep->id); 199 fmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0); 200 201 out(O_ALTFP|O_VERB2, "newfme: config snapshot contains..."); 202 config_print(O_ALTFP|O_VERB2, fmep->cfgdata->cooked); 203 204 return (fmep); 205 } 206 207 static struct fme * 208 newfme(const char *e0class, const struct ipath *e0ipp) 209 { 210 struct cfgdata *cfgdata; 211 212 if ((cfgdata = config_snapshot()) == NULL) { 213 out(O_ALTFP, "newfme: NULL configuration"); 214 Undiag_reason = UD_NOCONF; 215 return (NULL); 216 } 217 218 Nfmep = alloc_fme(); 219 220 Nfmep->id = Nextid++; 221 Nfmep->cfgdata = cfgdata; 222 Nfmep->posted_suspects = 0; 223 Nfmep->uniqobs = 0; 224 Nfmep->state = FME_NOTHING; 225 Nfmep->pull = 0ULL; 226 Nfmep->overflow = 0; 227 228 Nfmep->fmcase = NULL; 229 Nfmep->hdl = NULL; 230 231 if ((Nfmep->eventtree = itree_create(cfgdata->cooked)) == NULL) { 232 out(O_ALTFP, "newfme: NULL instance tree"); 233 Undiag_reason = UD_INSTFAIL; 234 config_free(cfgdata); 235 FREE(Nfmep); 236 Nfmep = NULL; 237 return (NULL); 238 } 239 240 itree_ptree(O_ALTFP|O_VERB2, Nfmep->eventtree); 241 242 if ((Nfmep->e0 = 243 itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) { 244 out(O_ALTFP, "newfme: e0 not in instance tree"); 245 Undiag_reason = UD_BADEVENTI; 246 itree_free(Nfmep->eventtree); 247 config_free(cfgdata); 248 FREE(Nfmep); 249 Nfmep = NULL; 250 return (NULL); 251 } 252 253 return (fme_ready(Nfmep)); 254 } 255 256 void 257 fme_fini(void) 258 { 259 struct fme *sfp, *fp; 260 struct case_list *ucasep, *nextcasep; 261 262 ucasep = Undiagablecaselist; 263 while (ucasep != NULL) { 264 nextcasep = ucasep->next; 265 FREE(ucasep); 266 ucasep = nextcasep; 267 } 268 Undiagablecaselist = NULL; 269 270 /* clean up closed fmes */ 271 fp = ClosedFMEs; 272 while (fp != NULL) { 273 sfp = fp->next; 274 destroy_fme(fp); 275 fp = sfp; 276 } 277 ClosedFMEs = NULL; 278 279 fp = FMElist; 280 while (fp != NULL) { 281 sfp = fp->next; 282 destroy_fme(fp); 283 fp = sfp; 284 } 285 FMElist = EFMElist = NULL; 286 287 /* if we were in the middle of creating an fme, free it now */ 288 if (Nfmep) { 289 destroy_fme(Nfmep); 290 Nfmep = NULL; 291 } 292 } 293 294 /* 295 * Allocated space for a buffer name. 20 bytes allows for 296 * a ridiculous 9,999,999 unique observations. 297 */ 298 #define OBBUFNMSZ 20 299 300 /* 301 * serialize_observation 302 * 303 * Create a recoverable version of the current observation 304 * (f->ecurrent). We keep a serialized version of each unique 305 * observation in order that we may resume correctly the fme in the 306 * correct state if eft or fmd crashes and we're restarted. 307 */ 308 static void 309 serialize_observation(struct fme *fp, const char *cls, const struct ipath *ipp) 310 { 311 size_t pkdlen; 312 char tmpbuf[OBBUFNMSZ]; 313 char *pkd = NULL; 314 char *estr; 315 316 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", fp->uniqobs); 317 estr = ipath2str(cls, ipp); 318 fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, strlen(estr) + 1); 319 fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)estr, 320 strlen(estr) + 1); 321 FREE(estr); 322 323 if (fp->ecurrent != NULL && fp->ecurrent->nvp != NULL) { 324 (void) snprintf(tmpbuf, 325 OBBUFNMSZ, "observed%d.nvp", fp->uniqobs); 326 if (nvlist_xpack(fp->ecurrent->nvp, 327 &pkd, &pkdlen, NV_ENCODE_XDR, &Eft_nv_hdl) != 0) 328 out(O_DIE|O_SYS, "pack of observed nvl failed"); 329 fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, pkdlen); 330 fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)pkd, pkdlen); 331 FREE(pkd); 332 } 333 334 fp->uniqobs++; 335 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs, 336 sizeof (fp->uniqobs)); 337 } 338 339 /* 340 * init_fme_bufs -- We keep several bits of state about an fme for 341 * use if eft or fmd crashes and we're restarted. 342 */ 343 static void 344 init_fme_bufs(struct fme *fp) 345 { 346 size_t cfglen = fp->cfgdata->nextfree - fp->cfgdata->begin; 347 348 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_CFGLEN, sizeof (cfglen)); 349 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_CFGLEN, (void *)&cfglen, 350 sizeof (cfglen)); 351 if (cfglen != 0) { 352 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_CFG, cfglen); 353 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_CFG, 354 fp->cfgdata->begin, cfglen); 355 } 356 357 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_PULL, sizeof (fp->pull)); 358 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_PULL, (void *)&fp->pull, 359 sizeof (fp->pull)); 360 361 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_ID, sizeof (fp->id)); 362 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_ID, (void *)&fp->id, 363 sizeof (fp->id)); 364 365 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_NOBS, sizeof (fp->uniqobs)); 366 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs, 367 sizeof (fp->uniqobs)); 368 369 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_POSTD, 370 sizeof (fp->posted_suspects)); 371 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_POSTD, 372 (void *)&fp->posted_suspects, sizeof (fp->posted_suspects)); 373 } 374 375 static void 376 destroy_fme_bufs(struct fme *fp) 377 { 378 char tmpbuf[OBBUFNMSZ]; 379 int o; 380 381 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFGLEN); 382 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFG); 383 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_PULL); 384 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_ID); 385 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_POSTD); 386 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_NOBS); 387 388 for (o = 0; o < fp->uniqobs; o++) { 389 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", o); 390 fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf); 391 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", o); 392 fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf); 393 } 394 } 395 396 /* 397 * reconstitute_observations -- convert a case's serialized observations 398 * back into struct events. Returns zero if all observations are 399 * successfully reconstituted. 400 */ 401 static int 402 reconstitute_observations(struct fme *fmep) 403 { 404 struct event *ep; 405 struct node *epnamenp = NULL; 406 size_t pkdlen; 407 char *pkd = NULL; 408 char *tmpbuf = alloca(OBBUFNMSZ); 409 char *sepptr; 410 char *estr; 411 int ocnt; 412 int elen; 413 414 for (ocnt = 0; ocnt < fmep->uniqobs; ocnt++) { 415 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", ocnt); 416 elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf); 417 if (elen == 0) { 418 out(O_ALTFP, 419 "reconstitute_observation: no %s buffer found.", 420 tmpbuf); 421 Undiag_reason = UD_MISSINGOBS; 422 break; 423 } 424 425 estr = MALLOC(elen); 426 fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen); 427 sepptr = strchr(estr, '@'); 428 if (sepptr == NULL) { 429 out(O_ALTFP, 430 "reconstitute_observation: %s: " 431 "missing @ separator in %s.", 432 tmpbuf, estr); 433 Undiag_reason = UD_MISSINGPATH; 434 FREE(estr); 435 break; 436 } 437 438 *sepptr = '\0'; 439 if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) { 440 out(O_ALTFP, 441 "reconstitute_observation: %s: " 442 "trouble converting path string \"%s\" " 443 "to internal representation.", 444 tmpbuf, sepptr + 1); 445 Undiag_reason = UD_MISSINGPATH; 446 FREE(estr); 447 break; 448 } 449 450 /* construct the event */ 451 ep = itree_lookup(fmep->eventtree, 452 stable(estr), ipath(epnamenp)); 453 if (ep == NULL) { 454 out(O_ALTFP, 455 "reconstitute_observation: %s: " 456 "lookup of \"%s\" in itree failed.", 457 tmpbuf, ipath2str(estr, ipath(epnamenp))); 458 Undiag_reason = UD_BADOBS; 459 tree_free(epnamenp); 460 FREE(estr); 461 break; 462 } 463 tree_free(epnamenp); 464 465 /* 466 * We may or may not have a saved nvlist for the observation 467 */ 468 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", ocnt); 469 pkdlen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf); 470 if (pkdlen != 0) { 471 pkd = MALLOC(pkdlen); 472 fmd_buf_read(fmep->hdl, 473 fmep->fmcase, tmpbuf, pkd, pkdlen); 474 ASSERT(ep->nvp == NULL); 475 if (nvlist_xunpack(pkd, 476 pkdlen, &ep->nvp, &Eft_nv_hdl) != 0) 477 out(O_DIE|O_SYS, "pack of observed nvl failed"); 478 FREE(pkd); 479 } 480 481 if (ocnt == 0) 482 fmep->e0 = ep; 483 484 FREE(estr); 485 fmep->ecurrent = ep; 486 ep->count++; 487 488 /* link it into list of observations seen */ 489 ep->observations = fmep->observations; 490 fmep->observations = ep; 491 } 492 493 if (ocnt == fmep->uniqobs) { 494 (void) fme_ready(fmep); 495 return (0); 496 } 497 498 return (1); 499 } 500 501 /* 502 * restart_fme -- called during eft initialization. Reconstitutes 503 * an in-progress fme. 504 */ 505 void 506 fme_restart(fmd_hdl_t *hdl, fmd_case_t *inprogress) 507 { 508 nvlist_t *defect; 509 struct case_list *bad; 510 struct fme *fmep; 511 struct cfgdata *cfgdata = NULL; 512 size_t rawsz; 513 514 fmep = alloc_fme(); 515 fmep->fmcase = inprogress; 516 fmep->hdl = hdl; 517 518 if (fmd_buf_size(hdl, inprogress, WOBUF_POSTD) == 0) { 519 out(O_ALTFP, "restart_fme: no saved posted status"); 520 Undiag_reason = UD_MISSINGINFO; 521 goto badcase; 522 } else { 523 fmd_buf_read(hdl, inprogress, WOBUF_POSTD, 524 (void *)&fmep->posted_suspects, 525 sizeof (fmep->posted_suspects)); 526 } 527 528 /* 529 * ignore solved or closed cases 530 */ 531 if (fmep->posted_suspects || 532 fmd_case_solved(fmep->hdl, fmep->fmcase) || 533 fmd_case_closed(fmep->hdl, fmep->fmcase)) 534 goto badcase; 535 536 if (fmd_buf_size(hdl, inprogress, WOBUF_CFGLEN) != sizeof (size_t)) { 537 out(O_ALTFP, "restart_fme: No config data"); 538 Undiag_reason = UD_MISSINGINFO; 539 goto badcase; 540 } 541 fmd_buf_read(hdl, inprogress, WOBUF_CFGLEN, (void *)&rawsz, 542 sizeof (size_t)); 543 544 if ((fmep->e0r = fmd_case_getprincipal(hdl, inprogress)) == NULL) { 545 out(O_ALTFP, "restart_fme: No event zero"); 546 Undiag_reason = UD_MISSINGZERO; 547 goto badcase; 548 } 549 550 cfgdata = MALLOC(sizeof (struct cfgdata)); 551 cfgdata->cooked = NULL; 552 cfgdata->devcache = NULL; 553 cfgdata->cpucache = NULL; 554 cfgdata->refcnt = 1; 555 556 if (rawsz > 0) { 557 if (fmd_buf_size(hdl, inprogress, WOBUF_CFG) != rawsz) { 558 out(O_ALTFP, "restart_fme: Config data size mismatch"); 559 Undiag_reason = UD_CFGMISMATCH; 560 goto badcase; 561 } 562 cfgdata->begin = MALLOC(rawsz); 563 cfgdata->end = cfgdata->nextfree = cfgdata->begin + rawsz; 564 fmd_buf_read(hdl, 565 inprogress, WOBUF_CFG, cfgdata->begin, rawsz); 566 } else { 567 cfgdata->begin = cfgdata->end = cfgdata->nextfree = NULL; 568 } 569 fmep->cfgdata = cfgdata; 570 571 config_cook(cfgdata); 572 if ((fmep->eventtree = itree_create(cfgdata->cooked)) == NULL) { 573 /* case not properly saved or irretrievable */ 574 out(O_ALTFP, "restart_fme: NULL instance tree"); 575 Undiag_reason = UD_INSTFAIL; 576 goto badcase; 577 } 578 579 itree_ptree(O_ALTFP|O_VERB2, fmep->eventtree); 580 581 if (fmd_buf_size(hdl, inprogress, WOBUF_PULL) == 0) { 582 out(O_ALTFP, "restart_fme: no saved wait time"); 583 Undiag_reason = UD_MISSINGINFO; 584 goto badcase; 585 } else { 586 fmd_buf_read(hdl, inprogress, WOBUF_PULL, (void *)&fmep->pull, 587 sizeof (fmep->pull)); 588 } 589 590 if (fmd_buf_size(hdl, inprogress, WOBUF_ID) == 0) { 591 out(O_ALTFP, "restart_fme: no saved id"); 592 Undiag_reason = UD_MISSINGINFO; 593 goto badcase; 594 } else { 595 fmd_buf_read(hdl, inprogress, WOBUF_ID, (void *)&fmep->id, 596 sizeof (fmep->id)); 597 } 598 if (Nextid <= fmep->id) 599 Nextid = fmep->id + 1; 600 601 if (fmd_buf_size(hdl, inprogress, WOBUF_NOBS) == 0) { 602 out(O_ALTFP, "restart_fme: no count of observations"); 603 Undiag_reason = UD_MISSINGINFO; 604 goto badcase; 605 } else { 606 fmd_buf_read(hdl, inprogress, WOBUF_NOBS, 607 (void *)&fmep->uniqobs, sizeof (fmep->uniqobs)); 608 } 609 610 if (reconstitute_observations(fmep) != 0) 611 goto badcase; 612 613 Open_fme_count++; 614 615 /* give the diagnosis algorithm a shot at the new FME state */ 616 fme_eval(fmep, fmep->e0r); 617 return; 618 619 badcase: 620 if (fmep->eventtree != NULL) 621 itree_free(fmep->eventtree); 622 config_free(cfgdata); 623 destroy_fme_bufs(fmep); 624 FREE(fmep); 625 626 /* 627 * Since we're unable to restart the case, add it to the undiagable 628 * list and solve and close it as appropriate. 629 */ 630 bad = MALLOC(sizeof (struct case_list)); 631 bad->next = NULL; 632 633 if (Undiagablecaselist != NULL) 634 bad->next = Undiagablecaselist; 635 Undiagablecaselist = bad; 636 bad->fmcase = inprogress; 637 638 out(O_ALTFP|O_NONL, "[case %s (unable to restart), ", 639 fmd_case_uuid(hdl, bad->fmcase)); 640 641 if (fmd_case_solved(hdl, bad->fmcase)) { 642 out(O_ALTFP|O_NONL, "already solved, "); 643 } else { 644 out(O_ALTFP|O_NONL, "solving, "); 645 defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100, 646 NULL, NULL, NULL); 647 if (Undiag_reason != NULL) 648 (void) nvlist_add_string(defect, 649 UNDIAG_REASON, Undiag_reason); 650 fmd_case_add_suspect(hdl, bad->fmcase, defect); 651 fmd_case_solve(hdl, bad->fmcase); 652 } 653 654 if (fmd_case_closed(hdl, bad->fmcase)) { 655 out(O_ALTFP, "already closed ]"); 656 } else { 657 out(O_ALTFP, "closing ]"); 658 fmd_case_close(hdl, bad->fmcase); 659 } 660 } 661 662 /*ARGSUSED*/ 663 static void 664 globals_destructor(void *left, void *right, void *arg) 665 { 666 struct evalue *evp = (struct evalue *)right; 667 if (evp->t == NODEPTR) 668 tree_free((struct node *)(uintptr_t)evp->v); 669 evp->v = NULL; 670 FREE(evp); 671 } 672 673 void 674 destroy_fme(struct fme *f) 675 { 676 stats_delete(f->Rcount); 677 stats_delete(f->Hcallcount); 678 stats_delete(f->Rcallcount); 679 stats_delete(f->Ccallcount); 680 stats_delete(f->Ecallcount); 681 stats_delete(f->Tcallcount); 682 stats_delete(f->Marrowcount); 683 stats_delete(f->diags); 684 685 if (f->eventtree != NULL) 686 itree_free(f->eventtree); 687 if (f->cfgdata != NULL) 688 config_free(f->cfgdata); 689 lut_free(f->globals, globals_destructor, NULL); 690 FREE(f); 691 } 692 693 static const char * 694 fme_state2str(enum fme_state s) 695 { 696 switch (s) { 697 case FME_NOTHING: return ("NOTHING"); 698 case FME_WAIT: return ("WAIT"); 699 case FME_CREDIBLE: return ("CREDIBLE"); 700 case FME_DISPROVED: return ("DISPROVED"); 701 case FME_DEFERRED: return ("DEFERRED"); 702 default: return ("UNKNOWN"); 703 } 704 } 705 706 static int 707 is_problem(enum nametype t) 708 { 709 return (t == N_FAULT || t == N_DEFECT || t == N_UPSET); 710 } 711 712 static int 713 is_fault(enum nametype t) 714 { 715 return (t == N_FAULT); 716 } 717 718 static int 719 is_defect(enum nametype t) 720 { 721 return (t == N_DEFECT); 722 } 723 724 static int 725 is_upset(enum nametype t) 726 { 727 return (t == N_UPSET); 728 } 729 730 static void 731 fme_print(int flags, struct fme *fmep) 732 { 733 struct event *ep; 734 735 out(flags, "Fault Management Exercise %d", fmep->id); 736 out(flags, "\t State: %s", fme_state2str(fmep->state)); 737 out(flags|O_NONL, "\t Start time: "); 738 ptree_timeval(flags|O_NONL, &fmep->ull); 739 out(flags, NULL); 740 if (fmep->wull) { 741 out(flags|O_NONL, "\t Wait time: "); 742 ptree_timeval(flags|O_NONL, &fmep->wull); 743 out(flags, NULL); 744 } 745 out(flags|O_NONL, "\t E0: "); 746 if (fmep->e0) 747 itree_pevent_brief(flags|O_NONL, fmep->e0); 748 else 749 out(flags|O_NONL, "NULL"); 750 out(flags, NULL); 751 out(flags|O_NONL, "\tObservations:"); 752 for (ep = fmep->observations; ep; ep = ep->observations) { 753 out(flags|O_NONL, " "); 754 itree_pevent_brief(flags|O_NONL, ep); 755 } 756 out(flags, NULL); 757 out(flags|O_NONL, "\tSuspect list:"); 758 for (ep = fmep->suspects; ep; ep = ep->suspects) { 759 out(flags|O_NONL, " "); 760 itree_pevent_brief(flags|O_NONL, ep); 761 } 762 out(flags, NULL); 763 if (fmep->eventtree != NULL) { 764 out(flags|O_VERB2, "\t Tree:"); 765 itree_ptree(flags|O_VERB2, fmep->eventtree); 766 } 767 } 768 769 static struct node * 770 pathstring2epnamenp(char *path) 771 { 772 char *sep = "/"; 773 struct node *ret; 774 char *ptr; 775 776 if ((ptr = strtok(path, sep)) == NULL) 777 out(O_DIE, "pathstring2epnamenp: invalid empty class"); 778 779 ret = tree_iname(stable(ptr), NULL, 0); 780 781 while ((ptr = strtok(NULL, sep)) != NULL) 782 ret = tree_name_append(ret, 783 tree_iname(stable(ptr), NULL, 0)); 784 785 return (ret); 786 } 787 788 /* 789 * for a given upset sp, increment the corresponding SERD engine. if the 790 * SERD engine trips, return the ename and ipp of the resulting ereport. 791 * returns true if engine tripped and *enamep and *ippp were filled in. 792 */ 793 static int 794 serd_eval(struct fme *fmep, fmd_hdl_t *hdl, fmd_event_t *ffep, 795 fmd_case_t *fmcase, struct event *sp, const char **enamep, 796 const struct ipath **ippp) 797 { 798 struct node *serdinst; 799 char *serdname; 800 struct node *nid; 801 802 ASSERT(sp->t == N_UPSET); 803 ASSERT(ffep != NULL); 804 805 /* 806 * obtain instanced SERD engine from the upset sp. from this 807 * derive serdname, the string used to identify the SERD engine. 808 */ 809 serdinst = eventprop_lookup(sp, L_engine); 810 811 if (serdinst == NULL) 812 return (NULL); 813 814 serdname = ipath2str(serdinst->u.stmt.np->u.event.ename->u.name.s, 815 ipath(serdinst->u.stmt.np->u.event.epname)); 816 817 /* handle serd engine "id" property, if there is one */ 818 if ((nid = 819 lut_lookup(serdinst->u.stmt.lutp, (void *)L_id, NULL)) != NULL) { 820 struct evalue *gval; 821 char suffixbuf[200]; 822 char *suffix; 823 char *nserdname; 824 size_t nname; 825 826 out(O_ALTFP|O_NONL, "serd \"%s\" id: ", serdname); 827 ptree_name_iter(O_ALTFP|O_NONL, nid); 828 829 ASSERTinfo(nid->t == T_GLOBID, ptree_nodetype2str(nid->t)); 830 831 if ((gval = lut_lookup(fmep->globals, 832 (void *)nid->u.globid.s, NULL)) == NULL) { 833 out(O_ALTFP, " undefined"); 834 } else if (gval->t == UINT64) { 835 out(O_ALTFP, " %llu", gval->v); 836 (void) sprintf(suffixbuf, "%llu", gval->v); 837 suffix = suffixbuf; 838 } else { 839 out(O_ALTFP, " \"%s\"", (char *)(uintptr_t)gval->v); 840 suffix = (char *)(uintptr_t)gval->v; 841 } 842 843 nname = strlen(serdname) + strlen(suffix) + 2; 844 nserdname = MALLOC(nname); 845 (void) snprintf(nserdname, nname, "%s:%s", serdname, suffix); 846 FREE(serdname); 847 serdname = nserdname; 848 } 849 850 if (!fmd_serd_exists(hdl, serdname)) { 851 struct node *nN, *nT; 852 853 /* no SERD engine yet, so create it */ 854 nN = lut_lookup(serdinst->u.stmt.lutp, (void *)L_N, NULL); 855 nT = lut_lookup(serdinst->u.stmt.lutp, (void *)L_T, NULL); 856 857 ASSERT(nN->t == T_NUM); 858 ASSERT(nT->t == T_TIMEVAL); 859 860 fmd_serd_create(hdl, serdname, (uint_t)nN->u.ull, 861 (hrtime_t)nT->u.ull); 862 } 863 864 865 /* 866 * increment SERD engine. if engine fires, reset serd 867 * engine and return trip_strcode 868 */ 869 if (fmd_serd_record(hdl, serdname, ffep)) { 870 struct node *tripinst = lut_lookup(serdinst->u.stmt.lutp, 871 (void *)L_trip, NULL); 872 873 ASSERT(tripinst != NULL); 874 875 *enamep = tripinst->u.event.ename->u.name.s; 876 *ippp = ipath(tripinst->u.event.epname); 877 878 fmd_case_add_serd(hdl, fmcase, serdname); 879 fmd_serd_reset(hdl, serdname); 880 out(O_ALTFP|O_NONL, "[engine fired: %s, sending: ", serdname); 881 ipath_print(O_ALTFP|O_NONL, *enamep, *ippp); 882 out(O_ALTFP, "]"); 883 884 FREE(serdname); 885 return (1); 886 } 887 888 FREE(serdname); 889 return (0); 890 } 891 892 /* 893 * search a suspect list for upsets. feed each upset to serd_eval() and 894 * build up tripped[], an array of ereports produced by the firing of 895 * any SERD engines. then feed each ereport back into 896 * fme_receive_report(). 897 * 898 * returns ntrip, the number of these ereports produced. 899 */ 900 static int 901 upsets_eval(struct fme *fmep, fmd_event_t *ffep) 902 { 903 /* we build an array of tripped ereports that we send ourselves */ 904 struct { 905 const char *ename; 906 const struct ipath *ipp; 907 } *tripped; 908 struct event *sp; 909 int ntrip, nupset, i; 910 911 /* 912 * count the number of upsets to determine the upper limit on 913 * expected trip ereport strings. remember that one upset can 914 * lead to at most one ereport. 915 */ 916 nupset = 0; 917 for (sp = fmep->suspects; sp; sp = sp->suspects) { 918 if (sp->t == N_UPSET) 919 nupset++; 920 } 921 922 if (nupset == 0) 923 return (0); 924 925 /* 926 * get to this point if we have upsets and expect some trip 927 * ereports 928 */ 929 tripped = alloca(sizeof (*tripped) * nupset); 930 bzero((void *)tripped, sizeof (*tripped) * nupset); 931 932 ntrip = 0; 933 for (sp = fmep->suspects; sp; sp = sp->suspects) 934 if (sp->t == N_UPSET && 935 serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, sp, 936 &tripped[ntrip].ename, &tripped[ntrip].ipp)) 937 ntrip++; 938 939 for (i = 0; i < ntrip; i++) 940 fme_receive_report(fmep->hdl, ffep, 941 tripped[i].ename, tripped[i].ipp, NULL); 942 943 return (ntrip); 944 } 945 946 /* 947 * fme_receive_external_report -- call when an external ereport comes in 948 * 949 * this routine just converts the relevant information from the ereport 950 * into a format used internally and passes it on to fme_receive_report(). 951 */ 952 void 953 fme_receive_external_report(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl, 954 const char *eventstring) 955 { 956 struct node *epnamenp = platform_getpath(nvl); 957 const struct ipath *ipp; 958 959 /* 960 * XFILE: If we ended up without a path, it's an X-file. 961 * For now, use our undiagnosable interface. 962 */ 963 if (epnamenp == NULL) { 964 out(O_ALTFP, "XFILE: Unable to get path from ereport"); 965 Undiag_reason = UD_NOPATH; 966 publish_undiagnosable(hdl, ffep); 967 return; 968 } 969 970 ipp = ipath(epnamenp); 971 tree_free(epnamenp); 972 fme_receive_report(hdl, ffep, stable(eventstring), ipp, nvl); 973 } 974 975 /*ARGSUSED*/ 976 void 977 fme_receive_repair_list(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl, 978 const char *eventstring) 979 { 980 char *uuid; 981 nvlist_t **nva; 982 uint_t nvc; 983 const struct ipath *ipp; 984 985 if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) != 0 || 986 nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST, 987 &nva, &nvc) != 0) { 988 out(O_ALTFP, "No uuid or fault list for list.repaired event"); 989 return; 990 } 991 992 out(O_ALTFP, "Processing list.repaired from case %s", uuid); 993 994 while (nvc-- != 0) { 995 /* 996 * Reset any istat associated with this path. 997 */ 998 char *path; 999 1000 if ((ipp = platform_fault2ipath(*nva++)) == NULL) 1001 continue; 1002 1003 path = ipath2str(NULL, ipp); 1004 out(O_ALTFP, "fme_receive_repair_list: resetting state for %s", 1005 path); 1006 FREE(path); 1007 1008 lut_walk(Istats, (lut_cb)istat_counter_reset_cb, (void *)ipp); 1009 istat_save(); 1010 1011 /* 1012 * We do not have a list of stat engines in a form that 1013 * we can readily clear any associated serd engines. When we 1014 * do, this will be the place to clear them. 1015 */ 1016 } 1017 } 1018 1019 static int mark_arrows(struct fme *fmep, struct event *ep, int mark, 1020 unsigned long long at_latest_by, unsigned long long *pdelay, int keep); 1021 1022 /* ARGSUSED */ 1023 static void 1024 clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep) 1025 { 1026 struct bubble *bp; 1027 struct arrowlist *ap; 1028 1029 ep->cached_state = 0; 1030 ep->keep_in_tree = 0; 1031 for (bp = itree_next_bubble(ep, NULL); bp; 1032 bp = itree_next_bubble(ep, bp)) { 1033 if (bp->t != B_FROM) 1034 continue; 1035 bp->mark = 0; 1036 for (ap = itree_next_arrow(bp, NULL); ap; 1037 ap = itree_next_arrow(bp, ap)) 1038 ap->arrowp->mark = 0; 1039 } 1040 } 1041 1042 static void 1043 fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep, 1044 const char *eventstring, const struct ipath *ipp, nvlist_t *nvl) 1045 { 1046 struct event *ep; 1047 struct fme *fmep = NULL; 1048 struct fme *ofmep = NULL; 1049 struct fme *cfmep, *svfmep; 1050 int matched = 0; 1051 nvlist_t *defect; 1052 1053 out(O_ALTFP|O_NONL, "fme_receive_report: "); 1054 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1055 out(O_ALTFP|O_STAMP, NULL); 1056 1057 /* decide which FME it goes to */ 1058 for (fmep = FMElist; fmep; fmep = fmep->next) { 1059 int prev_verbose; 1060 unsigned long long my_delay = TIMEVAL_EVENTUALLY; 1061 enum fme_state state; 1062 nvlist_t *pre_peek_nvp = NULL; 1063 1064 if (fmep->overflow) { 1065 if (!(fmd_case_closed(fmep->hdl, fmep->fmcase))) 1066 ofmep = fmep; 1067 1068 continue; 1069 } 1070 1071 /* 1072 * ignore solved or closed cases 1073 */ 1074 if (fmep->posted_suspects || 1075 fmd_case_solved(fmep->hdl, fmep->fmcase) || 1076 fmd_case_closed(fmep->hdl, fmep->fmcase)) 1077 continue; 1078 1079 /* look up event in event tree for this FME */ 1080 if ((ep = itree_lookup(fmep->eventtree, 1081 eventstring, ipp)) == NULL) 1082 continue; 1083 1084 /* note observation */ 1085 fmep->ecurrent = ep; 1086 if (ep->count++ == 0) { 1087 /* link it into list of observations seen */ 1088 ep->observations = fmep->observations; 1089 fmep->observations = ep; 1090 ep->nvp = evnv_dupnvl(nvl); 1091 } else { 1092 /* use new payload values for peek */ 1093 pre_peek_nvp = ep->nvp; 1094 ep->nvp = evnv_dupnvl(nvl); 1095 } 1096 1097 /* tell hypothesise() not to mess with suspect list */ 1098 fmep->peek = 1; 1099 1100 /* don't want this to be verbose (unless Debug is set) */ 1101 prev_verbose = Verbose; 1102 if (Debug == 0) 1103 Verbose = 0; 1104 1105 lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep); 1106 state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay); 1107 1108 fmep->peek = 0; 1109 1110 /* put verbose flag back */ 1111 Verbose = prev_verbose; 1112 1113 if (state != FME_DISPROVED) { 1114 /* found an FME that explains the ereport */ 1115 matched++; 1116 out(O_ALTFP|O_NONL, "["); 1117 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1118 out(O_ALTFP, " explained by FME%d]", fmep->id); 1119 1120 if (pre_peek_nvp) 1121 nvlist_free(pre_peek_nvp); 1122 1123 if (ep->count == 1) 1124 serialize_observation(fmep, eventstring, ipp); 1125 1126 if (ffep) 1127 fmd_case_add_ereport(hdl, fmep->fmcase, ffep); 1128 1129 stats_counter_bump(fmep->Rcount); 1130 1131 /* re-eval FME */ 1132 fme_eval(fmep, ffep); 1133 } else { 1134 1135 /* not a match, undo noting of observation */ 1136 fmep->ecurrent = NULL; 1137 if (--ep->count == 0) { 1138 /* unlink it from observations */ 1139 fmep->observations = ep->observations; 1140 ep->observations = NULL; 1141 nvlist_free(ep->nvp); 1142 ep->nvp = NULL; 1143 } else { 1144 nvlist_free(ep->nvp); 1145 ep->nvp = pre_peek_nvp; 1146 } 1147 } 1148 } 1149 1150 if (matched) 1151 return; /* explained by at least one existing FME */ 1152 1153 /* clean up closed fmes */ 1154 cfmep = ClosedFMEs; 1155 while (cfmep != NULL) { 1156 svfmep = cfmep->next; 1157 destroy_fme(cfmep); 1158 cfmep = svfmep; 1159 } 1160 ClosedFMEs = NULL; 1161 1162 if (ofmep) { 1163 out(O_ALTFP|O_NONL, "["); 1164 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1165 out(O_ALTFP, " ADDING TO OVERFLOW FME]"); 1166 if (ffep) 1167 fmd_case_add_ereport(hdl, ofmep->fmcase, ffep); 1168 1169 return; 1170 1171 } else if (Max_fme && (Open_fme_count >= Max_fme)) { 1172 out(O_ALTFP|O_NONL, "["); 1173 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1174 out(O_ALTFP, " MAX OPEN FME REACHED]"); 1175 /* Create overflow fme */ 1176 if ((fmep = newfme(eventstring, ipp)) == NULL) { 1177 out(O_ALTFP|O_NONL, "["); 1178 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1179 out(O_ALTFP, " CANNOT OPEN OVERFLOW FME]"); 1180 publish_undiagnosable(hdl, ffep); 1181 return; 1182 } 1183 1184 Open_fme_count++; 1185 1186 fmep->fmcase = fmd_case_open(hdl, NULL); 1187 fmep->hdl = hdl; 1188 init_fme_bufs(fmep); 1189 fmep->overflow = B_TRUE; 1190 1191 if (ffep) 1192 fmd_case_add_ereport(hdl, fmep->fmcase, ffep); 1193 1194 defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100, 1195 NULL, NULL, NULL); 1196 (void) nvlist_add_string(defect, UNDIAG_REASON, UD_MAXFME); 1197 fmd_case_add_suspect(hdl, fmep->fmcase, defect); 1198 fmd_case_solve(hdl, fmep->fmcase); 1199 return; 1200 } 1201 1202 /* start a new FME */ 1203 if ((fmep = newfme(eventstring, ipp)) == NULL) { 1204 out(O_ALTFP|O_NONL, "["); 1205 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1206 out(O_ALTFP, " CANNOT DIAGNOSE]"); 1207 publish_undiagnosable(hdl, ffep); 1208 return; 1209 } 1210 1211 Open_fme_count++; 1212 1213 /* open a case */ 1214 fmep->fmcase = fmd_case_open(hdl, NULL); 1215 fmep->hdl = hdl; 1216 init_fme_bufs(fmep); 1217 1218 out(O_ALTFP|O_NONL, "["); 1219 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1220 out(O_ALTFP, " created FME%d, case %s]", fmep->id, 1221 fmd_case_uuid(hdl, fmep->fmcase)); 1222 1223 ep = fmep->e0; 1224 ASSERT(ep != NULL); 1225 1226 /* note observation */ 1227 fmep->ecurrent = ep; 1228 if (ep->count++ == 0) { 1229 /* link it into list of observations seen */ 1230 ep->observations = fmep->observations; 1231 fmep->observations = ep; 1232 ep->nvp = evnv_dupnvl(nvl); 1233 serialize_observation(fmep, eventstring, ipp); 1234 } else { 1235 /* new payload overrides any previous */ 1236 nvlist_free(ep->nvp); 1237 ep->nvp = evnv_dupnvl(nvl); 1238 } 1239 1240 stats_counter_bump(fmep->Rcount); 1241 1242 if (ffep) { 1243 fmd_case_add_ereport(hdl, fmep->fmcase, ffep); 1244 fmd_case_setprincipal(hdl, fmep->fmcase, ffep); 1245 fmep->e0r = ffep; 1246 } 1247 1248 /* give the diagnosis algorithm a shot at the new FME state */ 1249 fme_eval(fmep, ffep); 1250 } 1251 1252 void 1253 fme_status(int flags) 1254 { 1255 struct fme *fmep; 1256 1257 if (FMElist == NULL) { 1258 out(flags, "No fault management exercises underway."); 1259 return; 1260 } 1261 1262 for (fmep = FMElist; fmep; fmep = fmep->next) 1263 fme_print(flags, fmep); 1264 } 1265 1266 /* 1267 * "indent" routines used mostly for nicely formatted debug output, but also 1268 * for sanity checking for infinite recursion bugs. 1269 */ 1270 1271 #define MAX_INDENT 1024 1272 static const char *indent_s[MAX_INDENT]; 1273 static int current_indent; 1274 1275 static void 1276 indent_push(const char *s) 1277 { 1278 if (current_indent < MAX_INDENT) 1279 indent_s[current_indent++] = s; 1280 else 1281 out(O_DIE, "unexpected recursion depth (%d)", current_indent); 1282 } 1283 1284 static void 1285 indent_set(const char *s) 1286 { 1287 current_indent = 0; 1288 indent_push(s); 1289 } 1290 1291 static void 1292 indent_pop(void) 1293 { 1294 if (current_indent > 0) 1295 current_indent--; 1296 else 1297 out(O_DIE, "recursion underflow"); 1298 } 1299 1300 static void 1301 indent(void) 1302 { 1303 int i; 1304 if (!Verbose) 1305 return; 1306 for (i = 0; i < current_indent; i++) 1307 out(O_ALTFP|O_VERB|O_NONL, indent_s[i]); 1308 } 1309 1310 #define SLNEW 1 1311 #define SLCHANGED 2 1312 #define SLWAIT 3 1313 #define SLDISPROVED 4 1314 1315 static void 1316 print_suspects(int circumstance, struct fme *fmep) 1317 { 1318 struct event *ep; 1319 1320 out(O_ALTFP|O_NONL, "["); 1321 if (circumstance == SLCHANGED) { 1322 out(O_ALTFP|O_NONL, "FME%d diagnosis changed. state: %s, " 1323 "suspect list:", fmep->id, fme_state2str(fmep->state)); 1324 } else if (circumstance == SLWAIT) { 1325 out(O_ALTFP|O_NONL, "FME%d set wait timer ", fmep->id); 1326 ptree_timeval(O_ALTFP|O_NONL, &fmep->wull); 1327 } else if (circumstance == SLDISPROVED) { 1328 out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS UNKNOWN", fmep->id); 1329 } else { 1330 out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS PRODUCED:", fmep->id); 1331 } 1332 1333 if (circumstance == SLWAIT || circumstance == SLDISPROVED) { 1334 out(O_ALTFP, "]"); 1335 return; 1336 } 1337 1338 for (ep = fmep->suspects; ep; ep = ep->suspects) { 1339 out(O_ALTFP|O_NONL, " "); 1340 itree_pevent_brief(O_ALTFP|O_NONL, ep); 1341 } 1342 out(O_ALTFP, "]"); 1343 } 1344 1345 static struct node * 1346 eventprop_lookup(struct event *ep, const char *propname) 1347 { 1348 return (lut_lookup(ep->props, (void *)propname, NULL)); 1349 } 1350 1351 #define MAXDIGITIDX 23 1352 static char numbuf[MAXDIGITIDX + 1]; 1353 1354 static int 1355 node2uint(struct node *n, uint_t *valp) 1356 { 1357 struct evalue value; 1358 struct lut *globals = NULL; 1359 1360 if (n == NULL) 1361 return (1); 1362 1363 /* 1364 * check value.v since we are being asked to convert an unsigned 1365 * long long int to an unsigned int 1366 */ 1367 if (! eval_expr(n, NULL, NULL, &globals, NULL, NULL, 0, &value) || 1368 value.t != UINT64 || value.v > (1ULL << 32)) 1369 return (1); 1370 1371 *valp = (uint_t)value.v; 1372 1373 return (0); 1374 } 1375 1376 static nvlist_t * 1377 node2fmri(struct node *n) 1378 { 1379 nvlist_t **pa, *f, *p; 1380 struct node *nc; 1381 uint_t depth = 0; 1382 char *numstr, *nullbyte; 1383 char *failure; 1384 int err, i; 1385 1386 /* XXX do we need to be able to handle a non-T_NAME node? */ 1387 if (n == NULL || n->t != T_NAME) 1388 return (NULL); 1389 1390 for (nc = n; nc != NULL; nc = nc->u.name.next) { 1391 if (nc->u.name.child == NULL || nc->u.name.child->t != T_NUM) 1392 break; 1393 depth++; 1394 } 1395 1396 if (nc != NULL) { 1397 /* We bailed early, something went wrong */ 1398 return (NULL); 1399 } 1400 1401 if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0) 1402 out(O_DIE|O_SYS, "alloc of fmri nvl failed"); 1403 pa = alloca(depth * sizeof (nvlist_t *)); 1404 for (i = 0; i < depth; i++) 1405 pa[i] = NULL; 1406 1407 err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC); 1408 err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION); 1409 err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, ""); 1410 err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth); 1411 if (err != 0) { 1412 failure = "basic construction of FMRI failed"; 1413 goto boom; 1414 } 1415 1416 numbuf[MAXDIGITIDX] = '\0'; 1417 nullbyte = &numbuf[MAXDIGITIDX]; 1418 i = 0; 1419 1420 for (nc = n; nc != NULL; nc = nc->u.name.next) { 1421 err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl); 1422 if (err != 0) { 1423 failure = "alloc of an hc-pair failed"; 1424 goto boom; 1425 } 1426 err = nvlist_add_string(p, FM_FMRI_HC_NAME, nc->u.name.s); 1427 numstr = ulltostr(nc->u.name.child->u.ull, nullbyte); 1428 err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr); 1429 if (err != 0) { 1430 failure = "construction of an hc-pair failed"; 1431 goto boom; 1432 } 1433 pa[i++] = p; 1434 } 1435 1436 err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth); 1437 if (err == 0) { 1438 for (i = 0; i < depth; i++) 1439 if (pa[i] != NULL) 1440 nvlist_free(pa[i]); 1441 return (f); 1442 } 1443 failure = "addition of hc-pair array to FMRI failed"; 1444 1445 boom: 1446 for (i = 0; i < depth; i++) 1447 if (pa[i] != NULL) 1448 nvlist_free(pa[i]); 1449 nvlist_free(f); 1450 out(O_DIE, "%s", failure); 1451 /*NOTREACHED*/ 1452 return (NULL); 1453 } 1454 1455 static uint_t 1456 avg(uint_t sum, uint_t cnt) 1457 { 1458 unsigned long long s = sum * 10; 1459 1460 return ((s / cnt / 10) + (((s / cnt % 10) >= 5) ? 1 : 0)); 1461 } 1462 1463 static uint8_t 1464 percentof(uint_t part, uint_t whole) 1465 { 1466 unsigned long long p = part * 1000; 1467 1468 return ((p / whole / 10) + (((p / whole % 10) >= 5) ? 1 : 0)); 1469 } 1470 1471 struct rsl { 1472 struct event *suspect; 1473 nvlist_t *asru; 1474 nvlist_t *fru; 1475 nvlist_t *rsrc; 1476 }; 1477 1478 /* 1479 * rslfree -- free internal members of struct rsl not expected to be 1480 * freed elsewhere. 1481 */ 1482 static void 1483 rslfree(struct rsl *freeme) 1484 { 1485 if (freeme->asru != NULL) 1486 nvlist_free(freeme->asru); 1487 if (freeme->fru != NULL) 1488 nvlist_free(freeme->fru); 1489 if (freeme->rsrc != NULL && freeme->rsrc != freeme->asru) 1490 nvlist_free(freeme->rsrc); 1491 } 1492 1493 /* 1494 * rslcmp -- compare two rsl structures. Use the following 1495 * comparisons to establish cardinality: 1496 * 1497 * 1. Name of the suspect's class. (simple strcmp) 1498 * 2. Name of the suspect's ASRU. (trickier, since nvlist) 1499 * 1500 */ 1501 static int 1502 rslcmp(const void *a, const void *b) 1503 { 1504 struct rsl *r1 = (struct rsl *)a; 1505 struct rsl *r2 = (struct rsl *)b; 1506 int rv; 1507 1508 rv = strcmp(r1->suspect->enode->u.event.ename->u.name.s, 1509 r2->suspect->enode->u.event.ename->u.name.s); 1510 if (rv != 0) 1511 return (rv); 1512 1513 if (r1->asru == NULL && r2->asru == NULL) 1514 return (0); 1515 if (r1->asru == NULL) 1516 return (-1); 1517 if (r2->asru == NULL) 1518 return (1); 1519 return (evnv_cmpnvl(r1->asru, r2->asru, 0)); 1520 } 1521 1522 /* 1523 * rsluniq -- given an array of rsl structures, seek out and "remove" 1524 * any duplicates. Dups are "remove"d by NULLing the suspect pointer 1525 * of the array element. Removal also means updating the number of 1526 * problems and the number of problems which are not faults. User 1527 * provides the first and last element pointers. 1528 */ 1529 static void 1530 rsluniq(struct rsl *first, struct rsl *last, int *nprobs, int *nnonf) 1531 { 1532 struct rsl *cr; 1533 1534 if (*nprobs == 1) 1535 return; 1536 1537 /* 1538 * At this point, we only expect duplicate defects. 1539 * Eversholt's diagnosis algorithm prevents duplicate 1540 * suspects, but we rewrite defects in the platform code after 1541 * the diagnosis is made, and that can introduce new 1542 * duplicates. 1543 */ 1544 while (first <= last) { 1545 if (first->suspect == NULL || !is_defect(first->suspect->t)) { 1546 first++; 1547 continue; 1548 } 1549 cr = first + 1; 1550 while (cr <= last) { 1551 if (is_defect(first->suspect->t)) { 1552 if (rslcmp(first, cr) == 0) { 1553 cr->suspect = NULL; 1554 rslfree(cr); 1555 (*nprobs)--; 1556 (*nnonf)--; 1557 } 1558 } 1559 /* 1560 * assume all defects are in order after our 1561 * sort and short circuit here with "else break" ? 1562 */ 1563 cr++; 1564 } 1565 first++; 1566 } 1567 } 1568 1569 /* 1570 * get_resources -- for a given suspect, determine what ASRU, FRU and 1571 * RSRC nvlists should be advertised in the final suspect list. 1572 */ 1573 void 1574 get_resources(struct event *sp, struct rsl *rsrcs, struct config *croot) 1575 { 1576 struct node *asrudef, *frudef; 1577 nvlist_t *asru, *fru; 1578 nvlist_t *rsrc = NULL; 1579 char *pathstr; 1580 1581 /* 1582 * First find any ASRU and/or FRU defined in the 1583 * initial fault tree. 1584 */ 1585 asrudef = eventprop_lookup(sp, L_ASRU); 1586 frudef = eventprop_lookup(sp, L_FRU); 1587 1588 /* 1589 * Create FMRIs based on those definitions 1590 */ 1591 asru = node2fmri(asrudef); 1592 fru = node2fmri(frudef); 1593 pathstr = ipath2str(NULL, sp->ipp); 1594 1595 /* 1596 * Allow for platform translations of the FMRIs 1597 */ 1598 platform_units_translate(is_defect(sp->t), croot, &asru, &fru, &rsrc, 1599 pathstr); 1600 1601 FREE(pathstr); 1602 rsrcs->suspect = sp; 1603 rsrcs->asru = asru; 1604 rsrcs->fru = fru; 1605 rsrcs->rsrc = rsrc; 1606 } 1607 1608 /* 1609 * trim_suspects -- prior to publishing, we may need to remove some 1610 * suspects from the list. If we're auto-closing upsets, we don't 1611 * want any of those in the published list. If the ASRUs for multiple 1612 * defects resolve to the same ASRU (driver) we only want to publish 1613 * that as a single suspect. 1614 */ 1615 static void 1616 trim_suspects(struct fme *fmep, boolean_t no_upsets, struct rsl **begin, 1617 struct rsl **end) 1618 { 1619 struct event *ep; 1620 struct rsl *rp; 1621 int rpcnt; 1622 1623 /* 1624 * First save the suspects in the psuspects, then copy back 1625 * only the ones we wish to retain. This resets nsuspects to 1626 * zero. 1627 */ 1628 rpcnt = fmep->nsuspects; 1629 save_suspects(fmep); 1630 1631 /* 1632 * allocate an array of resource pointers for the suspects. 1633 * We may end up using less than the full allocation, but this 1634 * is a very short-lived array. publish_suspects() will free 1635 * this array when it's done using it. 1636 */ 1637 rp = *begin = MALLOC(rpcnt * sizeof (struct rsl)); 1638 bzero(rp, rpcnt * sizeof (struct rsl)); 1639 1640 /* first pass, remove any unwanted upsets and populate our array */ 1641 for (ep = fmep->psuspects; ep; ep = ep->psuspects) { 1642 if (no_upsets && is_upset(ep->t)) 1643 continue; 1644 get_resources(ep, rp, fmep->cfgdata->cooked); 1645 rp++; 1646 fmep->nsuspects++; 1647 if (!is_fault(ep->t)) 1648 fmep->nonfault++; 1649 } 1650 1651 /* if all we had was unwanted upsets, we're done */ 1652 if (fmep->nsuspects == 0) 1653 return; 1654 1655 *end = rp - 1; 1656 1657 /* sort the array */ 1658 qsort(*begin, fmep->nsuspects, sizeof (struct rsl), rslcmp); 1659 rsluniq(*begin, *end, &fmep->nsuspects, &fmep->nonfault); 1660 } 1661 1662 /* 1663 * addpayloadprop -- add a payload prop to a problem 1664 */ 1665 static void 1666 addpayloadprop(const char *lhs, struct evalue *rhs, nvlist_t *fault) 1667 { 1668 ASSERT(fault != NULL); 1669 ASSERT(lhs != NULL); 1670 ASSERT(rhs != NULL); 1671 1672 if (rhs->t == UINT64) { 1673 out(O_ALTFP|O_VERB2, "addpayloadprop: %s=%llu", lhs, rhs->v); 1674 1675 if (nvlist_add_uint64(fault, lhs, rhs->v) != 0) 1676 out(O_DIE, 1677 "cannot add payloadprop \"%s\" to fault", lhs); 1678 } else { 1679 out(O_ALTFP|O_VERB2, "addpayloadprop: %s=\"%s\"", 1680 lhs, (char *)(uintptr_t)rhs->v); 1681 1682 if (nvlist_add_string(fault, lhs, (char *)(uintptr_t)rhs->v) != 1683 0) 1684 out(O_DIE, 1685 "cannot add payloadprop \"%s\" to fault", lhs); 1686 } 1687 } 1688 1689 static char *Istatbuf; 1690 static char *Istatbufptr; 1691 static int Istatsz; 1692 1693 /* 1694 * istataddsize -- calculate size of istat and add it to Istatsz 1695 */ 1696 /*ARGSUSED2*/ 1697 static void 1698 istataddsize(const struct istat_entry *lhs, struct stats *rhs, void *arg) 1699 { 1700 int val; 1701 1702 ASSERT(lhs != NULL); 1703 ASSERT(rhs != NULL); 1704 1705 if ((val = stats_counter_value(rhs)) == 0) 1706 return; /* skip zero-valued stats */ 1707 1708 /* count up the size of the stat name */ 1709 Istatsz += ipath2strlen(lhs->ename, lhs->ipath); 1710 Istatsz++; /* for the trailing NULL byte */ 1711 1712 /* count up the size of the stat value */ 1713 Istatsz += snprintf(NULL, 0, "%d", val); 1714 Istatsz++; /* for the trailing NULL byte */ 1715 } 1716 1717 /* 1718 * istat2str -- serialize an istat, writing result to *Istatbufptr 1719 */ 1720 /*ARGSUSED2*/ 1721 static void 1722 istat2str(const struct istat_entry *lhs, struct stats *rhs, void *arg) 1723 { 1724 char *str; 1725 int len; 1726 int val; 1727 1728 ASSERT(lhs != NULL); 1729 ASSERT(rhs != NULL); 1730 1731 if ((val = stats_counter_value(rhs)) == 0) 1732 return; /* skip zero-valued stats */ 1733 1734 /* serialize the stat name */ 1735 str = ipath2str(lhs->ename, lhs->ipath); 1736 len = strlen(str); 1737 1738 ASSERT(Istatbufptr + len + 1 < &Istatbuf[Istatsz]); 1739 (void) strlcpy(Istatbufptr, str, &Istatbuf[Istatsz] - Istatbufptr); 1740 Istatbufptr += len; 1741 FREE(str); 1742 *Istatbufptr++ = '\0'; 1743 1744 /* serialize the stat value */ 1745 Istatbufptr += snprintf(Istatbufptr, &Istatbuf[Istatsz] - Istatbufptr, 1746 "%d", val); 1747 *Istatbufptr++ = '\0'; 1748 1749 ASSERT(Istatbufptr <= &Istatbuf[Istatsz]); 1750 } 1751 1752 void 1753 istat_save() 1754 { 1755 if (Istat_need_save == 0) 1756 return; 1757 1758 /* figure out how big the serialzed info is */ 1759 Istatsz = 0; 1760 lut_walk(Istats, (lut_cb)istataddsize, NULL); 1761 1762 if (Istatsz == 0) { 1763 /* no stats to save */ 1764 fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS); 1765 return; 1766 } 1767 1768 /* create the serialized buffer */ 1769 Istatbufptr = Istatbuf = MALLOC(Istatsz); 1770 lut_walk(Istats, (lut_cb)istat2str, NULL); 1771 1772 /* clear out current saved stats */ 1773 fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS); 1774 1775 /* write out the new version */ 1776 fmd_buf_write(Hdl, NULL, WOBUF_ISTATS, Istatbuf, Istatsz); 1777 FREE(Istatbuf); 1778 1779 Istat_need_save = 0; 1780 } 1781 1782 int 1783 istat_cmp(struct istat_entry *ent1, struct istat_entry *ent2) 1784 { 1785 if (ent1->ename != ent2->ename) 1786 return (ent2->ename - ent1->ename); 1787 if (ent1->ipath != ent2->ipath) 1788 return ((char *)ent2->ipath - (char *)ent1->ipath); 1789 1790 return (0); 1791 } 1792 1793 /* 1794 * istat-verify -- verify the component associated with a stat still exists 1795 * 1796 * if the component no longer exists, this routine resets the stat and 1797 * returns 0. if the component still exists, it returns 1. 1798 */ 1799 static int 1800 istat_verify(struct node *snp, struct istat_entry *entp) 1801 { 1802 struct stats *statp; 1803 nvlist_t *fmri; 1804 1805 fmri = node2fmri(snp->u.event.epname); 1806 if (platform_path_exists(fmri)) { 1807 nvlist_free(fmri); 1808 return (1); 1809 } 1810 nvlist_free(fmri); 1811 1812 /* component no longer in system. zero out the associated stats */ 1813 if ((statp = (struct stats *) 1814 lut_lookup(Istats, entp, (lut_cmp)istat_cmp)) == NULL || 1815 stats_counter_value(statp) == 0) 1816 return (0); /* stat is already reset */ 1817 1818 Istat_need_save = 1; 1819 stats_counter_reset(statp); 1820 return (0); 1821 } 1822 1823 static void 1824 istat_bump(struct node *snp, int n) 1825 { 1826 struct stats *statp; 1827 struct istat_entry ent; 1828 1829 ASSERT(snp != NULL); 1830 ASSERTinfo(snp->t == T_EVENT, ptree_nodetype2str(snp->t)); 1831 ASSERT(snp->u.event.epname != NULL); 1832 1833 /* class name should be hoisted into a single stable entry */ 1834 ASSERT(snp->u.event.ename->u.name.next == NULL); 1835 ent.ename = snp->u.event.ename->u.name.s; 1836 ent.ipath = ipath(snp->u.event.epname); 1837 1838 if (!istat_verify(snp, &ent)) { 1839 /* component no longer exists in system, nothing to do */ 1840 return; 1841 } 1842 1843 if ((statp = (struct stats *) 1844 lut_lookup(Istats, &ent, (lut_cmp)istat_cmp)) == NULL) { 1845 /* need to create the counter */ 1846 int cnt = 0; 1847 struct node *np; 1848 char *sname; 1849 char *snamep; 1850 struct istat_entry *newentp; 1851 1852 /* count up the size of the stat name */ 1853 np = snp->u.event.ename; 1854 while (np != NULL) { 1855 cnt += strlen(np->u.name.s); 1856 cnt++; /* for the '.' or '@' */ 1857 np = np->u.name.next; 1858 } 1859 np = snp->u.event.epname; 1860 while (np != NULL) { 1861 cnt += snprintf(NULL, 0, "%s%llu", 1862 np->u.name.s, np->u.name.child->u.ull); 1863 cnt++; /* for the '/' or trailing NULL byte */ 1864 np = np->u.name.next; 1865 } 1866 1867 /* build the stat name */ 1868 snamep = sname = alloca(cnt); 1869 np = snp->u.event.ename; 1870 while (np != NULL) { 1871 snamep += snprintf(snamep, &sname[cnt] - snamep, 1872 "%s", np->u.name.s); 1873 np = np->u.name.next; 1874 if (np) 1875 *snamep++ = '.'; 1876 } 1877 *snamep++ = '@'; 1878 np = snp->u.event.epname; 1879 while (np != NULL) { 1880 snamep += snprintf(snamep, &sname[cnt] - snamep, 1881 "%s%llu", np->u.name.s, np->u.name.child->u.ull); 1882 np = np->u.name.next; 1883 if (np) 1884 *snamep++ = '/'; 1885 } 1886 *snamep++ = '\0'; 1887 1888 /* create the new stat & add it to our list */ 1889 newentp = MALLOC(sizeof (*newentp)); 1890 *newentp = ent; 1891 statp = stats_new_counter(NULL, sname, 0); 1892 Istats = lut_add(Istats, (void *)newentp, (void *)statp, 1893 (lut_cmp)istat_cmp); 1894 } 1895 1896 /* if n is non-zero, set that value instead of bumping */ 1897 if (n) { 1898 stats_counter_reset(statp); 1899 stats_counter_add(statp, n); 1900 } else 1901 stats_counter_bump(statp); 1902 Istat_need_save = 1; 1903 1904 ipath_print(O_ALTFP|O_VERB2, ent.ename, ent.ipath); 1905 out(O_ALTFP|O_VERB2, " %s to value %d", n ? "set" : "incremented", 1906 stats_counter_value(statp)); 1907 } 1908 1909 /*ARGSUSED*/ 1910 static void 1911 istat_destructor(void *left, void *right, void *arg) 1912 { 1913 struct istat_entry *entp = (struct istat_entry *)left; 1914 struct stats *statp = (struct stats *)right; 1915 FREE(entp); 1916 stats_delete(statp); 1917 } 1918 1919 /* 1920 * Callback used in a walk of the Istats to reset matching stat counters. 1921 */ 1922 static void 1923 istat_counter_reset_cb(struct istat_entry *entp, struct stats *statp, 1924 const struct ipath *ipp) 1925 { 1926 char *path; 1927 1928 if (entp->ipath == ipp) { 1929 path = ipath2str(entp->ename, ipp); 1930 out(O_ALTFP, "istat_counter_reset_cb: resetting %s", path); 1931 FREE(path); 1932 stats_counter_reset(statp); 1933 Istat_need_save = 1; 1934 } 1935 } 1936 1937 void 1938 istat_fini(void) 1939 { 1940 lut_free(Istats, istat_destructor, NULL); 1941 } 1942 1943 static void 1944 publish_suspects(struct fme *fmep) 1945 { 1946 struct rsl *srl = NULL; 1947 struct rsl *erl; 1948 struct rsl *rp; 1949 nvlist_t *fault; 1950 uint8_t cert; 1951 uint_t *frs; 1952 uint_t fravg, frsum, fr; 1953 uint_t messval; 1954 struct node *snp; 1955 int frcnt, fridx; 1956 boolean_t no_upsets = B_FALSE; 1957 boolean_t allfaulty = B_TRUE; 1958 1959 stats_counter_bump(fmep->diags); 1960 1961 /* 1962 * If we're auto-closing upsets, we don't want to include them 1963 * in any produced suspect lists or certainty accounting. 1964 */ 1965 if (Autoclose != NULL) 1966 if (strcmp(Autoclose, "true") == 0 || 1967 strcmp(Autoclose, "all") == 0 || 1968 strcmp(Autoclose, "upsets") == 0) 1969 no_upsets = B_TRUE; 1970 1971 trim_suspects(fmep, no_upsets, &srl, &erl); 1972 1973 /* 1974 * If the resulting suspect list has no members, we're 1975 * done. Returning here will simply close the case. 1976 */ 1977 if (fmep->nsuspects == 0) { 1978 out(O_ALTFP, 1979 "[FME%d, case %s (all suspects are upsets)]", 1980 fmep->id, fmd_case_uuid(fmep->hdl, fmep->fmcase)); 1981 FREE(srl); 1982 restore_suspects(fmep); 1983 return; 1984 } 1985 1986 /* 1987 * If the suspect list is all faults, then for a given fault, 1988 * say X of N, X's certainty is computed via: 1989 * 1990 * fitrate(X) / (fitrate(1) + ... + fitrate(N)) * 100 1991 * 1992 * If none of the suspects are faults, and there are N suspects, 1993 * the certainty of a given suspect is 100/N. 1994 * 1995 * If there are are a mixture of faults and other problems in 1996 * the suspect list, we take an average of the faults' 1997 * FITrates and treat this average as the FITrate for any 1998 * non-faults. The fitrate of any given suspect is then 1999 * computed per the first formula above. 2000 */ 2001 if (fmep->nonfault == fmep->nsuspects) { 2002 /* NO faults in the suspect list */ 2003 cert = percentof(1, fmep->nsuspects); 2004 } else { 2005 /* sum the fitrates */ 2006 frs = alloca(fmep->nsuspects * sizeof (uint_t)); 2007 fridx = frcnt = frsum = 0; 2008 2009 for (rp = srl; rp <= erl; rp++) { 2010 struct node *n; 2011 2012 if (rp->suspect == NULL) 2013 continue; 2014 if (!is_fault(rp->suspect->t)) { 2015 frs[fridx++] = 0; 2016 continue; 2017 } 2018 n = eventprop_lookup(rp->suspect, L_FITrate); 2019 if (node2uint(n, &fr) != 0) { 2020 out(O_DEBUG|O_NONL, "event "); 2021 ipath_print(O_DEBUG|O_NONL, 2022 rp->suspect->enode->u.event.ename->u.name.s, 2023 rp->suspect->ipp); 2024 out(O_DEBUG, " has no FITrate (using 1)"); 2025 fr = 1; 2026 } else if (fr == 0) { 2027 out(O_DEBUG|O_NONL, "event "); 2028 ipath_print(O_DEBUG|O_NONL, 2029 rp->suspect->enode->u.event.ename->u.name.s, 2030 rp->suspect->ipp); 2031 out(O_DEBUG, " has zero FITrate (using 1)"); 2032 fr = 1; 2033 } 2034 2035 frs[fridx++] = fr; 2036 frsum += fr; 2037 frcnt++; 2038 } 2039 fravg = avg(frsum, frcnt); 2040 for (fridx = 0; fridx < fmep->nsuspects; fridx++) 2041 if (frs[fridx] == 0) { 2042 frs[fridx] = fravg; 2043 frsum += fravg; 2044 } 2045 } 2046 2047 /* Add them in reverse order of our sort, as fmd reverses order */ 2048 for (rp = erl; rp >= srl; rp--) { 2049 if (rp->suspect == NULL) 2050 continue; 2051 if (!is_fault(rp->suspect->t)) 2052 allfaulty = B_FALSE; 2053 if (fmep->nonfault != fmep->nsuspects) 2054 cert = percentof(frs[--fridx], frsum); 2055 fault = fmd_nvl_create_fault(fmep->hdl, 2056 rp->suspect->enode->u.event.ename->u.name.s, 2057 cert, 2058 rp->asru, 2059 rp->fru, 2060 rp->rsrc); 2061 if (fault == NULL) 2062 out(O_DIE, "fault creation failed"); 2063 /* if "message" property exists, add it to the fault */ 2064 if (node2uint(eventprop_lookup(rp->suspect, L_message), 2065 &messval) == 0) { 2066 2067 out(O_ALTFP, 2068 "[FME%d, %s adds message=%d to suspect list]", 2069 fmep->id, 2070 rp->suspect->enode->u.event.ename->u.name.s, 2071 messval); 2072 if (nvlist_add_boolean_value(fault, 2073 FM_SUSPECT_MESSAGE, 2074 (messval) ? B_TRUE : B_FALSE) != 0) { 2075 out(O_DIE, "cannot add no-message to fault"); 2076 } 2077 } 2078 /* add any payload properties */ 2079 lut_walk(rp->suspect->payloadprops, 2080 (lut_cb)addpayloadprop, (void *)fault); 2081 fmd_case_add_suspect(fmep->hdl, fmep->fmcase, fault); 2082 rp->suspect->fault = fault; 2083 rslfree(rp); 2084 2085 /* 2086 * If "action" property exists, evaluate it; this must be done 2087 * before the dupclose check below since some actions may 2088 * modify the asru to be used in fmd_nvl_fmri_faulty. This 2089 * needs to be restructured if any new actions are introduced 2090 * that have effects that we do not want to be visible if 2091 * we decide not to publish in the dupclose check below. 2092 */ 2093 if ((snp = eventprop_lookup(rp->suspect, L_action)) != NULL) { 2094 struct evalue evalue; 2095 2096 out(O_ALTFP|O_NONL, 2097 "[FME%d, %s action ", fmep->id, 2098 rp->suspect->enode->u.event.ename->u.name.s); 2099 ptree_name_iter(O_ALTFP|O_NONL, snp); 2100 out(O_ALTFP, "]"); 2101 Action_nvl = fault; 2102 (void) eval_expr(snp, NULL, NULL, NULL, NULL, 2103 NULL, 0, &evalue); 2104 } 2105 2106 /* 2107 * if "dupclose" tunable is set, check if the asru is 2108 * already marked as "faulty". 2109 */ 2110 if (Dupclose && allfaulty) { 2111 nvlist_t *asru; 2112 2113 out(O_ALTFP|O_VERB, "FMD%d dupclose check ", fmep->id); 2114 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, rp->suspect); 2115 out(O_ALTFP|O_VERB|O_NONL, " "); 2116 if (nvlist_lookup_nvlist(fault, 2117 FM_FAULT_ASRU, &asru) != 0) { 2118 out(O_ALTFP|O_VERB, "NULL asru"); 2119 allfaulty = B_FALSE; 2120 } else if (fmd_nvl_fmri_faulty(fmep->hdl, asru)) { 2121 out(O_ALTFP|O_VERB, "faulty"); 2122 } else { 2123 out(O_ALTFP|O_VERB, "not faulty"); 2124 allfaulty = B_FALSE; 2125 } 2126 } 2127 2128 } 2129 2130 /* 2131 * Close the case if all asrus are already known to be faulty and if 2132 * Dupclose is enabled. Otherwise we are going to publish so take 2133 * any pre-publication actions. 2134 */ 2135 if (Dupclose && allfaulty) { 2136 out(O_ALTFP, "[dupclose FME%d, case %s]", fmep->id, 2137 fmd_case_uuid(fmep->hdl, fmep->fmcase)); 2138 fmd_case_close(fmep->hdl, fmep->fmcase); 2139 } else { 2140 for (rp = erl; rp >= srl; rp--) { 2141 struct event *suspect = rp->suspect; 2142 2143 if (suspect == NULL) 2144 continue; 2145 2146 fault = suspect->fault; 2147 2148 /* if "count" exists, increment the appropriate stat */ 2149 if ((snp = eventprop_lookup(suspect, 2150 L_count)) != NULL) { 2151 out(O_ALTFP|O_NONL, 2152 "[FME%d, %s count ", fmep->id, 2153 suspect->enode->u.event.ename->u.name.s); 2154 ptree_name_iter(O_ALTFP|O_NONL, snp); 2155 out(O_ALTFP, "]"); 2156 istat_bump(snp, 0); 2157 2158 } 2159 } 2160 istat_save(); /* write out any istat changes */ 2161 2162 out(O_ALTFP, "[solving FME%d, case %s]", fmep->id, 2163 fmd_case_uuid(fmep->hdl, fmep->fmcase)); 2164 fmd_case_solve(fmep->hdl, fmep->fmcase); 2165 } 2166 2167 /* 2168 * revert to the original suspect list 2169 */ 2170 FREE(srl); 2171 restore_suspects(fmep); 2172 } 2173 2174 static void 2175 publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep) 2176 { 2177 struct case_list *newcase; 2178 nvlist_t *defect; 2179 2180 out(O_ALTFP, 2181 "[undiagnosable ereport received, " 2182 "creating and closing a new case (%s)]", 2183 Undiag_reason ? Undiag_reason : "reason not provided"); 2184 2185 newcase = MALLOC(sizeof (struct case_list)); 2186 newcase->next = NULL; 2187 2188 newcase->fmcase = fmd_case_open(hdl, NULL); 2189 if (Undiagablecaselist != NULL) 2190 newcase->next = Undiagablecaselist; 2191 Undiagablecaselist = newcase; 2192 2193 if (ffep != NULL) 2194 fmd_case_add_ereport(hdl, newcase->fmcase, ffep); 2195 2196 defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100, 2197 NULL, NULL, NULL); 2198 if (Undiag_reason != NULL) 2199 (void) nvlist_add_string(defect, UNDIAG_REASON, Undiag_reason); 2200 fmd_case_add_suspect(hdl, newcase->fmcase, defect); 2201 2202 fmd_case_solve(hdl, newcase->fmcase); 2203 fmd_case_close(hdl, newcase->fmcase); 2204 } 2205 2206 static void 2207 fme_undiagnosable(struct fme *f) 2208 { 2209 nvlist_t *defect; 2210 2211 out(O_ALTFP, "[solving/closing FME%d, case %s (%s)]", 2212 f->id, fmd_case_uuid(f->hdl, f->fmcase), 2213 Undiag_reason ? Undiag_reason : "undiagnosable"); 2214 2215 defect = fmd_nvl_create_fault(f->hdl, UNDIAGNOSABLE_DEFECT, 100, 2216 NULL, NULL, NULL); 2217 if (Undiag_reason != NULL) 2218 (void) nvlist_add_string(defect, UNDIAG_REASON, Undiag_reason); 2219 fmd_case_add_suspect(f->hdl, f->fmcase, defect); 2220 fmd_case_solve(f->hdl, f->fmcase); 2221 destroy_fme_bufs(f); 2222 fmd_case_close(f->hdl, f->fmcase); 2223 } 2224 2225 /* 2226 * fme_close_case 2227 * 2228 * Find the requested case amongst our fmes and close it. Free up 2229 * the related fme. 2230 */ 2231 void 2232 fme_close_case(fmd_hdl_t *hdl, fmd_case_t *fmcase) 2233 { 2234 struct case_list *ucasep, *prevcasep = NULL; 2235 struct fme *prev = NULL; 2236 struct fme *fmep; 2237 2238 for (ucasep = Undiagablecaselist; ucasep; ucasep = ucasep->next) { 2239 if (fmcase != ucasep->fmcase) { 2240 prevcasep = ucasep; 2241 continue; 2242 } 2243 2244 if (prevcasep == NULL) 2245 Undiagablecaselist = Undiagablecaselist->next; 2246 else 2247 prevcasep->next = ucasep->next; 2248 2249 FREE(ucasep); 2250 return; 2251 } 2252 2253 for (fmep = FMElist; fmep; fmep = fmep->next) { 2254 if (fmep->hdl == hdl && fmep->fmcase == fmcase) 2255 break; 2256 prev = fmep; 2257 } 2258 2259 if (fmep == NULL) { 2260 out(O_WARN, "Eft asked to close unrecognized case [%s].", 2261 fmd_case_uuid(hdl, fmcase)); 2262 return; 2263 } 2264 2265 if (EFMElist == fmep) 2266 EFMElist = prev; 2267 2268 if (prev == NULL) 2269 FMElist = FMElist->next; 2270 else 2271 prev->next = fmep->next; 2272 2273 fmep->next = NULL; 2274 2275 /* Get rid of any timer this fme has set */ 2276 if (fmep->wull != 0) 2277 fmd_timer_remove(fmep->hdl, fmep->timer); 2278 2279 if (ClosedFMEs == NULL) { 2280 ClosedFMEs = fmep; 2281 } else { 2282 fmep->next = ClosedFMEs; 2283 ClosedFMEs = fmep; 2284 } 2285 2286 Open_fme_count--; 2287 2288 /* See if we can close the overflow FME */ 2289 if (Open_fme_count <= Max_fme) { 2290 for (fmep = FMElist; fmep; fmep = fmep->next) { 2291 if (fmep->overflow && !(fmd_case_closed(fmep->hdl, 2292 fmep->fmcase))) 2293 break; 2294 } 2295 2296 if (fmep != NULL) 2297 fmd_case_close(fmep->hdl, fmep->fmcase); 2298 } 2299 } 2300 2301 /* 2302 * fme_set_timer() 2303 * If the time we need to wait for the given FME is less than the 2304 * current timer, kick that old timer out and establish a new one. 2305 */ 2306 static int 2307 fme_set_timer(struct fme *fmep, unsigned long long wull) 2308 { 2309 out(O_ALTFP|O_VERB|O_NONL, " fme_set_timer: request to wait "); 2310 ptree_timeval(O_ALTFP|O_VERB, &wull); 2311 2312 if (wull <= fmep->pull) { 2313 out(O_ALTFP|O_VERB|O_NONL, "already have waited at least "); 2314 ptree_timeval(O_ALTFP|O_VERB, &fmep->pull); 2315 out(O_ALTFP|O_VERB, NULL); 2316 /* we've waited at least wull already, don't need timer */ 2317 return (0); 2318 } 2319 2320 out(O_ALTFP|O_VERB|O_NONL, " currently "); 2321 if (fmep->wull != 0) { 2322 out(O_ALTFP|O_VERB|O_NONL, "waiting "); 2323 ptree_timeval(O_ALTFP|O_VERB, &fmep->wull); 2324 out(O_ALTFP|O_VERB, NULL); 2325 } else { 2326 out(O_ALTFP|O_VERB|O_NONL, "not waiting"); 2327 out(O_ALTFP|O_VERB, NULL); 2328 } 2329 2330 if (fmep->wull != 0) 2331 if (wull >= fmep->wull) 2332 /* New timer would fire later than established timer */ 2333 return (0); 2334 2335 if (fmep->wull != 0) { 2336 fmd_timer_remove(fmep->hdl, fmep->timer); 2337 } 2338 2339 fmep->timer = fmd_timer_install(fmep->hdl, (void *)fmep, 2340 fmep->e0r, wull); 2341 out(O_ALTFP|O_VERB, "timer set, id is %ld", fmep->timer); 2342 fmep->wull = wull; 2343 return (1); 2344 } 2345 2346 void 2347 fme_timer_fired(struct fme *fmep, id_t tid) 2348 { 2349 struct fme *ffmep = NULL; 2350 2351 for (ffmep = FMElist; ffmep; ffmep = ffmep->next) 2352 if (ffmep == fmep) 2353 break; 2354 2355 if (ffmep == NULL) { 2356 out(O_WARN, "Timer fired for an FME (%p) not in FMEs list.", 2357 (void *)fmep); 2358 return; 2359 } 2360 2361 out(O_ALTFP, "Timer fired %lx", tid); 2362 fmep->pull = fmep->wull; 2363 fmep->wull = 0; 2364 fmd_buf_write(fmep->hdl, fmep->fmcase, 2365 WOBUF_PULL, (void *)&fmep->pull, sizeof (fmep->pull)); 2366 fme_eval(fmep, fmep->e0r); 2367 } 2368 2369 /* 2370 * Preserve the fme's suspect list in its psuspects list, NULLing the 2371 * suspects list in the meantime. 2372 */ 2373 static void 2374 save_suspects(struct fme *fmep) 2375 { 2376 struct event *ep; 2377 struct event *nextep; 2378 2379 /* zero out the previous suspect list */ 2380 for (ep = fmep->psuspects; ep; ep = nextep) { 2381 nextep = ep->psuspects; 2382 ep->psuspects = NULL; 2383 } 2384 fmep->psuspects = NULL; 2385 2386 /* zero out the suspect list, copying it to previous suspect list */ 2387 fmep->psuspects = fmep->suspects; 2388 for (ep = fmep->suspects; ep; ep = nextep) { 2389 nextep = ep->suspects; 2390 ep->psuspects = ep->suspects; 2391 ep->suspects = NULL; 2392 ep->is_suspect = 0; 2393 } 2394 fmep->suspects = NULL; 2395 fmep->nsuspects = 0; 2396 fmep->nonfault = 0; 2397 } 2398 2399 /* 2400 * Retrieve the fme's suspect list from its psuspects list. 2401 */ 2402 static void 2403 restore_suspects(struct fme *fmep) 2404 { 2405 struct event *ep; 2406 struct event *nextep; 2407 2408 fmep->nsuspects = fmep->nonfault = 0; 2409 fmep->suspects = fmep->psuspects; 2410 for (ep = fmep->psuspects; ep; ep = nextep) { 2411 fmep->nsuspects++; 2412 if (!is_fault(ep->t)) 2413 fmep->nonfault++; 2414 nextep = ep->psuspects; 2415 ep->suspects = ep->psuspects; 2416 } 2417 } 2418 2419 /* 2420 * this is what we use to call the Emrys prototype code instead of main() 2421 */ 2422 static void 2423 fme_eval(struct fme *fmep, fmd_event_t *ffep) 2424 { 2425 struct event *ep; 2426 unsigned long long my_delay = TIMEVAL_EVENTUALLY; 2427 2428 save_suspects(fmep); 2429 2430 out(O_ALTFP|O_VERB, "Evaluate FME %d", fmep->id); 2431 indent_set(" "); 2432 2433 lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep); 2434 fmep->state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay); 2435 2436 out(O_ALTFP|O_VERB|O_NONL, "FME%d state: %s, suspect list:", fmep->id, 2437 fme_state2str(fmep->state)); 2438 for (ep = fmep->suspects; ep; ep = ep->suspects) { 2439 out(O_ALTFP|O_VERB|O_NONL, " "); 2440 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2441 } 2442 out(O_ALTFP|O_VERB, NULL); 2443 2444 switch (fmep->state) { 2445 case FME_CREDIBLE: 2446 print_suspects(SLNEW, fmep); 2447 (void) upsets_eval(fmep, ffep); 2448 2449 /* 2450 * we may have already posted suspects in upsets_eval() which 2451 * can recurse into fme_eval() again. If so then just return. 2452 */ 2453 if (fmep->posted_suspects) 2454 return; 2455 2456 publish_suspects(fmep); 2457 fmep->posted_suspects = 1; 2458 fmd_buf_write(fmep->hdl, fmep->fmcase, 2459 WOBUF_POSTD, 2460 (void *)&fmep->posted_suspects, 2461 sizeof (fmep->posted_suspects)); 2462 2463 /* 2464 * Now the suspects have been posted, we can clear up 2465 * the instance tree as we won't be looking at it again. 2466 * Also cancel the timer as the case is now solved. 2467 */ 2468 if (fmep->wull != 0) { 2469 fmd_timer_remove(fmep->hdl, fmep->timer); 2470 fmep->wull = 0; 2471 } 2472 break; 2473 2474 case FME_WAIT: 2475 ASSERT(my_delay > fmep->ull); 2476 (void) fme_set_timer(fmep, my_delay); 2477 print_suspects(SLWAIT, fmep); 2478 break; 2479 2480 case FME_DISPROVED: 2481 print_suspects(SLDISPROVED, fmep); 2482 Undiag_reason = UD_UNSOLVD; 2483 fme_undiagnosable(fmep); 2484 break; 2485 } 2486 2487 if (fmep->posted_suspects == 1 && Autoclose != NULL) { 2488 int doclose = 0; 2489 2490 if (strcmp(Autoclose, "true") == 0 || 2491 strcmp(Autoclose, "all") == 0) 2492 doclose = 1; 2493 2494 if (strcmp(Autoclose, "upsets") == 0) { 2495 doclose = 1; 2496 for (ep = fmep->suspects; ep; ep = ep->suspects) { 2497 if (ep->t != N_UPSET) { 2498 doclose = 0; 2499 break; 2500 } 2501 } 2502 } 2503 2504 if (doclose) { 2505 out(O_ALTFP, "[closing FME%d, case %s (autoclose)]", 2506 fmep->id, fmd_case_uuid(fmep->hdl, fmep->fmcase)); 2507 2508 destroy_fme_bufs(fmep); 2509 fmd_case_close(fmep->hdl, fmep->fmcase); 2510 } 2511 } 2512 if (fmep->posted_suspects == 1) { 2513 itree_free(fmep->eventtree); 2514 fmep->eventtree = NULL; 2515 config_free(fmep->cfgdata); 2516 fmep->cfgdata = NULL; 2517 } else { 2518 itree_prune(fmep->eventtree); 2519 } 2520 } 2521 2522 static void indent(void); 2523 static int triggered(struct fme *fmep, struct event *ep, int mark); 2524 static enum fme_state effects_test(struct fme *fmep, 2525 struct event *fault_event, unsigned long long at_latest_by, 2526 unsigned long long *pdelay); 2527 static enum fme_state requirements_test(struct fme *fmep, struct event *ep, 2528 unsigned long long at_latest_by, unsigned long long *pdelay); 2529 static enum fme_state causes_test(struct fme *fmep, struct event *ep, 2530 unsigned long long at_latest_by, unsigned long long *pdelay); 2531 2532 static int 2533 checkconstraints(struct fme *fmep, struct arrow *arrowp) 2534 { 2535 struct constraintlist *ctp; 2536 struct evalue value; 2537 2538 if (arrowp->forever_false) { 2539 char *sep = ""; 2540 indent(); 2541 out(O_ALTFP|O_VERB|O_NONL, " Forever false constraint: "); 2542 for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) { 2543 out(O_ALTFP|O_VERB|O_NONL, sep); 2544 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0); 2545 sep = ", "; 2546 } 2547 out(O_ALTFP|O_VERB, NULL); 2548 return (0); 2549 } 2550 2551 for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) { 2552 if (eval_expr(ctp->cnode, NULL, NULL, 2553 &fmep->globals, fmep->cfgdata->cooked, 2554 arrowp, 0, &value)) { 2555 /* evaluation successful */ 2556 if (value.t == UNDEFINED || value.v == 0) { 2557 /* known false */ 2558 arrowp->forever_false = 1; 2559 indent(); 2560 out(O_ALTFP|O_VERB|O_NONL, 2561 " False constraint: "); 2562 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0); 2563 out(O_ALTFP|O_VERB, NULL); 2564 return (0); 2565 } 2566 } else { 2567 /* evaluation unsuccessful -- unknown value */ 2568 indent(); 2569 out(O_ALTFP|O_VERB|O_NONL, 2570 " Deferred constraint: "); 2571 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0); 2572 out(O_ALTFP|O_VERB, NULL); 2573 return (2); 2574 } 2575 } 2576 /* known true */ 2577 return (1); 2578 } 2579 2580 static int 2581 triggered(struct fme *fmep, struct event *ep, int mark) 2582 { 2583 struct bubble *bp; 2584 struct arrowlist *ap; 2585 int count = 0; 2586 2587 stats_counter_bump(fmep->Tcallcount); 2588 for (bp = itree_next_bubble(ep, NULL); bp; 2589 bp = itree_next_bubble(ep, bp)) { 2590 if (bp->t != B_TO) 2591 continue; 2592 for (ap = itree_next_arrow(bp, NULL); ap; 2593 ap = itree_next_arrow(bp, ap)) { 2594 /* check count of marks against K in the bubble */ 2595 if ((ap->arrowp->mark & mark) && 2596 ++count >= bp->nork) 2597 return (1); 2598 } 2599 } 2600 return (0); 2601 } 2602 2603 static int 2604 mark_arrows(struct fme *fmep, struct event *ep, int mark, 2605 unsigned long long at_latest_by, unsigned long long *pdelay, int keep) 2606 { 2607 struct bubble *bp; 2608 struct arrowlist *ap; 2609 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 2610 unsigned long long my_delay; 2611 enum fme_state result; 2612 int retval = 0; 2613 2614 for (bp = itree_next_bubble(ep, NULL); bp; 2615 bp = itree_next_bubble(ep, bp)) { 2616 if (bp->t != B_FROM) 2617 continue; 2618 stats_counter_bump(fmep->Marrowcount); 2619 for (ap = itree_next_arrow(bp, NULL); ap; 2620 ap = itree_next_arrow(bp, ap)) { 2621 struct event *ep2 = ap->arrowp->head->myevent; 2622 /* 2623 * if we're clearing marks, we can avoid doing 2624 * all that work evaluating constraints. 2625 */ 2626 if (mark == 0) { 2627 ap->arrowp->mark &= ~EFFECTS_COUNTER; 2628 if (keep && (ep2->cached_state & 2629 (WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT))) 2630 ep2->keep_in_tree = 1; 2631 ep2->cached_state &= 2632 ~(WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT); 2633 (void) mark_arrows(fmep, ep2, mark, 0, NULL, 2634 keep); 2635 continue; 2636 } 2637 if (ep2->cached_state & REQMNTS_DISPROVED) { 2638 indent(); 2639 out(O_ALTFP|O_VERB|O_NONL, 2640 " ALREADY DISPROVED "); 2641 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2642 out(O_ALTFP|O_VERB, NULL); 2643 continue; 2644 } 2645 if (ep2->cached_state & WAIT_EFFECT) { 2646 indent(); 2647 out(O_ALTFP|O_VERB|O_NONL, 2648 " ALREADY EFFECTS WAIT "); 2649 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2650 out(O_ALTFP|O_VERB, NULL); 2651 continue; 2652 } 2653 if (ep2->cached_state & CREDIBLE_EFFECT) { 2654 indent(); 2655 out(O_ALTFP|O_VERB|O_NONL, 2656 " ALREADY EFFECTS CREDIBLE "); 2657 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2658 out(O_ALTFP|O_VERB, NULL); 2659 continue; 2660 } 2661 if ((ep2->cached_state & PARENT_WAIT) && 2662 (mark & PARENT_WAIT)) { 2663 indent(); 2664 out(O_ALTFP|O_VERB|O_NONL, 2665 " ALREADY PARENT EFFECTS WAIT "); 2666 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2667 out(O_ALTFP|O_VERB, NULL); 2668 continue; 2669 } 2670 platform_set_payloadnvp(ep2->nvp); 2671 if (checkconstraints(fmep, ap->arrowp) == 0) { 2672 platform_set_payloadnvp(NULL); 2673 indent(); 2674 out(O_ALTFP|O_VERB|O_NONL, 2675 " CONSTRAINTS FAIL "); 2676 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2677 out(O_ALTFP|O_VERB, NULL); 2678 continue; 2679 } 2680 platform_set_payloadnvp(NULL); 2681 ap->arrowp->mark |= EFFECTS_COUNTER; 2682 if (!triggered(fmep, ep2, EFFECTS_COUNTER)) { 2683 indent(); 2684 out(O_ALTFP|O_VERB|O_NONL, 2685 " K-COUNT NOT YET MET "); 2686 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2687 out(O_ALTFP|O_VERB, NULL); 2688 continue; 2689 } 2690 ep2->cached_state &= ~PARENT_WAIT; 2691 /* 2692 * if we've reached an ereport and no propagation time 2693 * is specified, use the Hesitate value 2694 */ 2695 if (ep2->t == N_EREPORT && at_latest_by == 0ULL && 2696 ap->arrowp->maxdelay == 0ULL) { 2697 result = requirements_test(fmep, ep2, Hesitate, 2698 &my_delay); 2699 out(O_ALTFP|O_VERB|O_NONL, " default wait "); 2700 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2701 out(O_ALTFP|O_VERB, NULL); 2702 } else { 2703 result = requirements_test(fmep, ep2, 2704 at_latest_by + ap->arrowp->maxdelay, 2705 &my_delay); 2706 } 2707 if (result == FME_WAIT) { 2708 retval = WAIT_EFFECT; 2709 if (overall_delay > my_delay) 2710 overall_delay = my_delay; 2711 ep2->cached_state |= WAIT_EFFECT; 2712 indent(); 2713 out(O_ALTFP|O_VERB|O_NONL, " EFFECTS WAIT "); 2714 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2715 out(O_ALTFP|O_VERB, NULL); 2716 indent_push(" E"); 2717 if (mark_arrows(fmep, ep2, PARENT_WAIT, 2718 at_latest_by, &my_delay, 0) == 2719 WAIT_EFFECT) { 2720 retval = WAIT_EFFECT; 2721 if (overall_delay > my_delay) 2722 overall_delay = my_delay; 2723 } 2724 indent_pop(); 2725 } else if (result == FME_DISPROVED) { 2726 indent(); 2727 out(O_ALTFP|O_VERB|O_NONL, 2728 " EFFECTS DISPROVED "); 2729 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2730 out(O_ALTFP|O_VERB, NULL); 2731 } else { 2732 ep2->cached_state |= mark; 2733 indent(); 2734 if (mark == CREDIBLE_EFFECT) 2735 out(O_ALTFP|O_VERB|O_NONL, 2736 " EFFECTS CREDIBLE "); 2737 else 2738 out(O_ALTFP|O_VERB|O_NONL, 2739 " PARENT EFFECTS WAIT "); 2740 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2741 out(O_ALTFP|O_VERB, NULL); 2742 indent_push(" E"); 2743 if (mark_arrows(fmep, ep2, mark, at_latest_by, 2744 &my_delay, 0) == WAIT_EFFECT) { 2745 retval = WAIT_EFFECT; 2746 if (overall_delay > my_delay) 2747 overall_delay = my_delay; 2748 } 2749 indent_pop(); 2750 } 2751 } 2752 } 2753 if (retval == WAIT_EFFECT) 2754 *pdelay = overall_delay; 2755 return (retval); 2756 } 2757 2758 static enum fme_state 2759 effects_test(struct fme *fmep, struct event *fault_event, 2760 unsigned long long at_latest_by, unsigned long long *pdelay) 2761 { 2762 struct event *error_event; 2763 enum fme_state return_value = FME_CREDIBLE; 2764 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 2765 unsigned long long my_delay; 2766 2767 stats_counter_bump(fmep->Ecallcount); 2768 indent_push(" E"); 2769 indent(); 2770 out(O_ALTFP|O_VERB|O_NONL, "->"); 2771 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event); 2772 out(O_ALTFP|O_VERB, NULL); 2773 2774 if (mark_arrows(fmep, fault_event, CREDIBLE_EFFECT, at_latest_by, 2775 &my_delay, 0) == WAIT_EFFECT) { 2776 return_value = FME_WAIT; 2777 if (overall_delay > my_delay) 2778 overall_delay = my_delay; 2779 } 2780 for (error_event = fmep->observations; 2781 error_event; error_event = error_event->observations) { 2782 indent(); 2783 out(O_ALTFP|O_VERB|O_NONL, " "); 2784 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, error_event); 2785 if (!(error_event->cached_state & CREDIBLE_EFFECT)) { 2786 if (error_event->cached_state & 2787 (PARENT_WAIT|WAIT_EFFECT)) { 2788 out(O_ALTFP|O_VERB, " NOT YET triggered"); 2789 continue; 2790 } 2791 return_value = FME_DISPROVED; 2792 out(O_ALTFP|O_VERB, " NOT triggered"); 2793 break; 2794 } else { 2795 out(O_ALTFP|O_VERB, " triggered"); 2796 } 2797 } 2798 if (return_value == FME_DISPROVED) { 2799 (void) mark_arrows(fmep, fault_event, 0, 0, NULL, 0); 2800 } else { 2801 fault_event->keep_in_tree = 1; 2802 (void) mark_arrows(fmep, fault_event, 0, 0, NULL, 1); 2803 } 2804 2805 indent(); 2806 out(O_ALTFP|O_VERB|O_NONL, "<-EFFECTS %s ", 2807 fme_state2str(return_value)); 2808 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event); 2809 out(O_ALTFP|O_VERB, NULL); 2810 indent_pop(); 2811 if (return_value == FME_WAIT) 2812 *pdelay = overall_delay; 2813 return (return_value); 2814 } 2815 2816 static enum fme_state 2817 requirements_test(struct fme *fmep, struct event *ep, 2818 unsigned long long at_latest_by, unsigned long long *pdelay) 2819 { 2820 int waiting_events; 2821 int credible_events; 2822 int deferred_events; 2823 enum fme_state return_value = FME_CREDIBLE; 2824 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 2825 unsigned long long arrow_delay; 2826 unsigned long long my_delay; 2827 struct event *ep2; 2828 struct bubble *bp; 2829 struct arrowlist *ap; 2830 2831 if (ep->cached_state & REQMNTS_CREDIBLE) { 2832 indent(); 2833 out(O_ALTFP|O_VERB|O_NONL, " REQMNTS ALREADY CREDIBLE "); 2834 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2835 out(O_ALTFP|O_VERB, NULL); 2836 return (FME_CREDIBLE); 2837 } 2838 if (ep->cached_state & REQMNTS_DISPROVED) { 2839 indent(); 2840 out(O_ALTFP|O_VERB|O_NONL, " REQMNTS ALREADY DISPROVED "); 2841 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2842 out(O_ALTFP|O_VERB, NULL); 2843 return (FME_DISPROVED); 2844 } 2845 if (ep->cached_state & REQMNTS_WAIT) { 2846 indent(); 2847 *pdelay = ep->cached_delay; 2848 out(O_ALTFP|O_VERB|O_NONL, " REQMNTS ALREADY WAIT "); 2849 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2850 out(O_ALTFP|O_VERB|O_NONL, ", wait for: "); 2851 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 2852 out(O_ALTFP|O_VERB, NULL); 2853 return (FME_WAIT); 2854 } 2855 stats_counter_bump(fmep->Rcallcount); 2856 indent_push(" R"); 2857 indent(); 2858 out(O_ALTFP|O_VERB|O_NONL, "->"); 2859 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2860 out(O_ALTFP|O_VERB|O_NONL, ", at latest by: "); 2861 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 2862 out(O_ALTFP|O_VERB, NULL); 2863 2864 if (ep->t == N_EREPORT) { 2865 if (ep->count == 0) { 2866 if (fmep->pull >= at_latest_by) { 2867 return_value = FME_DISPROVED; 2868 } else { 2869 ep->cached_delay = *pdelay = at_latest_by; 2870 return_value = FME_WAIT; 2871 } 2872 } 2873 2874 indent(); 2875 switch (return_value) { 2876 case FME_CREDIBLE: 2877 ep->cached_state |= REQMNTS_CREDIBLE; 2878 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS CREDIBLE "); 2879 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2880 break; 2881 case FME_DISPROVED: 2882 ep->cached_state |= REQMNTS_DISPROVED; 2883 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED "); 2884 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2885 break; 2886 case FME_WAIT: 2887 ep->cached_state |= REQMNTS_WAIT; 2888 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS WAIT "); 2889 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2890 out(O_ALTFP|O_VERB|O_NONL, " to "); 2891 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 2892 break; 2893 default: 2894 out(O_DIE, "requirements_test: unexpected fme_state"); 2895 break; 2896 } 2897 out(O_ALTFP|O_VERB, NULL); 2898 indent_pop(); 2899 2900 return (return_value); 2901 } 2902 2903 /* this event is not a report, descend the tree */ 2904 for (bp = itree_next_bubble(ep, NULL); bp; 2905 bp = itree_next_bubble(ep, bp)) { 2906 int n; 2907 2908 if (bp->t != B_FROM) 2909 continue; 2910 2911 n = bp->nork; 2912 2913 credible_events = 0; 2914 waiting_events = 0; 2915 deferred_events = 0; 2916 arrow_delay = TIMEVAL_EVENTUALLY; 2917 /* 2918 * n is -1 for 'A' so adjust it. 2919 * XXX just count up the arrows for now. 2920 */ 2921 if (n < 0) { 2922 n = 0; 2923 for (ap = itree_next_arrow(bp, NULL); ap; 2924 ap = itree_next_arrow(bp, ap)) 2925 n++; 2926 indent(); 2927 out(O_ALTFP|O_VERB, " Bubble Counted N=%d", n); 2928 } else { 2929 indent(); 2930 out(O_ALTFP|O_VERB, " Bubble N=%d", n); 2931 } 2932 2933 if (n == 0) 2934 continue; 2935 if (!(bp->mark & (BUBBLE_ELIDED|BUBBLE_OK))) { 2936 for (ap = itree_next_arrow(bp, NULL); ap; 2937 ap = itree_next_arrow(bp, ap)) { 2938 ep2 = ap->arrowp->head->myevent; 2939 platform_set_payloadnvp(ep2->nvp); 2940 if (checkconstraints(fmep, ap->arrowp) == 0) { 2941 /* 2942 * if any arrow is invalidated by the 2943 * constraints, then we should elide the 2944 * whole bubble to be consistant with 2945 * the tree creation time behaviour 2946 */ 2947 bp->mark |= BUBBLE_ELIDED; 2948 platform_set_payloadnvp(NULL); 2949 break; 2950 } 2951 platform_set_payloadnvp(NULL); 2952 } 2953 } 2954 if (bp->mark & BUBBLE_ELIDED) 2955 continue; 2956 bp->mark |= BUBBLE_OK; 2957 for (ap = itree_next_arrow(bp, NULL); ap; 2958 ap = itree_next_arrow(bp, ap)) { 2959 ep2 = ap->arrowp->head->myevent; 2960 if (n <= credible_events) 2961 break; 2962 2963 ap->arrowp->mark |= REQMNTS_COUNTER; 2964 if (triggered(fmep, ep2, REQMNTS_COUNTER)) 2965 /* XXX adding max timevals! */ 2966 switch (requirements_test(fmep, ep2, 2967 at_latest_by + ap->arrowp->maxdelay, 2968 &my_delay)) { 2969 case FME_DEFERRED: 2970 deferred_events++; 2971 break; 2972 case FME_CREDIBLE: 2973 credible_events++; 2974 break; 2975 case FME_DISPROVED: 2976 break; 2977 case FME_WAIT: 2978 if (my_delay < arrow_delay) 2979 arrow_delay = my_delay; 2980 waiting_events++; 2981 break; 2982 default: 2983 out(O_DIE, 2984 "Bug in requirements_test."); 2985 } 2986 else 2987 deferred_events++; 2988 } 2989 indent(); 2990 out(O_ALTFP|O_VERB, " Credible: %d Waiting %d", 2991 credible_events + deferred_events, waiting_events); 2992 if (credible_events + deferred_events + waiting_events < n) { 2993 /* Can never meet requirements */ 2994 ep->cached_state |= REQMNTS_DISPROVED; 2995 indent(); 2996 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED "); 2997 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2998 out(O_ALTFP|O_VERB, NULL); 2999 indent_pop(); 3000 return (FME_DISPROVED); 3001 } 3002 if (credible_events + deferred_events < n) { 3003 /* will have to wait */ 3004 /* wait time is shortest known */ 3005 if (arrow_delay < overall_delay) 3006 overall_delay = arrow_delay; 3007 return_value = FME_WAIT; 3008 } else if (credible_events < n) { 3009 if (return_value != FME_WAIT) 3010 return_value = FME_DEFERRED; 3011 } 3012 } 3013 3014 /* 3015 * don't mark as FME_DEFERRED. If this event isn't reached by another 3016 * path, then this will be considered FME_CREDIBLE. But if it is 3017 * reached by a different path so the K-count is met, then might 3018 * get overridden by FME_WAIT or FME_DISPROVED. 3019 */ 3020 if (return_value == FME_WAIT) { 3021 ep->cached_state |= REQMNTS_WAIT; 3022 ep->cached_delay = *pdelay = overall_delay; 3023 } else if (return_value == FME_CREDIBLE) { 3024 ep->cached_state |= REQMNTS_CREDIBLE; 3025 } 3026 indent(); 3027 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS %s ", 3028 fme_state2str(return_value)); 3029 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3030 out(O_ALTFP|O_VERB, NULL); 3031 indent_pop(); 3032 return (return_value); 3033 } 3034 3035 static enum fme_state 3036 causes_test(struct fme *fmep, struct event *ep, 3037 unsigned long long at_latest_by, unsigned long long *pdelay) 3038 { 3039 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 3040 unsigned long long my_delay; 3041 int credible_results = 0; 3042 int waiting_results = 0; 3043 enum fme_state fstate; 3044 struct event *tail_event; 3045 struct bubble *bp; 3046 struct arrowlist *ap; 3047 int k = 1; 3048 3049 stats_counter_bump(fmep->Ccallcount); 3050 indent_push(" C"); 3051 indent(); 3052 out(O_ALTFP|O_VERB|O_NONL, "->"); 3053 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3054 out(O_ALTFP|O_VERB, NULL); 3055 3056 for (bp = itree_next_bubble(ep, NULL); bp; 3057 bp = itree_next_bubble(ep, bp)) { 3058 if (bp->t != B_TO) 3059 continue; 3060 k = bp->nork; /* remember the K value */ 3061 for (ap = itree_next_arrow(bp, NULL); ap; 3062 ap = itree_next_arrow(bp, ap)) { 3063 int do_not_follow = 0; 3064 3065 /* 3066 * if we get to the same event multiple times 3067 * only worry about the first one. 3068 */ 3069 if (ap->arrowp->tail->myevent->cached_state & 3070 CAUSES_TESTED) { 3071 indent(); 3072 out(O_ALTFP|O_VERB|O_NONL, 3073 " causes test already run for "); 3074 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, 3075 ap->arrowp->tail->myevent); 3076 out(O_ALTFP|O_VERB, NULL); 3077 continue; 3078 } 3079 3080 /* 3081 * see if false constraint prevents us 3082 * from traversing this arrow 3083 */ 3084 platform_set_payloadnvp(ep->nvp); 3085 if (checkconstraints(fmep, ap->arrowp) == 0) 3086 do_not_follow = 1; 3087 platform_set_payloadnvp(NULL); 3088 if (do_not_follow) { 3089 indent(); 3090 out(O_ALTFP|O_VERB|O_NONL, 3091 " False arrow from "); 3092 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, 3093 ap->arrowp->tail->myevent); 3094 out(O_ALTFP|O_VERB, NULL); 3095 continue; 3096 } 3097 3098 ap->arrowp->tail->myevent->cached_state |= 3099 CAUSES_TESTED; 3100 tail_event = ap->arrowp->tail->myevent; 3101 fstate = hypothesise(fmep, tail_event, at_latest_by, 3102 &my_delay); 3103 3104 switch (fstate) { 3105 case FME_WAIT: 3106 if (my_delay < overall_delay) 3107 overall_delay = my_delay; 3108 waiting_results++; 3109 break; 3110 case FME_CREDIBLE: 3111 credible_results++; 3112 break; 3113 case FME_DISPROVED: 3114 break; 3115 default: 3116 out(O_DIE, "Bug in causes_test"); 3117 } 3118 } 3119 } 3120 /* compare against K */ 3121 if (credible_results + waiting_results < k) { 3122 indent(); 3123 out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES DISPROVED "); 3124 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3125 out(O_ALTFP|O_VERB, NULL); 3126 indent_pop(); 3127 return (FME_DISPROVED); 3128 } 3129 if (waiting_results != 0) { 3130 *pdelay = overall_delay; 3131 indent(); 3132 out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES WAIT "); 3133 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3134 out(O_ALTFP|O_VERB|O_NONL, " to "); 3135 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 3136 out(O_ALTFP|O_VERB, NULL); 3137 indent_pop(); 3138 return (FME_WAIT); 3139 } 3140 indent(); 3141 out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES CREDIBLE "); 3142 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3143 out(O_ALTFP|O_VERB, NULL); 3144 indent_pop(); 3145 return (FME_CREDIBLE); 3146 } 3147 3148 static enum fme_state 3149 hypothesise(struct fme *fmep, struct event *ep, 3150 unsigned long long at_latest_by, unsigned long long *pdelay) 3151 { 3152 enum fme_state rtr, otr; 3153 unsigned long long my_delay; 3154 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 3155 3156 stats_counter_bump(fmep->Hcallcount); 3157 indent_push(" H"); 3158 indent(); 3159 out(O_ALTFP|O_VERB|O_NONL, "->"); 3160 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3161 out(O_ALTFP|O_VERB|O_NONL, ", at latest by: "); 3162 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 3163 out(O_ALTFP|O_VERB, NULL); 3164 3165 rtr = requirements_test(fmep, ep, at_latest_by, &my_delay); 3166 if ((rtr == FME_WAIT) && (my_delay < overall_delay)) 3167 overall_delay = my_delay; 3168 if (rtr != FME_DISPROVED) { 3169 if (is_problem(ep->t)) { 3170 otr = effects_test(fmep, ep, at_latest_by, &my_delay); 3171 if (otr != FME_DISPROVED) { 3172 if (fmep->peek == 0 && ep->is_suspect++ == 0) { 3173 ep->suspects = fmep->suspects; 3174 fmep->suspects = ep; 3175 fmep->nsuspects++; 3176 if (!is_fault(ep->t)) 3177 fmep->nonfault++; 3178 } 3179 } 3180 } else 3181 otr = causes_test(fmep, ep, at_latest_by, &my_delay); 3182 if ((otr == FME_WAIT) && (my_delay < overall_delay)) 3183 overall_delay = my_delay; 3184 if ((otr != FME_DISPROVED) && 3185 ((rtr == FME_WAIT) || (otr == FME_WAIT))) 3186 *pdelay = overall_delay; 3187 } 3188 if (rtr == FME_DISPROVED) { 3189 indent(); 3190 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED "); 3191 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3192 out(O_ALTFP|O_VERB, " (doesn't meet requirements)"); 3193 indent_pop(); 3194 return (FME_DISPROVED); 3195 } 3196 if ((otr == FME_DISPROVED) && is_problem(ep->t)) { 3197 indent(); 3198 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED "); 3199 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3200 out(O_ALTFP|O_VERB, " (doesn't explain all reports)"); 3201 indent_pop(); 3202 return (FME_DISPROVED); 3203 } 3204 if (otr == FME_DISPROVED) { 3205 indent(); 3206 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED "); 3207 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3208 out(O_ALTFP|O_VERB, " (causes are not credible)"); 3209 indent_pop(); 3210 return (FME_DISPROVED); 3211 } 3212 if ((rtr == FME_WAIT) || (otr == FME_WAIT)) { 3213 indent(); 3214 out(O_ALTFP|O_VERB|O_NONL, "<-WAIT "); 3215 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3216 out(O_ALTFP|O_VERB|O_NONL, " to "); 3217 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &overall_delay); 3218 out(O_ALTFP|O_VERB, NULL); 3219 indent_pop(); 3220 return (FME_WAIT); 3221 } 3222 indent(); 3223 out(O_ALTFP|O_VERB|O_NONL, "<-CREDIBLE "); 3224 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3225 out(O_ALTFP|O_VERB, NULL); 3226 indent_pop(); 3227 return (FME_CREDIBLE); 3228 } 3229 3230 /* 3231 * fme_istat_load -- reconstitute any persistent istats 3232 */ 3233 void 3234 fme_istat_load(fmd_hdl_t *hdl) 3235 { 3236 int sz; 3237 char *sbuf; 3238 char *ptr; 3239 3240 if ((sz = fmd_buf_size(hdl, NULL, WOBUF_ISTATS)) == 0) { 3241 out(O_ALTFP, "fme_istat_load: No stats"); 3242 return; 3243 } 3244 3245 sbuf = alloca(sz); 3246 3247 fmd_buf_read(hdl, NULL, WOBUF_ISTATS, sbuf, sz); 3248 3249 /* 3250 * pick apart the serialized stats 3251 * 3252 * format is: 3253 * <class-name>, '@', <path>, '\0', <value>, '\0' 3254 * for example: 3255 * "stat.first@stat0/path0\02\0stat.second@stat0/path1\023\0" 3256 * 3257 * since this is parsing our own serialized data, any parsing issues 3258 * are fatal, so we check for them all with ASSERT() below. 3259 */ 3260 ptr = sbuf; 3261 while (ptr < &sbuf[sz]) { 3262 char *sepptr; 3263 struct node *np; 3264 int val; 3265 3266 sepptr = strchr(ptr, '@'); 3267 ASSERT(sepptr != NULL); 3268 *sepptr = '\0'; 3269 3270 /* construct the event */ 3271 np = newnode(T_EVENT, NULL, 0); 3272 np->u.event.ename = newnode(T_NAME, NULL, 0); 3273 np->u.event.ename->u.name.t = N_STAT; 3274 np->u.event.ename->u.name.s = stable(ptr); 3275 np->u.event.ename->u.name.it = IT_ENAME; 3276 np->u.event.ename->u.name.last = np->u.event.ename; 3277 3278 ptr = sepptr + 1; 3279 ASSERT(ptr < &sbuf[sz]); 3280 ptr += strlen(ptr); 3281 ptr++; /* move past the '\0' separating path from value */ 3282 ASSERT(ptr < &sbuf[sz]); 3283 ASSERT(isdigit(*ptr)); 3284 val = atoi(ptr); 3285 ASSERT(val > 0); 3286 ptr += strlen(ptr); 3287 ptr++; /* move past the final '\0' for this entry */ 3288 3289 np->u.event.epname = pathstring2epnamenp(sepptr + 1); 3290 ASSERT(np->u.event.epname != NULL); 3291 3292 istat_bump(np, val); 3293 tree_free(np); 3294 } 3295 3296 istat_save(); 3297 } 3298