1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * 26 * fme.c -- fault management exercise module 27 * 28 * this module provides the simulated fault management exercise. 29 */ 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 #include <stdio.h> 34 #include <stdlib.h> 35 #include <string.h> 36 #include <strings.h> 37 #include <ctype.h> 38 #include <alloca.h> 39 #include <libnvpair.h> 40 #include <sys/fm/protocol.h> 41 #include <fm/fmd_api.h> 42 #include "alloc.h" 43 #include "out.h" 44 #include "stats.h" 45 #include "stable.h" 46 #include "literals.h" 47 #include "lut.h" 48 #include "tree.h" 49 #include "ptree.h" 50 #include "itree.h" 51 #include "ipath.h" 52 #include "fme.h" 53 #include "evnv.h" 54 #include "eval.h" 55 #include "config.h" 56 #include "platform.h" 57 58 /* imported from eft.c... */ 59 extern char *Autoclose; 60 extern int Dupclose; 61 extern hrtime_t Hesitate; 62 extern nv_alloc_t Eft_nv_hdl; 63 extern int Max_fme; 64 extern fmd_hdl_t *Hdl; 65 66 static int Istat_need_save; 67 68 /* fme under construction is global so we can free it on module abort */ 69 static struct fme *Nfmep; 70 71 static const char *Undiag_reason; 72 73 static int Nextid = 0; 74 75 static int Open_fme_count = 0; /* Count of open FMEs */ 76 77 /* list of fault management exercises underway */ 78 static struct fme { 79 struct fme *next; /* next exercise */ 80 unsigned long long ull; /* time when fme was created */ 81 int id; /* FME id */ 82 struct cfgdata *cfgdata; /* full configuration data */ 83 struct lut *eventtree; /* propagation tree for this FME */ 84 /* 85 * The initial error report that created this FME is kept in 86 * two forms. e0 points to the instance tree node and is used 87 * by fme_eval() as the starting point for the inference 88 * algorithm. e0r is the event handle FMD passed to us when 89 * the ereport first arrived and is used when setting timers, 90 * which are always relative to the time of this initial 91 * report. 92 */ 93 struct event *e0; 94 fmd_event_t *e0r; 95 96 id_t timer; /* for setting an fmd time-out */ 97 id_t htid; /* for setting hesitation timer */ 98 99 struct event *ecurrent; /* ereport under consideration */ 100 struct event *suspects; /* current suspect list */ 101 struct event *psuspects; /* previous suspect list */ 102 int nsuspects; /* count of suspects */ 103 int nonfault; /* zero if all suspects T_FAULT */ 104 int posted_suspects; /* true if we've posted a diagnosis */ 105 int hesitated; /* true if we hesitated */ 106 int uniqobs; /* number of unique events observed */ 107 int peek; /* just peeking, don't track suspects */ 108 int overflow; /* true if overflow FME */ 109 enum fme_state { 110 FME_NOTHING = 5000, /* not evaluated yet */ 111 FME_WAIT, /* need to wait for more info */ 112 FME_CREDIBLE, /* suspect list is credible */ 113 FME_DISPROVED, /* no valid suspects found */ 114 FME_DEFERRED /* don't know yet (k-count not met) */ 115 } state; 116 117 unsigned long long pull; /* time passed since created */ 118 unsigned long long wull; /* wait until this time for re-eval */ 119 struct event *observations; /* observation list */ 120 struct lut *globals; /* values of global variables */ 121 /* fmd interfacing */ 122 fmd_hdl_t *hdl; /* handle for talking with fmd */ 123 fmd_case_t *fmcase; /* what fmd 'case' we associate with */ 124 /* stats */ 125 struct stats *Rcount; 126 struct stats *Hcallcount; 127 struct stats *Rcallcount; 128 struct stats *Ccallcount; 129 struct stats *Ecallcount; 130 struct stats *Tcallcount; 131 struct stats *Marrowcount; 132 struct stats *diags; 133 } *FMElist, *EFMElist, *ClosedFMEs; 134 135 static struct case_list { 136 fmd_case_t *fmcase; 137 struct case_list *next; 138 } *Undiagablecaselist; 139 140 static void fme_eval(struct fme *fmep, fmd_event_t *ffep); 141 static enum fme_state hypothesise(struct fme *fmep, struct event *ep, 142 unsigned long long at_latest_by, unsigned long long *pdelay); 143 static struct node *eventprop_lookup(struct event *ep, const char *propname); 144 static struct node *pathstring2epnamenp(char *path); 145 static void publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep); 146 static void restore_suspects(struct fme *fmep); 147 static void save_suspects(struct fme *fmep); 148 static void destroy_fme(struct fme *f); 149 static void fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep, 150 const char *eventstring, const struct ipath *ipp, nvlist_t *nvl); 151 152 static struct fme * 153 alloc_fme(void) 154 { 155 struct fme *fmep; 156 157 fmep = MALLOC(sizeof (*fmep)); 158 bzero(fmep, sizeof (*fmep)); 159 return (fmep); 160 } 161 162 /* 163 * fme_ready -- called when all initialization of the FME (except for 164 * stats) has completed successfully. Adds the fme to global lists 165 * and establishes its stats. 166 */ 167 static struct fme * 168 fme_ready(struct fme *fmep) 169 { 170 char nbuf[100]; 171 172 Nfmep = NULL; /* don't need to free this on module abort now */ 173 174 if (EFMElist) { 175 EFMElist->next = fmep; 176 EFMElist = fmep; 177 } else 178 FMElist = EFMElist = fmep; 179 180 (void) sprintf(nbuf, "fme%d.Rcount", fmep->id); 181 fmep->Rcount = stats_new_counter(nbuf, "ereports received", 0); 182 (void) sprintf(nbuf, "fme%d.Hcall", fmep->id); 183 fmep->Hcallcount = stats_new_counter(nbuf, "calls to hypothesise()", 1); 184 (void) sprintf(nbuf, "fme%d.Rcall", fmep->id); 185 fmep->Rcallcount = stats_new_counter(nbuf, 186 "calls to requirements_test()", 1); 187 (void) sprintf(nbuf, "fme%d.Ccall", fmep->id); 188 fmep->Ccallcount = stats_new_counter(nbuf, "calls to causes_test()", 1); 189 (void) sprintf(nbuf, "fme%d.Ecall", fmep->id); 190 fmep->Ecallcount = 191 stats_new_counter(nbuf, "calls to effects_test()", 1); 192 (void) sprintf(nbuf, "fme%d.Tcall", fmep->id); 193 fmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1); 194 (void) sprintf(nbuf, "fme%d.Marrow", fmep->id); 195 fmep->Marrowcount = stats_new_counter(nbuf, 196 "arrows marked by mark_arrows()", 1); 197 (void) sprintf(nbuf, "fme%d.diags", fmep->id); 198 fmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0); 199 200 out(O_ALTFP|O_VERB2, "newfme: config snapshot contains..."); 201 config_print(O_ALTFP|O_VERB2, fmep->cfgdata->cooked); 202 203 return (fmep); 204 } 205 206 static struct fme * 207 newfme(const char *e0class, const struct ipath *e0ipp) 208 { 209 struct cfgdata *cfgdata; 210 211 if ((cfgdata = config_snapshot()) == NULL) { 212 out(O_ALTFP, "newfme: NULL configuration"); 213 Undiag_reason = UD_NOCONF; 214 return (NULL); 215 } 216 217 Nfmep = alloc_fme(); 218 219 Nfmep->id = Nextid++; 220 Nfmep->cfgdata = cfgdata; 221 Nfmep->posted_suspects = 0; 222 Nfmep->uniqobs = 0; 223 Nfmep->state = FME_NOTHING; 224 Nfmep->pull = 0ULL; 225 Nfmep->overflow = 0; 226 227 Nfmep->fmcase = NULL; 228 Nfmep->hdl = NULL; 229 230 if ((Nfmep->eventtree = itree_create(cfgdata->cooked)) == NULL) { 231 out(O_ALTFP, "newfme: NULL instance tree"); 232 Undiag_reason = UD_INSTFAIL; 233 config_free(cfgdata); 234 FREE(Nfmep); 235 Nfmep = NULL; 236 return (NULL); 237 } 238 239 itree_ptree(O_ALTFP|O_VERB2, Nfmep->eventtree); 240 241 if ((Nfmep->e0 = 242 itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) { 243 out(O_ALTFP, "newfme: e0 not in instance tree"); 244 Undiag_reason = UD_BADEVENTI; 245 itree_free(Nfmep->eventtree); 246 config_free(cfgdata); 247 FREE(Nfmep); 248 Nfmep = NULL; 249 return (NULL); 250 } 251 252 return (fme_ready(Nfmep)); 253 } 254 255 void 256 fme_fini(void) 257 { 258 struct fme *sfp, *fp; 259 struct case_list *ucasep, *nextcasep; 260 261 ucasep = Undiagablecaselist; 262 while (ucasep != NULL) { 263 nextcasep = ucasep->next; 264 FREE(ucasep); 265 ucasep = nextcasep; 266 } 267 Undiagablecaselist = NULL; 268 269 /* clean up closed fmes */ 270 fp = ClosedFMEs; 271 while (fp != NULL) { 272 sfp = fp->next; 273 destroy_fme(fp); 274 fp = sfp; 275 } 276 ClosedFMEs = NULL; 277 278 fp = FMElist; 279 while (fp != NULL) { 280 sfp = fp->next; 281 destroy_fme(fp); 282 fp = sfp; 283 } 284 FMElist = EFMElist = NULL; 285 286 /* if we were in the middle of creating an fme, free it now */ 287 if (Nfmep) { 288 destroy_fme(Nfmep); 289 Nfmep = NULL; 290 } 291 } 292 293 /* 294 * Allocated space for a buffer name. 20 bytes allows for 295 * a ridiculous 9,999,999 unique observations. 296 */ 297 #define OBBUFNMSZ 20 298 299 /* 300 * serialize_observation 301 * 302 * Create a recoverable version of the current observation 303 * (f->ecurrent). We keep a serialized version of each unique 304 * observation in order that we may resume correctly the fme in the 305 * correct state if eft or fmd crashes and we're restarted. 306 */ 307 static void 308 serialize_observation(struct fme *fp, const char *cls, const struct ipath *ipp) 309 { 310 size_t pkdlen; 311 char tmpbuf[OBBUFNMSZ]; 312 char *pkd = NULL; 313 char *estr; 314 315 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", fp->uniqobs); 316 estr = ipath2str(cls, ipp); 317 fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, strlen(estr) + 1); 318 fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)estr, 319 strlen(estr) + 1); 320 FREE(estr); 321 322 if (fp->ecurrent != NULL && fp->ecurrent->nvp != NULL) { 323 (void) snprintf(tmpbuf, 324 OBBUFNMSZ, "observed%d.nvp", fp->uniqobs); 325 if (nvlist_xpack(fp->ecurrent->nvp, 326 &pkd, &pkdlen, NV_ENCODE_XDR, &Eft_nv_hdl) != 0) 327 out(O_DIE|O_SYS, "pack of observed nvl failed"); 328 fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, pkdlen); 329 fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)pkd, pkdlen); 330 FREE(pkd); 331 } 332 333 fp->uniqobs++; 334 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs, 335 sizeof (fp->uniqobs)); 336 } 337 338 /* 339 * init_fme_bufs -- We keep several bits of state about an fme for 340 * use if eft or fmd crashes and we're restarted. 341 */ 342 static void 343 init_fme_bufs(struct fme *fp) 344 { 345 size_t cfglen = fp->cfgdata->nextfree - fp->cfgdata->begin; 346 347 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_CFGLEN, sizeof (cfglen)); 348 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_CFGLEN, (void *)&cfglen, 349 sizeof (cfglen)); 350 if (cfglen != 0) { 351 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_CFG, cfglen); 352 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_CFG, 353 fp->cfgdata->begin, cfglen); 354 } 355 356 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_PULL, sizeof (fp->pull)); 357 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_PULL, (void *)&fp->pull, 358 sizeof (fp->pull)); 359 360 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_ID, sizeof (fp->id)); 361 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_ID, (void *)&fp->id, 362 sizeof (fp->id)); 363 364 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_NOBS, sizeof (fp->uniqobs)); 365 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs, 366 sizeof (fp->uniqobs)); 367 368 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_POSTD, 369 sizeof (fp->posted_suspects)); 370 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_POSTD, 371 (void *)&fp->posted_suspects, sizeof (fp->posted_suspects)); 372 } 373 374 static void 375 destroy_fme_bufs(struct fme *fp) 376 { 377 char tmpbuf[OBBUFNMSZ]; 378 int o; 379 380 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFGLEN); 381 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFG); 382 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_PULL); 383 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_ID); 384 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_POSTD); 385 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_NOBS); 386 387 for (o = 0; o < fp->uniqobs; o++) { 388 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", o); 389 fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf); 390 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", o); 391 fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf); 392 } 393 } 394 395 /* 396 * reconstitute_observations -- convert a case's serialized observations 397 * back into struct events. Returns zero if all observations are 398 * successfully reconstituted. 399 */ 400 static int 401 reconstitute_observations(struct fme *fmep) 402 { 403 struct event *ep; 404 struct node *epnamenp = NULL; 405 size_t pkdlen; 406 char *pkd = NULL; 407 char *tmpbuf = alloca(OBBUFNMSZ); 408 char *sepptr; 409 char *estr; 410 int ocnt; 411 int elen; 412 413 for (ocnt = 0; ocnt < fmep->uniqobs; ocnt++) { 414 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", ocnt); 415 elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf); 416 if (elen == 0) { 417 out(O_ALTFP, 418 "reconstitute_observation: no %s buffer found.", 419 tmpbuf); 420 Undiag_reason = UD_MISSINGOBS; 421 break; 422 } 423 424 estr = MALLOC(elen); 425 fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen); 426 sepptr = strchr(estr, '@'); 427 if (sepptr == NULL) { 428 out(O_ALTFP, 429 "reconstitute_observation: %s: " 430 "missing @ separator in %s.", 431 tmpbuf, estr); 432 Undiag_reason = UD_MISSINGPATH; 433 FREE(estr); 434 break; 435 } 436 437 *sepptr = '\0'; 438 if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) { 439 out(O_ALTFP, 440 "reconstitute_observation: %s: " 441 "trouble converting path string \"%s\" " 442 "to internal representation.", 443 tmpbuf, sepptr + 1); 444 Undiag_reason = UD_MISSINGPATH; 445 FREE(estr); 446 break; 447 } 448 449 /* construct the event */ 450 ep = itree_lookup(fmep->eventtree, 451 stable(estr), ipath(epnamenp)); 452 if (ep == NULL) { 453 out(O_ALTFP, 454 "reconstitute_observation: %s: " 455 "lookup of \"%s\" in itree failed.", 456 tmpbuf, ipath2str(estr, ipath(epnamenp))); 457 Undiag_reason = UD_BADOBS; 458 tree_free(epnamenp); 459 FREE(estr); 460 break; 461 } 462 tree_free(epnamenp); 463 464 /* 465 * We may or may not have a saved nvlist for the observation 466 */ 467 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", ocnt); 468 pkdlen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf); 469 if (pkdlen != 0) { 470 pkd = MALLOC(pkdlen); 471 fmd_buf_read(fmep->hdl, 472 fmep->fmcase, tmpbuf, pkd, pkdlen); 473 ASSERT(ep->nvp == NULL); 474 if (nvlist_xunpack(pkd, 475 pkdlen, &ep->nvp, &Eft_nv_hdl) != 0) 476 out(O_DIE|O_SYS, "pack of observed nvl failed"); 477 FREE(pkd); 478 } 479 480 if (ocnt == 0) 481 fmep->e0 = ep; 482 483 FREE(estr); 484 fmep->ecurrent = ep; 485 ep->count++; 486 487 /* link it into list of observations seen */ 488 ep->observations = fmep->observations; 489 fmep->observations = ep; 490 } 491 492 if (ocnt == fmep->uniqobs) { 493 (void) fme_ready(fmep); 494 return (0); 495 } 496 497 return (1); 498 } 499 500 /* 501 * restart_fme -- called during eft initialization. Reconstitutes 502 * an in-progress fme. 503 */ 504 void 505 fme_restart(fmd_hdl_t *hdl, fmd_case_t *inprogress) 506 { 507 nvlist_t *defect; 508 struct case_list *bad; 509 struct fme *fmep; 510 struct cfgdata *cfgdata = NULL; 511 size_t rawsz; 512 513 fmep = alloc_fme(); 514 fmep->fmcase = inprogress; 515 fmep->hdl = hdl; 516 517 if (fmd_buf_size(hdl, inprogress, WOBUF_CFGLEN) != sizeof (size_t)) { 518 out(O_ALTFP, "restart_fme: No config data"); 519 Undiag_reason = UD_MISSINGINFO; 520 goto badcase; 521 } 522 fmd_buf_read(hdl, inprogress, WOBUF_CFGLEN, (void *)&rawsz, 523 sizeof (size_t)); 524 525 if ((fmep->e0r = fmd_case_getprincipal(hdl, inprogress)) == NULL) { 526 out(O_ALTFP, "restart_fme: No event zero"); 527 Undiag_reason = UD_MISSINGZERO; 528 goto badcase; 529 } 530 531 cfgdata = MALLOC(sizeof (struct cfgdata)); 532 cfgdata->cooked = NULL; 533 cfgdata->devcache = NULL; 534 cfgdata->cpucache = NULL; 535 cfgdata->refcnt = 1; 536 537 if (rawsz > 0) { 538 if (fmd_buf_size(hdl, inprogress, WOBUF_CFG) != rawsz) { 539 out(O_ALTFP, "restart_fme: Config data size mismatch"); 540 Undiag_reason = UD_CFGMISMATCH; 541 goto badcase; 542 } 543 cfgdata->begin = MALLOC(rawsz); 544 cfgdata->end = cfgdata->nextfree = cfgdata->begin + rawsz; 545 fmd_buf_read(hdl, 546 inprogress, WOBUF_CFG, cfgdata->begin, rawsz); 547 } else { 548 cfgdata->begin = cfgdata->end = cfgdata->nextfree = NULL; 549 } 550 fmep->cfgdata = cfgdata; 551 552 config_cook(cfgdata); 553 if ((fmep->eventtree = itree_create(cfgdata->cooked)) == NULL) { 554 /* case not properly saved or irretrievable */ 555 out(O_ALTFP, "restart_fme: NULL instance tree"); 556 Undiag_reason = UD_INSTFAIL; 557 goto badcase; 558 } 559 560 itree_ptree(O_ALTFP|O_VERB2, fmep->eventtree); 561 562 if (fmd_buf_size(hdl, inprogress, WOBUF_PULL) == 0) { 563 out(O_ALTFP, "restart_fme: no saved wait time"); 564 Undiag_reason = UD_MISSINGINFO; 565 goto badcase; 566 } else { 567 fmd_buf_read(hdl, inprogress, WOBUF_PULL, (void *)&fmep->pull, 568 sizeof (fmep->pull)); 569 } 570 571 if (fmd_buf_size(hdl, inprogress, WOBUF_POSTD) == 0) { 572 out(O_ALTFP, "restart_fme: no saved posted status"); 573 Undiag_reason = UD_MISSINGINFO; 574 goto badcase; 575 } else { 576 fmd_buf_read(hdl, inprogress, WOBUF_POSTD, 577 (void *)&fmep->posted_suspects, 578 sizeof (fmep->posted_suspects)); 579 } 580 581 if (fmd_buf_size(hdl, inprogress, WOBUF_ID) == 0) { 582 out(O_ALTFP, "restart_fme: no saved id"); 583 Undiag_reason = UD_MISSINGINFO; 584 goto badcase; 585 } else { 586 fmd_buf_read(hdl, inprogress, WOBUF_ID, (void *)&fmep->id, 587 sizeof (fmep->id)); 588 } 589 if (Nextid <= fmep->id) 590 Nextid = fmep->id + 1; 591 592 if (fmd_buf_size(hdl, inprogress, WOBUF_NOBS) == 0) { 593 out(O_ALTFP, "restart_fme: no count of observations"); 594 Undiag_reason = UD_MISSINGINFO; 595 goto badcase; 596 } else { 597 fmd_buf_read(hdl, inprogress, WOBUF_NOBS, 598 (void *)&fmep->uniqobs, sizeof (fmep->uniqobs)); 599 } 600 601 if (reconstitute_observations(fmep) != 0) 602 goto badcase; 603 604 Open_fme_count++; 605 606 /* give the diagnosis algorithm a shot at the new FME state */ 607 fme_eval(fmep, NULL); 608 return; 609 610 badcase: 611 if (fmep->eventtree != NULL) 612 itree_free(fmep->eventtree); 613 config_free(cfgdata); 614 destroy_fme_bufs(fmep); 615 FREE(fmep); 616 617 /* 618 * Since we're unable to restart the case, add it to the undiagable 619 * list and solve and close it as appropriate. 620 */ 621 bad = MALLOC(sizeof (struct case_list)); 622 bad->next = NULL; 623 624 if (Undiagablecaselist != NULL) 625 bad->next = Undiagablecaselist; 626 Undiagablecaselist = bad; 627 bad->fmcase = inprogress; 628 629 out(O_ALTFP, "[case %s (unable to restart), ", 630 fmd_case_uuid(hdl, bad->fmcase)); 631 632 if (fmd_case_solved(hdl, bad->fmcase)) { 633 out(O_ALTFP, "already solved, "); 634 } else { 635 out(O_ALTFP, "solving, "); 636 defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100, 637 NULL, NULL, NULL); 638 if (Undiag_reason != NULL) 639 (void) nvlist_add_string(defect, 640 UNDIAG_REASON, Undiag_reason); 641 fmd_case_add_suspect(hdl, bad->fmcase, defect); 642 fmd_case_solve(hdl, bad->fmcase); 643 } 644 645 if (fmd_case_closed(hdl, bad->fmcase)) { 646 out(O_ALTFP, "already closed ]"); 647 } else { 648 out(O_ALTFP, "closing ]"); 649 fmd_case_close(hdl, bad->fmcase); 650 } 651 } 652 653 /*ARGSUSED*/ 654 static void 655 globals_destructor(void *left, void *right, void *arg) 656 { 657 struct evalue *evp = (struct evalue *)right; 658 if (evp->t == NODEPTR) 659 tree_free((struct node *)(uintptr_t)evp->v); 660 evp->v = NULL; 661 FREE(evp); 662 } 663 664 void 665 destroy_fme(struct fme *f) 666 { 667 stats_delete(f->Rcount); 668 stats_delete(f->Hcallcount); 669 stats_delete(f->Rcallcount); 670 stats_delete(f->Ccallcount); 671 stats_delete(f->Ecallcount); 672 stats_delete(f->Tcallcount); 673 stats_delete(f->Marrowcount); 674 stats_delete(f->diags); 675 676 itree_free(f->eventtree); 677 config_free(f->cfgdata); 678 lut_free(f->globals, globals_destructor, NULL); 679 FREE(f); 680 } 681 682 static const char * 683 fme_state2str(enum fme_state s) 684 { 685 switch (s) { 686 case FME_NOTHING: return ("NOTHING"); 687 case FME_WAIT: return ("WAIT"); 688 case FME_CREDIBLE: return ("CREDIBLE"); 689 case FME_DISPROVED: return ("DISPROVED"); 690 case FME_DEFERRED: return ("DEFERRED"); 691 default: return ("UNKNOWN"); 692 } 693 } 694 695 static int 696 is_problem(enum nametype t) 697 { 698 return (t == N_FAULT || t == N_DEFECT || t == N_UPSET); 699 } 700 701 static int 702 is_fault(enum nametype t) 703 { 704 return (t == N_FAULT); 705 } 706 707 static int 708 is_defect(enum nametype t) 709 { 710 return (t == N_DEFECT); 711 } 712 713 static int 714 is_upset(enum nametype t) 715 { 716 return (t == N_UPSET); 717 } 718 719 static void 720 fme_print(int flags, struct fme *fmep) 721 { 722 struct event *ep; 723 724 out(flags, "Fault Management Exercise %d", fmep->id); 725 out(flags, "\t State: %s", fme_state2str(fmep->state)); 726 out(flags|O_NONL, "\t Start time: "); 727 ptree_timeval(flags|O_NONL, &fmep->ull); 728 out(flags, NULL); 729 if (fmep->wull) { 730 out(flags|O_NONL, "\t Wait time: "); 731 ptree_timeval(flags|O_NONL, &fmep->wull); 732 out(flags, NULL); 733 } 734 out(flags|O_NONL, "\t E0: "); 735 if (fmep->e0) 736 itree_pevent_brief(flags|O_NONL, fmep->e0); 737 else 738 out(flags|O_NONL, "NULL"); 739 out(flags, NULL); 740 out(flags|O_NONL, "\tObservations:"); 741 for (ep = fmep->observations; ep; ep = ep->observations) { 742 out(flags|O_NONL, " "); 743 itree_pevent_brief(flags|O_NONL, ep); 744 } 745 out(flags, NULL); 746 out(flags|O_NONL, "\tSuspect list:"); 747 for (ep = fmep->suspects; ep; ep = ep->suspects) { 748 out(flags|O_NONL, " "); 749 itree_pevent_brief(flags|O_NONL, ep); 750 } 751 out(flags, NULL); 752 out(flags|O_VERB2, "\t Tree:"); 753 itree_ptree(flags|O_VERB2, fmep->eventtree); 754 } 755 756 static struct node * 757 pathstring2epnamenp(char *path) 758 { 759 char *sep = "/"; 760 struct node *ret; 761 char *ptr; 762 763 if ((ptr = strtok(path, sep)) == NULL) 764 out(O_DIE, "pathstring2epnamenp: invalid empty class"); 765 766 ret = tree_iname(stable(ptr), NULL, 0); 767 768 while ((ptr = strtok(NULL, sep)) != NULL) 769 ret = tree_name_append(ret, 770 tree_iname(stable(ptr), NULL, 0)); 771 772 return (ret); 773 } 774 775 /* 776 * for a given upset sp, increment the corresponding SERD engine. if the 777 * SERD engine trips, return the ename and ipp of the resulting ereport. 778 * returns true if engine tripped and *enamep and *ippp were filled in. 779 */ 780 static int 781 serd_eval(struct fme *fmep, fmd_hdl_t *hdl, fmd_event_t *ffep, 782 fmd_case_t *fmcase, struct event *sp, const char **enamep, 783 const struct ipath **ippp) 784 { 785 struct node *serdinst; 786 char *serdname; 787 struct node *nid; 788 789 ASSERT(sp->t == N_UPSET); 790 ASSERT(ffep != NULL); 791 792 /* 793 * obtain instanced SERD engine from the upset sp. from this 794 * derive serdname, the string used to identify the SERD engine. 795 */ 796 serdinst = eventprop_lookup(sp, L_engine); 797 798 if (serdinst == NULL) 799 return (NULL); 800 801 serdname = ipath2str(serdinst->u.stmt.np->u.event.ename->u.name.s, 802 ipath(serdinst->u.stmt.np->u.event.epname)); 803 804 /* handle serd engine "id" property, if there is one */ 805 if ((nid = 806 lut_lookup(serdinst->u.stmt.lutp, (void *)L_id, NULL)) != NULL) { 807 struct evalue *gval; 808 char suffixbuf[200]; 809 char *suffix; 810 char *nserdname; 811 size_t nname; 812 813 out(O_ALTFP|O_NONL, "serd \"%s\" id: ", serdname); 814 ptree_name_iter(O_ALTFP|O_NONL, nid); 815 816 ASSERTinfo(nid->t == T_GLOBID, ptree_nodetype2str(nid->t)); 817 818 if ((gval = lut_lookup(fmep->globals, 819 (void *)nid->u.globid.s, NULL)) == NULL) { 820 out(O_ALTFP, " undefined"); 821 } else if (gval->t == UINT64) { 822 out(O_ALTFP, " %llu", gval->v); 823 (void) sprintf(suffixbuf, "%llu", gval->v); 824 suffix = suffixbuf; 825 } else { 826 out(O_ALTFP, " \"%s\"", (char *)(uintptr_t)gval->v); 827 suffix = (char *)(uintptr_t)gval->v; 828 } 829 830 nname = strlen(serdname) + strlen(suffix) + 2; 831 nserdname = MALLOC(nname); 832 (void) snprintf(nserdname, nname, "%s:%s", serdname, suffix); 833 FREE(serdname); 834 serdname = nserdname; 835 } 836 837 if (!fmd_serd_exists(hdl, serdname)) { 838 struct node *nN, *nT; 839 840 /* no SERD engine yet, so create it */ 841 nN = lut_lookup(serdinst->u.stmt.lutp, (void *)L_N, NULL); 842 nT = lut_lookup(serdinst->u.stmt.lutp, (void *)L_T, NULL); 843 844 ASSERT(nN->t == T_NUM); 845 ASSERT(nT->t == T_TIMEVAL); 846 847 fmd_serd_create(hdl, serdname, (uint_t)nN->u.ull, 848 (hrtime_t)nT->u.ull); 849 } 850 851 852 /* 853 * increment SERD engine. if engine fires, reset serd 854 * engine and return trip_strcode 855 */ 856 if (fmd_serd_record(hdl, serdname, ffep)) { 857 struct node *tripinst = lut_lookup(serdinst->u.stmt.lutp, 858 (void *)L_trip, NULL); 859 860 ASSERT(tripinst != NULL); 861 862 *enamep = tripinst->u.event.ename->u.name.s; 863 *ippp = ipath(tripinst->u.event.epname); 864 865 fmd_case_add_serd(hdl, fmcase, serdname); 866 fmd_serd_reset(hdl, serdname); 867 out(O_ALTFP|O_NONL, "[engine fired: %s, sending: ", serdname); 868 ipath_print(O_ALTFP|O_NONL, *enamep, *ippp); 869 out(O_ALTFP, "]"); 870 871 FREE(serdname); 872 return (1); 873 } 874 875 FREE(serdname); 876 return (0); 877 } 878 879 /* 880 * search a suspect list for upsets. feed each upset to serd_eval() and 881 * build up tripped[], an array of ereports produced by the firing of 882 * any SERD engines. then feed each ereport back into 883 * fme_receive_report(). 884 * 885 * returns ntrip, the number of these ereports produced. 886 */ 887 static int 888 upsets_eval(struct fme *fmep, fmd_event_t *ffep) 889 { 890 /* we build an array of tripped ereports that we send ourselves */ 891 struct { 892 const char *ename; 893 const struct ipath *ipp; 894 } *tripped; 895 struct event *sp; 896 int ntrip, nupset, i; 897 898 /* 899 * count the number of upsets to determine the upper limit on 900 * expected trip ereport strings. remember that one upset can 901 * lead to at most one ereport. 902 */ 903 nupset = 0; 904 for (sp = fmep->suspects; sp; sp = sp->suspects) { 905 if (sp->t == N_UPSET) 906 nupset++; 907 } 908 909 if (nupset == 0) 910 return (0); 911 912 /* 913 * get to this point if we have upsets and expect some trip 914 * ereports 915 */ 916 tripped = alloca(sizeof (*tripped) * nupset); 917 bzero((void *)tripped, sizeof (*tripped) * nupset); 918 919 ntrip = 0; 920 for (sp = fmep->suspects; sp; sp = sp->suspects) 921 if (sp->t == N_UPSET && 922 serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, sp, 923 &tripped[ntrip].ename, &tripped[ntrip].ipp)) 924 ntrip++; 925 926 for (i = 0; i < ntrip; i++) 927 fme_receive_report(fmep->hdl, ffep, 928 tripped[i].ename, tripped[i].ipp, NULL); 929 930 return (ntrip); 931 } 932 933 /* 934 * fme_receive_external_report -- call when an external ereport comes in 935 * 936 * this routine just converts the relevant information from the ereport 937 * into a format used internally and passes it on to fme_receive_report(). 938 */ 939 void 940 fme_receive_external_report(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl, 941 const char *eventstring) 942 { 943 struct node *epnamenp = platform_getpath(nvl); 944 const struct ipath *ipp; 945 946 /* 947 * XFILE: If we ended up without a path, it's an X-file. 948 * For now, use our undiagnosable interface. 949 */ 950 if (epnamenp == NULL) { 951 out(O_ALTFP, "XFILE: Unable to get path from ereport"); 952 Undiag_reason = UD_NOPATH; 953 publish_undiagnosable(hdl, ffep); 954 return; 955 } 956 957 ipp = ipath(epnamenp); 958 tree_free(epnamenp); 959 fme_receive_report(hdl, ffep, stable(eventstring), ipp, nvl); 960 } 961 962 static int mark_arrows(struct fme *fmep, struct event *ep, int mark, 963 unsigned long long at_latest_by, unsigned long long *pdelay, int keep); 964 965 /* ARGSUSED */ 966 static void 967 clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep) 968 { 969 struct bubble *bp; 970 struct arrowlist *ap; 971 972 ep->cached_state = 0; 973 ep->keep_in_tree = 0; 974 for (bp = itree_next_bubble(ep, NULL); bp; 975 bp = itree_next_bubble(ep, bp)) { 976 if (bp->t != B_FROM) 977 continue; 978 bp->mark = 0; 979 for (ap = itree_next_arrow(bp, NULL); ap; 980 ap = itree_next_arrow(bp, ap)) 981 ap->arrowp->mark = 0; 982 } 983 } 984 985 static void 986 fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep, 987 const char *eventstring, const struct ipath *ipp, nvlist_t *nvl) 988 { 989 struct event *ep; 990 struct fme *fmep = NULL; 991 struct fme *ofmep = NULL; 992 struct fme *cfmep, *svfmep; 993 int matched = 0; 994 nvlist_t *defect; 995 996 out(O_ALTFP|O_NONL, "fme_receive_report: "); 997 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 998 out(O_ALTFP|O_STAMP, NULL); 999 1000 /* decide which FME it goes to */ 1001 for (fmep = FMElist; fmep; fmep = fmep->next) { 1002 int prev_verbose; 1003 unsigned long long my_delay = TIMEVAL_EVENTUALLY; 1004 enum fme_state state; 1005 nvlist_t *pre_peek_nvp = NULL; 1006 1007 if (fmep->overflow) { 1008 if (!(fmd_case_closed(fmep->hdl, fmep->fmcase))) 1009 ofmep = fmep; 1010 1011 continue; 1012 } 1013 1014 /* look up event in event tree for this FME */ 1015 if ((ep = itree_lookup(fmep->eventtree, 1016 eventstring, ipp)) == NULL) 1017 continue; 1018 1019 /* note observation */ 1020 fmep->ecurrent = ep; 1021 if (ep->count++ == 0) { 1022 /* link it into list of observations seen */ 1023 ep->observations = fmep->observations; 1024 fmep->observations = ep; 1025 ep->nvp = evnv_dupnvl(nvl); 1026 } else { 1027 /* use new payload values for peek */ 1028 pre_peek_nvp = ep->nvp; 1029 ep->nvp = evnv_dupnvl(nvl); 1030 } 1031 1032 /* tell hypothesise() not to mess with suspect list */ 1033 fmep->peek = 1; 1034 1035 /* don't want this to be verbose (unless Debug is set) */ 1036 prev_verbose = Verbose; 1037 if (Debug == 0) 1038 Verbose = 0; 1039 1040 lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep); 1041 state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay); 1042 1043 fmep->peek = 0; 1044 1045 /* put verbose flag back */ 1046 Verbose = prev_verbose; 1047 1048 if (state != FME_DISPROVED) { 1049 /* found an FME that explains the ereport */ 1050 matched++; 1051 out(O_ALTFP|O_NONL, "["); 1052 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1053 out(O_ALTFP, " explained by FME%d]", fmep->id); 1054 1055 if (pre_peek_nvp) 1056 nvlist_free(pre_peek_nvp); 1057 1058 if (ep->count == 1) 1059 serialize_observation(fmep, eventstring, ipp); 1060 1061 if (ffep) 1062 fmd_case_add_ereport(hdl, fmep->fmcase, ffep); 1063 1064 stats_counter_bump(fmep->Rcount); 1065 1066 /* re-eval FME */ 1067 fme_eval(fmep, ffep); 1068 } else { 1069 1070 /* not a match, undo noting of observation */ 1071 fmep->ecurrent = NULL; 1072 if (--ep->count == 0) { 1073 /* unlink it from observations */ 1074 fmep->observations = ep->observations; 1075 ep->observations = NULL; 1076 nvlist_free(ep->nvp); 1077 ep->nvp = NULL; 1078 } else { 1079 nvlist_free(ep->nvp); 1080 ep->nvp = pre_peek_nvp; 1081 } 1082 } 1083 } 1084 1085 if (matched) 1086 return; /* explained by at least one existing FME */ 1087 1088 /* clean up closed fmes */ 1089 cfmep = ClosedFMEs; 1090 while (cfmep != NULL) { 1091 svfmep = cfmep->next; 1092 destroy_fme(cfmep); 1093 cfmep = svfmep; 1094 } 1095 ClosedFMEs = NULL; 1096 1097 if (ofmep) { 1098 out(O_ALTFP|O_NONL, "["); 1099 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1100 out(O_ALTFP, " ADDING TO OVERFLOW FME]"); 1101 if (ffep) 1102 fmd_case_add_ereport(hdl, ofmep->fmcase, ffep); 1103 1104 return; 1105 1106 } else if (Max_fme && (Open_fme_count >= Max_fme)) { 1107 out(O_ALTFP|O_NONL, "["); 1108 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1109 out(O_ALTFP, " MAX OPEN FME REACHED]"); 1110 /* Create overflow fme */ 1111 if ((fmep = newfme(eventstring, ipp)) == NULL) { 1112 out(O_ALTFP|O_NONL, "["); 1113 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1114 out(O_ALTFP, " CANNOT OPEN OVERFLOW FME]"); 1115 publish_undiagnosable(hdl, ffep); 1116 return; 1117 } 1118 1119 Open_fme_count++; 1120 1121 fmep->fmcase = fmd_case_open(hdl, NULL); 1122 fmep->hdl = hdl; 1123 init_fme_bufs(fmep); 1124 fmep->overflow = B_TRUE; 1125 1126 if (ffep) 1127 fmd_case_add_ereport(hdl, fmep->fmcase, ffep); 1128 1129 defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100, 1130 NULL, NULL, NULL); 1131 (void) nvlist_add_string(defect, UNDIAG_REASON, UD_MAXFME); 1132 fmd_case_add_suspect(hdl, fmep->fmcase, defect); 1133 fmd_case_solve(hdl, fmep->fmcase); 1134 return; 1135 } 1136 1137 /* start a new FME */ 1138 if ((fmep = newfme(eventstring, ipp)) == NULL) { 1139 out(O_ALTFP|O_NONL, "["); 1140 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1141 out(O_ALTFP, " CANNOT DIAGNOSE]"); 1142 publish_undiagnosable(hdl, ffep); 1143 return; 1144 } 1145 1146 Open_fme_count++; 1147 1148 /* open a case */ 1149 fmep->fmcase = fmd_case_open(hdl, NULL); 1150 fmep->hdl = hdl; 1151 init_fme_bufs(fmep); 1152 1153 out(O_ALTFP|O_NONL, "["); 1154 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1155 out(O_ALTFP, " created FME%d, case %s]", fmep->id, 1156 fmd_case_uuid(hdl, fmep->fmcase)); 1157 1158 ep = fmep->e0; 1159 ASSERT(ep != NULL); 1160 1161 /* note observation */ 1162 fmep->ecurrent = ep; 1163 if (ep->count++ == 0) { 1164 /* link it into list of observations seen */ 1165 ep->observations = fmep->observations; 1166 fmep->observations = ep; 1167 ep->nvp = evnv_dupnvl(nvl); 1168 serialize_observation(fmep, eventstring, ipp); 1169 } else { 1170 /* new payload overrides any previous */ 1171 nvlist_free(ep->nvp); 1172 ep->nvp = evnv_dupnvl(nvl); 1173 } 1174 1175 stats_counter_bump(fmep->Rcount); 1176 1177 if (ffep) { 1178 fmd_case_add_ereport(hdl, fmep->fmcase, ffep); 1179 fmd_case_setprincipal(hdl, fmep->fmcase, ffep); 1180 fmep->e0r = ffep; 1181 } 1182 1183 /* give the diagnosis algorithm a shot at the new FME state */ 1184 fme_eval(fmep, ffep); 1185 } 1186 1187 void 1188 fme_status(int flags) 1189 { 1190 struct fme *fmep; 1191 1192 if (FMElist == NULL) { 1193 out(flags, "No fault management exercises underway."); 1194 return; 1195 } 1196 1197 for (fmep = FMElist; fmep; fmep = fmep->next) 1198 fme_print(flags, fmep); 1199 } 1200 1201 /* 1202 * "indent" routines used mostly for nicely formatted debug output, but also 1203 * for sanity checking for infinite recursion bugs. 1204 */ 1205 1206 #define MAX_INDENT 1024 1207 static const char *indent_s[MAX_INDENT]; 1208 static int current_indent; 1209 1210 static void 1211 indent_push(const char *s) 1212 { 1213 if (current_indent < MAX_INDENT) 1214 indent_s[current_indent++] = s; 1215 else 1216 out(O_DIE, "unexpected recursion depth (%d)", current_indent); 1217 } 1218 1219 static void 1220 indent_set(const char *s) 1221 { 1222 current_indent = 0; 1223 indent_push(s); 1224 } 1225 1226 static void 1227 indent_pop(void) 1228 { 1229 if (current_indent > 0) 1230 current_indent--; 1231 else 1232 out(O_DIE, "recursion underflow"); 1233 } 1234 1235 static void 1236 indent(void) 1237 { 1238 int i; 1239 if (!Verbose) 1240 return; 1241 for (i = 0; i < current_indent; i++) 1242 out(O_ALTFP|O_VERB|O_NONL, indent_s[i]); 1243 } 1244 1245 static int 1246 suspects_changed(struct fme *fmep) 1247 { 1248 struct event *suspects = fmep->suspects; 1249 struct event *psuspects = fmep->psuspects; 1250 1251 while (suspects != NULL && psuspects != NULL) { 1252 if (suspects != psuspects) 1253 return (1); 1254 suspects = suspects->suspects; 1255 psuspects = psuspects->psuspects; 1256 } 1257 1258 return (suspects != psuspects); 1259 } 1260 1261 #define SLNEW 1 1262 #define SLCHANGED 2 1263 #define SLWAIT 3 1264 #define SLDISPROVED 4 1265 1266 static void 1267 print_suspects(int circumstance, struct fme *fmep) 1268 { 1269 struct event *ep; 1270 1271 out(O_ALTFP|O_NONL, "["); 1272 if (circumstance == SLCHANGED) { 1273 out(O_ALTFP|O_NONL, "FME%d diagnosis changed. state: %s, " 1274 "suspect list:", fmep->id, fme_state2str(fmep->state)); 1275 } else if (circumstance == SLWAIT) { 1276 out(O_ALTFP|O_NONL, "FME%d set wait timer ", fmep->id); 1277 ptree_timeval(O_ALTFP|O_NONL, &fmep->wull); 1278 } else if (circumstance == SLDISPROVED) { 1279 out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS UNKNOWN", fmep->id); 1280 } else { 1281 out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS PRODUCED:", fmep->id); 1282 } 1283 1284 if (circumstance == SLWAIT || circumstance == SLDISPROVED) { 1285 out(O_ALTFP, "]"); 1286 return; 1287 } 1288 1289 for (ep = fmep->suspects; ep; ep = ep->suspects) { 1290 out(O_ALTFP|O_NONL, " "); 1291 itree_pevent_brief(O_ALTFP|O_NONL, ep); 1292 } 1293 out(O_ALTFP, "]"); 1294 } 1295 1296 static struct node * 1297 eventprop_lookup(struct event *ep, const char *propname) 1298 { 1299 return (lut_lookup(ep->props, (void *)propname, NULL)); 1300 } 1301 1302 #define MAXDIGITIDX 23 1303 static char numbuf[MAXDIGITIDX + 1]; 1304 1305 static int 1306 node2uint(struct node *n, uint_t *valp) 1307 { 1308 struct evalue value; 1309 struct lut *globals = NULL; 1310 1311 if (n == NULL) 1312 return (1); 1313 1314 /* 1315 * check value.v since we are being asked to convert an unsigned 1316 * long long int to an unsigned int 1317 */ 1318 if (! eval_expr(n, NULL, NULL, &globals, NULL, NULL, 0, &value) || 1319 value.t != UINT64 || value.v > (1ULL << 32)) 1320 return (1); 1321 1322 *valp = (uint_t)value.v; 1323 1324 return (0); 1325 } 1326 1327 static nvlist_t * 1328 node2fmri(struct node *n) 1329 { 1330 nvlist_t **pa, *f, *p; 1331 struct node *nc; 1332 uint_t depth = 0; 1333 char *numstr, *nullbyte; 1334 char *failure; 1335 int err, i; 1336 1337 /* XXX do we need to be able to handle a non-T_NAME node? */ 1338 if (n == NULL || n->t != T_NAME) 1339 return (NULL); 1340 1341 for (nc = n; nc != NULL; nc = nc->u.name.next) { 1342 if (nc->u.name.child == NULL || nc->u.name.child->t != T_NUM) 1343 break; 1344 depth++; 1345 } 1346 1347 if (nc != NULL) { 1348 /* We bailed early, something went wrong */ 1349 return (NULL); 1350 } 1351 1352 if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0) 1353 out(O_DIE|O_SYS, "alloc of fmri nvl failed"); 1354 pa = alloca(depth * sizeof (nvlist_t *)); 1355 for (i = 0; i < depth; i++) 1356 pa[i] = NULL; 1357 1358 err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC); 1359 err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION); 1360 err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, ""); 1361 err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth); 1362 if (err != 0) { 1363 failure = "basic construction of FMRI failed"; 1364 goto boom; 1365 } 1366 1367 numbuf[MAXDIGITIDX] = '\0'; 1368 nullbyte = &numbuf[MAXDIGITIDX]; 1369 i = 0; 1370 1371 for (nc = n; nc != NULL; nc = nc->u.name.next) { 1372 err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl); 1373 if (err != 0) { 1374 failure = "alloc of an hc-pair failed"; 1375 goto boom; 1376 } 1377 err = nvlist_add_string(p, FM_FMRI_HC_NAME, nc->u.name.s); 1378 numstr = ulltostr(nc->u.name.child->u.ull, nullbyte); 1379 err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr); 1380 if (err != 0) { 1381 failure = "construction of an hc-pair failed"; 1382 goto boom; 1383 } 1384 pa[i++] = p; 1385 } 1386 1387 err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth); 1388 if (err == 0) { 1389 for (i = 0; i < depth; i++) 1390 if (pa[i] != NULL) 1391 nvlist_free(pa[i]); 1392 return (f); 1393 } 1394 failure = "addition of hc-pair array to FMRI failed"; 1395 1396 boom: 1397 for (i = 0; i < depth; i++) 1398 if (pa[i] != NULL) 1399 nvlist_free(pa[i]); 1400 nvlist_free(f); 1401 out(O_DIE, "%s", failure); 1402 /*NOTREACHED*/ 1403 return (NULL); 1404 } 1405 1406 static uint_t 1407 avg(uint_t sum, uint_t cnt) 1408 { 1409 unsigned long long s = sum * 10; 1410 1411 return ((s / cnt / 10) + (((s / cnt % 10) >= 5) ? 1 : 0)); 1412 } 1413 1414 static uint8_t 1415 percentof(uint_t part, uint_t whole) 1416 { 1417 unsigned long long p = part * 1000; 1418 1419 return ((p / whole / 10) + (((p / whole % 10) >= 5) ? 1 : 0)); 1420 } 1421 1422 struct rsl { 1423 struct event *suspect; 1424 nvlist_t *asru; 1425 nvlist_t *fru; 1426 nvlist_t *rsrc; 1427 }; 1428 1429 /* 1430 * rslfree -- free internal members of struct rsl not expected to be 1431 * freed elsewhere. 1432 */ 1433 static void 1434 rslfree(struct rsl *freeme) 1435 { 1436 if (freeme->asru != NULL) 1437 nvlist_free(freeme->asru); 1438 if (freeme->fru != NULL) 1439 nvlist_free(freeme->fru); 1440 if (freeme->rsrc != NULL && freeme->rsrc != freeme->asru) 1441 nvlist_free(freeme->rsrc); 1442 } 1443 1444 /* 1445 * rslcmp -- compare two rsl structures. Use the following 1446 * comparisons to establish cardinality: 1447 * 1448 * 1. Name of the suspect's class. (simple strcmp) 1449 * 2. Name of the suspect's ASRU. (trickier, since nvlist) 1450 * 1451 */ 1452 static int 1453 rslcmp(const void *a, const void *b) 1454 { 1455 struct rsl *r1 = (struct rsl *)a; 1456 struct rsl *r2 = (struct rsl *)b; 1457 int rv; 1458 1459 rv = strcmp(r1->suspect->enode->u.event.ename->u.name.s, 1460 r2->suspect->enode->u.event.ename->u.name.s); 1461 if (rv != 0) 1462 return (rv); 1463 1464 if (r1->asru == NULL && r2->asru == NULL) 1465 return (0); 1466 if (r1->asru == NULL) 1467 return (-1); 1468 if (r2->asru == NULL) 1469 return (1); 1470 return (evnv_cmpnvl(r1->asru, r2->asru, 0)); 1471 } 1472 1473 /* 1474 * rsluniq -- given an array of rsl structures, seek out and "remove" 1475 * any duplicates. Dups are "remove"d by NULLing the suspect pointer 1476 * of the array element. Removal also means updating the number of 1477 * problems and the number of problems which are not faults. User 1478 * provides the first and last element pointers. 1479 */ 1480 static void 1481 rsluniq(struct rsl *first, struct rsl *last, int *nprobs, int *nnonf) 1482 { 1483 struct rsl *cr; 1484 1485 if (*nprobs == 1) 1486 return; 1487 1488 /* 1489 * At this point, we only expect duplicate defects. 1490 * Eversholt's diagnosis algorithm prevents duplicate 1491 * suspects, but we rewrite defects in the platform code after 1492 * the diagnosis is made, and that can introduce new 1493 * duplicates. 1494 */ 1495 while (first <= last) { 1496 if (first->suspect == NULL || !is_defect(first->suspect->t)) { 1497 first++; 1498 continue; 1499 } 1500 cr = first + 1; 1501 while (cr <= last) { 1502 if (is_defect(first->suspect->t)) { 1503 if (rslcmp(first, cr) == 0) { 1504 cr->suspect = NULL; 1505 rslfree(cr); 1506 (*nprobs)--; 1507 (*nnonf)--; 1508 } 1509 } 1510 /* 1511 * assume all defects are in order after our 1512 * sort and short circuit here with "else break" ? 1513 */ 1514 cr++; 1515 } 1516 first++; 1517 } 1518 } 1519 1520 /* 1521 * get_resources -- for a given suspect, determine what ASRU, FRU and 1522 * RSRC nvlists should be advertised in the final suspect list. 1523 */ 1524 void 1525 get_resources(struct event *sp, struct rsl *rsrcs, struct config *croot) 1526 { 1527 struct node *asrudef, *frudef; 1528 nvlist_t *asru, *fru; 1529 nvlist_t *rsrc = NULL; 1530 char *pathstr; 1531 1532 /* 1533 * First find any ASRU and/or FRU defined in the 1534 * initial fault tree. 1535 */ 1536 asrudef = eventprop_lookup(sp, L_ASRU); 1537 frudef = eventprop_lookup(sp, L_FRU); 1538 1539 /* 1540 * Create FMRIs based on those definitions 1541 */ 1542 asru = node2fmri(asrudef); 1543 fru = node2fmri(frudef); 1544 pathstr = ipath2str(NULL, sp->ipp); 1545 1546 /* 1547 * Allow for platform translations of the FMRIs 1548 */ 1549 platform_units_translate(is_defect(sp->t), croot, &asru, &fru, &rsrc, 1550 pathstr); 1551 1552 FREE(pathstr); 1553 rsrcs->suspect = sp; 1554 rsrcs->asru = asru; 1555 rsrcs->fru = fru; 1556 rsrcs->rsrc = rsrc; 1557 } 1558 1559 /* 1560 * trim_suspects -- prior to publishing, we may need to remove some 1561 * suspects from the list. If we're auto-closing upsets, we don't 1562 * want any of those in the published list. If the ASRUs for multiple 1563 * defects resolve to the same ASRU (driver) we only want to publish 1564 * that as a single suspect. 1565 */ 1566 static void 1567 trim_suspects(struct fme *fmep, boolean_t no_upsets, struct rsl **begin, 1568 struct rsl **end) 1569 { 1570 struct event *ep; 1571 struct rsl *rp; 1572 int rpcnt; 1573 1574 /* 1575 * First save the suspects in the psuspects, then copy back 1576 * only the ones we wish to retain. This resets nsuspects to 1577 * zero. 1578 */ 1579 rpcnt = fmep->nsuspects; 1580 save_suspects(fmep); 1581 1582 /* 1583 * allocate an array of resource pointers for the suspects. 1584 * We may end up using less than the full allocation, but this 1585 * is a very short-lived array. publish_suspects() will free 1586 * this array when it's done using it. 1587 */ 1588 rp = *begin = MALLOC(rpcnt * sizeof (struct rsl)); 1589 bzero(rp, rpcnt * sizeof (struct rsl)); 1590 1591 /* first pass, remove any unwanted upsets and populate our array */ 1592 for (ep = fmep->psuspects; ep; ep = ep->psuspects) { 1593 if (no_upsets && is_upset(ep->t)) 1594 continue; 1595 get_resources(ep, rp, fmep->cfgdata->cooked); 1596 rp++; 1597 fmep->nsuspects++; 1598 if (!is_fault(ep->t)) 1599 fmep->nonfault++; 1600 } 1601 1602 /* if all we had was unwanted upsets, we're done */ 1603 if (fmep->nsuspects == 0) 1604 return; 1605 1606 *end = rp - 1; 1607 1608 /* sort the array */ 1609 qsort(*begin, fmep->nsuspects, sizeof (struct rsl), rslcmp); 1610 rsluniq(*begin, *end, &fmep->nsuspects, &fmep->nonfault); 1611 } 1612 1613 /* 1614 * addpayloadprop -- add a payload prop to a problem 1615 */ 1616 static void 1617 addpayloadprop(const char *lhs, struct evalue *rhs, nvlist_t *fault) 1618 { 1619 ASSERT(fault != NULL); 1620 ASSERT(lhs != NULL); 1621 ASSERT(rhs != NULL); 1622 1623 if (rhs->t == UINT64) { 1624 out(O_ALTFP|O_VERB2, "addpayloadprop: %s=%llu", lhs, rhs->v); 1625 1626 if (nvlist_add_uint64(fault, lhs, rhs->v) != 0) 1627 out(O_DIE, 1628 "cannot add payloadprop \"%s\" to fault", lhs); 1629 } else { 1630 out(O_ALTFP|O_VERB2, "addpayloadprop: %s=\"%s\"", 1631 lhs, (char *)(uintptr_t)rhs->v); 1632 1633 if (nvlist_add_string(fault, lhs, (char *)(uintptr_t)rhs->v) != 1634 0) 1635 out(O_DIE, 1636 "cannot add payloadprop \"%s\" to fault", lhs); 1637 } 1638 } 1639 1640 static char *Istatbuf; 1641 static char *Istatbufptr; 1642 static int Istatsz; 1643 1644 /* 1645 * istataddsize -- calculate size of istat and add it to Istatsz 1646 */ 1647 /*ARGSUSED2*/ 1648 static void 1649 istataddsize(const struct istat_entry *lhs, struct stats *rhs, void *arg) 1650 { 1651 int val; 1652 1653 ASSERT(lhs != NULL); 1654 ASSERT(rhs != NULL); 1655 1656 if ((val = stats_counter_value(rhs)) == 0) 1657 return; /* skip zero-valued stats */ 1658 1659 /* count up the size of the stat name */ 1660 Istatsz += ipath2strlen(lhs->ename, lhs->ipath); 1661 Istatsz++; /* for the trailing NULL byte */ 1662 1663 /* count up the size of the stat value */ 1664 Istatsz += snprintf(NULL, 0, "%d", val); 1665 Istatsz++; /* for the trailing NULL byte */ 1666 } 1667 1668 /* 1669 * istat2str -- serialize an istat, writing result to *Istatbufptr 1670 */ 1671 /*ARGSUSED2*/ 1672 static void 1673 istat2str(const struct istat_entry *lhs, struct stats *rhs, void *arg) 1674 { 1675 char *str; 1676 int len; 1677 int val; 1678 1679 ASSERT(lhs != NULL); 1680 ASSERT(rhs != NULL); 1681 1682 if ((val = stats_counter_value(rhs)) == 0) 1683 return; /* skip zero-valued stats */ 1684 1685 /* serialize the stat name */ 1686 str = ipath2str(lhs->ename, lhs->ipath); 1687 len = strlen(str); 1688 1689 ASSERT(Istatbufptr + len + 1 < &Istatbuf[Istatsz]); 1690 (void) strlcpy(Istatbufptr, str, &Istatbuf[Istatsz] - Istatbufptr); 1691 Istatbufptr += len; 1692 FREE(str); 1693 *Istatbufptr++ = '\0'; 1694 1695 /* serialize the stat value */ 1696 Istatbufptr += snprintf(Istatbufptr, &Istatbuf[Istatsz] - Istatbufptr, 1697 "%d", val); 1698 *Istatbufptr++ = '\0'; 1699 1700 ASSERT(Istatbufptr <= &Istatbuf[Istatsz]); 1701 } 1702 1703 void 1704 istat_save() 1705 { 1706 if (Istat_need_save == 0) 1707 return; 1708 1709 /* figure out how big the serialzed info is */ 1710 Istatsz = 0; 1711 lut_walk(Istats, (lut_cb)istataddsize, NULL); 1712 1713 if (Istatsz == 0) { 1714 /* no stats to save */ 1715 fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS); 1716 return; 1717 } 1718 1719 /* create the serialized buffer */ 1720 Istatbufptr = Istatbuf = MALLOC(Istatsz); 1721 lut_walk(Istats, (lut_cb)istat2str, NULL); 1722 1723 /* clear out current saved stats */ 1724 fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS); 1725 1726 /* write out the new version */ 1727 fmd_buf_write(Hdl, NULL, WOBUF_ISTATS, Istatbuf, Istatsz); 1728 FREE(Istatbuf); 1729 1730 Istat_need_save = 0; 1731 } 1732 1733 int 1734 istat_cmp(struct istat_entry *ent1, struct istat_entry *ent2) 1735 { 1736 if (ent1->ename != ent2->ename) 1737 return (ent2->ename - ent1->ename); 1738 if (ent1->ipath != ent2->ipath) 1739 return ((char *)ent2->ipath - (char *)ent1->ipath); 1740 1741 return (0); 1742 } 1743 1744 /* 1745 * istat-verify -- verify the component associated with a stat still exists 1746 * 1747 * if the component no longer exists, this routine resets the stat and 1748 * returns 0. if the component still exists, it returns 1. 1749 */ 1750 static int 1751 istat_verify(struct node *snp, struct istat_entry *entp) 1752 { 1753 struct stats *statp; 1754 nvlist_t *fmri; 1755 1756 fmri = node2fmri(snp->u.event.epname); 1757 if (platform_path_exists(fmri)) { 1758 nvlist_free(fmri); 1759 return (1); 1760 } 1761 nvlist_free(fmri); 1762 1763 /* component no longer in system. zero out the associated stats */ 1764 if ((statp = (struct stats *) 1765 lut_lookup(Istats, entp, (lut_cmp)istat_cmp)) == NULL || 1766 stats_counter_value(statp) == 0) 1767 return (0); /* stat is already reset */ 1768 1769 Istat_need_save = 1; 1770 stats_counter_reset(statp); 1771 return (0); 1772 } 1773 1774 static void 1775 istat_bump(struct node *snp, int n) 1776 { 1777 struct stats *statp; 1778 struct istat_entry ent; 1779 1780 ASSERT(snp != NULL); 1781 ASSERTinfo(snp->t == T_EVENT, ptree_nodetype2str(snp->t)); 1782 ASSERT(snp->u.event.epname != NULL); 1783 1784 /* class name should be hoisted into a single stable entry */ 1785 ASSERT(snp->u.event.ename->u.name.next == NULL); 1786 ent.ename = snp->u.event.ename->u.name.s; 1787 ent.ipath = ipath(snp->u.event.epname); 1788 1789 if (!istat_verify(snp, &ent)) { 1790 /* component no longer exists in system, nothing to do */ 1791 return; 1792 } 1793 1794 if ((statp = (struct stats *) 1795 lut_lookup(Istats, &ent, (lut_cmp)istat_cmp)) == NULL) { 1796 /* need to create the counter */ 1797 int cnt = 0; 1798 struct node *np; 1799 char *sname; 1800 char *snamep; 1801 struct istat_entry *newentp; 1802 1803 /* count up the size of the stat name */ 1804 np = snp->u.event.ename; 1805 while (np != NULL) { 1806 cnt += strlen(np->u.name.s); 1807 cnt++; /* for the '.' or '@' */ 1808 np = np->u.name.next; 1809 } 1810 np = snp->u.event.epname; 1811 while (np != NULL) { 1812 cnt += snprintf(NULL, 0, "%s%llu", 1813 np->u.name.s, np->u.name.child->u.ull); 1814 cnt++; /* for the '/' or trailing NULL byte */ 1815 np = np->u.name.next; 1816 } 1817 1818 /* build the stat name */ 1819 snamep = sname = alloca(cnt); 1820 np = snp->u.event.ename; 1821 while (np != NULL) { 1822 snamep += snprintf(snamep, &sname[cnt] - snamep, 1823 "%s", np->u.name.s); 1824 np = np->u.name.next; 1825 if (np) 1826 *snamep++ = '.'; 1827 } 1828 *snamep++ = '@'; 1829 np = snp->u.event.epname; 1830 while (np != NULL) { 1831 snamep += snprintf(snamep, &sname[cnt] - snamep, 1832 "%s%llu", np->u.name.s, np->u.name.child->u.ull); 1833 np = np->u.name.next; 1834 if (np) 1835 *snamep++ = '/'; 1836 } 1837 *snamep++ = '\0'; 1838 1839 /* create the new stat & add it to our list */ 1840 newentp = MALLOC(sizeof (*newentp)); 1841 *newentp = ent; 1842 statp = stats_new_counter(NULL, sname, 0); 1843 Istats = lut_add(Istats, (void *)newentp, (void *)statp, 1844 (lut_cmp)istat_cmp); 1845 } 1846 1847 /* if n is non-zero, set that value instead of bumping */ 1848 if (n) { 1849 stats_counter_reset(statp); 1850 stats_counter_add(statp, n); 1851 } else 1852 stats_counter_bump(statp); 1853 Istat_need_save = 1; 1854 } 1855 1856 /*ARGSUSED*/ 1857 static void 1858 istat_destructor(void *left, void *right, void *arg) 1859 { 1860 struct istat_entry *entp = (struct istat_entry *)left; 1861 struct stats *statp = (struct stats *)right; 1862 FREE(entp); 1863 stats_delete(statp); 1864 } 1865 1866 void 1867 istat_fini(void) 1868 { 1869 lut_free(Istats, istat_destructor, NULL); 1870 } 1871 1872 static void 1873 publish_suspects(struct fme *fmep) 1874 { 1875 struct event *ep; 1876 struct rsl *srl = NULL; 1877 struct rsl *erl; 1878 struct rsl *rp; 1879 nvlist_t *fault; 1880 uint8_t cert; 1881 uint_t *frs; 1882 uint_t fravg, frsum, fr; 1883 uint_t messval; 1884 struct node *snp; 1885 int frcnt, fridx; 1886 boolean_t no_upsets = B_FALSE; 1887 boolean_t allfaulty = B_TRUE; 1888 1889 stats_counter_bump(fmep->diags); 1890 1891 /* 1892 * The current fmd interfaces don't allow us to solve a case 1893 * that's already solved. If we make a new case, what of the 1894 * ereports? We don't appear to have an interface that allows 1895 * us to access the ereports attached to a case (if we wanted 1896 * to copy the original case's ereport attachments to the new 1897 * case) and it's also a bit unclear if there would be any 1898 * problems with having ereports attached to multiple cases 1899 * and/or attaching DIAGNOSED ereports to a case. For now, 1900 * we'll just output a message. 1901 */ 1902 if (fmep->posted_suspects || 1903 fmd_case_solved(fmep->hdl, fmep->fmcase)) { 1904 out(O_ALTFP|O_NONL, "Revised diagnosis for case %s: ", 1905 fmd_case_uuid(fmep->hdl, fmep->fmcase)); 1906 for (ep = fmep->suspects; ep; ep = ep->suspects) { 1907 out(O_ALTFP|O_NONL, " "); 1908 itree_pevent_brief(O_ALTFP|O_NONL, ep); 1909 } 1910 out(O_ALTFP, NULL); 1911 return; 1912 } 1913 1914 /* 1915 * If we're auto-closing upsets, we don't want to include them 1916 * in any produced suspect lists or certainty accounting. 1917 */ 1918 if (Autoclose != NULL) 1919 if (strcmp(Autoclose, "true") == 0 || 1920 strcmp(Autoclose, "all") == 0 || 1921 strcmp(Autoclose, "upsets") == 0) 1922 no_upsets = B_TRUE; 1923 1924 trim_suspects(fmep, no_upsets, &srl, &erl); 1925 1926 /* 1927 * If the resulting suspect list has no members, we're 1928 * done. Returning here will simply close the case. 1929 */ 1930 if (fmep->nsuspects == 0) { 1931 out(O_ALTFP, 1932 "[FME%d, case %s (all suspects are upsets)]", 1933 fmep->id, fmd_case_uuid(fmep->hdl, fmep->fmcase)); 1934 FREE(srl); 1935 restore_suspects(fmep); 1936 return; 1937 } 1938 1939 /* 1940 * If the suspect list is all faults, then for a given fault, 1941 * say X of N, X's certainty is computed via: 1942 * 1943 * fitrate(X) / (fitrate(1) + ... + fitrate(N)) * 100 1944 * 1945 * If none of the suspects are faults, and there are N suspects, 1946 * the certainty of a given suspect is 100/N. 1947 * 1948 * If there are are a mixture of faults and other problems in 1949 * the suspect list, we take an average of the faults' 1950 * FITrates and treat this average as the FITrate for any 1951 * non-faults. The fitrate of any given suspect is then 1952 * computed per the first formula above. 1953 */ 1954 if (fmep->nonfault == fmep->nsuspects) { 1955 /* NO faults in the suspect list */ 1956 cert = percentof(1, fmep->nsuspects); 1957 } else { 1958 /* sum the fitrates */ 1959 frs = alloca(fmep->nsuspects * sizeof (uint_t)); 1960 fridx = frcnt = frsum = 0; 1961 1962 for (rp = srl; rp <= erl; rp++) { 1963 struct node *n; 1964 1965 if (rp->suspect == NULL) 1966 continue; 1967 if (!is_fault(rp->suspect->t)) { 1968 frs[fridx++] = 0; 1969 continue; 1970 } 1971 n = eventprop_lookup(rp->suspect, L_FITrate); 1972 if (node2uint(n, &fr) != 0) { 1973 out(O_DEBUG|O_NONL, "event "); 1974 ipath_print(O_DEBUG|O_NONL, 1975 ep->enode->u.event.ename->u.name.s, 1976 ep->ipp); 1977 out(O_DEBUG, " has no FITrate (using 1)"); 1978 fr = 1; 1979 } else if (fr == 0) { 1980 out(O_DEBUG|O_NONL, "event "); 1981 ipath_print(O_DEBUG|O_NONL, 1982 ep->enode->u.event.ename->u.name.s, 1983 ep->ipp); 1984 out(O_DEBUG, " has zero FITrate (using 1)"); 1985 fr = 1; 1986 } 1987 1988 frs[fridx++] = fr; 1989 frsum += fr; 1990 frcnt++; 1991 } 1992 fravg = avg(frsum, frcnt); 1993 for (fridx = 0; fridx < fmep->nsuspects; fridx++) 1994 if (frs[fridx] == 0) { 1995 frs[fridx] = fravg; 1996 frsum += fravg; 1997 } 1998 } 1999 2000 /* Add them in reverse order of our sort, as fmd reverses order */ 2001 for (rp = erl; rp >= srl; rp--) { 2002 if (rp->suspect == NULL) 2003 continue; 2004 if (!is_fault(rp->suspect->t)) 2005 allfaulty = B_FALSE; 2006 if (fmep->nonfault != fmep->nsuspects) 2007 cert = percentof(frs[--fridx], frsum); 2008 fault = fmd_nvl_create_fault(fmep->hdl, 2009 rp->suspect->enode->u.event.ename->u.name.s, 2010 cert, 2011 rp->asru, 2012 rp->fru, 2013 rp->rsrc); 2014 if (fault == NULL) 2015 out(O_DIE, "fault creation failed"); 2016 /* if "message" property exists, add it to the fault */ 2017 if (node2uint(eventprop_lookup(rp->suspect, L_message), 2018 &messval) == 0) { 2019 2020 out(O_ALTFP, 2021 "[FME%d, %s adds message=%d to suspect list]", 2022 fmep->id, 2023 rp->suspect->enode->u.event.ename->u.name.s, 2024 messval); 2025 if (nvlist_add_boolean_value(fault, 2026 FM_SUSPECT_MESSAGE, 2027 (messval) ? B_TRUE : B_FALSE) != 0) { 2028 out(O_DIE, "cannot add no-message to fault"); 2029 } 2030 } 2031 /* add any payload properties */ 2032 lut_walk(rp->suspect->payloadprops, 2033 (lut_cb)addpayloadprop, (void *)fault); 2034 fmd_case_add_suspect(fmep->hdl, fmep->fmcase, fault); 2035 rp->suspect->fault = fault; 2036 rslfree(rp); 2037 2038 /* 2039 * If "action" property exists, evaluate it; this must be done 2040 * before the dupclose check below since some actions may 2041 * modify the asru to be used in fmd_nvl_fmri_faulty. This 2042 * needs to be restructured if any new actions are introduced 2043 * that have effects that we do not want to be visible if 2044 * we decide not to publish in the dupclose check below. 2045 */ 2046 if ((snp = eventprop_lookup(rp->suspect, L_action)) != NULL) { 2047 struct evalue evalue; 2048 2049 out(O_ALTFP|O_NONL, 2050 "[FME%d, %s action ", fmep->id, 2051 rp->suspect->enode->u.event.ename->u.name.s); 2052 ptree_name_iter(O_ALTFP|O_NONL, snp); 2053 out(O_ALTFP, "]"); 2054 Action_nvl = fault; 2055 (void) eval_expr(snp, NULL, NULL, NULL, NULL, 2056 NULL, 0, &evalue); 2057 } 2058 2059 /* 2060 * if "dupclose" tunable is set, check if the asru is 2061 * already marked as "faulty". 2062 */ 2063 if (Dupclose && allfaulty) { 2064 nvlist_t *asru; 2065 2066 out(O_ALTFP|O_VERB, "FMD%d dupclose check ", fmep->id); 2067 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, rp->suspect); 2068 out(O_ALTFP|O_VERB|O_NONL, " "); 2069 if (nvlist_lookup_nvlist(fault, 2070 FM_FAULT_ASRU, &asru) != 0) { 2071 out(O_ALTFP|O_VERB, "NULL asru"); 2072 allfaulty = B_FALSE; 2073 } else if (fmd_nvl_fmri_faulty(fmep->hdl, asru)) { 2074 out(O_ALTFP|O_VERB, "faulty"); 2075 } else { 2076 out(O_ALTFP|O_VERB, "not faulty"); 2077 allfaulty = B_FALSE; 2078 } 2079 } 2080 2081 } 2082 2083 /* 2084 * Close the case if all asrus are already known to be faulty and if 2085 * Dupclose is enabled. Otherwise we are going to publish so take 2086 * any pre-publication actions. 2087 */ 2088 if (Dupclose && allfaulty) { 2089 out(O_ALTFP, "[dupclose FME%d, case %s]", fmep->id, 2090 fmd_case_uuid(fmep->hdl, fmep->fmcase)); 2091 fmd_case_close(fmep->hdl, fmep->fmcase); 2092 } else { 2093 for (rp = erl; rp >= srl; rp--) { 2094 struct event *suspect = rp->suspect; 2095 2096 if (suspect == NULL) 2097 continue; 2098 2099 fault = suspect->fault; 2100 2101 /* if "count" exists, increment the appropriate stat */ 2102 if ((snp = eventprop_lookup(suspect, 2103 L_count)) != NULL) { 2104 out(O_ALTFP|O_NONL, 2105 "[FME%d, %s count ", fmep->id, 2106 suspect->enode->u.event.ename->u.name.s); 2107 ptree_name_iter(O_ALTFP|O_NONL, snp); 2108 out(O_ALTFP, "]"); 2109 istat_bump(snp, 0); 2110 2111 } 2112 } 2113 istat_save(); /* write out any istat changes */ 2114 2115 out(O_ALTFP, "[solving FME%d, case %s]", fmep->id, 2116 fmd_case_uuid(fmep->hdl, fmep->fmcase)); 2117 fmd_case_solve(fmep->hdl, fmep->fmcase); 2118 } 2119 2120 /* 2121 * revert to the original suspect list 2122 */ 2123 FREE(srl); 2124 restore_suspects(fmep); 2125 } 2126 2127 static void 2128 publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep) 2129 { 2130 struct case_list *newcase; 2131 nvlist_t *defect; 2132 2133 out(O_ALTFP, 2134 "[undiagnosable ereport received, " 2135 "creating and closing a new case (%s)]", 2136 Undiag_reason ? Undiag_reason : "reason not provided"); 2137 2138 newcase = MALLOC(sizeof (struct case_list)); 2139 newcase->next = NULL; 2140 2141 newcase->fmcase = fmd_case_open(hdl, NULL); 2142 if (Undiagablecaselist != NULL) 2143 newcase->next = Undiagablecaselist; 2144 Undiagablecaselist = newcase; 2145 2146 if (ffep != NULL) 2147 fmd_case_add_ereport(hdl, newcase->fmcase, ffep); 2148 2149 defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100, 2150 NULL, NULL, NULL); 2151 if (Undiag_reason != NULL) 2152 (void) nvlist_add_string(defect, UNDIAG_REASON, Undiag_reason); 2153 fmd_case_add_suspect(hdl, newcase->fmcase, defect); 2154 2155 fmd_case_solve(hdl, newcase->fmcase); 2156 fmd_case_close(hdl, newcase->fmcase); 2157 } 2158 2159 static void 2160 fme_undiagnosable(struct fme *f) 2161 { 2162 nvlist_t *defect; 2163 2164 out(O_ALTFP, "[solving/closing FME%d, case %s (%s)]", 2165 f->id, fmd_case_uuid(f->hdl, f->fmcase), 2166 Undiag_reason ? Undiag_reason : "undiagnosable"); 2167 2168 defect = fmd_nvl_create_fault(f->hdl, UNDIAGNOSABLE_DEFECT, 100, 2169 NULL, NULL, NULL); 2170 if (Undiag_reason != NULL) 2171 (void) nvlist_add_string(defect, UNDIAG_REASON, Undiag_reason); 2172 fmd_case_add_suspect(f->hdl, f->fmcase, defect); 2173 fmd_case_solve(f->hdl, f->fmcase); 2174 destroy_fme_bufs(f); 2175 fmd_case_close(f->hdl, f->fmcase); 2176 } 2177 2178 /* 2179 * fme_close_case 2180 * 2181 * Find the requested case amongst our fmes and close it. Free up 2182 * the related fme. 2183 */ 2184 void 2185 fme_close_case(fmd_hdl_t *hdl, fmd_case_t *fmcase) 2186 { 2187 struct case_list *ucasep, *prevcasep = NULL; 2188 struct fme *prev = NULL; 2189 struct fme *fmep; 2190 2191 for (ucasep = Undiagablecaselist; ucasep; ucasep = ucasep->next) { 2192 if (fmcase != ucasep->fmcase) { 2193 prevcasep = ucasep; 2194 continue; 2195 } 2196 2197 if (prevcasep == NULL) 2198 Undiagablecaselist = Undiagablecaselist->next; 2199 else 2200 prevcasep->next = ucasep->next; 2201 2202 FREE(ucasep); 2203 return; 2204 } 2205 2206 for (fmep = FMElist; fmep; fmep = fmep->next) { 2207 if (fmep->hdl == hdl && fmep->fmcase == fmcase) 2208 break; 2209 prev = fmep; 2210 } 2211 2212 if (fmep == NULL) { 2213 out(O_WARN, "Eft asked to close unrecognized case [%s].", 2214 fmd_case_uuid(hdl, fmcase)); 2215 return; 2216 } 2217 2218 if (EFMElist == fmep) 2219 EFMElist = prev; 2220 2221 if (prev == NULL) 2222 FMElist = FMElist->next; 2223 else 2224 prev->next = fmep->next; 2225 2226 fmep->next = NULL; 2227 2228 /* Get rid of any timer this fme has set */ 2229 if (fmep->wull != 0) 2230 fmd_timer_remove(fmep->hdl, fmep->timer); 2231 2232 if (ClosedFMEs == NULL) { 2233 ClosedFMEs = fmep; 2234 } else { 2235 fmep->next = ClosedFMEs; 2236 ClosedFMEs = fmep; 2237 } 2238 2239 Open_fme_count--; 2240 2241 /* See if we can close the overflow FME */ 2242 if (Open_fme_count <= Max_fme) { 2243 for (fmep = FMElist; fmep; fmep = fmep->next) { 2244 if (fmep->overflow && !(fmd_case_closed(fmep->hdl, 2245 fmep->fmcase))) 2246 break; 2247 } 2248 2249 if (fmep != NULL) 2250 fmd_case_close(fmep->hdl, fmep->fmcase); 2251 } 2252 } 2253 2254 /* 2255 * fme_set_timer() 2256 * If the time we need to wait for the given FME is less than the 2257 * current timer, kick that old timer out and establish a new one. 2258 */ 2259 static int 2260 fme_set_timer(struct fme *fmep, unsigned long long wull) 2261 { 2262 out(O_ALTFP|O_VERB|O_NONL, " fme_set_timer: request to wait "); 2263 ptree_timeval(O_ALTFP|O_VERB, &wull); 2264 2265 if (wull <= fmep->pull) { 2266 out(O_ALTFP|O_VERB|O_NONL, "already have waited at least "); 2267 ptree_timeval(O_ALTFP|O_VERB, &fmep->pull); 2268 out(O_ALTFP|O_VERB, NULL); 2269 /* we've waited at least wull already, don't need timer */ 2270 return (0); 2271 } 2272 2273 out(O_ALTFP|O_VERB|O_NONL, " currently "); 2274 if (fmep->wull != 0) { 2275 out(O_ALTFP|O_VERB|O_NONL, "waiting "); 2276 ptree_timeval(O_ALTFP|O_VERB, &fmep->wull); 2277 out(O_ALTFP|O_VERB, NULL); 2278 } else { 2279 out(O_ALTFP|O_VERB|O_NONL, "not waiting"); 2280 out(O_ALTFP|O_VERB, NULL); 2281 } 2282 2283 if (fmep->wull != 0) 2284 if (wull >= fmep->wull) 2285 /* New timer would fire later than established timer */ 2286 return (0); 2287 2288 if (fmep->wull != 0) { 2289 fmd_timer_remove(fmep->hdl, fmep->timer); 2290 if (fmep->timer == fmep->htid) { 2291 out(O_ALTFP, 2292 "[stopped hesitating FME%d, case %s]", 2293 fmep->id, 2294 fmd_case_uuid(fmep->hdl, 2295 fmep->fmcase)); 2296 fmep->htid = 0; 2297 } 2298 } 2299 2300 fmep->timer = fmd_timer_install(fmep->hdl, (void *)fmep, 2301 fmep->e0r, wull); 2302 out(O_ALTFP|O_VERB, "timer set, id is %ld", fmep->timer); 2303 fmep->wull = wull; 2304 return (1); 2305 } 2306 2307 void 2308 fme_timer_fired(struct fme *fmep, id_t tid) 2309 { 2310 struct fme *ffmep = NULL; 2311 2312 for (ffmep = FMElist; ffmep; ffmep = ffmep->next) 2313 if (ffmep == fmep) 2314 break; 2315 2316 if (ffmep == NULL) { 2317 out(O_WARN, "Timer fired for an FME (%p) not in FMEs list.", 2318 (void *)fmep); 2319 return; 2320 } 2321 2322 out(O_ALTFP, "Timer fired %lx %lx", tid, fmep->htid); 2323 if (tid != fmep->htid) { 2324 /* 2325 * normal timer (not the hesitation timer) 2326 */ 2327 fmep->pull = fmep->wull; 2328 fmep->wull = 0; 2329 fmd_buf_write(fmep->hdl, fmep->fmcase, 2330 WOBUF_PULL, (void *)&fmep->pull, sizeof (fmep->pull)); 2331 /* 2332 * no point in heistating if we've already waited. 2333 */ 2334 fmep->hesitated = 1; 2335 } else { 2336 fmep->hesitated = 1; 2337 } 2338 fme_eval(fmep, fmep->e0r); 2339 } 2340 2341 /* 2342 * Preserve the fme's suspect list in its psuspects list, NULLing the 2343 * suspects list in the meantime. 2344 */ 2345 static void 2346 save_suspects(struct fme *fmep) 2347 { 2348 struct event *ep; 2349 struct event *nextep; 2350 2351 /* zero out the previous suspect list */ 2352 for (ep = fmep->psuspects; ep; ep = nextep) { 2353 nextep = ep->psuspects; 2354 ep->psuspects = NULL; 2355 } 2356 fmep->psuspects = NULL; 2357 2358 /* zero out the suspect list, copying it to previous suspect list */ 2359 fmep->psuspects = fmep->suspects; 2360 for (ep = fmep->suspects; ep; ep = nextep) { 2361 nextep = ep->suspects; 2362 ep->psuspects = ep->suspects; 2363 ep->suspects = NULL; 2364 ep->is_suspect = 0; 2365 } 2366 fmep->suspects = NULL; 2367 fmep->nsuspects = 0; 2368 fmep->nonfault = 0; 2369 } 2370 2371 /* 2372 * Retrieve the fme's suspect list from its psuspects list. 2373 */ 2374 static void 2375 restore_suspects(struct fme *fmep) 2376 { 2377 struct event *ep; 2378 struct event *nextep; 2379 2380 fmep->nsuspects = fmep->nonfault = 0; 2381 fmep->suspects = fmep->psuspects; 2382 for (ep = fmep->psuspects; ep; ep = nextep) { 2383 fmep->nsuspects++; 2384 if (!is_fault(ep->t)) 2385 fmep->nonfault++; 2386 nextep = ep->psuspects; 2387 ep->suspects = ep->psuspects; 2388 } 2389 } 2390 2391 /* 2392 * this is what we use to call the Emrys prototype code instead of main() 2393 */ 2394 static void 2395 fme_eval(struct fme *fmep, fmd_event_t *ffep) 2396 { 2397 struct event *ep; 2398 unsigned long long my_delay = TIMEVAL_EVENTUALLY; 2399 2400 save_suspects(fmep); 2401 2402 out(O_ALTFP|O_VERB, "Evaluate FME %d", fmep->id); 2403 indent_set(" "); 2404 2405 lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep); 2406 fmep->state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay); 2407 2408 out(O_ALTFP|O_VERB|O_NONL, "FME%d state: %s, suspect list:", fmep->id, 2409 fme_state2str(fmep->state)); 2410 for (ep = fmep->suspects; ep; ep = ep->suspects) { 2411 out(O_ALTFP|O_VERB|O_NONL, " "); 2412 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2413 } 2414 out(O_ALTFP|O_VERB, NULL); 2415 2416 if (fmep->posted_suspects) { 2417 /* 2418 * this FME has already posted a diagnosis, so see if 2419 * the event changed the diagnosis and print a warning 2420 * if it did. 2421 * 2422 */ 2423 if (suspects_changed(fmep)) { 2424 print_suspects(SLCHANGED, fmep); 2425 publish_suspects(fmep); 2426 } 2427 } else { 2428 switch (fmep->state) { 2429 case FME_CREDIBLE: 2430 /* 2431 * if the suspect list contains any upsets, we 2432 * turn off the hesitation logic (by setting 2433 * the hesitate flag which normally indicates 2434 * we've already done the hesitate logic). 2435 * this is done because hesitating with upsets 2436 * causes us to explain away additional soft errors 2437 * while the upset FME stays open. 2438 */ 2439 if (fmep->hesitated == 0) { 2440 struct event *s; 2441 2442 for (s = fmep->suspects; s; s = s->suspects) { 2443 if (s->t == N_UPSET) { 2444 fmep->hesitated = 1; 2445 break; 2446 } 2447 } 2448 } 2449 2450 if (Hesitate && 2451 fmep->suspects != NULL && 2452 fmep->suspects->suspects != NULL && 2453 fmep->hesitated == 0) { 2454 /* 2455 * about to publish multi-entry suspect list, 2456 * set the hesitation timer if not already set. 2457 */ 2458 if (fmep->htid == 0) { 2459 out(O_ALTFP|O_NONL, 2460 "[hesitate FME%d, case %s ", 2461 fmep->id, 2462 fmd_case_uuid(fmep->hdl, 2463 fmep->fmcase)); 2464 ptree_timeval(O_ALTFP|O_NONL, 2465 (unsigned long long *)&Hesitate); 2466 out(O_ALTFP, "]"); 2467 if (fme_set_timer(fmep, Hesitate)) 2468 fmep->htid = fmep->timer; 2469 } else { 2470 out(O_ALTFP, 2471 "[still hesitating FME%d, case %s]", 2472 fmep->id, 2473 fmd_case_uuid(fmep->hdl, 2474 fmep->fmcase)); 2475 } 2476 } else { 2477 print_suspects(SLNEW, fmep); 2478 (void) upsets_eval(fmep, ffep); 2479 publish_suspects(fmep); 2480 fmep->posted_suspects = 1; 2481 fmd_buf_write(fmep->hdl, fmep->fmcase, 2482 WOBUF_POSTD, 2483 (void *)&fmep->posted_suspects, 2484 sizeof (fmep->posted_suspects)); 2485 } 2486 break; 2487 2488 case FME_WAIT: 2489 /* 2490 * singleton suspect list implies 2491 * no point in waiting 2492 */ 2493 if (fmep->suspects && 2494 fmep->suspects->suspects == NULL) { 2495 print_suspects(SLNEW, fmep); 2496 (void) upsets_eval(fmep, ffep); 2497 publish_suspects(fmep); 2498 fmep->posted_suspects = 1; 2499 fmd_buf_write(fmep->hdl, fmep->fmcase, 2500 WOBUF_POSTD, 2501 (void *)&fmep->posted_suspects, 2502 sizeof (fmep->posted_suspects)); 2503 fmep->state = FME_CREDIBLE; 2504 } else { 2505 ASSERT(my_delay > fmep->ull); 2506 (void) fme_set_timer(fmep, my_delay); 2507 print_suspects(SLWAIT, fmep); 2508 } 2509 break; 2510 2511 case FME_DISPROVED: 2512 print_suspects(SLDISPROVED, fmep); 2513 Undiag_reason = UD_UNSOLVD; 2514 fme_undiagnosable(fmep); 2515 break; 2516 } 2517 } 2518 2519 if (fmep->posted_suspects == 1 && Autoclose != NULL) { 2520 int doclose = 0; 2521 2522 if (strcmp(Autoclose, "true") == 0 || 2523 strcmp(Autoclose, "all") == 0) 2524 doclose = 1; 2525 2526 if (strcmp(Autoclose, "upsets") == 0) { 2527 doclose = 1; 2528 for (ep = fmep->suspects; ep; ep = ep->suspects) { 2529 if (ep->t != N_UPSET) { 2530 doclose = 0; 2531 break; 2532 } 2533 } 2534 } 2535 2536 if (doclose) { 2537 out(O_ALTFP, "[closing FME%d, case %s (autoclose)]", 2538 fmep->id, fmd_case_uuid(fmep->hdl, fmep->fmcase)); 2539 2540 destroy_fme_bufs(fmep); 2541 fmd_case_close(fmep->hdl, fmep->fmcase); 2542 } 2543 } 2544 itree_prune(fmep->eventtree); 2545 } 2546 2547 static void indent(void); 2548 static int triggered(struct fme *fmep, struct event *ep, int mark); 2549 static enum fme_state effects_test(struct fme *fmep, 2550 struct event *fault_event, unsigned long long at_latest_by, 2551 unsigned long long *pdelay); 2552 static enum fme_state requirements_test(struct fme *fmep, struct event *ep, 2553 unsigned long long at_latest_by, unsigned long long *pdelay); 2554 static enum fme_state causes_test(struct fme *fmep, struct event *ep, 2555 unsigned long long at_latest_by, unsigned long long *pdelay); 2556 2557 static int 2558 checkconstraints(struct fme *fmep, struct arrow *arrowp) 2559 { 2560 struct constraintlist *ctp; 2561 struct evalue value; 2562 2563 if (arrowp->forever_false) { 2564 char *sep = ""; 2565 indent(); 2566 out(O_ALTFP|O_VERB|O_NONL, " Forever false constraint: "); 2567 for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) { 2568 out(O_ALTFP|O_VERB|O_NONL, sep); 2569 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0); 2570 sep = ", "; 2571 } 2572 out(O_ALTFP|O_VERB, NULL); 2573 return (0); 2574 } 2575 2576 for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) { 2577 if (eval_expr(ctp->cnode, NULL, NULL, 2578 &fmep->globals, fmep->cfgdata->cooked, 2579 arrowp, 0, &value)) { 2580 /* evaluation successful */ 2581 if (value.t == UNDEFINED || value.v == 0) { 2582 /* known false */ 2583 arrowp->forever_false = 1; 2584 indent(); 2585 out(O_ALTFP|O_VERB|O_NONL, 2586 " False constraint: "); 2587 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0); 2588 out(O_ALTFP|O_VERB, NULL); 2589 return (0); 2590 } 2591 } else { 2592 /* evaluation unsuccessful -- unknown value */ 2593 indent(); 2594 out(O_ALTFP|O_VERB|O_NONL, 2595 " Deferred constraint: "); 2596 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0); 2597 out(O_ALTFP|O_VERB, NULL); 2598 return (2); 2599 } 2600 } 2601 /* known true */ 2602 return (1); 2603 } 2604 2605 static int 2606 triggered(struct fme *fmep, struct event *ep, int mark) 2607 { 2608 struct bubble *bp; 2609 struct arrowlist *ap; 2610 int count = 0; 2611 2612 stats_counter_bump(fmep->Tcallcount); 2613 for (bp = itree_next_bubble(ep, NULL); bp; 2614 bp = itree_next_bubble(ep, bp)) { 2615 if (bp->t != B_TO) 2616 continue; 2617 for (ap = itree_next_arrow(bp, NULL); ap; 2618 ap = itree_next_arrow(bp, ap)) { 2619 /* check count of marks against K in the bubble */ 2620 if ((ap->arrowp->mark & mark) && 2621 ++count >= bp->nork) 2622 return (1); 2623 } 2624 } 2625 return (0); 2626 } 2627 2628 static int 2629 mark_arrows(struct fme *fmep, struct event *ep, int mark, 2630 unsigned long long at_latest_by, unsigned long long *pdelay, int keep) 2631 { 2632 struct bubble *bp; 2633 struct arrowlist *ap; 2634 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 2635 unsigned long long my_delay; 2636 enum fme_state result; 2637 int retval = 0; 2638 2639 for (bp = itree_next_bubble(ep, NULL); bp; 2640 bp = itree_next_bubble(ep, bp)) { 2641 if (bp->t != B_FROM) 2642 continue; 2643 stats_counter_bump(fmep->Marrowcount); 2644 for (ap = itree_next_arrow(bp, NULL); ap; 2645 ap = itree_next_arrow(bp, ap)) { 2646 struct event *ep2 = ap->arrowp->head->myevent; 2647 /* 2648 * if we're clearing marks, we can avoid doing 2649 * all that work evaluating constraints. 2650 */ 2651 if (mark == 0) { 2652 ap->arrowp->mark &= ~EFFECTS_COUNTER; 2653 if (keep && (ep2->cached_state & 2654 (WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT))) 2655 ep2->keep_in_tree = 1; 2656 ep2->cached_state &= 2657 ~(WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT); 2658 (void) mark_arrows(fmep, ep2, mark, 0, NULL, 2659 keep); 2660 continue; 2661 } 2662 if (ep2->cached_state & REQMNTS_DISPROVED) { 2663 indent(); 2664 out(O_ALTFP|O_VERB|O_NONL, 2665 " ALREADY DISPROVED "); 2666 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2667 out(O_ALTFP|O_VERB, NULL); 2668 continue; 2669 } 2670 if (ep2->cached_state & WAIT_EFFECT) { 2671 indent(); 2672 out(O_ALTFP|O_VERB|O_NONL, 2673 " ALREADY EFFECTS WAIT "); 2674 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2675 out(O_ALTFP|O_VERB, NULL); 2676 continue; 2677 } 2678 if (ep2->cached_state & CREDIBLE_EFFECT) { 2679 indent(); 2680 out(O_ALTFP|O_VERB|O_NONL, 2681 " ALREADY EFFECTS CREDIBLE "); 2682 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2683 out(O_ALTFP|O_VERB, NULL); 2684 continue; 2685 } 2686 if ((ep2->cached_state & PARENT_WAIT) && 2687 (mark & PARENT_WAIT)) { 2688 indent(); 2689 out(O_ALTFP|O_VERB|O_NONL, 2690 " ALREADY PARENT EFFECTS WAIT "); 2691 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2692 out(O_ALTFP|O_VERB, NULL); 2693 continue; 2694 } 2695 platform_set_payloadnvp(ep2->nvp); 2696 if (checkconstraints(fmep, ap->arrowp) == 0) { 2697 platform_set_payloadnvp(NULL); 2698 indent(); 2699 out(O_ALTFP|O_VERB|O_NONL, 2700 " CONSTRAINTS FAIL "); 2701 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2702 out(O_ALTFP|O_VERB, NULL); 2703 continue; 2704 } 2705 platform_set_payloadnvp(NULL); 2706 ap->arrowp->mark |= EFFECTS_COUNTER; 2707 if (!triggered(fmep, ep2, EFFECTS_COUNTER)) { 2708 indent(); 2709 out(O_ALTFP|O_VERB|O_NONL, 2710 " K-COUNT NOT YET MET "); 2711 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2712 out(O_ALTFP|O_VERB, NULL); 2713 continue; 2714 } 2715 ep2->cached_state &= ~PARENT_WAIT; 2716 result = requirements_test(fmep, ep2, at_latest_by + 2717 ap->arrowp->maxdelay, 2718 &my_delay); 2719 if (result == FME_WAIT) { 2720 retval = WAIT_EFFECT; 2721 if (overall_delay > my_delay) 2722 overall_delay = my_delay; 2723 ep2->cached_state |= WAIT_EFFECT; 2724 indent(); 2725 out(O_ALTFP|O_VERB|O_NONL, " EFFECTS WAIT "); 2726 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2727 out(O_ALTFP|O_VERB, NULL); 2728 indent_push(" E"); 2729 if (mark_arrows(fmep, ep2, PARENT_WAIT, 2730 at_latest_by, &my_delay, 0) == 2731 WAIT_EFFECT) { 2732 retval = WAIT_EFFECT; 2733 if (overall_delay > my_delay) 2734 overall_delay = my_delay; 2735 } 2736 indent_pop(); 2737 } else if (result == FME_DISPROVED) { 2738 indent(); 2739 out(O_ALTFP|O_VERB|O_NONL, 2740 " EFFECTS DISPROVED "); 2741 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2742 out(O_ALTFP|O_VERB, NULL); 2743 } else { 2744 ep2->cached_state |= mark; 2745 indent(); 2746 if (mark == CREDIBLE_EFFECT) 2747 out(O_ALTFP|O_VERB|O_NONL, 2748 " EFFECTS CREDIBLE "); 2749 else 2750 out(O_ALTFP|O_VERB|O_NONL, 2751 " PARENT EFFECTS WAIT "); 2752 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2753 out(O_ALTFP|O_VERB, NULL); 2754 indent_push(" E"); 2755 if (mark_arrows(fmep, ep2, mark, at_latest_by, 2756 &my_delay, 0) == WAIT_EFFECT) { 2757 retval = WAIT_EFFECT; 2758 if (overall_delay > my_delay) 2759 overall_delay = my_delay; 2760 } 2761 indent_pop(); 2762 } 2763 } 2764 } 2765 if (retval == WAIT_EFFECT) 2766 *pdelay = overall_delay; 2767 return (retval); 2768 } 2769 2770 static enum fme_state 2771 effects_test(struct fme *fmep, struct event *fault_event, 2772 unsigned long long at_latest_by, unsigned long long *pdelay) 2773 { 2774 struct event *error_event; 2775 enum fme_state return_value = FME_CREDIBLE; 2776 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 2777 unsigned long long my_delay; 2778 2779 stats_counter_bump(fmep->Ecallcount); 2780 indent_push(" E"); 2781 indent(); 2782 out(O_ALTFP|O_VERB|O_NONL, "->"); 2783 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event); 2784 out(O_ALTFP|O_VERB, NULL); 2785 2786 (void) mark_arrows(fmep, fault_event, CREDIBLE_EFFECT, at_latest_by, 2787 &my_delay, 0); 2788 for (error_event = fmep->observations; 2789 error_event; error_event = error_event->observations) { 2790 indent(); 2791 out(O_ALTFP|O_VERB|O_NONL, " "); 2792 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, error_event); 2793 if (!(error_event->cached_state & CREDIBLE_EFFECT)) { 2794 if (error_event->cached_state & 2795 (PARENT_WAIT|WAIT_EFFECT)) { 2796 return_value = FME_WAIT; 2797 if (overall_delay > my_delay) 2798 overall_delay = my_delay; 2799 out(O_ALTFP|O_VERB, " NOT YET triggered"); 2800 continue; 2801 } 2802 return_value = FME_DISPROVED; 2803 out(O_ALTFP|O_VERB, " NOT triggered"); 2804 break; 2805 } else { 2806 out(O_ALTFP|O_VERB, " triggered"); 2807 } 2808 } 2809 if (return_value == FME_DISPROVED) { 2810 (void) mark_arrows(fmep, fault_event, 0, 0, NULL, 0); 2811 } else { 2812 fault_event->keep_in_tree = 1; 2813 (void) mark_arrows(fmep, fault_event, 0, 0, NULL, 1); 2814 } 2815 2816 indent(); 2817 out(O_ALTFP|O_VERB|O_NONL, "<-EFFECTS %s ", 2818 fme_state2str(return_value)); 2819 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event); 2820 out(O_ALTFP|O_VERB, NULL); 2821 indent_pop(); 2822 if (return_value == FME_WAIT) 2823 *pdelay = overall_delay; 2824 return (return_value); 2825 } 2826 2827 static enum fme_state 2828 requirements_test(struct fme *fmep, struct event *ep, 2829 unsigned long long at_latest_by, unsigned long long *pdelay) 2830 { 2831 int waiting_events; 2832 int credible_events; 2833 int deferred_events; 2834 enum fme_state return_value = FME_CREDIBLE; 2835 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 2836 unsigned long long arrow_delay; 2837 unsigned long long my_delay; 2838 struct event *ep2; 2839 struct bubble *bp; 2840 struct arrowlist *ap; 2841 2842 if (ep->cached_state & REQMNTS_CREDIBLE) { 2843 indent(); 2844 out(O_ALTFP|O_VERB|O_NONL, " REQMNTS ALREADY CREDIBLE "); 2845 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2846 out(O_ALTFP|O_VERB, NULL); 2847 return (FME_CREDIBLE); 2848 } 2849 if (ep->cached_state & REQMNTS_DISPROVED) { 2850 indent(); 2851 out(O_ALTFP|O_VERB|O_NONL, " REQMNTS ALREADY DISPROVED "); 2852 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2853 out(O_ALTFP|O_VERB, NULL); 2854 return (FME_DISPROVED); 2855 } 2856 if (ep->cached_state & REQMNTS_WAIT) { 2857 indent(); 2858 *pdelay = ep->cached_delay; 2859 out(O_ALTFP|O_VERB|O_NONL, " REQMNTS ALREADY WAIT "); 2860 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2861 out(O_ALTFP|O_VERB|O_NONL, ", wait for: "); 2862 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 2863 out(O_ALTFP|O_VERB, NULL); 2864 return (FME_WAIT); 2865 } 2866 stats_counter_bump(fmep->Rcallcount); 2867 indent_push(" R"); 2868 indent(); 2869 out(O_ALTFP|O_VERB|O_NONL, "->"); 2870 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2871 out(O_ALTFP|O_VERB|O_NONL, ", at latest by: "); 2872 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 2873 out(O_ALTFP|O_VERB, NULL); 2874 2875 if (ep->t == N_EREPORT) { 2876 if (ep->count == 0) { 2877 if (fmep->pull >= at_latest_by) { 2878 return_value = FME_DISPROVED; 2879 } else { 2880 ep->cached_delay = *pdelay = at_latest_by; 2881 return_value = FME_WAIT; 2882 } 2883 } 2884 2885 indent(); 2886 switch (return_value) { 2887 case FME_CREDIBLE: 2888 ep->cached_state |= REQMNTS_CREDIBLE; 2889 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS CREDIBLE "); 2890 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2891 break; 2892 case FME_DISPROVED: 2893 ep->cached_state |= REQMNTS_DISPROVED; 2894 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED "); 2895 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2896 break; 2897 case FME_WAIT: 2898 ep->cached_state |= REQMNTS_WAIT; 2899 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS WAIT "); 2900 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2901 out(O_ALTFP|O_VERB|O_NONL, " to "); 2902 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 2903 break; 2904 default: 2905 out(O_DIE, "requirements_test: unexpected fme_state"); 2906 break; 2907 } 2908 out(O_ALTFP|O_VERB, NULL); 2909 indent_pop(); 2910 2911 return (return_value); 2912 } 2913 2914 /* this event is not a report, descend the tree */ 2915 for (bp = itree_next_bubble(ep, NULL); bp; 2916 bp = itree_next_bubble(ep, bp)) { 2917 int n; 2918 2919 if (bp->t != B_FROM) 2920 continue; 2921 2922 n = bp->nork; 2923 2924 credible_events = 0; 2925 waiting_events = 0; 2926 deferred_events = 0; 2927 arrow_delay = TIMEVAL_EVENTUALLY; 2928 /* 2929 * n is -1 for 'A' so adjust it. 2930 * XXX just count up the arrows for now. 2931 */ 2932 if (n < 0) { 2933 n = 0; 2934 for (ap = itree_next_arrow(bp, NULL); ap; 2935 ap = itree_next_arrow(bp, ap)) 2936 n++; 2937 indent(); 2938 out(O_ALTFP|O_VERB, " Bubble Counted N=%d", n); 2939 } else { 2940 indent(); 2941 out(O_ALTFP|O_VERB, " Bubble N=%d", n); 2942 } 2943 2944 if (n == 0) 2945 continue; 2946 if (!(bp->mark & (BUBBLE_ELIDED|BUBBLE_OK))) { 2947 for (ap = itree_next_arrow(bp, NULL); ap; 2948 ap = itree_next_arrow(bp, ap)) { 2949 ep2 = ap->arrowp->head->myevent; 2950 platform_set_payloadnvp(ep2->nvp); 2951 if (checkconstraints(fmep, ap->arrowp) == 0) { 2952 /* 2953 * if any arrow is invalidated by the 2954 * constraints, then we should elide the 2955 * whole bubble to be consistant with 2956 * the tree creation time behaviour 2957 */ 2958 bp->mark |= BUBBLE_ELIDED; 2959 platform_set_payloadnvp(NULL); 2960 break; 2961 } 2962 platform_set_payloadnvp(NULL); 2963 } 2964 } 2965 if (bp->mark & BUBBLE_ELIDED) 2966 continue; 2967 bp->mark |= BUBBLE_OK; 2968 for (ap = itree_next_arrow(bp, NULL); ap; 2969 ap = itree_next_arrow(bp, ap)) { 2970 ep2 = ap->arrowp->head->myevent; 2971 if (n <= credible_events) 2972 break; 2973 2974 ap->arrowp->mark |= REQMNTS_COUNTER; 2975 if (triggered(fmep, ep2, REQMNTS_COUNTER)) 2976 /* XXX adding max timevals! */ 2977 switch (requirements_test(fmep, ep2, 2978 at_latest_by + ap->arrowp->maxdelay, 2979 &my_delay)) { 2980 case FME_DEFERRED: 2981 deferred_events++; 2982 break; 2983 case FME_CREDIBLE: 2984 credible_events++; 2985 break; 2986 case FME_DISPROVED: 2987 break; 2988 case FME_WAIT: 2989 if (my_delay < arrow_delay) 2990 arrow_delay = my_delay; 2991 waiting_events++; 2992 break; 2993 default: 2994 out(O_DIE, 2995 "Bug in requirements_test."); 2996 } 2997 else 2998 deferred_events++; 2999 } 3000 indent(); 3001 out(O_ALTFP|O_VERB, " Credible: %d Waiting %d", 3002 credible_events + deferred_events, waiting_events); 3003 if (credible_events + deferred_events + waiting_events < n) { 3004 /* Can never meet requirements */ 3005 ep->cached_state |= REQMNTS_DISPROVED; 3006 indent(); 3007 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED "); 3008 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3009 out(O_ALTFP|O_VERB, NULL); 3010 indent_pop(); 3011 return (FME_DISPROVED); 3012 } 3013 if (credible_events + deferred_events < n) { 3014 /* will have to wait */ 3015 /* wait time is shortest known */ 3016 if (arrow_delay < overall_delay) 3017 overall_delay = arrow_delay; 3018 return_value = FME_WAIT; 3019 } else if (credible_events < n) { 3020 if (return_value != FME_WAIT) 3021 return_value = FME_DEFERRED; 3022 } 3023 } 3024 3025 /* 3026 * don't mark as FME_DEFERRED. If this event isn't reached by another 3027 * path, then this will be considered FME_CREDIBLE. But if it is 3028 * reached by a different path so the K-count is met, then might 3029 * get overridden by FME_WAIT or FME_DISPROVED. 3030 */ 3031 if (return_value == FME_WAIT) { 3032 ep->cached_state |= REQMNTS_WAIT; 3033 ep->cached_delay = *pdelay = overall_delay; 3034 } else if (return_value == FME_CREDIBLE) { 3035 ep->cached_state |= REQMNTS_CREDIBLE; 3036 } 3037 indent(); 3038 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS %s ", 3039 fme_state2str(return_value)); 3040 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3041 out(O_ALTFP|O_VERB, NULL); 3042 indent_pop(); 3043 return (return_value); 3044 } 3045 3046 static enum fme_state 3047 causes_test(struct fme *fmep, struct event *ep, 3048 unsigned long long at_latest_by, unsigned long long *pdelay) 3049 { 3050 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 3051 unsigned long long my_delay; 3052 int credible_results = 0; 3053 int waiting_results = 0; 3054 enum fme_state fstate; 3055 struct event *tail_event; 3056 struct bubble *bp; 3057 struct arrowlist *ap; 3058 int k = 1; 3059 3060 stats_counter_bump(fmep->Ccallcount); 3061 indent_push(" C"); 3062 indent(); 3063 out(O_ALTFP|O_VERB|O_NONL, "->"); 3064 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3065 out(O_ALTFP|O_VERB, NULL); 3066 3067 for (bp = itree_next_bubble(ep, NULL); bp; 3068 bp = itree_next_bubble(ep, bp)) { 3069 if (bp->t != B_TO) 3070 continue; 3071 k = bp->nork; /* remember the K value */ 3072 for (ap = itree_next_arrow(bp, NULL); ap; 3073 ap = itree_next_arrow(bp, ap)) { 3074 int do_not_follow = 0; 3075 3076 /* 3077 * if we get to the same event multiple times 3078 * only worry about the first one. 3079 */ 3080 if (ap->arrowp->tail->myevent->cached_state & 3081 CAUSES_TESTED) { 3082 indent(); 3083 out(O_ALTFP|O_VERB|O_NONL, 3084 " causes test already run for "); 3085 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, 3086 ap->arrowp->tail->myevent); 3087 out(O_ALTFP|O_VERB, NULL); 3088 continue; 3089 } 3090 3091 /* 3092 * see if false constraint prevents us 3093 * from traversing this arrow 3094 */ 3095 platform_set_payloadnvp(ep->nvp); 3096 if (checkconstraints(fmep, ap->arrowp) == 0) 3097 do_not_follow = 1; 3098 platform_set_payloadnvp(NULL); 3099 if (do_not_follow) { 3100 indent(); 3101 out(O_ALTFP|O_VERB|O_NONL, 3102 " False arrow from "); 3103 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, 3104 ap->arrowp->tail->myevent); 3105 out(O_ALTFP|O_VERB, NULL); 3106 continue; 3107 } 3108 3109 ap->arrowp->tail->myevent->cached_state |= 3110 CAUSES_TESTED; 3111 tail_event = ap->arrowp->tail->myevent; 3112 fstate = hypothesise(fmep, tail_event, at_latest_by, 3113 &my_delay); 3114 3115 switch (fstate) { 3116 case FME_WAIT: 3117 if (my_delay < overall_delay) 3118 overall_delay = my_delay; 3119 waiting_results++; 3120 break; 3121 case FME_CREDIBLE: 3122 credible_results++; 3123 break; 3124 case FME_DISPROVED: 3125 break; 3126 default: 3127 out(O_DIE, "Bug in causes_test"); 3128 } 3129 } 3130 } 3131 /* compare against K */ 3132 if (credible_results + waiting_results < k) { 3133 indent(); 3134 out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES DISPROVED "); 3135 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3136 out(O_ALTFP|O_VERB, NULL); 3137 indent_pop(); 3138 return (FME_DISPROVED); 3139 } 3140 if (waiting_results != 0) { 3141 *pdelay = overall_delay; 3142 indent(); 3143 out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES WAIT "); 3144 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3145 out(O_ALTFP|O_VERB|O_NONL, " to "); 3146 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 3147 out(O_ALTFP|O_VERB, NULL); 3148 indent_pop(); 3149 return (FME_WAIT); 3150 } 3151 indent(); 3152 out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES CREDIBLE "); 3153 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3154 out(O_ALTFP|O_VERB, NULL); 3155 indent_pop(); 3156 return (FME_CREDIBLE); 3157 } 3158 3159 static enum fme_state 3160 hypothesise(struct fme *fmep, struct event *ep, 3161 unsigned long long at_latest_by, unsigned long long *pdelay) 3162 { 3163 enum fme_state rtr, otr; 3164 unsigned long long my_delay; 3165 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 3166 3167 stats_counter_bump(fmep->Hcallcount); 3168 indent_push(" H"); 3169 indent(); 3170 out(O_ALTFP|O_VERB|O_NONL, "->"); 3171 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3172 out(O_ALTFP|O_VERB|O_NONL, ", at latest by: "); 3173 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 3174 out(O_ALTFP|O_VERB, NULL); 3175 3176 rtr = requirements_test(fmep, ep, at_latest_by, &my_delay); 3177 if ((rtr == FME_WAIT) && (my_delay < overall_delay)) 3178 overall_delay = my_delay; 3179 if (rtr != FME_DISPROVED) { 3180 if (is_problem(ep->t)) { 3181 otr = effects_test(fmep, ep, at_latest_by, &my_delay); 3182 if (otr != FME_DISPROVED) { 3183 if (fmep->peek == 0 && ep->is_suspect++ == 0) { 3184 ep->suspects = fmep->suspects; 3185 fmep->suspects = ep; 3186 fmep->nsuspects++; 3187 if (!is_fault(ep->t)) 3188 fmep->nonfault++; 3189 } 3190 } 3191 } else 3192 otr = causes_test(fmep, ep, at_latest_by, &my_delay); 3193 if ((otr == FME_WAIT) && (my_delay < overall_delay)) 3194 overall_delay = my_delay; 3195 if ((otr != FME_DISPROVED) && 3196 ((rtr == FME_WAIT) || (otr == FME_WAIT))) 3197 *pdelay = overall_delay; 3198 } 3199 if (rtr == FME_DISPROVED) { 3200 indent(); 3201 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED "); 3202 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3203 out(O_ALTFP|O_VERB, " (doesn't meet requirements)"); 3204 indent_pop(); 3205 return (FME_DISPROVED); 3206 } 3207 if ((otr == FME_DISPROVED) && is_problem(ep->t)) { 3208 indent(); 3209 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED "); 3210 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3211 out(O_ALTFP|O_VERB, " (doesn't explain all reports)"); 3212 indent_pop(); 3213 return (FME_DISPROVED); 3214 } 3215 if (otr == FME_DISPROVED) { 3216 indent(); 3217 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED "); 3218 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3219 out(O_ALTFP|O_VERB, " (causes are not credible)"); 3220 indent_pop(); 3221 return (FME_DISPROVED); 3222 } 3223 if ((rtr == FME_WAIT) || (otr == FME_WAIT)) { 3224 indent(); 3225 out(O_ALTFP|O_VERB|O_NONL, "<-WAIT "); 3226 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3227 out(O_ALTFP|O_VERB|O_NONL, " to "); 3228 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &overall_delay); 3229 out(O_ALTFP|O_VERB, NULL); 3230 indent_pop(); 3231 return (FME_WAIT); 3232 } 3233 indent(); 3234 out(O_ALTFP|O_VERB|O_NONL, "<-CREDIBLE "); 3235 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3236 out(O_ALTFP|O_VERB, NULL); 3237 indent_pop(); 3238 return (FME_CREDIBLE); 3239 } 3240 3241 /* 3242 * fme_istat_load -- reconstitute any persistent istats 3243 */ 3244 void 3245 fme_istat_load(fmd_hdl_t *hdl) 3246 { 3247 int sz; 3248 char *sbuf; 3249 char *ptr; 3250 3251 if ((sz = fmd_buf_size(hdl, NULL, WOBUF_ISTATS)) == 0) { 3252 out(O_ALTFP, "fme_istat_load: No stats"); 3253 return; 3254 } 3255 3256 sbuf = alloca(sz); 3257 3258 fmd_buf_read(hdl, NULL, WOBUF_ISTATS, sbuf, sz); 3259 3260 /* 3261 * pick apart the serialized stats 3262 * 3263 * format is: 3264 * <class-name>, '@', <path>, '\0', <value>, '\0' 3265 * for example: 3266 * "stat.first@stat0/path0\02\0stat.second@stat0/path1\023\0" 3267 * 3268 * since this is parsing our own serialized data, any parsing issues 3269 * are fatal, so we check for them all with ASSERT() below. 3270 */ 3271 ptr = sbuf; 3272 while (ptr < &sbuf[sz]) { 3273 char *sepptr; 3274 struct node *np; 3275 int val; 3276 3277 sepptr = strchr(ptr, '@'); 3278 ASSERT(sepptr != NULL); 3279 *sepptr = '\0'; 3280 3281 /* construct the event */ 3282 np = newnode(T_EVENT, NULL, 0); 3283 np->u.event.ename = newnode(T_NAME, NULL, 0); 3284 np->u.event.ename->u.name.t = N_STAT; 3285 np->u.event.ename->u.name.s = stable(ptr); 3286 np->u.event.ename->u.name.it = IT_ENAME; 3287 np->u.event.ename->u.name.last = np->u.event.ename; 3288 3289 ptr = sepptr + 1; 3290 ASSERT(ptr < &sbuf[sz]); 3291 ptr += strlen(ptr); 3292 ptr++; /* move past the '\0' separating path from value */ 3293 ASSERT(ptr < &sbuf[sz]); 3294 ASSERT(isdigit(*ptr)); 3295 val = atoi(ptr); 3296 ASSERT(val > 0); 3297 ptr += strlen(ptr); 3298 ptr++; /* move past the final '\0' for this entry */ 3299 3300 np->u.event.epname = pathstring2epnamenp(sepptr + 1); 3301 ASSERT(np->u.event.epname != NULL); 3302 3303 istat_bump(np, val); 3304 tree_free(np); 3305 } 3306 3307 istat_save(); 3308 } 3309