1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * 26 * fme.c -- fault management exercise module 27 * 28 * this module provides the simulated fault management exercise. 29 */ 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 #include <stdio.h> 34 #include <stdlib.h> 35 #include <string.h> 36 #include <strings.h> 37 #include <ctype.h> 38 #include <alloca.h> 39 #include <libnvpair.h> 40 #include <sys/fm/protocol.h> 41 #include <fm/fmd_api.h> 42 #include "alloc.h" 43 #include "out.h" 44 #include "stats.h" 45 #include "stable.h" 46 #include "literals.h" 47 #include "lut.h" 48 #include "tree.h" 49 #include "ptree.h" 50 #include "itree.h" 51 #include "ipath.h" 52 #include "fme.h" 53 #include "evnv.h" 54 #include "eval.h" 55 #include "config.h" 56 #include "platform.h" 57 58 /* imported from eft.c... */ 59 extern int Autoconvict; 60 extern char *Autoclose; 61 extern hrtime_t Hesitate; 62 extern nv_alloc_t Eft_nv_hdl; 63 extern int Max_fme; 64 65 /* fme under construction is global so we can free it on module abort */ 66 static struct fme *Nfmep; 67 68 static const char *Undiag_reason; 69 70 static int Nextid = 0; 71 72 static int Open_fme_count = 0; /* Count of open FMEs */ 73 74 /* list of fault management exercises underway */ 75 static struct fme { 76 struct fme *next; /* next exercise */ 77 unsigned long long ull; /* time when fme was created */ 78 int id; /* FME id */ 79 struct cfgdata *cfgdata; /* full configuration data */ 80 struct lut *eventtree; /* propagation tree for this FME */ 81 /* 82 * The initial error report that created this FME is kept in 83 * two forms. e0 points to the instance tree node and is used 84 * by fme_eval() as the starting point for the inference 85 * algorithm. e0r is the event handle FMD passed to us when 86 * the ereport first arrived and is used when setting timers, 87 * which are always relative to the time of this initial 88 * report. 89 */ 90 struct event *e0; 91 fmd_event_t *e0r; 92 93 id_t timer; /* for setting an fmd time-out */ 94 id_t htid; /* for setting hesitation timer */ 95 96 struct event *ecurrent; /* ereport under consideration */ 97 struct event *suspects; /* current suspect list */ 98 struct event *psuspects; /* previous suspect list */ 99 int nsuspects; /* count of suspects */ 100 int nonfault; /* zero if all suspects T_FAULT */ 101 int posted_suspects; /* true if we've posted a diagnosis */ 102 int hesitated; /* true if we hesitated */ 103 int uniqobs; /* number of unique events observed */ 104 int peek; /* just peeking, don't track suspects */ 105 int overflow; /* true if overflow FME */ 106 enum fme_state { 107 FME_NOTHING = 5000, /* not evaluated yet */ 108 FME_WAIT, /* need to wait for more info */ 109 FME_CREDIBLE, /* suspect list is credible */ 110 FME_DISPROVED /* no valid suspects found */ 111 } state; 112 113 unsigned long long pull; /* time passed since created */ 114 unsigned long long wull; /* wait until this time for re-eval */ 115 struct event *observations; /* observation list */ 116 struct lut *globals; /* values of global variables */ 117 /* fmd interfacing */ 118 fmd_hdl_t *hdl; /* handle for talking with fmd */ 119 fmd_case_t *fmcase; /* what fmd 'case' we associate with */ 120 /* stats */ 121 struct stats *Rcount; 122 struct stats *Hcallcount; 123 struct stats *Rcallcount; 124 struct stats *Ccallcount; 125 struct stats *Ecallcount; 126 struct stats *Tcallcount; 127 struct stats *Marrowcount; 128 struct stats *diags; 129 } *FMElist, *EFMElist, *ClosedFMEs; 130 131 static struct case_list { 132 fmd_case_t *fmcase; 133 struct case_list *next; 134 } *Undiagablecaselist; 135 136 static void fme_eval(struct fme *fmep, fmd_event_t *ffep); 137 static enum fme_state hypothesise(struct fme *fmep, struct event *ep, 138 unsigned long long at_latest_by, unsigned long long *pdelay, 139 struct arrow *arrowp); 140 static struct node *eventprop_lookup(struct event *ep, const char *propname); 141 static struct node *pathstring2epnamenp(char *path); 142 static void publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep); 143 static void restore_suspects(struct fme *fmep); 144 static void save_suspects(struct fme *fmep); 145 static void destroy_fme(struct fme *f); 146 static void fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep, 147 const char *eventstring, const struct ipath *ipp, nvlist_t *nvl); 148 149 static struct fme * 150 alloc_fme(void) 151 { 152 struct fme *fmep; 153 154 fmep = MALLOC(sizeof (*fmep)); 155 bzero(fmep, sizeof (*fmep)); 156 return (fmep); 157 } 158 159 /* 160 * fme_ready -- called when all initialization of the FME (except for 161 * stats) has completed successfully. Adds the fme to global lists 162 * and establishes its stats. 163 */ 164 static struct fme * 165 fme_ready(struct fme *fmep) 166 { 167 char nbuf[100]; 168 169 Nfmep = NULL; /* don't need to free this on module abort now */ 170 171 if (EFMElist) { 172 EFMElist->next = fmep; 173 EFMElist = fmep; 174 } else 175 FMElist = EFMElist = fmep; 176 177 (void) sprintf(nbuf, "fme%d.Rcount", fmep->id); 178 fmep->Rcount = stats_new_counter(nbuf, "ereports received", 0); 179 (void) sprintf(nbuf, "fme%d.Hcall", fmep->id); 180 fmep->Hcallcount = stats_new_counter(nbuf, "calls to hypothesise()", 1); 181 (void) sprintf(nbuf, "fme%d.Rcall", fmep->id); 182 fmep->Rcallcount = stats_new_counter(nbuf, 183 "calls to requirements_test()", 1); 184 (void) sprintf(nbuf, "fme%d.Ccall", fmep->id); 185 fmep->Ccallcount = stats_new_counter(nbuf, "calls to causes_test()", 1); 186 (void) sprintf(nbuf, "fme%d.Ecall", fmep->id); 187 fmep->Ecallcount = 188 stats_new_counter(nbuf, "calls to effects_test()", 1); 189 (void) sprintf(nbuf, "fme%d.Tcall", fmep->id); 190 fmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1); 191 (void) sprintf(nbuf, "fme%d.Marrow", fmep->id); 192 fmep->Marrowcount = stats_new_counter(nbuf, 193 "arrows marked by mark_arrows()", 1); 194 (void) sprintf(nbuf, "fme%d.diags", fmep->id); 195 fmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0); 196 197 out(O_ALTFP|O_VERB2, "newfme: config snapshot contains..."); 198 config_print(O_ALTFP|O_VERB2, fmep->cfgdata->cooked); 199 200 return (fmep); 201 } 202 203 static struct fme * 204 newfme(const char *e0class, const struct ipath *e0ipp) 205 { 206 struct cfgdata *cfgdata; 207 208 if ((cfgdata = config_snapshot()) == NULL) { 209 out(O_ALTFP, "newfme: NULL configuration"); 210 Undiag_reason = UD_NOCONF; 211 return (NULL); 212 } 213 214 Nfmep = alloc_fme(); 215 216 Nfmep->id = Nextid++; 217 Nfmep->cfgdata = cfgdata; 218 Nfmep->posted_suspects = 0; 219 Nfmep->uniqobs = 0; 220 Nfmep->state = FME_NOTHING; 221 Nfmep->pull = 0ULL; 222 Nfmep->overflow = 0; 223 224 Nfmep->fmcase = NULL; 225 Nfmep->hdl = NULL; 226 227 if ((Nfmep->eventtree = itree_create(cfgdata->cooked)) == NULL) { 228 out(O_ALTFP, "newfme: NULL instance tree"); 229 Undiag_reason = UD_INSTFAIL; 230 config_free(cfgdata); 231 FREE(Nfmep); 232 Nfmep = NULL; 233 return (NULL); 234 } 235 236 itree_ptree(O_ALTFP|O_VERB2, Nfmep->eventtree); 237 238 if ((Nfmep->e0 = 239 itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) { 240 out(O_ALTFP, "newfme: e0 not in instance tree"); 241 Undiag_reason = UD_BADEVENTI; 242 itree_free(Nfmep->eventtree); 243 config_free(cfgdata); 244 FREE(Nfmep); 245 Nfmep = NULL; 246 return (NULL); 247 } 248 249 return (fme_ready(Nfmep)); 250 } 251 252 void 253 fme_fini(void) 254 { 255 struct fme *sfp, *fp; 256 struct case_list *ucasep, *nextcasep; 257 258 ucasep = Undiagablecaselist; 259 while (ucasep != NULL) { 260 nextcasep = ucasep->next; 261 FREE(ucasep); 262 ucasep = nextcasep; 263 } 264 Undiagablecaselist = NULL; 265 266 /* clean up closed fmes */ 267 fp = ClosedFMEs; 268 while (fp != NULL) { 269 sfp = fp->next; 270 destroy_fme(fp); 271 fp = sfp; 272 } 273 ClosedFMEs = NULL; 274 275 fp = FMElist; 276 while (fp != NULL) { 277 sfp = fp->next; 278 destroy_fme(fp); 279 fp = sfp; 280 } 281 FMElist = EFMElist = NULL; 282 283 /* if we were in the middle of creating an fme, free it now */ 284 if (Nfmep) { 285 destroy_fme(Nfmep); 286 Nfmep = NULL; 287 } 288 } 289 290 /* 291 * Allocated space for a buffer name. 20 bytes allows for 292 * a ridiculous 9,999,999 unique observations. 293 */ 294 #define OBBUFNMSZ 20 295 296 /* 297 * serialize_observation 298 * 299 * Create a recoverable version of the current observation 300 * (f->ecurrent). We keep a serialized version of each unique 301 * observation in order that we may resume correctly the fme in the 302 * correct state if eft or fmd crashes and we're restarted. 303 */ 304 static void 305 serialize_observation(struct fme *fp, const char *cls, const struct ipath *ipp) 306 { 307 size_t pkdlen; 308 char tmpbuf[OBBUFNMSZ]; 309 char *pkd = NULL; 310 char *estr; 311 312 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", fp->uniqobs); 313 estr = ipath2str(cls, ipp); 314 fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, strlen(estr) + 1); 315 fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)estr, 316 strlen(estr) + 1); 317 FREE(estr); 318 319 if (fp->ecurrent != NULL && fp->ecurrent->nvp != NULL) { 320 (void) snprintf(tmpbuf, 321 OBBUFNMSZ, "observed%d.nvp", fp->uniqobs); 322 if (nvlist_xpack(fp->ecurrent->nvp, 323 &pkd, &pkdlen, NV_ENCODE_XDR, &Eft_nv_hdl) != 0) 324 out(O_DIE|O_SYS, "pack of observed nvl failed"); 325 fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, pkdlen); 326 fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)pkd, pkdlen); 327 FREE(pkd); 328 } 329 330 fp->uniqobs++; 331 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs, 332 sizeof (fp->uniqobs)); 333 } 334 335 /* 336 * init_fme_bufs -- We keep several bits of state about an fme for 337 * use if eft or fmd crashes and we're restarted. 338 */ 339 static void 340 init_fme_bufs(struct fme *fp) 341 { 342 size_t cfglen = fp->cfgdata->nextfree - fp->cfgdata->begin; 343 344 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_CFGLEN, sizeof (cfglen)); 345 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_CFGLEN, (void *)&cfglen, 346 sizeof (cfglen)); 347 if (cfglen != 0) { 348 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_CFG, cfglen); 349 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_CFG, 350 fp->cfgdata->begin, cfglen); 351 } 352 353 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_PULL, sizeof (fp->pull)); 354 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_PULL, (void *)&fp->pull, 355 sizeof (fp->pull)); 356 357 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_ID, sizeof (fp->id)); 358 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_ID, (void *)&fp->id, 359 sizeof (fp->id)); 360 361 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_NOBS, sizeof (fp->uniqobs)); 362 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs, 363 sizeof (fp->uniqobs)); 364 365 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_POSTD, 366 sizeof (fp->posted_suspects)); 367 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_POSTD, 368 (void *)&fp->posted_suspects, sizeof (fp->posted_suspects)); 369 } 370 371 static void 372 destroy_fme_bufs(struct fme *fp) 373 { 374 char tmpbuf[OBBUFNMSZ]; 375 int o; 376 377 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFGLEN); 378 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFG); 379 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_PULL); 380 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_ID); 381 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_POSTD); 382 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_NOBS); 383 384 for (o = 0; o < fp->uniqobs; o++) { 385 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", o); 386 fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf); 387 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", o); 388 fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf); 389 } 390 } 391 392 /* 393 * reconstitute_observations -- convert a case's serialized observations 394 * back into struct events. Returns zero if all observations are 395 * successfully reconstituted. 396 */ 397 static int 398 reconstitute_observations(struct fme *fmep) 399 { 400 struct event *ep; 401 struct node *epnamenp = NULL; 402 size_t pkdlen; 403 char *pkd = NULL; 404 char *tmpbuf = alloca(OBBUFNMSZ); 405 char *sepptr; 406 char *estr; 407 int ocnt; 408 int elen; 409 410 for (ocnt = 0; ocnt < fmep->uniqobs; ocnt++) { 411 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", ocnt); 412 elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf); 413 if (elen == 0) { 414 out(O_ALTFP, 415 "reconstitute_observation: no %s buffer found.", 416 tmpbuf); 417 Undiag_reason = UD_MISSINGOBS; 418 break; 419 } 420 421 estr = MALLOC(elen); 422 fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen); 423 sepptr = strchr(estr, '@'); 424 if (sepptr == NULL) { 425 out(O_ALTFP, 426 "reconstitute_observation: %s: " 427 "missing @ separator in %s.", 428 tmpbuf, estr); 429 Undiag_reason = UD_MISSINGPATH; 430 FREE(estr); 431 break; 432 } 433 434 *sepptr = '\0'; 435 if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) { 436 out(O_ALTFP, 437 "reconstitute_observation: %s: " 438 "trouble converting path string \"%s\" " 439 "to internal representation.", 440 tmpbuf, sepptr + 1); 441 Undiag_reason = UD_MISSINGPATH; 442 FREE(estr); 443 break; 444 } 445 446 /* construct the event */ 447 ep = itree_lookup(fmep->eventtree, 448 stable(estr), ipath(epnamenp)); 449 if (ep == NULL) { 450 out(O_ALTFP, 451 "reconstitute_observation: %s: " 452 "lookup of \"%s\" in itree failed.", 453 tmpbuf, ipath2str(estr, ipath(epnamenp))); 454 Undiag_reason = UD_BADOBS; 455 tree_free(epnamenp); 456 FREE(estr); 457 break; 458 } 459 tree_free(epnamenp); 460 461 /* 462 * We may or may not have a saved nvlist for the observation 463 */ 464 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", ocnt); 465 pkdlen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf); 466 if (pkdlen != 0) { 467 pkd = MALLOC(pkdlen); 468 fmd_buf_read(fmep->hdl, 469 fmep->fmcase, tmpbuf, pkd, pkdlen); 470 if (nvlist_xunpack(pkd, 471 pkdlen, &ep->nvp, &Eft_nv_hdl) != 0) 472 out(O_DIE|O_SYS, "pack of observed nvl failed"); 473 FREE(pkd); 474 } 475 476 if (ocnt == 0) 477 fmep->e0 = ep; 478 479 FREE(estr); 480 fmep->ecurrent = ep; 481 ep->count++; 482 483 /* link it into list of observations seen */ 484 ep->observations = fmep->observations; 485 fmep->observations = ep; 486 } 487 488 if (ocnt == fmep->uniqobs) { 489 (void) fme_ready(fmep); 490 return (0); 491 } 492 493 return (1); 494 } 495 496 /* 497 * restart_fme -- called during eft initialization. Reconstitutes 498 * an in-progress fme. 499 */ 500 void 501 fme_restart(fmd_hdl_t *hdl, fmd_case_t *inprogress) 502 { 503 nvlist_t *defect; 504 struct case_list *bad; 505 struct fme *fmep; 506 struct cfgdata *cfgdata = NULL; 507 size_t rawsz; 508 509 fmep = alloc_fme(); 510 fmep->fmcase = inprogress; 511 fmep->hdl = hdl; 512 513 if (fmd_buf_size(hdl, inprogress, WOBUF_CFGLEN) != sizeof (size_t)) { 514 out(O_ALTFP, "restart_fme: No config data"); 515 Undiag_reason = UD_MISSINGINFO; 516 goto badcase; 517 } 518 fmd_buf_read(hdl, inprogress, WOBUF_CFGLEN, (void *)&rawsz, 519 sizeof (size_t)); 520 521 if ((fmep->e0r = fmd_case_getprincipal(hdl, inprogress)) == NULL) { 522 out(O_ALTFP, "restart_fme: No event zero"); 523 Undiag_reason = UD_MISSINGZERO; 524 goto badcase; 525 } 526 527 cfgdata = MALLOC(sizeof (struct cfgdata)); 528 cfgdata->cooked = NULL; 529 cfgdata->devcache = NULL; 530 cfgdata->cpucache = NULL; 531 cfgdata->refcnt = 1; 532 533 if (rawsz > 0) { 534 if (fmd_buf_size(hdl, inprogress, WOBUF_CFG) != rawsz) { 535 out(O_ALTFP, "restart_fme: Config data size mismatch"); 536 Undiag_reason = UD_CFGMISMATCH; 537 goto badcase; 538 } 539 cfgdata->begin = MALLOC(rawsz); 540 cfgdata->end = cfgdata->nextfree = cfgdata->begin + rawsz; 541 fmd_buf_read(hdl, 542 inprogress, WOBUF_CFG, cfgdata->begin, rawsz); 543 } else { 544 cfgdata->begin = cfgdata->end = cfgdata->nextfree = NULL; 545 } 546 fmep->cfgdata = cfgdata; 547 548 config_cook(cfgdata); 549 if ((fmep->eventtree = itree_create(cfgdata->cooked)) == NULL) { 550 /* case not properly saved or irretrievable */ 551 out(O_ALTFP, "restart_fme: NULL instance tree"); 552 Undiag_reason = UD_INSTFAIL; 553 goto badcase; 554 } 555 556 itree_ptree(O_ALTFP|O_VERB2, fmep->eventtree); 557 558 if (fmd_buf_size(hdl, inprogress, WOBUF_PULL) == 0) { 559 out(O_ALTFP, "restart_fme: no saved wait time"); 560 Undiag_reason = UD_MISSINGINFO; 561 goto badcase; 562 } else { 563 fmd_buf_read(hdl, inprogress, WOBUF_PULL, (void *)&fmep->pull, 564 sizeof (fmep->pull)); 565 } 566 567 if (fmd_buf_size(hdl, inprogress, WOBUF_POSTD) == 0) { 568 out(O_ALTFP, "restart_fme: no saved posted status"); 569 Undiag_reason = UD_MISSINGINFO; 570 goto badcase; 571 } else { 572 fmd_buf_read(hdl, inprogress, WOBUF_POSTD, 573 (void *)&fmep->posted_suspects, 574 sizeof (fmep->posted_suspects)); 575 } 576 577 if (fmd_buf_size(hdl, inprogress, WOBUF_ID) == 0) { 578 out(O_ALTFP, "restart_fme: no saved id"); 579 Undiag_reason = UD_MISSINGINFO; 580 goto badcase; 581 } else { 582 fmd_buf_read(hdl, inprogress, WOBUF_ID, (void *)&fmep->id, 583 sizeof (fmep->id)); 584 } 585 if (Nextid <= fmep->id) 586 Nextid = fmep->id + 1; 587 588 if (fmd_buf_size(hdl, inprogress, WOBUF_NOBS) == 0) { 589 out(O_ALTFP, "restart_fme: no count of observations"); 590 Undiag_reason = UD_MISSINGINFO; 591 goto badcase; 592 } else { 593 fmd_buf_read(hdl, inprogress, WOBUF_NOBS, 594 (void *)&fmep->uniqobs, sizeof (fmep->uniqobs)); 595 } 596 597 if (reconstitute_observations(fmep) != 0) 598 goto badcase; 599 600 Open_fme_count++; 601 602 /* give the diagnosis algorithm a shot at the new FME state */ 603 fme_eval(fmep, NULL); 604 return; 605 606 badcase: 607 if (fmep->eventtree != NULL) 608 itree_free(fmep->eventtree); 609 config_free(cfgdata); 610 destroy_fme_bufs(fmep); 611 FREE(fmep); 612 613 /* 614 * Since we're unable to restart the case, add it to the undiagable 615 * list and solve and close it as appropriate. 616 */ 617 bad = MALLOC(sizeof (struct case_list)); 618 bad->next = NULL; 619 620 if (Undiagablecaselist != NULL) 621 bad->next = Undiagablecaselist; 622 Undiagablecaselist = bad; 623 bad->fmcase = inprogress; 624 625 out(O_ALTFP, "[case %s (unable to restart), ", 626 fmd_case_uuid(hdl, bad->fmcase)); 627 628 if (fmd_case_solved(hdl, bad->fmcase)) { 629 out(O_ALTFP, "already solved, "); 630 } else { 631 out(O_ALTFP, "solving, "); 632 defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100, 633 NULL, NULL, NULL); 634 if (Undiag_reason != NULL) 635 (void) nvlist_add_string(defect, 636 UNDIAG_REASON, Undiag_reason); 637 fmd_case_add_suspect(hdl, bad->fmcase, defect); 638 fmd_case_solve(hdl, bad->fmcase); 639 } 640 641 if (fmd_case_closed(hdl, bad->fmcase)) { 642 out(O_ALTFP, "already closed ]"); 643 } else { 644 out(O_ALTFP, "closing ]"); 645 fmd_case_close(hdl, bad->fmcase); 646 } 647 } 648 649 void 650 destroy_fme(struct fme *f) 651 { 652 stats_delete(f->Rcount); 653 stats_delete(f->Hcallcount); 654 stats_delete(f->Rcallcount); 655 stats_delete(f->Ccallcount); 656 stats_delete(f->Ecallcount); 657 stats_delete(f->Tcallcount); 658 stats_delete(f->Marrowcount); 659 stats_delete(f->diags); 660 661 itree_free(f->eventtree); 662 config_free(f->cfgdata); 663 FREE(f); 664 } 665 666 static const char * 667 fme_state2str(enum fme_state s) 668 { 669 switch (s) { 670 case FME_NOTHING: return ("NOTHING"); 671 case FME_WAIT: return ("WAIT"); 672 case FME_CREDIBLE: return ("CREDIBLE"); 673 case FME_DISPROVED: return ("DISPROVED"); 674 default: return ("UNKNOWN"); 675 } 676 } 677 678 static int 679 is_problem(enum nametype t) 680 { 681 return (t == N_FAULT || t == N_DEFECT || t == N_UPSET); 682 } 683 684 static int 685 is_fault(enum nametype t) 686 { 687 return (t == N_FAULT); 688 } 689 690 static int 691 is_defect(enum nametype t) 692 { 693 return (t == N_DEFECT); 694 } 695 696 static int 697 is_upset(enum nametype t) 698 { 699 return (t == N_UPSET); 700 } 701 702 /*ARGSUSED*/ 703 static void 704 clear_causes_tested(struct event *lhs, struct event *ep, void *arg) 705 { 706 struct bubble *bp; 707 struct arrowlist *ap; 708 709 for (bp = itree_next_bubble(ep, NULL); bp; 710 bp = itree_next_bubble(ep, bp)) { 711 if (bp->t != B_FROM) 712 continue; 713 for (ap = itree_next_arrow(bp, NULL); ap; 714 ap = itree_next_arrow(bp, ap)) 715 ap->arrowp->causes_tested = 0; 716 } 717 } 718 719 /* 720 * call this function with initcode set to 0 to initialize cycle tracking 721 */ 722 static void 723 initialize_cycles(struct fme *fmep) 724 { 725 lut_walk(fmep->eventtree, (lut_cb)clear_causes_tested, NULL); 726 } 727 728 static void 729 fme_print(int flags, struct fme *fmep) 730 { 731 struct event *ep; 732 733 out(flags, "Fault Management Exercise %d", fmep->id); 734 out(flags, "\t State: %s", fme_state2str(fmep->state)); 735 out(flags|O_NONL, "\t Start time: "); 736 ptree_timeval(flags|O_NONL, &fmep->ull); 737 out(flags, NULL); 738 if (fmep->wull) { 739 out(flags|O_NONL, "\t Wait time: "); 740 ptree_timeval(flags|O_NONL, &fmep->wull); 741 out(flags, NULL); 742 } 743 out(flags|O_NONL, "\t E0: "); 744 if (fmep->e0) 745 itree_pevent_brief(flags|O_NONL, fmep->e0); 746 else 747 out(flags|O_NONL, "NULL"); 748 out(flags, NULL); 749 out(flags|O_NONL, "\tObservations:"); 750 for (ep = fmep->observations; ep; ep = ep->observations) { 751 out(flags|O_NONL, " "); 752 itree_pevent_brief(flags|O_NONL, ep); 753 } 754 out(flags, NULL); 755 out(flags|O_NONL, "\tSuspect list:"); 756 for (ep = fmep->suspects; ep; ep = ep->suspects) { 757 out(flags|O_NONL, " "); 758 itree_pevent_brief(flags|O_NONL, ep); 759 } 760 out(flags, NULL); 761 out(flags|O_VERB2, "\t Tree:"); 762 itree_ptree(flags|O_VERB2, fmep->eventtree); 763 } 764 765 static struct node * 766 pathstring2epnamenp(char *path) 767 { 768 char *sep = "/"; 769 struct node *ret; 770 char *ptr; 771 772 if ((ptr = strtok(path, sep)) == NULL) 773 out(O_DIE, "pathstring2epnamenp: invalid empty class"); 774 775 ret = tree_iname(stable(ptr), NULL, 0); 776 777 while ((ptr = strtok(NULL, sep)) != NULL) 778 ret = tree_name_append(ret, 779 tree_iname(stable(ptr), NULL, 0)); 780 781 return (ret); 782 } 783 784 /* 785 * for a given upset sp, increment the corresponding SERD engine. if the 786 * SERD engine trips, return the ename and ipp of the resulting ereport. 787 * returns true if engine tripped and *enamep and *ippp were filled in. 788 */ 789 static int 790 serd_eval(fmd_hdl_t *hdl, fmd_event_t *ffep, fmd_case_t *fmcase, 791 struct event *sp, const char **enamep, const struct ipath **ippp) 792 { 793 struct node *serdinst; 794 char *serdname; 795 796 ASSERT(sp->t == N_UPSET); 797 ASSERT(ffep != NULL); 798 799 /* 800 * obtain instanced SERD engine from the upset sp. from this 801 * derive serdname, the string used to identify the SERD engine. 802 */ 803 serdinst = eventprop_lookup(sp, L_engine); 804 805 if (serdinst == NULL) 806 return (NULL); 807 808 serdname = ipath2str(serdinst->u.stmt.np->u.event.ename->u.name.s, 809 ipath(serdinst->u.stmt.np->u.event.epname)); 810 811 if (!fmd_serd_exists(hdl, serdname)) { 812 struct node *nN, *nT; 813 814 /* no SERD engine yet, so create it */ 815 nN = lut_lookup(serdinst->u.stmt.lutp, (void *)L_N, NULL); 816 nT = lut_lookup(serdinst->u.stmt.lutp, (void *)L_T, NULL); 817 818 ASSERT(nN->t == T_NUM); 819 ASSERT(nT->t == T_TIMEVAL); 820 821 fmd_serd_create(hdl, serdname, (uint_t)nN->u.ull, 822 (hrtime_t)nT->u.ull); 823 } 824 825 826 /* 827 * increment SERD engine. if engine fires, reset serd 828 * engine and return trip_strcode 829 */ 830 if (fmd_serd_record(hdl, serdname, ffep)) { 831 struct node *tripinst = lut_lookup(serdinst->u.stmt.lutp, 832 (void *)L_trip, NULL); 833 834 ASSERT(tripinst != NULL); 835 836 *enamep = tripinst->u.event.ename->u.name.s; 837 *ippp = ipath(tripinst->u.event.epname); 838 839 fmd_case_add_serd(hdl, fmcase, serdname); 840 fmd_serd_reset(hdl, serdname); 841 out(O_ALTFP|O_NONL, "[engine fired: %s, sending: ", serdname); 842 ipath_print(O_ALTFP|O_NONL, *enamep, *ippp); 843 out(O_ALTFP, "]"); 844 845 FREE(serdname); 846 return (1); 847 } 848 849 FREE(serdname); 850 return (0); 851 } 852 853 /* 854 * search a suspect list for upsets. feed each upset to serd_eval() and 855 * build up tripped[], an array of ereports produced by the firing of 856 * any SERD engines. then feed each ereport back into 857 * fme_receive_report(). 858 * 859 * returns ntrip, the number of these ereports produced. 860 */ 861 static int 862 upsets_eval(struct fme *fmep, fmd_event_t *ffep) 863 { 864 /* we build an array of tripped ereports that we send ourselves */ 865 struct { 866 const char *ename; 867 const struct ipath *ipp; 868 } *tripped; 869 struct event *sp; 870 int ntrip, nupset, i; 871 872 /* 873 * we avoid recursion by calling fme_receive_report() at the end of 874 * this function with a NULL ffep 875 */ 876 if (ffep == NULL) 877 return (0); 878 879 /* 880 * count the number of upsets to determine the upper limit on 881 * expected trip ereport strings. remember that one upset can 882 * lead to at most one ereport. 883 */ 884 nupset = 0; 885 for (sp = fmep->suspects; sp; sp = sp->suspects) { 886 if (sp->t == N_UPSET) 887 nupset++; 888 } 889 890 if (nupset == 0) 891 return (0); 892 893 /* 894 * get to this point if we have upsets and expect some trip 895 * ereports 896 */ 897 tripped = alloca(sizeof (*tripped) * nupset); 898 bzero((void *)tripped, sizeof (*tripped) * nupset); 899 900 ntrip = 0; 901 for (sp = fmep->suspects; sp; sp = sp->suspects) 902 if (sp->t == N_UPSET && 903 serd_eval(fmep->hdl, ffep, fmep->fmcase, sp, 904 &tripped[ntrip].ename, &tripped[ntrip].ipp)) 905 ntrip++; 906 907 for (i = 0; i < ntrip; i++) 908 fme_receive_report(fmep->hdl, NULL, 909 tripped[i].ename, tripped[i].ipp, NULL); 910 911 return (ntrip); 912 } 913 914 /* 915 * fme_receive_external_report -- call when an external ereport comes in 916 * 917 * this routine just converts the relevant information from the ereport 918 * into a format used internally and passes it on to fme_receive_report(). 919 */ 920 void 921 fme_receive_external_report(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl, 922 const char *eventstring) 923 { 924 struct node *epnamenp = platform_getpath(nvl); 925 const struct ipath *ipp; 926 927 /* 928 * XFILE: If we ended up without a path, it's an X-file. 929 * For now, use our undiagnosable interface. 930 */ 931 if (epnamenp == NULL) { 932 out(O_ALTFP, "XFILE: Unable to get path from ereport"); 933 Undiag_reason = UD_NOPATH; 934 publish_undiagnosable(hdl, ffep); 935 return; 936 } 937 938 ipp = ipath(epnamenp); 939 tree_free(epnamenp); 940 fme_receive_report(hdl, ffep, stable(eventstring), ipp, nvl); 941 } 942 943 static void 944 fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep, 945 const char *eventstring, const struct ipath *ipp, nvlist_t *nvl) 946 { 947 struct event *ep; 948 struct fme *fmep = NULL; 949 struct fme *ofmep = NULL; 950 struct fme *cfmep, *svfmep; 951 int matched = 0; 952 nvlist_t *defect; 953 954 out(O_ALTFP|O_NONL, "fme_receive_report: "); 955 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 956 out(O_ALTFP|O_STAMP, NULL); 957 958 /* decide which FME it goes to */ 959 for (fmep = FMElist; fmep; fmep = fmep->next) { 960 int prev_verbose; 961 unsigned long long my_delay = TIMEVAL_EVENTUALLY; 962 enum fme_state state; 963 964 if (fmep->overflow) { 965 if (!(fmd_case_closed(fmep->hdl, fmep->fmcase))) 966 ofmep = fmep; 967 968 continue; 969 } 970 971 /* look up event in event tree for this FME */ 972 if ((ep = itree_lookup(fmep->eventtree, 973 eventstring, ipp)) == NULL) 974 continue; 975 976 /* note observation */ 977 fmep->ecurrent = ep; 978 if (ep->count++ == 0) { 979 /* link it into list of observations seen */ 980 ep->observations = fmep->observations; 981 fmep->observations = ep; 982 ep->nvp = evnv_dupnvl(nvl); 983 } 984 985 /* tell hypothesise() not to mess with suspect list */ 986 fmep->peek = 1; 987 988 /* don't want this to be verbose (unless Debug is set) */ 989 prev_verbose = Verbose; 990 if (Debug == 0) 991 Verbose = 0; 992 993 initialize_cycles(fmep); 994 state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay, NULL); 995 996 fmep->peek = 0; 997 998 /* put verbose flag back */ 999 Verbose = prev_verbose; 1000 1001 if (state != FME_DISPROVED) { 1002 /* found an FME that explains the ereport */ 1003 matched++; 1004 out(O_ALTFP|O_NONL, "["); 1005 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1006 out(O_ALTFP, " explained by FME%d]", fmep->id); 1007 1008 if (ep->count == 1) 1009 serialize_observation(fmep, eventstring, ipp); 1010 1011 if (ffep) 1012 fmd_case_add_ereport(hdl, fmep->fmcase, ffep); 1013 1014 stats_counter_bump(fmep->Rcount); 1015 1016 /* re-eval FME */ 1017 fme_eval(fmep, ffep); 1018 } else { 1019 1020 /* not a match, undo noting of observation */ 1021 fmep->ecurrent = NULL; 1022 if (--ep->count == 0) { 1023 /* unlink it from observations */ 1024 fmep->observations = ep->observations; 1025 ep->observations = NULL; 1026 nvlist_free(ep->nvp); 1027 ep->nvp = NULL; 1028 } 1029 } 1030 } 1031 1032 if (matched) 1033 return; /* explained by at least one existing FME */ 1034 1035 /* clean up closed fmes */ 1036 cfmep = ClosedFMEs; 1037 while (cfmep != NULL) { 1038 svfmep = cfmep->next; 1039 destroy_fme(cfmep); 1040 cfmep = svfmep; 1041 } 1042 ClosedFMEs = NULL; 1043 1044 if (ofmep) { 1045 out(O_ALTFP|O_NONL, "["); 1046 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1047 out(O_ALTFP, " ADDING TO OVERFLOW FME]"); 1048 if (ffep) 1049 fmd_case_add_ereport(hdl, ofmep->fmcase, ffep); 1050 1051 return; 1052 1053 } else if (Max_fme && (Open_fme_count >= Max_fme)) { 1054 out(O_ALTFP|O_NONL, "["); 1055 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1056 out(O_ALTFP, " MAX OPEN FME REACHED]"); 1057 /* Create overflow fme */ 1058 if ((fmep = newfme(eventstring, ipp)) == NULL) { 1059 out(O_ALTFP|O_NONL, "["); 1060 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1061 out(O_ALTFP, " CANNOT OPEN OVERFLOW FME]"); 1062 publish_undiagnosable(hdl, ffep); 1063 return; 1064 } 1065 1066 Open_fme_count++; 1067 1068 fmep->fmcase = fmd_case_open(hdl, NULL); 1069 fmep->hdl = hdl; 1070 init_fme_bufs(fmep); 1071 fmep->overflow = B_TRUE; 1072 1073 if (ffep) 1074 fmd_case_add_ereport(hdl, fmep->fmcase, ffep); 1075 1076 defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100, 1077 NULL, NULL, NULL); 1078 (void) nvlist_add_string(defect, UNDIAG_REASON, UD_MAXFME); 1079 fmd_case_add_suspect(hdl, fmep->fmcase, defect); 1080 fmd_case_solve(hdl, fmep->fmcase); 1081 return; 1082 } 1083 1084 /* start a new FME */ 1085 if ((fmep = newfme(eventstring, ipp)) == NULL) { 1086 out(O_ALTFP|O_NONL, "["); 1087 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1088 out(O_ALTFP, " CANNOT DIAGNOSE]"); 1089 publish_undiagnosable(hdl, ffep); 1090 return; 1091 } 1092 1093 Open_fme_count++; 1094 1095 /* open a case */ 1096 fmep->fmcase = fmd_case_open(hdl, NULL); 1097 fmep->hdl = hdl; 1098 init_fme_bufs(fmep); 1099 1100 out(O_ALTFP|O_NONL, "["); 1101 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1102 out(O_ALTFP, " created FME%d, case %s]", fmep->id, 1103 fmd_case_uuid(hdl, fmep->fmcase)); 1104 1105 ep = fmep->e0; 1106 ASSERT(ep != NULL); 1107 1108 /* note observation */ 1109 fmep->ecurrent = ep; 1110 if (ep->count++ == 0) { 1111 /* link it into list of observations seen */ 1112 ep->observations = fmep->observations; 1113 fmep->observations = ep; 1114 ep->nvp = evnv_dupnvl(nvl); 1115 serialize_observation(fmep, eventstring, ipp); 1116 } 1117 1118 stats_counter_bump(fmep->Rcount); 1119 1120 if (ffep) { 1121 fmd_case_add_ereport(hdl, fmep->fmcase, ffep); 1122 fmd_case_setprincipal(hdl, fmep->fmcase, ffep); 1123 fmep->e0r = ffep; 1124 } 1125 1126 /* give the diagnosis algorithm a shot at the new FME state */ 1127 fme_eval(fmep, ffep); 1128 } 1129 1130 void 1131 fme_status(int flags) 1132 { 1133 struct fme *fmep; 1134 1135 if (FMElist == NULL) { 1136 out(flags, "No fault management exercises underway."); 1137 return; 1138 } 1139 1140 for (fmep = FMElist; fmep; fmep = fmep->next) 1141 fme_print(flags, fmep); 1142 } 1143 1144 /* 1145 * "indent" routines used mostly for nicely formatted debug output, but also 1146 * for sanity checking for infinite recursion bugs. 1147 */ 1148 1149 #define MAX_INDENT 1024 1150 static const char *indent_s[MAX_INDENT]; 1151 static int current_indent; 1152 1153 static void 1154 indent_push(const char *s) 1155 { 1156 if (current_indent < MAX_INDENT) 1157 indent_s[current_indent++] = s; 1158 else 1159 out(O_DIE, "unexpected recursion depth (%d)", current_indent); 1160 } 1161 1162 static void 1163 indent_set(const char *s) 1164 { 1165 current_indent = 0; 1166 indent_push(s); 1167 } 1168 1169 static void 1170 indent_pop(void) 1171 { 1172 if (current_indent > 0) 1173 current_indent--; 1174 else 1175 out(O_DIE, "recursion underflow"); 1176 } 1177 1178 static void 1179 indent(void) 1180 { 1181 int i; 1182 if (!Verbose) 1183 return; 1184 for (i = 0; i < current_indent; i++) 1185 out(O_ALTFP|O_VERB|O_NONL, indent_s[i]); 1186 } 1187 1188 static int 1189 suspects_changed(struct fme *fmep) 1190 { 1191 struct event *suspects = fmep->suspects; 1192 struct event *psuspects = fmep->psuspects; 1193 1194 while (suspects != NULL && psuspects != NULL) { 1195 if (suspects != psuspects) 1196 return (1); 1197 suspects = suspects->suspects; 1198 psuspects = psuspects->psuspects; 1199 } 1200 1201 return (suspects != psuspects); 1202 } 1203 1204 #define SLNEW 1 1205 #define SLCHANGED 2 1206 #define SLWAIT 3 1207 #define SLDISPROVED 4 1208 1209 static void 1210 print_suspects(int circumstance, struct fme *fmep) 1211 { 1212 struct event *ep; 1213 1214 out(O_ALTFP|O_NONL, "["); 1215 if (circumstance == SLCHANGED) { 1216 out(O_ALTFP|O_NONL, "FME%d diagnosis changed. state: %s, " 1217 "suspect list:", fmep->id, fme_state2str(fmep->state)); 1218 } else if (circumstance == SLWAIT) { 1219 out(O_ALTFP|O_NONL, "FME%d set wait timer ", fmep->id); 1220 ptree_timeval(O_ALTFP|O_NONL, &fmep->wull); 1221 } else if (circumstance == SLDISPROVED) { 1222 out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS UNKNOWN", fmep->id); 1223 } else { 1224 out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS PRODUCED:", fmep->id); 1225 } 1226 1227 if (circumstance == SLWAIT || circumstance == SLDISPROVED) { 1228 out(O_ALTFP, "]"); 1229 return; 1230 } 1231 1232 for (ep = fmep->suspects; ep; ep = ep->suspects) { 1233 out(O_ALTFP|O_NONL, " "); 1234 itree_pevent_brief(O_ALTFP|O_NONL, ep); 1235 } 1236 out(O_ALTFP, "]"); 1237 } 1238 1239 static struct node * 1240 eventprop_lookup(struct event *ep, const char *propname) 1241 { 1242 return (lut_lookup(ep->props, (void *)propname, NULL)); 1243 } 1244 1245 #define MAXDIGITIDX 23 1246 static char numbuf[MAXDIGITIDX + 1]; 1247 1248 static int 1249 node2uint(struct node *n, uint_t *valp) 1250 { 1251 struct evalue value; 1252 struct lut *globals = NULL; 1253 1254 if (n == NULL) 1255 return (1); 1256 1257 /* 1258 * check value.v since we are being asked to convert an unsigned 1259 * long long int to an unsigned int 1260 */ 1261 if (! eval_expr(n, NULL, NULL, &globals, NULL, NULL, 0, &value) || 1262 value.t != UINT64 || value.v > (1ULL << 32)) 1263 return (1); 1264 1265 *valp = (uint_t)value.v; 1266 1267 return (0); 1268 } 1269 1270 static nvlist_t * 1271 node2fmri(struct node *n) 1272 { 1273 nvlist_t **pa, *f, *p; 1274 struct node *nc; 1275 uint_t depth = 0; 1276 char *numstr, *nullbyte; 1277 char *failure; 1278 int err, i; 1279 1280 /* XXX do we need to be able to handle a non-T_NAME node? */ 1281 if (n == NULL || n->t != T_NAME) 1282 return (NULL); 1283 1284 for (nc = n; nc != NULL; nc = nc->u.name.next) { 1285 if (nc->u.name.child == NULL || nc->u.name.child->t != T_NUM) 1286 break; 1287 depth++; 1288 } 1289 1290 if (nc != NULL) { 1291 /* We bailed early, something went wrong */ 1292 return (NULL); 1293 } 1294 1295 if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0) 1296 out(O_DIE|O_SYS, "alloc of fmri nvl failed"); 1297 pa = alloca(depth * sizeof (nvlist_t *)); 1298 for (i = 0; i < depth; i++) 1299 pa[i] = NULL; 1300 1301 err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC); 1302 err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION); 1303 err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, ""); 1304 err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth); 1305 if (err != 0) { 1306 failure = "basic construction of FMRI failed"; 1307 goto boom; 1308 } 1309 1310 numbuf[MAXDIGITIDX] = '\0'; 1311 nullbyte = &numbuf[MAXDIGITIDX]; 1312 i = 0; 1313 1314 for (nc = n; nc != NULL; nc = nc->u.name.next) { 1315 err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl); 1316 if (err != 0) { 1317 failure = "alloc of an hc-pair failed"; 1318 goto boom; 1319 } 1320 err = nvlist_add_string(p, FM_FMRI_HC_NAME, nc->u.name.s); 1321 numstr = ulltostr(nc->u.name.child->u.ull, nullbyte); 1322 err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr); 1323 if (err != 0) { 1324 failure = "construction of an hc-pair failed"; 1325 goto boom; 1326 } 1327 pa[i++] = p; 1328 } 1329 1330 err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth); 1331 if (err == 0) { 1332 for (i = 0; i < depth; i++) 1333 if (pa[i] != NULL) 1334 nvlist_free(pa[i]); 1335 return (f); 1336 } 1337 failure = "addition of hc-pair array to FMRI failed"; 1338 1339 boom: 1340 for (i = 0; i < depth; i++) 1341 if (pa[i] != NULL) 1342 nvlist_free(pa[i]); 1343 nvlist_free(f); 1344 out(O_DIE, "%s", failure); 1345 /*NOTREACHED*/ 1346 } 1347 1348 static uint_t 1349 avg(uint_t sum, uint_t cnt) 1350 { 1351 unsigned long long s = sum * 10; 1352 1353 return ((s / cnt / 10) + (((s / cnt % 10) >= 5) ? 1 : 0)); 1354 } 1355 1356 static uint8_t 1357 percentof(uint_t part, uint_t whole) 1358 { 1359 unsigned long long p = part * 1000; 1360 1361 return ((p / whole / 10) + (((p / whole % 10) >= 5) ? 1 : 0)); 1362 } 1363 1364 static struct rsl { 1365 struct event *suspect; 1366 nvlist_t *asru; 1367 nvlist_t *fru; 1368 nvlist_t *rsrc; 1369 }; 1370 1371 /* 1372 * rslfree -- free internal members of struct rsl not expected to be 1373 * freed elsewhere. 1374 */ 1375 static void 1376 rslfree(struct rsl *freeme) 1377 { 1378 if (freeme->asru != NULL) 1379 nvlist_free(freeme->asru); 1380 if (freeme->fru != NULL) 1381 nvlist_free(freeme->fru); 1382 if (freeme->rsrc != NULL && freeme->rsrc != freeme->asru) 1383 nvlist_free(freeme->rsrc); 1384 } 1385 1386 /* 1387 * rslcmp -- compare two rsl structures. Use the following 1388 * comparisons to establish cardinality: 1389 * 1390 * 1. Name of the suspect's class. (simple strcmp) 1391 * 2. Name of the suspect's ASRU. (trickier, since nvlist) 1392 * 1393 */ 1394 static int 1395 rslcmp(const void *a, const void *b) 1396 { 1397 struct rsl *r1 = (struct rsl *)a; 1398 struct rsl *r2 = (struct rsl *)b; 1399 int rv; 1400 1401 rv = strcmp(r1->suspect->enode->u.event.ename->u.name.s, 1402 r2->suspect->enode->u.event.ename->u.name.s); 1403 if (rv != 0) 1404 return (rv); 1405 1406 if (r1->asru == NULL && r2->asru == NULL) 1407 return (0); 1408 if (r1->asru == NULL) 1409 return (-1); 1410 if (r2->asru == NULL) 1411 return (1); 1412 return (evnv_cmpnvl(r1->asru, r2->asru, 0)); 1413 } 1414 1415 /* 1416 * rsluniq -- given an array of rsl structures, seek out and "remove" 1417 * any duplicates. Dups are "remove"d by NULLing the suspect pointer 1418 * of the array element. Removal also means updating the number of 1419 * problems and the number of problems which are not faults. User 1420 * provides the first and last element pointers. 1421 */ 1422 static void 1423 rsluniq(struct rsl *first, struct rsl *last, int *nprobs, int *nnonf) 1424 { 1425 struct rsl *cr; 1426 1427 if (*nprobs == 1) 1428 return; 1429 1430 /* 1431 * At this point, we only expect duplicate defects. 1432 * Eversholt's diagnosis algorithm prevents duplicate 1433 * suspects, but we rewrite defects in the platform code after 1434 * the diagnosis is made, and that can introduce new 1435 * duplicates. 1436 */ 1437 while (first <= last) { 1438 if (first->suspect == NULL || !is_defect(first->suspect->t)) { 1439 first++; 1440 continue; 1441 } 1442 cr = first + 1; 1443 while (cr <= last) { 1444 if (is_defect(first->suspect->t)) { 1445 if (rslcmp(first, cr) == 0) { 1446 cr->suspect = NULL; 1447 rslfree(cr); 1448 (*nprobs)--; 1449 (*nnonf)--; 1450 } 1451 } 1452 /* 1453 * assume all defects are in order after our 1454 * sort and short circuit here with "else break" ? 1455 */ 1456 cr++; 1457 } 1458 first++; 1459 } 1460 } 1461 1462 /* 1463 * get_resources -- for a given suspect, determine what ASRU, FRU and 1464 * RSRC nvlists should be advertised in the final suspect list. 1465 */ 1466 void 1467 get_resources(struct event *sp, struct rsl *rsrcs, struct config *croot) 1468 { 1469 struct node *asrudef, *frudef; 1470 nvlist_t *asru, *fru; 1471 nvlist_t *rsrc = NULL; 1472 char *pathstr; 1473 1474 /* 1475 * First find any ASRU and/or FRU defined in the 1476 * initial fault tree. 1477 */ 1478 asrudef = eventprop_lookup(sp, L_ASRU); 1479 frudef = eventprop_lookup(sp, L_FRU); 1480 1481 /* 1482 * Create FMRIs based on those definitions 1483 */ 1484 asru = node2fmri(asrudef); 1485 fru = node2fmri(frudef); 1486 pathstr = ipath2str(NULL, sp->ipp); 1487 1488 /* 1489 * Allow for platform translations of the FMRIs 1490 */ 1491 platform_units_translate(is_defect(sp->t), croot, &asru, &fru, &rsrc, 1492 pathstr); 1493 1494 FREE(pathstr); 1495 rsrcs->suspect = sp; 1496 rsrcs->asru = asru; 1497 rsrcs->fru = fru; 1498 rsrcs->rsrc = rsrc; 1499 } 1500 1501 /* 1502 * trim_suspects -- prior to publishing, we may need to remove some 1503 * suspects from the list. If we're auto-closing upsets, we don't 1504 * want any of those in the published list. If the ASRUs for multiple 1505 * defects resolve to the same ASRU (driver) we only want to publish 1506 * that as a single suspect. 1507 */ 1508 static void 1509 trim_suspects(struct fme *fmep, boolean_t no_upsets, struct rsl **begin, 1510 struct rsl **end) 1511 { 1512 struct event *ep; 1513 struct rsl *rp; 1514 int rpcnt; 1515 1516 /* 1517 * First save the suspects in the psuspects, then copy back 1518 * only the ones we wish to retain. This resets nsuspects to 1519 * zero. 1520 */ 1521 rpcnt = fmep->nsuspects; 1522 save_suspects(fmep); 1523 1524 /* 1525 * allocate an array of resource pointers for the suspects. 1526 * We may end up using less than the full allocation, but this 1527 * is a very short-lived array. publish_suspects() will free 1528 * this array when it's done using it. 1529 */ 1530 rp = *begin = MALLOC(rpcnt * sizeof (struct rsl)); 1531 bzero(rp, rpcnt * sizeof (struct rsl)); 1532 1533 /* first pass, remove any unwanted upsets and populate our array */ 1534 for (ep = fmep->psuspects; ep; ep = ep->psuspects) { 1535 if (no_upsets && is_upset(ep->t)) 1536 continue; 1537 get_resources(ep, rp, fmep->cfgdata->cooked); 1538 rp++; 1539 fmep->nsuspects++; 1540 if (!is_fault(ep->t)) 1541 fmep->nonfault++; 1542 } 1543 1544 /* if all we had was unwanted upsets, we're done */ 1545 if (fmep->nsuspects == 0) 1546 return; 1547 1548 *end = rp - 1; 1549 1550 /* sort the array */ 1551 qsort(*begin, fmep->nsuspects, sizeof (struct rsl), rslcmp); 1552 rsluniq(*begin, *end, &fmep->nsuspects, &fmep->nonfault); 1553 } 1554 1555 static void 1556 publish_suspects(struct fme *fmep) 1557 { 1558 struct event *ep; 1559 struct rsl *srl = NULL; 1560 struct rsl *erl; 1561 struct rsl *rp; 1562 nvlist_t *fault; 1563 uint8_t cert; 1564 uint_t *frs; 1565 uint_t fravg, frsum, fr; 1566 int frcnt, fridx; 1567 boolean_t no_upsets = B_FALSE; 1568 1569 stats_counter_bump(fmep->diags); 1570 1571 /* 1572 * The current fmd interfaces don't allow us to solve a case 1573 * that's already solved. If we make a new case, what of the 1574 * ereports? We don't appear to have an interface that allows 1575 * us to access the ereports attached to a case (if we wanted 1576 * to copy the original case's ereport attachments to the new 1577 * case) and it's also a bit unclear if there would be any 1578 * problems with having ereports attached to multiple cases 1579 * and/or attaching DIAGNOSED ereports to a case. For now, 1580 * we'll just output a message. 1581 */ 1582 if (fmep->posted_suspects || 1583 fmd_case_solved(fmep->hdl, fmep->fmcase)) { 1584 out(O_ALTFP|O_NONL, "Revised diagnosis for case %s: ", 1585 fmd_case_uuid(fmep->hdl, fmep->fmcase)); 1586 for (ep = fmep->suspects; ep; ep = ep->suspects) { 1587 out(O_ALTFP|O_NONL, " "); 1588 itree_pevent_brief(O_ALTFP|O_NONL, ep); 1589 } 1590 out(O_ALTFP, NULL); 1591 return; 1592 } 1593 1594 /* 1595 * If we're auto-closing upsets, we don't want to include them 1596 * in any produced suspect lists or certainty accounting. 1597 */ 1598 if (Autoclose != NULL) 1599 if (strcmp(Autoclose, "true") == 0 || 1600 strcmp(Autoclose, "all") == 0 || 1601 strcmp(Autoclose, "upsets") == 0) 1602 no_upsets = B_TRUE; 1603 1604 trim_suspects(fmep, no_upsets, &srl, &erl); 1605 1606 /* 1607 * If the resulting suspect list has no members, we're 1608 * done. Returning here will simply close the case. 1609 */ 1610 if (fmep->nsuspects == 0) { 1611 out(O_ALTFP, 1612 "[FME%d, case %s (all suspects are upsets)]", 1613 fmep->id, fmd_case_uuid(fmep->hdl, fmep->fmcase)); 1614 FREE(srl); 1615 restore_suspects(fmep); 1616 return; 1617 } 1618 1619 /* 1620 * If the suspect list is all faults, then for a given fault, 1621 * say X of N, X's certainty is computed via: 1622 * 1623 * fitrate(X) / (fitrate(1) + ... + fitrate(N)) * 100 1624 * 1625 * If none of the suspects are faults, and there are N suspects, 1626 * the certainty of a given suspect is 100/N. 1627 * 1628 * If there are are a mixture of faults and other problems in 1629 * the suspect list, we take an average of the faults' 1630 * FITrates and treat this average as the FITrate for any 1631 * non-faults. The fitrate of any given suspect is then 1632 * computed per the first formula above. 1633 */ 1634 if (fmep->nonfault == fmep->nsuspects) { 1635 /* NO faults in the suspect list */ 1636 cert = percentof(1, fmep->nsuspects); 1637 } else { 1638 /* sum the fitrates */ 1639 frs = alloca(fmep->nsuspects * sizeof (uint_t)); 1640 fridx = frcnt = frsum = 0; 1641 1642 for (rp = srl; rp <= erl; rp++) { 1643 struct node *n; 1644 1645 if (rp->suspect == NULL) 1646 continue; 1647 if (!is_fault(rp->suspect->t)) { 1648 frs[fridx++] = 0; 1649 continue; 1650 } 1651 n = eventprop_lookup(rp->suspect, L_FITrate); 1652 if (node2uint(n, &fr) != 0) { 1653 out(O_DEBUG|O_NONL, "event "); 1654 ipath_print(O_DEBUG|O_NONL, 1655 ep->enode->u.event.ename->u.name.s, 1656 ep->ipp); 1657 out(O_DEBUG, " has no FITrate (using 1)"); 1658 fr = 1; 1659 } else if (fr == 0) { 1660 out(O_DEBUG|O_NONL, "event "); 1661 ipath_print(O_DEBUG|O_NONL, 1662 ep->enode->u.event.ename->u.name.s, 1663 ep->ipp); 1664 out(O_DEBUG, " has zero FITrate (using 1)"); 1665 fr = 1; 1666 } 1667 1668 frs[fridx++] = fr; 1669 frsum += fr; 1670 frcnt++; 1671 } 1672 fravg = avg(frsum, frcnt); 1673 for (fridx = 0; fridx < fmep->nsuspects; fridx++) 1674 if (frs[fridx] == 0) { 1675 frs[fridx] = fravg; 1676 frsum += fravg; 1677 } 1678 } 1679 1680 /* Add them in reverse order of our sort, as fmd reverses order */ 1681 for (rp = erl; rp >= srl; rp--) { 1682 if (rp->suspect == NULL) 1683 continue; 1684 if (fmep->nonfault != fmep->nsuspects) 1685 cert = percentof(frs[--fridx], frsum); 1686 fault = fmd_nvl_create_fault(fmep->hdl, 1687 rp->suspect->enode->u.event.ename->u.name.s, 1688 cert, 1689 rp->asru, 1690 rp->fru, 1691 rp->rsrc); 1692 if (fault == NULL) 1693 out(O_DIE, "fault creation failed"); 1694 fmd_case_add_suspect(fmep->hdl, fmep->fmcase, fault); 1695 rp->suspect->fault = fault; 1696 rslfree(rp); 1697 } 1698 fmd_case_solve(fmep->hdl, fmep->fmcase); 1699 out(O_ALTFP, "[solving FME%d, case %s]", fmep->id, 1700 fmd_case_uuid(fmep->hdl, fmep->fmcase)); 1701 1702 if (Autoconvict) { 1703 for (rp = srl; rp <= erl; rp++) { 1704 if (rp->suspect == NULL) 1705 continue; 1706 fmd_case_convict(fmep->hdl, 1707 fmep->fmcase, rp->suspect->fault); 1708 } 1709 out(O_ALTFP, "[convicting FME%d, case %s]", fmep->id, 1710 fmd_case_uuid(fmep->hdl, fmep->fmcase)); 1711 } 1712 1713 /* 1714 * revert to the original suspect list 1715 */ 1716 FREE(srl); 1717 restore_suspects(fmep); 1718 } 1719 1720 static void 1721 publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep) 1722 { 1723 struct case_list *newcase; 1724 nvlist_t *defect; 1725 1726 out(O_ALTFP, 1727 "[undiagnosable ereport received, " 1728 "creating and closing a new case (%s)]", 1729 Undiag_reason ? Undiag_reason : "reason not provided"); 1730 1731 newcase = MALLOC(sizeof (struct case_list)); 1732 newcase->next = NULL; 1733 1734 newcase->fmcase = fmd_case_open(hdl, NULL); 1735 if (Undiagablecaselist != NULL) 1736 newcase->next = Undiagablecaselist; 1737 Undiagablecaselist = newcase; 1738 1739 if (ffep != NULL) 1740 fmd_case_add_ereport(hdl, newcase->fmcase, ffep); 1741 1742 defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100, 1743 NULL, NULL, NULL); 1744 if (Undiag_reason != NULL) 1745 (void) nvlist_add_string(defect, UNDIAG_REASON, Undiag_reason); 1746 fmd_case_add_suspect(hdl, newcase->fmcase, defect); 1747 1748 fmd_case_solve(hdl, newcase->fmcase); 1749 fmd_case_close(hdl, newcase->fmcase); 1750 } 1751 1752 static void 1753 fme_undiagnosable(struct fme *f) 1754 { 1755 nvlist_t *defect; 1756 1757 out(O_ALTFP, "[solving/closing FME%d, case %s (%s)]", 1758 f->id, fmd_case_uuid(f->hdl, f->fmcase), 1759 Undiag_reason ? Undiag_reason : "undiagnosable"); 1760 1761 defect = fmd_nvl_create_fault(f->hdl, UNDIAGNOSABLE_DEFECT, 100, 1762 NULL, NULL, NULL); 1763 if (Undiag_reason != NULL) 1764 (void) nvlist_add_string(defect, UNDIAG_REASON, Undiag_reason); 1765 fmd_case_add_suspect(f->hdl, f->fmcase, defect); 1766 fmd_case_solve(f->hdl, f->fmcase); 1767 destroy_fme_bufs(f); 1768 fmd_case_close(f->hdl, f->fmcase); 1769 } 1770 1771 /* 1772 * fme_close_case 1773 * 1774 * Find the requested case amongst our fmes and close it. Free up 1775 * the related fme. 1776 */ 1777 void 1778 fme_close_case(fmd_hdl_t *hdl, fmd_case_t *fmcase) 1779 { 1780 struct case_list *ucasep, *prevcasep = NULL; 1781 struct fme *prev = NULL; 1782 struct fme *fmep; 1783 1784 for (ucasep = Undiagablecaselist; ucasep; ucasep = ucasep->next) { 1785 if (fmcase != ucasep->fmcase) { 1786 prevcasep = ucasep; 1787 continue; 1788 } 1789 1790 if (prevcasep == NULL) 1791 Undiagablecaselist = Undiagablecaselist->next; 1792 else 1793 prevcasep->next = ucasep->next; 1794 1795 FREE(ucasep); 1796 return; 1797 } 1798 1799 for (fmep = FMElist; fmep; fmep = fmep->next) { 1800 if (fmep->hdl == hdl && fmep->fmcase == fmcase) 1801 break; 1802 prev = fmep; 1803 } 1804 1805 if (fmep == NULL) { 1806 out(O_WARN, "Eft asked to close unrecognized case [%s].", 1807 fmd_case_uuid(hdl, fmcase)); 1808 return; 1809 } 1810 1811 if (EFMElist == fmep) 1812 EFMElist = prev; 1813 1814 if (prev == NULL) 1815 FMElist = FMElist->next; 1816 else 1817 prev->next = fmep->next; 1818 1819 fmep->next = NULL; 1820 1821 /* Get rid of any timer this fme has set */ 1822 if (fmep->wull != 0) 1823 fmd_timer_remove(fmep->hdl, fmep->timer); 1824 1825 if (ClosedFMEs == NULL) { 1826 ClosedFMEs = fmep; 1827 } else { 1828 fmep->next = ClosedFMEs; 1829 ClosedFMEs = fmep; 1830 } 1831 1832 Open_fme_count--; 1833 1834 /* See if we can close the overflow FME */ 1835 if (Open_fme_count <= Max_fme) { 1836 for (fmep = FMElist; fmep; fmep = fmep->next) { 1837 if (fmep->overflow && !(fmd_case_closed(fmep->hdl, 1838 fmep->fmcase))) 1839 break; 1840 } 1841 1842 if (fmep != NULL) 1843 fmd_case_close(fmep->hdl, fmep->fmcase); 1844 } 1845 } 1846 1847 /* 1848 * fme_set_timer() 1849 * If the time we need to wait for the given FME is less than the 1850 * current timer, kick that old timer out and establish a new one. 1851 */ 1852 static void 1853 fme_set_timer(struct fme *fmep, unsigned long long wull) 1854 { 1855 out(O_ALTFP|O_VERB|O_NONL, " fme_set_timer: request to wait "); 1856 ptree_timeval(O_ALTFP|O_VERB, &wull); 1857 1858 if (wull <= fmep->pull) { 1859 out(O_ALTFP|O_VERB|O_NONL, "already have waited at least "); 1860 ptree_timeval(O_ALTFP|O_VERB, &fmep->pull); 1861 out(O_ALTFP|O_VERB, NULL); 1862 /* we've waited at least wull already, don't need timer */ 1863 return; 1864 } 1865 1866 out(O_ALTFP|O_VERB|O_NONL, " currently "); 1867 if (fmep->wull != 0) { 1868 out(O_ALTFP|O_VERB|O_NONL, "waiting "); 1869 ptree_timeval(O_ALTFP|O_VERB, &fmep->wull); 1870 out(O_ALTFP|O_VERB, NULL); 1871 } else { 1872 out(O_ALTFP|O_VERB|O_NONL, "not waiting"); 1873 out(O_ALTFP|O_VERB, NULL); 1874 } 1875 1876 if (fmep->wull != 0) 1877 if (wull >= fmep->wull) 1878 /* New timer would fire later than established timer */ 1879 return; 1880 1881 if (fmep->wull != 0) 1882 fmd_timer_remove(fmep->hdl, fmep->timer); 1883 1884 fmep->timer = fmd_timer_install(fmep->hdl, (void *)fmep, 1885 fmep->e0r, wull); 1886 out(O_ALTFP|O_VERB, "timer set, id is %ld", fmep->timer); 1887 fmep->wull = wull; 1888 } 1889 1890 void 1891 fme_timer_fired(struct fme *fmep, id_t tid) 1892 { 1893 struct fme *ffmep = NULL; 1894 1895 for (ffmep = FMElist; ffmep; ffmep = ffmep->next) 1896 if (ffmep == fmep) 1897 break; 1898 1899 if (ffmep == NULL) { 1900 out(O_WARN, "Timer fired for an FME (%p) not in FMEs list.", 1901 (void *)fmep); 1902 return; 1903 } 1904 1905 if (tid != fmep->htid) { 1906 /* 1907 * normal timer (not the hesitation timer 1908 */ 1909 fmep->pull = fmep->wull; 1910 fmep->wull = 0; 1911 fmd_buf_write(fmep->hdl, fmep->fmcase, 1912 WOBUF_PULL, (void *)&fmep->pull, sizeof (fmep->pull)); 1913 } else { 1914 fmep->hesitated = 1; 1915 } 1916 fme_eval(fmep, NULL); 1917 } 1918 1919 /* 1920 * Preserve the fme's suspect list in its psuspects list, NULLing the 1921 * suspects list in the meantime. 1922 */ 1923 static void 1924 save_suspects(struct fme *fmep) 1925 { 1926 struct event *ep; 1927 struct event *nextep; 1928 1929 /* zero out the previous suspect list */ 1930 for (ep = fmep->psuspects; ep; ep = nextep) { 1931 nextep = ep->psuspects; 1932 ep->psuspects = NULL; 1933 } 1934 fmep->psuspects = NULL; 1935 1936 /* zero out the suspect list, copying it to previous suspect list */ 1937 fmep->psuspects = fmep->suspects; 1938 for (ep = fmep->suspects; ep; ep = nextep) { 1939 nextep = ep->suspects; 1940 ep->psuspects = ep->suspects; 1941 ep->suspects = NULL; 1942 ep->is_suspect = 0; 1943 } 1944 fmep->suspects = NULL; 1945 fmep->nsuspects = 0; 1946 fmep->nonfault = 0; 1947 } 1948 1949 /* 1950 * Retrieve the fme's suspect list from its psuspects list. 1951 */ 1952 static void 1953 restore_suspects(struct fme *fmep) 1954 { 1955 struct event *ep; 1956 struct event *nextep; 1957 1958 fmep->nsuspects = fmep->nonfault = 0; 1959 fmep->suspects = fmep->psuspects; 1960 for (ep = fmep->psuspects; ep; ep = nextep) { 1961 fmep->nsuspects++; 1962 if (!is_fault(ep->t)) 1963 fmep->nonfault++; 1964 nextep = ep->psuspects; 1965 ep->suspects = ep->psuspects; 1966 } 1967 } 1968 1969 /* 1970 * this is what we use to call the Emrys prototype code instead of main() 1971 */ 1972 static void 1973 fme_eval(struct fme *fmep, fmd_event_t *ffep) 1974 { 1975 struct event *ep; 1976 unsigned long long my_delay = TIMEVAL_EVENTUALLY; 1977 1978 save_suspects(fmep); 1979 1980 out(O_ALTFP|O_VERB, "Evaluate FME %d", fmep->id); 1981 indent_set(" "); 1982 1983 initialize_cycles(fmep); 1984 fmep->state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay, NULL); 1985 1986 out(O_ALTFP|O_VERB|O_NONL, "FME%d state: %s, suspect list:", fmep->id, 1987 fme_state2str(fmep->state)); 1988 for (ep = fmep->suspects; ep; ep = ep->suspects) { 1989 out(O_ALTFP|O_VERB|O_NONL, " "); 1990 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 1991 } 1992 out(O_ALTFP|O_VERB, NULL); 1993 1994 if (fmep->posted_suspects) { 1995 /* 1996 * this FME has already posted a diagnosis, so see if 1997 * the event changed the diagnosis and print a warning 1998 * if it did. 1999 * 2000 */ 2001 if (suspects_changed(fmep)) { 2002 print_suspects(SLCHANGED, fmep); 2003 publish_suspects(fmep); 2004 } 2005 } else { 2006 switch (fmep->state) { 2007 case FME_CREDIBLE: 2008 /* 2009 * if the suspect list contains any upsets, we 2010 * turn off the hesitation logic (by setting 2011 * the hesitate flag which normally indicates 2012 * we've already done the hesitate logic). 2013 * this is done because hesitating with upsets 2014 * causes us to explain away additional soft errors 2015 * while the upset FME stays open. 2016 */ 2017 if (fmep->hesitated == 0) { 2018 struct event *s; 2019 2020 for (s = fmep->suspects; s; s = s->suspects) { 2021 if (s->t == N_UPSET) { 2022 fmep->hesitated = 1; 2023 break; 2024 } 2025 } 2026 } 2027 2028 if (Hesitate && 2029 fmep->suspects != NULL && 2030 fmep->suspects->suspects != NULL && 2031 fmep->hesitated == 0) { 2032 /* 2033 * about to publish multi-entry suspect list, 2034 * set the hesitation timer if not already set. 2035 */ 2036 if (fmep->htid == 0) { 2037 out(O_ALTFP|O_NONL, 2038 "[hesitate FME%d, case %s ", 2039 fmep->id, 2040 fmd_case_uuid(fmep->hdl, 2041 fmep->fmcase)); 2042 ptree_timeval(O_ALTFP|O_NONL, 2043 (unsigned long long *)&Hesitate); 2044 out(O_ALTFP, "]"); 2045 fme_set_timer(fmep, my_delay); 2046 fmep->htid = 2047 fmd_timer_install(fmep->hdl, 2048 (void *)fmep, NULL, Hesitate); 2049 } else { 2050 out(O_ALTFP, 2051 "[still hesitating FME%d, case %s]", 2052 fmep->id, 2053 fmd_case_uuid(fmep->hdl, 2054 fmep->fmcase)); 2055 } 2056 } else { 2057 print_suspects(SLNEW, fmep); 2058 (void) upsets_eval(fmep, ffep); 2059 publish_suspects(fmep); 2060 fmep->posted_suspects = 1; 2061 fmd_buf_write(fmep->hdl, fmep->fmcase, 2062 WOBUF_POSTD, 2063 (void *)&fmep->posted_suspects, 2064 sizeof (fmep->posted_suspects)); 2065 } 2066 break; 2067 2068 case FME_WAIT: 2069 /* 2070 * singleton suspect list implies 2071 * no point in waiting 2072 */ 2073 if (fmep->suspects && 2074 fmep->suspects->suspects == NULL) { 2075 print_suspects(SLNEW, fmep); 2076 (void) upsets_eval(fmep, ffep); 2077 publish_suspects(fmep); 2078 fmep->posted_suspects = 1; 2079 fmd_buf_write(fmep->hdl, fmep->fmcase, 2080 WOBUF_POSTD, 2081 (void *)&fmep->posted_suspects, 2082 sizeof (fmep->posted_suspects)); 2083 fmep->state = FME_CREDIBLE; 2084 } else { 2085 ASSERT(my_delay > fmep->ull); 2086 fme_set_timer(fmep, my_delay); 2087 print_suspects(SLWAIT, fmep); 2088 } 2089 break; 2090 2091 case FME_DISPROVED: 2092 print_suspects(SLDISPROVED, fmep); 2093 Undiag_reason = UD_UNSOLVD; 2094 fme_undiagnosable(fmep); 2095 break; 2096 } 2097 } 2098 2099 if (fmep->posted_suspects == 1 && Autoclose != NULL) { 2100 int doclose = 0; 2101 2102 if (strcmp(Autoclose, "true") == 0 || 2103 strcmp(Autoclose, "all") == 0) 2104 doclose = 1; 2105 2106 if (strcmp(Autoclose, "upsets") == 0) { 2107 doclose = 1; 2108 for (ep = fmep->suspects; ep; ep = ep->suspects) { 2109 if (ep->t != N_UPSET) { 2110 doclose = 0; 2111 break; 2112 } 2113 } 2114 } 2115 2116 if (doclose) { 2117 out(O_ALTFP, "[closing FME%d, case %s (autoclose)]", 2118 fmep->id, fmd_case_uuid(fmep->hdl, fmep->fmcase)); 2119 2120 destroy_fme_bufs(fmep); 2121 fmd_case_close(fmep->hdl, fmep->fmcase); 2122 } 2123 } 2124 } 2125 2126 /* 2127 * below here is the code derived from the Emrys prototype 2128 */ 2129 2130 static void indent(void); 2131 static int triggered(struct fme *fmep, struct event *ep, int mark); 2132 static void mark_arrows(struct fme *fmep, struct event *ep, int mark); 2133 static enum fme_state effects_test(struct fme *fmep, 2134 struct event *fault_event); 2135 static enum fme_state requirements_test(struct fme *fmep, struct event *ep, 2136 unsigned long long at_latest_by, unsigned long long *pdelay, 2137 struct arrow *arrowp); 2138 static enum fme_state causes_test(struct fme *fmep, struct event *ep, 2139 unsigned long long at_latest_by, unsigned long long *pdelay); 2140 2141 static int 2142 triggered(struct fme *fmep, struct event *ep, int mark) 2143 { 2144 struct bubble *bp; 2145 struct arrowlist *ap; 2146 int count = 0; 2147 2148 stats_counter_bump(fmep->Tcallcount); 2149 for (bp = itree_next_bubble(ep, NULL); bp; 2150 bp = itree_next_bubble(ep, bp)) { 2151 if (bp->t != B_TO) 2152 continue; 2153 for (ap = itree_next_arrow(bp, NULL); ap; 2154 ap = itree_next_arrow(bp, ap)) { 2155 /* check count of marks against K in the bubble */ 2156 if (ap->arrowp->tail->mark == mark && 2157 ++count >= bp->nork) 2158 return (1); 2159 } 2160 } 2161 return (0); 2162 } 2163 2164 static void 2165 mark_arrows(struct fme *fmep, struct event *ep, int mark) 2166 { 2167 struct bubble *bp; 2168 struct arrowlist *ap; 2169 2170 for (bp = itree_next_bubble(ep, NULL); bp; 2171 bp = itree_next_bubble(ep, bp)) { 2172 if (bp->t != B_FROM) 2173 continue; 2174 if (bp->mark != mark) { 2175 stats_counter_bump(fmep->Marrowcount); 2176 bp->mark = mark; 2177 for (ap = itree_next_arrow(bp, NULL); ap; 2178 ap = itree_next_arrow(bp, ap)) { 2179 struct constraintlist *ctp; 2180 struct evalue value; 2181 int do_not_follow = 0; 2182 /* 2183 * see if false constraint prevents us 2184 * from traversing this arrow, but don't 2185 * bother if the event is an ereport we 2186 * haven't seen 2187 */ 2188 if (ap->arrowp->head->myevent->t != N_EREPORT || 2189 ap->arrowp->head->myevent->count != 0) { 2190 platform_set_payloadnvp( 2191 ap->arrowp->head->myevent->nvp); 2192 for (ctp = ap->arrowp->constraints; 2193 ctp != NULL; ctp = ctp->next) { 2194 if (eval_expr(ctp->cnode, 2195 NULL, NULL, 2196 &fmep->globals, 2197 fmep->cfgdata->cooked, 2198 ap->arrowp, 0, 2199 &value) == 0 || 2200 value.t == UNDEFINED || 2201 value.v == 0) { 2202 do_not_follow = 1; 2203 break; 2204 } 2205 } 2206 platform_set_payloadnvp(NULL); 2207 } 2208 2209 if (do_not_follow) { 2210 indent(); 2211 out(O_ALTFP|O_VERB|O_NONL, 2212 " False arrow to "); 2213 itree_pevent_brief( 2214 O_ALTFP|O_VERB|O_NONL, 2215 ap->arrowp->head->myevent); 2216 out(O_ALTFP|O_VERB|O_NONL, " "); 2217 ptree(O_ALTFP|O_VERB|O_NONL, 2218 ctp->cnode, 1, 0); 2219 out(O_ALTFP|O_VERB, NULL); 2220 continue; 2221 } 2222 2223 if (triggered(fmep, ap->arrowp->head->myevent, 2224 mark)) 2225 mark_arrows(fmep, 2226 ap->arrowp->head->myevent, mark); 2227 } 2228 } 2229 } 2230 } 2231 2232 static enum fme_state 2233 effects_test(struct fme *fmep, struct event *fault_event) 2234 { 2235 struct event *error_event; 2236 enum fme_state return_value = FME_CREDIBLE; 2237 2238 stats_counter_bump(fmep->Ecallcount); 2239 indent_push(" E"); 2240 indent(); 2241 out(O_ALTFP|O_VERB|O_NONL, "->"); 2242 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event); 2243 out(O_ALTFP|O_VERB, NULL); 2244 2245 mark_arrows(fmep, fault_event, 1); 2246 for (error_event = fmep->observations; 2247 error_event; error_event = error_event->observations) { 2248 indent(); 2249 out(O_ALTFP|O_VERB|O_NONL, " "); 2250 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, error_event); 2251 if (!triggered(fmep, error_event, 1)) { 2252 return_value = FME_DISPROVED; 2253 out(O_ALTFP|O_VERB, " NOT triggered"); 2254 break; 2255 } else { 2256 out(O_ALTFP|O_VERB, " triggered"); 2257 } 2258 } 2259 mark_arrows(fmep, fault_event, 0); 2260 2261 indent(); 2262 out(O_ALTFP|O_VERB|O_NONL, "<-%s ", fme_state2str(return_value)); 2263 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event); 2264 out(O_ALTFP|O_VERB, NULL); 2265 indent_pop(); 2266 return (return_value); 2267 } 2268 2269 static enum fme_state 2270 requirements_test(struct fme *fmep, struct event *ep, 2271 unsigned long long at_latest_by, unsigned long long *pdelay, 2272 struct arrow *arrowp) 2273 { 2274 int waiting_events; 2275 int credible_events; 2276 enum fme_state return_value = FME_CREDIBLE; 2277 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 2278 unsigned long long arrow_delay; 2279 unsigned long long my_delay; 2280 struct event *ep2; 2281 struct bubble *bp; 2282 struct arrowlist *ap; 2283 2284 stats_counter_bump(fmep->Rcallcount); 2285 indent_push(" R"); 2286 indent(); 2287 out(O_ALTFP|O_VERB|O_NONL, "->"); 2288 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2289 out(O_ALTFP|O_VERB|O_NONL, ", at latest by: "); 2290 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 2291 out(O_ALTFP|O_VERB, NULL); 2292 2293 if (ep->t == N_EREPORT) { 2294 if (ep->count == 0) { 2295 if (fmep->pull >= at_latest_by) { 2296 return_value = FME_DISPROVED; 2297 } else { 2298 *pdelay = at_latest_by; 2299 return_value = FME_WAIT; 2300 } 2301 } else if (arrowp != NULL) { 2302 /* 2303 * evaluate constraints only for current observation 2304 */ 2305 struct constraintlist *ctp; 2306 struct evalue value; 2307 2308 platform_set_payloadnvp(ep->nvp); 2309 for (ctp = arrowp->constraints; ctp != NULL; 2310 ctp = ctp->next) { 2311 if (eval_expr(ctp->cnode, NULL, NULL, 2312 &fmep->globals, fmep->cfgdata->cooked, 2313 arrowp, 0, &value) == 0 || 2314 value.t == UNDEFINED || value.v == 0) { 2315 indent(); 2316 out(O_ALTFP|O_VERB|O_NONL, 2317 " False constraint "); 2318 out(O_ALTFP|O_VERB|O_NONL, " "); 2319 ptree(O_ALTFP|O_VERB|O_NONL, 2320 ctp->cnode, 1, 0); 2321 out(O_ALTFP|O_VERB, NULL); 2322 return_value = FME_DISPROVED; 2323 break; 2324 } 2325 } 2326 platform_set_payloadnvp(NULL); 2327 } 2328 2329 indent(); 2330 switch (return_value) { 2331 case FME_CREDIBLE: 2332 out(O_ALTFP|O_VERB|O_NONL, "<-CREDIBLE "); 2333 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2334 break; 2335 case FME_DISPROVED: 2336 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED "); 2337 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2338 break; 2339 case FME_WAIT: 2340 out(O_ALTFP|O_VERB|O_NONL, "<-WAIT "); 2341 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2342 out(O_ALTFP|O_VERB|O_NONL, " to "); 2343 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 2344 break; 2345 default: 2346 out(O_DIE, "requirements_test: unexpected fme_state"); 2347 break; 2348 } 2349 out(O_ALTFP|O_VERB, NULL); 2350 indent_pop(); 2351 2352 return (return_value); 2353 } 2354 2355 /* this event is not a report, descend the tree */ 2356 for (bp = itree_next_bubble(ep, NULL); bp; 2357 bp = itree_next_bubble(ep, bp)) { 2358 if (bp->t != B_FROM) 2359 continue; 2360 if (bp->mark == 0) { 2361 int n = bp->nork; 2362 2363 bp->mark = 1; 2364 credible_events = 0; 2365 waiting_events = 0; 2366 arrow_delay = TIMEVAL_EVENTUALLY; 2367 /* 2368 * n is -1 for 'A' so adjust it. 2369 * XXX just count up the arrows for now. 2370 */ 2371 if (n < 0) { 2372 n = 0; 2373 for (ap = itree_next_arrow(bp, NULL); ap; 2374 ap = itree_next_arrow(bp, ap)) 2375 n++; 2376 indent(); 2377 out(O_ALTFP|O_VERB, " Bubble Counted N=%d", n); 2378 } else { 2379 indent(); 2380 out(O_ALTFP|O_VERB, " Bubble N=%d", n); 2381 } 2382 2383 for (ap = itree_next_arrow(bp, NULL); ap; 2384 ap = itree_next_arrow(bp, ap)) { 2385 ep2 = ap->arrowp->head->myevent; 2386 if (n <= credible_events) 2387 break; 2388 2389 if (triggered(fmep, ep2, 1)) 2390 /* XXX adding max timevals! */ 2391 switch (requirements_test(fmep, ep2, 2392 at_latest_by + ap->arrowp->maxdelay, 2393 &my_delay, ap->arrowp)) { 2394 case FME_CREDIBLE: 2395 credible_events++; 2396 break; 2397 case FME_DISPROVED: 2398 break; 2399 case FME_WAIT: 2400 if (my_delay < arrow_delay) 2401 arrow_delay = my_delay; 2402 waiting_events++; 2403 break; 2404 default: 2405 out(O_DIE, 2406 "Bug in requirements_test."); 2407 } 2408 else 2409 credible_events++; 2410 } 2411 indent(); 2412 out(O_ALTFP|O_VERB, " Credible: %d Waiting %d", 2413 credible_events, waiting_events); 2414 if (credible_events + waiting_events < n) { 2415 /* Can never meet requirements */ 2416 indent(); 2417 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED "); 2418 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2419 out(O_ALTFP|O_VERB, NULL); 2420 indent_pop(); 2421 return (FME_DISPROVED); 2422 } 2423 if (credible_events < n) { /* will have to wait */ 2424 /* wait time is shortest known */ 2425 if (arrow_delay < overall_delay) 2426 overall_delay = arrow_delay; 2427 return_value = FME_WAIT; 2428 } 2429 } else { 2430 indent(); 2431 out(O_ALTFP|O_VERB|O_NONL, " Mark was set: "); 2432 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2433 out(O_ALTFP|O_VERB|O_NONL, " to"); 2434 for (ap = itree_next_arrow(bp, NULL); ap; 2435 ap = itree_next_arrow(bp, ap)) { 2436 out(O_ALTFP|O_VERB|O_NONL, " "); 2437 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, 2438 ap->arrowp->head->myevent); 2439 } 2440 out(O_ALTFP|O_VERB, NULL); 2441 } 2442 } 2443 2444 /* 2445 * evaluate constraints for ctlist, which is the list of 2446 * constraints for the arrow pointing into this node of the tree 2447 */ 2448 if (return_value == FME_CREDIBLE && arrowp != NULL) { 2449 struct constraintlist *ctp; 2450 struct evalue value; 2451 2452 platform_set_payloadnvp(ep->nvp); 2453 for (ctp = arrowp->constraints; ctp != NULL; 2454 ctp = ctp->next) { 2455 if (eval_expr(ctp->cnode, NULL, NULL, &fmep->globals, 2456 fmep->cfgdata->cooked, arrowp, 0, &value) == 0 || 2457 value.t == UNDEFINED || value.v == 0) { 2458 indent(); 2459 out(O_ALTFP|O_VERB|O_NONL, 2460 " False constraint "); 2461 out(O_ALTFP|O_VERB|O_NONL, " "); 2462 ptree(O_ALTFP|O_VERB|O_NONL, 2463 ctp->cnode, 1, 0); 2464 out(O_ALTFP|O_VERB, NULL); 2465 return_value = FME_DISPROVED; 2466 break; 2467 } 2468 } 2469 platform_set_payloadnvp(NULL); 2470 } 2471 2472 if (return_value == FME_WAIT) 2473 *pdelay = overall_delay; 2474 indent(); 2475 out(O_ALTFP|O_VERB|O_NONL, "<-%s ", fme_state2str(return_value)); 2476 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2477 out(O_ALTFP|O_VERB, NULL); 2478 indent_pop(); 2479 return (return_value); 2480 } 2481 2482 static enum fme_state 2483 causes_test(struct fme *fmep, struct event *ep, 2484 unsigned long long at_latest_by, unsigned long long *pdelay) 2485 { 2486 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 2487 unsigned long long my_delay; 2488 int credible_results = 0; 2489 int waiting_results = 0; 2490 enum fme_state fstate; 2491 struct event *tail_event; 2492 struct bubble *bp; 2493 struct arrowlist *ap; 2494 int k = 1; 2495 2496 stats_counter_bump(fmep->Ccallcount); 2497 indent_push(" C"); 2498 indent(); 2499 out(O_ALTFP|O_VERB|O_NONL, "->"); 2500 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2501 out(O_ALTFP|O_VERB, NULL); 2502 2503 for (bp = itree_next_bubble(ep, NULL); bp; 2504 bp = itree_next_bubble(ep, bp)) { 2505 if (bp->t != B_TO) 2506 continue; 2507 k = bp->nork; /* remember the K value */ 2508 for (ap = itree_next_arrow(bp, NULL); ap; 2509 ap = itree_next_arrow(bp, ap)) { 2510 struct constraintlist *ctp; 2511 struct evalue value; 2512 int do_not_follow = 0; 2513 /* 2514 * see if false constraint prevents us 2515 * from traversing this arrow 2516 */ 2517 platform_set_payloadnvp(ep->nvp); 2518 for (ctp = ap->arrowp->constraints; 2519 ctp != NULL; ctp = ctp->next) { 2520 if (eval_expr(ctp->cnode, NULL, NULL, 2521 &fmep->globals, 2522 fmep->cfgdata->cooked, 2523 ap->arrowp, 0, 2524 &value) == 0 || 2525 value.t == UNDEFINED || 2526 value.v == 0) { 2527 do_not_follow = 1; 2528 break; 2529 } 2530 } 2531 platform_set_payloadnvp(NULL); 2532 if (do_not_follow) { 2533 indent(); 2534 out(O_ALTFP|O_VERB|O_NONL, 2535 " False arrow from "); 2536 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, 2537 ap->arrowp->tail->myevent); 2538 out(O_ALTFP|O_VERB|O_NONL, " "); 2539 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0); 2540 out(O_ALTFP|O_VERB, NULL); 2541 continue; 2542 } 2543 2544 if (ap->arrowp->causes_tested++ > 0) { 2545 /* 2546 * get to this point if this is not the 2547 * first time we're going through this 2548 * arrow in the causes test. consider this 2549 * branch to be credible and let the 2550 * credible/noncredible outcome depend on 2551 * the other branches in this cycle. 2552 */ 2553 fstate = FME_CREDIBLE; 2554 } else { 2555 /* 2556 * get to this point if this is the first 2557 * time we're going through this arrow. 2558 */ 2559 tail_event = ap->arrowp->tail->myevent; 2560 fstate = hypothesise(fmep, tail_event, 2561 at_latest_by, 2562 &my_delay, ap->arrowp); 2563 } 2564 2565 switch (fstate) { 2566 case FME_WAIT: 2567 if (my_delay < overall_delay) 2568 overall_delay = my_delay; 2569 waiting_results++; 2570 break; 2571 case FME_CREDIBLE: 2572 credible_results++; 2573 break; 2574 case FME_DISPROVED: 2575 break; 2576 default: 2577 out(O_DIE, "Bug in causes_test"); 2578 } 2579 2580 ap->arrowp->causes_tested--; 2581 ASSERT(ap->arrowp->causes_tested >= 0); 2582 } 2583 } 2584 /* compare against K */ 2585 if (credible_results + waiting_results < k) { 2586 indent(); 2587 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED "); 2588 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2589 out(O_ALTFP|O_VERB, NULL); 2590 indent_pop(); 2591 return (FME_DISPROVED); 2592 } 2593 if (waiting_results != 0) { 2594 *pdelay = overall_delay; 2595 indent(); 2596 out(O_ALTFP|O_VERB|O_NONL, "<-WAIT "); 2597 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2598 out(O_ALTFP|O_VERB|O_NONL, " to "); 2599 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 2600 out(O_ALTFP|O_VERB, NULL); 2601 indent_pop(); 2602 return (FME_WAIT); 2603 } 2604 indent(); 2605 out(O_ALTFP|O_VERB|O_NONL, "<-CREDIBLE "); 2606 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2607 out(O_ALTFP|O_VERB, NULL); 2608 indent_pop(); 2609 return (FME_CREDIBLE); 2610 } 2611 2612 static enum fme_state 2613 hypothesise(struct fme *fmep, struct event *ep, 2614 unsigned long long at_latest_by, unsigned long long *pdelay, 2615 struct arrow *arrowp) 2616 { 2617 enum fme_state rtr, otr; 2618 unsigned long long my_delay; 2619 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 2620 2621 stats_counter_bump(fmep->Hcallcount); 2622 indent_push(" H"); 2623 indent(); 2624 out(O_ALTFP|O_VERB|O_NONL, "->"); 2625 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2626 out(O_ALTFP|O_VERB|O_NONL, ", at latest by: "); 2627 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 2628 out(O_ALTFP|O_VERB, NULL); 2629 2630 rtr = requirements_test(fmep, ep, at_latest_by, &my_delay, arrowp); 2631 mark_arrows(fmep, ep, 0); /* clean up after requirements test */ 2632 if ((rtr == FME_WAIT) && (my_delay < overall_delay)) 2633 overall_delay = my_delay; 2634 if (rtr != FME_DISPROVED) { 2635 if (is_problem(ep->t)) { 2636 otr = effects_test(fmep, ep); 2637 if (otr != FME_DISPROVED) { 2638 if (fmep->peek == 0 && ep->is_suspect++ == 0) { 2639 ep->suspects = fmep->suspects; 2640 fmep->suspects = ep; 2641 fmep->nsuspects++; 2642 if (!is_fault(ep->t)) 2643 fmep->nonfault++; 2644 } 2645 } 2646 } else 2647 otr = causes_test(fmep, ep, at_latest_by, &my_delay); 2648 if ((otr == FME_WAIT) && (my_delay < overall_delay)) 2649 overall_delay = my_delay; 2650 if ((otr != FME_DISPROVED) && 2651 ((rtr == FME_WAIT) || (otr == FME_WAIT))) 2652 *pdelay = overall_delay; 2653 } 2654 if (rtr == FME_DISPROVED) { 2655 indent(); 2656 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED "); 2657 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2658 out(O_ALTFP|O_VERB, " (doesn't meet requirements)"); 2659 indent_pop(); 2660 return (FME_DISPROVED); 2661 } 2662 if ((otr == FME_DISPROVED) && is_problem(ep->t)) { 2663 indent(); 2664 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED "); 2665 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2666 out(O_ALTFP|O_VERB, " (doesn't explain all reports)"); 2667 indent_pop(); 2668 return (FME_DISPROVED); 2669 } 2670 if (otr == FME_DISPROVED) { 2671 indent(); 2672 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED "); 2673 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2674 out(O_ALTFP|O_VERB, " (causes are not credible)"); 2675 indent_pop(); 2676 return (FME_DISPROVED); 2677 } 2678 if ((rtr == FME_WAIT) || (otr == FME_WAIT)) { 2679 indent(); 2680 out(O_ALTFP|O_VERB|O_NONL, "<-WAIT "); 2681 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2682 out(O_ALTFP|O_VERB|O_NONL, " to "); 2683 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &overall_delay); 2684 out(O_ALTFP|O_VERB, NULL); 2685 indent_pop(); 2686 return (FME_WAIT); 2687 } 2688 indent(); 2689 out(O_ALTFP|O_VERB|O_NONL, "<-CREDIBLE "); 2690 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2691 out(O_ALTFP|O_VERB, NULL); 2692 indent_pop(); 2693 return (FME_CREDIBLE); 2694 } 2695