1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * 26 * fme.c -- fault management exercise module 27 * 28 * this module provides the simulated fault management exercise. 29 */ 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 #include <stdio.h> 34 #include <stdlib.h> 35 #include <string.h> 36 #include <strings.h> 37 #include <ctype.h> 38 #include <alloca.h> 39 #include <libnvpair.h> 40 #include <sys/fm/protocol.h> 41 #include <fm/fmd_api.h> 42 #include "alloc.h" 43 #include "out.h" 44 #include "stats.h" 45 #include "stable.h" 46 #include "literals.h" 47 #include "lut.h" 48 #include "tree.h" 49 #include "ptree.h" 50 #include "itree.h" 51 #include "ipath.h" 52 #include "fme.h" 53 #include "evnv.h" 54 #include "eval.h" 55 #include "config.h" 56 #include "platform.h" 57 58 /* imported from eft.c... */ 59 extern char *Autoclose; 60 extern int Dupclose; 61 extern hrtime_t Hesitate; 62 extern nv_alloc_t Eft_nv_hdl; 63 extern int Max_fme; 64 extern fmd_hdl_t *Hdl; 65 66 static int Istat_need_save; 67 void istat_save(void); 68 69 /* fme under construction is global so we can free it on module abort */ 70 static struct fme *Nfmep; 71 72 static const char *Undiag_reason; 73 74 static int Nextid = 0; 75 76 static int Open_fme_count = 0; /* Count of open FMEs */ 77 78 /* list of fault management exercises underway */ 79 static struct fme { 80 struct fme *next; /* next exercise */ 81 unsigned long long ull; /* time when fme was created */ 82 int id; /* FME id */ 83 struct cfgdata *cfgdata; /* full configuration data */ 84 struct lut *eventtree; /* propagation tree for this FME */ 85 /* 86 * The initial error report that created this FME is kept in 87 * two forms. e0 points to the instance tree node and is used 88 * by fme_eval() as the starting point for the inference 89 * algorithm. e0r is the event handle FMD passed to us when 90 * the ereport first arrived and is used when setting timers, 91 * which are always relative to the time of this initial 92 * report. 93 */ 94 struct event *e0; 95 fmd_event_t *e0r; 96 97 id_t timer; /* for setting an fmd time-out */ 98 id_t htid; /* for setting hesitation timer */ 99 100 struct event *ecurrent; /* ereport under consideration */ 101 struct event *suspects; /* current suspect list */ 102 struct event *psuspects; /* previous suspect list */ 103 int nsuspects; /* count of suspects */ 104 int nonfault; /* zero if all suspects T_FAULT */ 105 int posted_suspects; /* true if we've posted a diagnosis */ 106 int hesitated; /* true if we hesitated */ 107 int uniqobs; /* number of unique events observed */ 108 int peek; /* just peeking, don't track suspects */ 109 int overflow; /* true if overflow FME */ 110 enum fme_state { 111 FME_NOTHING = 5000, /* not evaluated yet */ 112 FME_WAIT, /* need to wait for more info */ 113 FME_CREDIBLE, /* suspect list is credible */ 114 FME_DISPROVED, /* no valid suspects found */ 115 FME_DEFERRED /* don't know yet (k-count not met) */ 116 } state; 117 118 unsigned long long pull; /* time passed since created */ 119 unsigned long long wull; /* wait until this time for re-eval */ 120 struct event *observations; /* observation list */ 121 struct lut *globals; /* values of global variables */ 122 /* fmd interfacing */ 123 fmd_hdl_t *hdl; /* handle for talking with fmd */ 124 fmd_case_t *fmcase; /* what fmd 'case' we associate with */ 125 /* stats */ 126 struct stats *Rcount; 127 struct stats *Hcallcount; 128 struct stats *Rcallcount; 129 struct stats *Ccallcount; 130 struct stats *Ecallcount; 131 struct stats *Tcallcount; 132 struct stats *Marrowcount; 133 struct stats *diags; 134 } *FMElist, *EFMElist, *ClosedFMEs; 135 136 static struct case_list { 137 fmd_case_t *fmcase; 138 struct case_list *next; 139 } *Undiagablecaselist; 140 141 static void fme_eval(struct fme *fmep, fmd_event_t *ffep); 142 static enum fme_state hypothesise(struct fme *fmep, struct event *ep, 143 unsigned long long at_latest_by, unsigned long long *pdelay); 144 static struct node *eventprop_lookup(struct event *ep, const char *propname); 145 static struct node *pathstring2epnamenp(char *path); 146 static void publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep); 147 static void restore_suspects(struct fme *fmep); 148 static void save_suspects(struct fme *fmep); 149 static void destroy_fme(struct fme *f); 150 static void fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep, 151 const char *eventstring, const struct ipath *ipp, nvlist_t *nvl); 152 static void istat_counter_reset_cb(struct istat_entry *entp, 153 struct stats *statp, const struct ipath *ipp); 154 155 static struct fme * 156 alloc_fme(void) 157 { 158 struct fme *fmep; 159 160 fmep = MALLOC(sizeof (*fmep)); 161 bzero(fmep, sizeof (*fmep)); 162 return (fmep); 163 } 164 165 /* 166 * fme_ready -- called when all initialization of the FME (except for 167 * stats) has completed successfully. Adds the fme to global lists 168 * and establishes its stats. 169 */ 170 static struct fme * 171 fme_ready(struct fme *fmep) 172 { 173 char nbuf[100]; 174 175 Nfmep = NULL; /* don't need to free this on module abort now */ 176 177 if (EFMElist) { 178 EFMElist->next = fmep; 179 EFMElist = fmep; 180 } else 181 FMElist = EFMElist = fmep; 182 183 (void) sprintf(nbuf, "fme%d.Rcount", fmep->id); 184 fmep->Rcount = stats_new_counter(nbuf, "ereports received", 0); 185 (void) sprintf(nbuf, "fme%d.Hcall", fmep->id); 186 fmep->Hcallcount = stats_new_counter(nbuf, "calls to hypothesise()", 1); 187 (void) sprintf(nbuf, "fme%d.Rcall", fmep->id); 188 fmep->Rcallcount = stats_new_counter(nbuf, 189 "calls to requirements_test()", 1); 190 (void) sprintf(nbuf, "fme%d.Ccall", fmep->id); 191 fmep->Ccallcount = stats_new_counter(nbuf, "calls to causes_test()", 1); 192 (void) sprintf(nbuf, "fme%d.Ecall", fmep->id); 193 fmep->Ecallcount = 194 stats_new_counter(nbuf, "calls to effects_test()", 1); 195 (void) sprintf(nbuf, "fme%d.Tcall", fmep->id); 196 fmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1); 197 (void) sprintf(nbuf, "fme%d.Marrow", fmep->id); 198 fmep->Marrowcount = stats_new_counter(nbuf, 199 "arrows marked by mark_arrows()", 1); 200 (void) sprintf(nbuf, "fme%d.diags", fmep->id); 201 fmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0); 202 203 out(O_ALTFP|O_VERB2, "newfme: config snapshot contains..."); 204 config_print(O_ALTFP|O_VERB2, fmep->cfgdata->cooked); 205 206 return (fmep); 207 } 208 209 static struct fme * 210 newfme(const char *e0class, const struct ipath *e0ipp) 211 { 212 struct cfgdata *cfgdata; 213 214 if ((cfgdata = config_snapshot()) == NULL) { 215 out(O_ALTFP, "newfme: NULL configuration"); 216 Undiag_reason = UD_NOCONF; 217 return (NULL); 218 } 219 220 Nfmep = alloc_fme(); 221 222 Nfmep->id = Nextid++; 223 Nfmep->cfgdata = cfgdata; 224 Nfmep->posted_suspects = 0; 225 Nfmep->uniqobs = 0; 226 Nfmep->state = FME_NOTHING; 227 Nfmep->pull = 0ULL; 228 Nfmep->overflow = 0; 229 230 Nfmep->fmcase = NULL; 231 Nfmep->hdl = NULL; 232 233 if ((Nfmep->eventtree = itree_create(cfgdata->cooked)) == NULL) { 234 out(O_ALTFP, "newfme: NULL instance tree"); 235 Undiag_reason = UD_INSTFAIL; 236 config_free(cfgdata); 237 FREE(Nfmep); 238 Nfmep = NULL; 239 return (NULL); 240 } 241 242 itree_ptree(O_ALTFP|O_VERB2, Nfmep->eventtree); 243 244 if ((Nfmep->e0 = 245 itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) { 246 out(O_ALTFP, "newfme: e0 not in instance tree"); 247 Undiag_reason = UD_BADEVENTI; 248 itree_free(Nfmep->eventtree); 249 config_free(cfgdata); 250 FREE(Nfmep); 251 Nfmep = NULL; 252 return (NULL); 253 } 254 255 return (fme_ready(Nfmep)); 256 } 257 258 void 259 fme_fini(void) 260 { 261 struct fme *sfp, *fp; 262 struct case_list *ucasep, *nextcasep; 263 264 ucasep = Undiagablecaselist; 265 while (ucasep != NULL) { 266 nextcasep = ucasep->next; 267 FREE(ucasep); 268 ucasep = nextcasep; 269 } 270 Undiagablecaselist = NULL; 271 272 /* clean up closed fmes */ 273 fp = ClosedFMEs; 274 while (fp != NULL) { 275 sfp = fp->next; 276 destroy_fme(fp); 277 fp = sfp; 278 } 279 ClosedFMEs = NULL; 280 281 fp = FMElist; 282 while (fp != NULL) { 283 sfp = fp->next; 284 destroy_fme(fp); 285 fp = sfp; 286 } 287 FMElist = EFMElist = NULL; 288 289 /* if we were in the middle of creating an fme, free it now */ 290 if (Nfmep) { 291 destroy_fme(Nfmep); 292 Nfmep = NULL; 293 } 294 } 295 296 /* 297 * Allocated space for a buffer name. 20 bytes allows for 298 * a ridiculous 9,999,999 unique observations. 299 */ 300 #define OBBUFNMSZ 20 301 302 /* 303 * serialize_observation 304 * 305 * Create a recoverable version of the current observation 306 * (f->ecurrent). We keep a serialized version of each unique 307 * observation in order that we may resume correctly the fme in the 308 * correct state if eft or fmd crashes and we're restarted. 309 */ 310 static void 311 serialize_observation(struct fme *fp, const char *cls, const struct ipath *ipp) 312 { 313 size_t pkdlen; 314 char tmpbuf[OBBUFNMSZ]; 315 char *pkd = NULL; 316 char *estr; 317 318 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", fp->uniqobs); 319 estr = ipath2str(cls, ipp); 320 fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, strlen(estr) + 1); 321 fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)estr, 322 strlen(estr) + 1); 323 FREE(estr); 324 325 if (fp->ecurrent != NULL && fp->ecurrent->nvp != NULL) { 326 (void) snprintf(tmpbuf, 327 OBBUFNMSZ, "observed%d.nvp", fp->uniqobs); 328 if (nvlist_xpack(fp->ecurrent->nvp, 329 &pkd, &pkdlen, NV_ENCODE_XDR, &Eft_nv_hdl) != 0) 330 out(O_DIE|O_SYS, "pack of observed nvl failed"); 331 fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, pkdlen); 332 fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)pkd, pkdlen); 333 FREE(pkd); 334 } 335 336 fp->uniqobs++; 337 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs, 338 sizeof (fp->uniqobs)); 339 } 340 341 /* 342 * init_fme_bufs -- We keep several bits of state about an fme for 343 * use if eft or fmd crashes and we're restarted. 344 */ 345 static void 346 init_fme_bufs(struct fme *fp) 347 { 348 size_t cfglen = fp->cfgdata->nextfree - fp->cfgdata->begin; 349 350 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_CFGLEN, sizeof (cfglen)); 351 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_CFGLEN, (void *)&cfglen, 352 sizeof (cfglen)); 353 if (cfglen != 0) { 354 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_CFG, cfglen); 355 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_CFG, 356 fp->cfgdata->begin, cfglen); 357 } 358 359 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_PULL, sizeof (fp->pull)); 360 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_PULL, (void *)&fp->pull, 361 sizeof (fp->pull)); 362 363 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_ID, sizeof (fp->id)); 364 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_ID, (void *)&fp->id, 365 sizeof (fp->id)); 366 367 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_NOBS, sizeof (fp->uniqobs)); 368 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs, 369 sizeof (fp->uniqobs)); 370 371 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_POSTD, 372 sizeof (fp->posted_suspects)); 373 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_POSTD, 374 (void *)&fp->posted_suspects, sizeof (fp->posted_suspects)); 375 } 376 377 static void 378 destroy_fme_bufs(struct fme *fp) 379 { 380 char tmpbuf[OBBUFNMSZ]; 381 int o; 382 383 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFGLEN); 384 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFG); 385 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_PULL); 386 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_ID); 387 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_POSTD); 388 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_NOBS); 389 390 for (o = 0; o < fp->uniqobs; o++) { 391 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", o); 392 fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf); 393 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", o); 394 fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf); 395 } 396 } 397 398 /* 399 * reconstitute_observations -- convert a case's serialized observations 400 * back into struct events. Returns zero if all observations are 401 * successfully reconstituted. 402 */ 403 static int 404 reconstitute_observations(struct fme *fmep) 405 { 406 struct event *ep; 407 struct node *epnamenp = NULL; 408 size_t pkdlen; 409 char *pkd = NULL; 410 char *tmpbuf = alloca(OBBUFNMSZ); 411 char *sepptr; 412 char *estr; 413 int ocnt; 414 int elen; 415 416 for (ocnt = 0; ocnt < fmep->uniqobs; ocnt++) { 417 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", ocnt); 418 elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf); 419 if (elen == 0) { 420 out(O_ALTFP, 421 "reconstitute_observation: no %s buffer found.", 422 tmpbuf); 423 Undiag_reason = UD_MISSINGOBS; 424 break; 425 } 426 427 estr = MALLOC(elen); 428 fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen); 429 sepptr = strchr(estr, '@'); 430 if (sepptr == NULL) { 431 out(O_ALTFP, 432 "reconstitute_observation: %s: " 433 "missing @ separator in %s.", 434 tmpbuf, estr); 435 Undiag_reason = UD_MISSINGPATH; 436 FREE(estr); 437 break; 438 } 439 440 *sepptr = '\0'; 441 if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) { 442 out(O_ALTFP, 443 "reconstitute_observation: %s: " 444 "trouble converting path string \"%s\" " 445 "to internal representation.", 446 tmpbuf, sepptr + 1); 447 Undiag_reason = UD_MISSINGPATH; 448 FREE(estr); 449 break; 450 } 451 452 /* construct the event */ 453 ep = itree_lookup(fmep->eventtree, 454 stable(estr), ipath(epnamenp)); 455 if (ep == NULL) { 456 out(O_ALTFP, 457 "reconstitute_observation: %s: " 458 "lookup of \"%s\" in itree failed.", 459 tmpbuf, ipath2str(estr, ipath(epnamenp))); 460 Undiag_reason = UD_BADOBS; 461 tree_free(epnamenp); 462 FREE(estr); 463 break; 464 } 465 tree_free(epnamenp); 466 467 /* 468 * We may or may not have a saved nvlist for the observation 469 */ 470 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", ocnt); 471 pkdlen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf); 472 if (pkdlen != 0) { 473 pkd = MALLOC(pkdlen); 474 fmd_buf_read(fmep->hdl, 475 fmep->fmcase, tmpbuf, pkd, pkdlen); 476 ASSERT(ep->nvp == NULL); 477 if (nvlist_xunpack(pkd, 478 pkdlen, &ep->nvp, &Eft_nv_hdl) != 0) 479 out(O_DIE|O_SYS, "pack of observed nvl failed"); 480 FREE(pkd); 481 } 482 483 if (ocnt == 0) 484 fmep->e0 = ep; 485 486 FREE(estr); 487 fmep->ecurrent = ep; 488 ep->count++; 489 490 /* link it into list of observations seen */ 491 ep->observations = fmep->observations; 492 fmep->observations = ep; 493 } 494 495 if (ocnt == fmep->uniqobs) { 496 (void) fme_ready(fmep); 497 return (0); 498 } 499 500 return (1); 501 } 502 503 /* 504 * restart_fme -- called during eft initialization. Reconstitutes 505 * an in-progress fme. 506 */ 507 void 508 fme_restart(fmd_hdl_t *hdl, fmd_case_t *inprogress) 509 { 510 nvlist_t *defect; 511 struct case_list *bad; 512 struct fme *fmep; 513 struct cfgdata *cfgdata = NULL; 514 size_t rawsz; 515 516 fmep = alloc_fme(); 517 fmep->fmcase = inprogress; 518 fmep->hdl = hdl; 519 520 if (fmd_buf_size(hdl, inprogress, WOBUF_CFGLEN) != sizeof (size_t)) { 521 out(O_ALTFP, "restart_fme: No config data"); 522 Undiag_reason = UD_MISSINGINFO; 523 goto badcase; 524 } 525 fmd_buf_read(hdl, inprogress, WOBUF_CFGLEN, (void *)&rawsz, 526 sizeof (size_t)); 527 528 if ((fmep->e0r = fmd_case_getprincipal(hdl, inprogress)) == NULL) { 529 out(O_ALTFP, "restart_fme: No event zero"); 530 Undiag_reason = UD_MISSINGZERO; 531 goto badcase; 532 } 533 534 cfgdata = MALLOC(sizeof (struct cfgdata)); 535 cfgdata->cooked = NULL; 536 cfgdata->devcache = NULL; 537 cfgdata->cpucache = NULL; 538 cfgdata->refcnt = 1; 539 540 if (rawsz > 0) { 541 if (fmd_buf_size(hdl, inprogress, WOBUF_CFG) != rawsz) { 542 out(O_ALTFP, "restart_fme: Config data size mismatch"); 543 Undiag_reason = UD_CFGMISMATCH; 544 goto badcase; 545 } 546 cfgdata->begin = MALLOC(rawsz); 547 cfgdata->end = cfgdata->nextfree = cfgdata->begin + rawsz; 548 fmd_buf_read(hdl, 549 inprogress, WOBUF_CFG, cfgdata->begin, rawsz); 550 } else { 551 cfgdata->begin = cfgdata->end = cfgdata->nextfree = NULL; 552 } 553 fmep->cfgdata = cfgdata; 554 555 config_cook(cfgdata); 556 if ((fmep->eventtree = itree_create(cfgdata->cooked)) == NULL) { 557 /* case not properly saved or irretrievable */ 558 out(O_ALTFP, "restart_fme: NULL instance tree"); 559 Undiag_reason = UD_INSTFAIL; 560 goto badcase; 561 } 562 563 itree_ptree(O_ALTFP|O_VERB2, fmep->eventtree); 564 565 if (fmd_buf_size(hdl, inprogress, WOBUF_PULL) == 0) { 566 out(O_ALTFP, "restart_fme: no saved wait time"); 567 Undiag_reason = UD_MISSINGINFO; 568 goto badcase; 569 } else { 570 fmd_buf_read(hdl, inprogress, WOBUF_PULL, (void *)&fmep->pull, 571 sizeof (fmep->pull)); 572 } 573 574 if (fmd_buf_size(hdl, inprogress, WOBUF_POSTD) == 0) { 575 out(O_ALTFP, "restart_fme: no saved posted status"); 576 Undiag_reason = UD_MISSINGINFO; 577 goto badcase; 578 } else { 579 fmd_buf_read(hdl, inprogress, WOBUF_POSTD, 580 (void *)&fmep->posted_suspects, 581 sizeof (fmep->posted_suspects)); 582 } 583 584 if (fmd_buf_size(hdl, inprogress, WOBUF_ID) == 0) { 585 out(O_ALTFP, "restart_fme: no saved id"); 586 Undiag_reason = UD_MISSINGINFO; 587 goto badcase; 588 } else { 589 fmd_buf_read(hdl, inprogress, WOBUF_ID, (void *)&fmep->id, 590 sizeof (fmep->id)); 591 } 592 if (Nextid <= fmep->id) 593 Nextid = fmep->id + 1; 594 595 if (fmd_buf_size(hdl, inprogress, WOBUF_NOBS) == 0) { 596 out(O_ALTFP, "restart_fme: no count of observations"); 597 Undiag_reason = UD_MISSINGINFO; 598 goto badcase; 599 } else { 600 fmd_buf_read(hdl, inprogress, WOBUF_NOBS, 601 (void *)&fmep->uniqobs, sizeof (fmep->uniqobs)); 602 } 603 604 if (reconstitute_observations(fmep) != 0) 605 goto badcase; 606 607 Open_fme_count++; 608 609 /* give the diagnosis algorithm a shot at the new FME state */ 610 fme_eval(fmep, NULL); 611 return; 612 613 badcase: 614 if (fmep->eventtree != NULL) 615 itree_free(fmep->eventtree); 616 config_free(cfgdata); 617 destroy_fme_bufs(fmep); 618 FREE(fmep); 619 620 /* 621 * Since we're unable to restart the case, add it to the undiagable 622 * list and solve and close it as appropriate. 623 */ 624 bad = MALLOC(sizeof (struct case_list)); 625 bad->next = NULL; 626 627 if (Undiagablecaselist != NULL) 628 bad->next = Undiagablecaselist; 629 Undiagablecaselist = bad; 630 bad->fmcase = inprogress; 631 632 out(O_ALTFP, "[case %s (unable to restart), ", 633 fmd_case_uuid(hdl, bad->fmcase)); 634 635 if (fmd_case_solved(hdl, bad->fmcase)) { 636 out(O_ALTFP, "already solved, "); 637 } else { 638 out(O_ALTFP, "solving, "); 639 defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100, 640 NULL, NULL, NULL); 641 if (Undiag_reason != NULL) 642 (void) nvlist_add_string(defect, 643 UNDIAG_REASON, Undiag_reason); 644 fmd_case_add_suspect(hdl, bad->fmcase, defect); 645 fmd_case_solve(hdl, bad->fmcase); 646 } 647 648 if (fmd_case_closed(hdl, bad->fmcase)) { 649 out(O_ALTFP, "already closed ]"); 650 } else { 651 out(O_ALTFP, "closing ]"); 652 fmd_case_close(hdl, bad->fmcase); 653 } 654 } 655 656 /*ARGSUSED*/ 657 static void 658 globals_destructor(void *left, void *right, void *arg) 659 { 660 struct evalue *evp = (struct evalue *)right; 661 if (evp->t == NODEPTR) 662 tree_free((struct node *)(uintptr_t)evp->v); 663 evp->v = NULL; 664 FREE(evp); 665 } 666 667 void 668 destroy_fme(struct fme *f) 669 { 670 stats_delete(f->Rcount); 671 stats_delete(f->Hcallcount); 672 stats_delete(f->Rcallcount); 673 stats_delete(f->Ccallcount); 674 stats_delete(f->Ecallcount); 675 stats_delete(f->Tcallcount); 676 stats_delete(f->Marrowcount); 677 stats_delete(f->diags); 678 679 itree_free(f->eventtree); 680 config_free(f->cfgdata); 681 lut_free(f->globals, globals_destructor, NULL); 682 FREE(f); 683 } 684 685 static const char * 686 fme_state2str(enum fme_state s) 687 { 688 switch (s) { 689 case FME_NOTHING: return ("NOTHING"); 690 case FME_WAIT: return ("WAIT"); 691 case FME_CREDIBLE: return ("CREDIBLE"); 692 case FME_DISPROVED: return ("DISPROVED"); 693 case FME_DEFERRED: return ("DEFERRED"); 694 default: return ("UNKNOWN"); 695 } 696 } 697 698 static int 699 is_problem(enum nametype t) 700 { 701 return (t == N_FAULT || t == N_DEFECT || t == N_UPSET); 702 } 703 704 static int 705 is_fault(enum nametype t) 706 { 707 return (t == N_FAULT); 708 } 709 710 static int 711 is_defect(enum nametype t) 712 { 713 return (t == N_DEFECT); 714 } 715 716 static int 717 is_upset(enum nametype t) 718 { 719 return (t == N_UPSET); 720 } 721 722 static void 723 fme_print(int flags, struct fme *fmep) 724 { 725 struct event *ep; 726 727 out(flags, "Fault Management Exercise %d", fmep->id); 728 out(flags, "\t State: %s", fme_state2str(fmep->state)); 729 out(flags|O_NONL, "\t Start time: "); 730 ptree_timeval(flags|O_NONL, &fmep->ull); 731 out(flags, NULL); 732 if (fmep->wull) { 733 out(flags|O_NONL, "\t Wait time: "); 734 ptree_timeval(flags|O_NONL, &fmep->wull); 735 out(flags, NULL); 736 } 737 out(flags|O_NONL, "\t E0: "); 738 if (fmep->e0) 739 itree_pevent_brief(flags|O_NONL, fmep->e0); 740 else 741 out(flags|O_NONL, "NULL"); 742 out(flags, NULL); 743 out(flags|O_NONL, "\tObservations:"); 744 for (ep = fmep->observations; ep; ep = ep->observations) { 745 out(flags|O_NONL, " "); 746 itree_pevent_brief(flags|O_NONL, ep); 747 } 748 out(flags, NULL); 749 out(flags|O_NONL, "\tSuspect list:"); 750 for (ep = fmep->suspects; ep; ep = ep->suspects) { 751 out(flags|O_NONL, " "); 752 itree_pevent_brief(flags|O_NONL, ep); 753 } 754 out(flags, NULL); 755 out(flags|O_VERB2, "\t Tree:"); 756 itree_ptree(flags|O_VERB2, fmep->eventtree); 757 } 758 759 static struct node * 760 pathstring2epnamenp(char *path) 761 { 762 char *sep = "/"; 763 struct node *ret; 764 char *ptr; 765 766 if ((ptr = strtok(path, sep)) == NULL) 767 out(O_DIE, "pathstring2epnamenp: invalid empty class"); 768 769 ret = tree_iname(stable(ptr), NULL, 0); 770 771 while ((ptr = strtok(NULL, sep)) != NULL) 772 ret = tree_name_append(ret, 773 tree_iname(stable(ptr), NULL, 0)); 774 775 return (ret); 776 } 777 778 /* 779 * for a given upset sp, increment the corresponding SERD engine. if the 780 * SERD engine trips, return the ename and ipp of the resulting ereport. 781 * returns true if engine tripped and *enamep and *ippp were filled in. 782 */ 783 static int 784 serd_eval(struct fme *fmep, fmd_hdl_t *hdl, fmd_event_t *ffep, 785 fmd_case_t *fmcase, struct event *sp, const char **enamep, 786 const struct ipath **ippp) 787 { 788 struct node *serdinst; 789 char *serdname; 790 struct node *nid; 791 792 ASSERT(sp->t == N_UPSET); 793 ASSERT(ffep != NULL); 794 795 /* 796 * obtain instanced SERD engine from the upset sp. from this 797 * derive serdname, the string used to identify the SERD engine. 798 */ 799 serdinst = eventprop_lookup(sp, L_engine); 800 801 if (serdinst == NULL) 802 return (NULL); 803 804 serdname = ipath2str(serdinst->u.stmt.np->u.event.ename->u.name.s, 805 ipath(serdinst->u.stmt.np->u.event.epname)); 806 807 /* handle serd engine "id" property, if there is one */ 808 if ((nid = 809 lut_lookup(serdinst->u.stmt.lutp, (void *)L_id, NULL)) != NULL) { 810 struct evalue *gval; 811 char suffixbuf[200]; 812 char *suffix; 813 char *nserdname; 814 size_t nname; 815 816 out(O_ALTFP|O_NONL, "serd \"%s\" id: ", serdname); 817 ptree_name_iter(O_ALTFP|O_NONL, nid); 818 819 ASSERTinfo(nid->t == T_GLOBID, ptree_nodetype2str(nid->t)); 820 821 if ((gval = lut_lookup(fmep->globals, 822 (void *)nid->u.globid.s, NULL)) == NULL) { 823 out(O_ALTFP, " undefined"); 824 } else if (gval->t == UINT64) { 825 out(O_ALTFP, " %llu", gval->v); 826 (void) sprintf(suffixbuf, "%llu", gval->v); 827 suffix = suffixbuf; 828 } else { 829 out(O_ALTFP, " \"%s\"", (char *)(uintptr_t)gval->v); 830 suffix = (char *)(uintptr_t)gval->v; 831 } 832 833 nname = strlen(serdname) + strlen(suffix) + 2; 834 nserdname = MALLOC(nname); 835 (void) snprintf(nserdname, nname, "%s:%s", serdname, suffix); 836 FREE(serdname); 837 serdname = nserdname; 838 } 839 840 if (!fmd_serd_exists(hdl, serdname)) { 841 struct node *nN, *nT; 842 843 /* no SERD engine yet, so create it */ 844 nN = lut_lookup(serdinst->u.stmt.lutp, (void *)L_N, NULL); 845 nT = lut_lookup(serdinst->u.stmt.lutp, (void *)L_T, NULL); 846 847 ASSERT(nN->t == T_NUM); 848 ASSERT(nT->t == T_TIMEVAL); 849 850 fmd_serd_create(hdl, serdname, (uint_t)nN->u.ull, 851 (hrtime_t)nT->u.ull); 852 } 853 854 855 /* 856 * increment SERD engine. if engine fires, reset serd 857 * engine and return trip_strcode 858 */ 859 if (fmd_serd_record(hdl, serdname, ffep)) { 860 struct node *tripinst = lut_lookup(serdinst->u.stmt.lutp, 861 (void *)L_trip, NULL); 862 863 ASSERT(tripinst != NULL); 864 865 *enamep = tripinst->u.event.ename->u.name.s; 866 *ippp = ipath(tripinst->u.event.epname); 867 868 fmd_case_add_serd(hdl, fmcase, serdname); 869 fmd_serd_reset(hdl, serdname); 870 out(O_ALTFP|O_NONL, "[engine fired: %s, sending: ", serdname); 871 ipath_print(O_ALTFP|O_NONL, *enamep, *ippp); 872 out(O_ALTFP, "]"); 873 874 FREE(serdname); 875 return (1); 876 } 877 878 FREE(serdname); 879 return (0); 880 } 881 882 /* 883 * search a suspect list for upsets. feed each upset to serd_eval() and 884 * build up tripped[], an array of ereports produced by the firing of 885 * any SERD engines. then feed each ereport back into 886 * fme_receive_report(). 887 * 888 * returns ntrip, the number of these ereports produced. 889 */ 890 static int 891 upsets_eval(struct fme *fmep, fmd_event_t *ffep) 892 { 893 /* we build an array of tripped ereports that we send ourselves */ 894 struct { 895 const char *ename; 896 const struct ipath *ipp; 897 } *tripped; 898 struct event *sp; 899 int ntrip, nupset, i; 900 901 /* 902 * count the number of upsets to determine the upper limit on 903 * expected trip ereport strings. remember that one upset can 904 * lead to at most one ereport. 905 */ 906 nupset = 0; 907 for (sp = fmep->suspects; sp; sp = sp->suspects) { 908 if (sp->t == N_UPSET) 909 nupset++; 910 } 911 912 if (nupset == 0) 913 return (0); 914 915 /* 916 * get to this point if we have upsets and expect some trip 917 * ereports 918 */ 919 tripped = alloca(sizeof (*tripped) * nupset); 920 bzero((void *)tripped, sizeof (*tripped) * nupset); 921 922 ntrip = 0; 923 for (sp = fmep->suspects; sp; sp = sp->suspects) 924 if (sp->t == N_UPSET && 925 serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, sp, 926 &tripped[ntrip].ename, &tripped[ntrip].ipp)) 927 ntrip++; 928 929 for (i = 0; i < ntrip; i++) 930 fme_receive_report(fmep->hdl, ffep, 931 tripped[i].ename, tripped[i].ipp, NULL); 932 933 return (ntrip); 934 } 935 936 /* 937 * fme_receive_external_report -- call when an external ereport comes in 938 * 939 * this routine just converts the relevant information from the ereport 940 * into a format used internally and passes it on to fme_receive_report(). 941 */ 942 void 943 fme_receive_external_report(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl, 944 const char *eventstring) 945 { 946 struct node *epnamenp = platform_getpath(nvl); 947 const struct ipath *ipp; 948 949 /* 950 * XFILE: If we ended up without a path, it's an X-file. 951 * For now, use our undiagnosable interface. 952 */ 953 if (epnamenp == NULL) { 954 out(O_ALTFP, "XFILE: Unable to get path from ereport"); 955 Undiag_reason = UD_NOPATH; 956 publish_undiagnosable(hdl, ffep); 957 return; 958 } 959 960 ipp = ipath(epnamenp); 961 tree_free(epnamenp); 962 fme_receive_report(hdl, ffep, stable(eventstring), ipp, nvl); 963 } 964 965 /*ARGSUSED*/ 966 void 967 fme_receive_repair_list(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl, 968 const char *eventstring) 969 { 970 char *uuid; 971 nvlist_t **nva; 972 uint_t nvc; 973 const struct ipath *ipp; 974 975 if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) != 0 || 976 nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST, 977 &nva, &nvc) != 0) { 978 out(O_ALTFP, "No uuid or fault list for list.repaired event"); 979 return; 980 } 981 982 out(O_ALTFP, "Processing list.repaired from case %s", uuid); 983 984 while (nvc-- != 0) { 985 /* 986 * Reset any istat associated with this path. 987 */ 988 char *path; 989 990 if ((ipp = platform_fault2ipath(*nva++)) == NULL) 991 continue; 992 993 path = ipath2str(NULL, ipp); 994 out(O_ALTFP, "fme_receive_repair_list: resetting state for %s", 995 path); 996 FREE(path); 997 998 lut_walk(Istats, (lut_cb)istat_counter_reset_cb, (void *)ipp); 999 istat_save(); 1000 1001 /* 1002 * We do not have a list of stat engines in a form that 1003 * we can readily clear any associated serd engines. When we 1004 * do, this will be the place to clear them. 1005 */ 1006 } 1007 } 1008 1009 static int mark_arrows(struct fme *fmep, struct event *ep, int mark, 1010 unsigned long long at_latest_by, unsigned long long *pdelay, int keep); 1011 1012 /* ARGSUSED */ 1013 static void 1014 clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep) 1015 { 1016 struct bubble *bp; 1017 struct arrowlist *ap; 1018 1019 ep->cached_state = 0; 1020 ep->keep_in_tree = 0; 1021 for (bp = itree_next_bubble(ep, NULL); bp; 1022 bp = itree_next_bubble(ep, bp)) { 1023 if (bp->t != B_FROM) 1024 continue; 1025 bp->mark = 0; 1026 for (ap = itree_next_arrow(bp, NULL); ap; 1027 ap = itree_next_arrow(bp, ap)) 1028 ap->arrowp->mark = 0; 1029 } 1030 } 1031 1032 static void 1033 fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep, 1034 const char *eventstring, const struct ipath *ipp, nvlist_t *nvl) 1035 { 1036 struct event *ep; 1037 struct fme *fmep = NULL; 1038 struct fme *ofmep = NULL; 1039 struct fme *cfmep, *svfmep; 1040 int matched = 0; 1041 nvlist_t *defect; 1042 1043 out(O_ALTFP|O_NONL, "fme_receive_report: "); 1044 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1045 out(O_ALTFP|O_STAMP, NULL); 1046 1047 /* decide which FME it goes to */ 1048 for (fmep = FMElist; fmep; fmep = fmep->next) { 1049 int prev_verbose; 1050 unsigned long long my_delay = TIMEVAL_EVENTUALLY; 1051 enum fme_state state; 1052 nvlist_t *pre_peek_nvp = NULL; 1053 1054 if (fmep->overflow) { 1055 if (!(fmd_case_closed(fmep->hdl, fmep->fmcase))) 1056 ofmep = fmep; 1057 1058 continue; 1059 } 1060 1061 /* look up event in event tree for this FME */ 1062 if ((ep = itree_lookup(fmep->eventtree, 1063 eventstring, ipp)) == NULL) 1064 continue; 1065 1066 /* note observation */ 1067 fmep->ecurrent = ep; 1068 if (ep->count++ == 0) { 1069 /* link it into list of observations seen */ 1070 ep->observations = fmep->observations; 1071 fmep->observations = ep; 1072 ep->nvp = evnv_dupnvl(nvl); 1073 } else { 1074 /* use new payload values for peek */ 1075 pre_peek_nvp = ep->nvp; 1076 ep->nvp = evnv_dupnvl(nvl); 1077 } 1078 1079 /* tell hypothesise() not to mess with suspect list */ 1080 fmep->peek = 1; 1081 1082 /* don't want this to be verbose (unless Debug is set) */ 1083 prev_verbose = Verbose; 1084 if (Debug == 0) 1085 Verbose = 0; 1086 1087 lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep); 1088 state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay); 1089 1090 fmep->peek = 0; 1091 1092 /* put verbose flag back */ 1093 Verbose = prev_verbose; 1094 1095 if (state != FME_DISPROVED) { 1096 /* found an FME that explains the ereport */ 1097 matched++; 1098 out(O_ALTFP|O_NONL, "["); 1099 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1100 out(O_ALTFP, " explained by FME%d]", fmep->id); 1101 1102 if (pre_peek_nvp) 1103 nvlist_free(pre_peek_nvp); 1104 1105 if (ep->count == 1) 1106 serialize_observation(fmep, eventstring, ipp); 1107 1108 if (ffep) 1109 fmd_case_add_ereport(hdl, fmep->fmcase, ffep); 1110 1111 stats_counter_bump(fmep->Rcount); 1112 1113 /* re-eval FME */ 1114 fme_eval(fmep, ffep); 1115 } else { 1116 1117 /* not a match, undo noting of observation */ 1118 fmep->ecurrent = NULL; 1119 if (--ep->count == 0) { 1120 /* unlink it from observations */ 1121 fmep->observations = ep->observations; 1122 ep->observations = NULL; 1123 nvlist_free(ep->nvp); 1124 ep->nvp = NULL; 1125 } else { 1126 nvlist_free(ep->nvp); 1127 ep->nvp = pre_peek_nvp; 1128 } 1129 } 1130 } 1131 1132 if (matched) 1133 return; /* explained by at least one existing FME */ 1134 1135 /* clean up closed fmes */ 1136 cfmep = ClosedFMEs; 1137 while (cfmep != NULL) { 1138 svfmep = cfmep->next; 1139 destroy_fme(cfmep); 1140 cfmep = svfmep; 1141 } 1142 ClosedFMEs = NULL; 1143 1144 if (ofmep) { 1145 out(O_ALTFP|O_NONL, "["); 1146 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1147 out(O_ALTFP, " ADDING TO OVERFLOW FME]"); 1148 if (ffep) 1149 fmd_case_add_ereport(hdl, ofmep->fmcase, ffep); 1150 1151 return; 1152 1153 } else if (Max_fme && (Open_fme_count >= Max_fme)) { 1154 out(O_ALTFP|O_NONL, "["); 1155 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1156 out(O_ALTFP, " MAX OPEN FME REACHED]"); 1157 /* Create overflow fme */ 1158 if ((fmep = newfme(eventstring, ipp)) == NULL) { 1159 out(O_ALTFP|O_NONL, "["); 1160 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1161 out(O_ALTFP, " CANNOT OPEN OVERFLOW FME]"); 1162 publish_undiagnosable(hdl, ffep); 1163 return; 1164 } 1165 1166 Open_fme_count++; 1167 1168 fmep->fmcase = fmd_case_open(hdl, NULL); 1169 fmep->hdl = hdl; 1170 init_fme_bufs(fmep); 1171 fmep->overflow = B_TRUE; 1172 1173 if (ffep) 1174 fmd_case_add_ereport(hdl, fmep->fmcase, ffep); 1175 1176 defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100, 1177 NULL, NULL, NULL); 1178 (void) nvlist_add_string(defect, UNDIAG_REASON, UD_MAXFME); 1179 fmd_case_add_suspect(hdl, fmep->fmcase, defect); 1180 fmd_case_solve(hdl, fmep->fmcase); 1181 return; 1182 } 1183 1184 /* start a new FME */ 1185 if ((fmep = newfme(eventstring, ipp)) == NULL) { 1186 out(O_ALTFP|O_NONL, "["); 1187 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1188 out(O_ALTFP, " CANNOT DIAGNOSE]"); 1189 publish_undiagnosable(hdl, ffep); 1190 return; 1191 } 1192 1193 Open_fme_count++; 1194 1195 /* open a case */ 1196 fmep->fmcase = fmd_case_open(hdl, NULL); 1197 fmep->hdl = hdl; 1198 init_fme_bufs(fmep); 1199 1200 out(O_ALTFP|O_NONL, "["); 1201 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1202 out(O_ALTFP, " created FME%d, case %s]", fmep->id, 1203 fmd_case_uuid(hdl, fmep->fmcase)); 1204 1205 ep = fmep->e0; 1206 ASSERT(ep != NULL); 1207 1208 /* note observation */ 1209 fmep->ecurrent = ep; 1210 if (ep->count++ == 0) { 1211 /* link it into list of observations seen */ 1212 ep->observations = fmep->observations; 1213 fmep->observations = ep; 1214 ep->nvp = evnv_dupnvl(nvl); 1215 serialize_observation(fmep, eventstring, ipp); 1216 } else { 1217 /* new payload overrides any previous */ 1218 nvlist_free(ep->nvp); 1219 ep->nvp = evnv_dupnvl(nvl); 1220 } 1221 1222 stats_counter_bump(fmep->Rcount); 1223 1224 if (ffep) { 1225 fmd_case_add_ereport(hdl, fmep->fmcase, ffep); 1226 fmd_case_setprincipal(hdl, fmep->fmcase, ffep); 1227 fmep->e0r = ffep; 1228 } 1229 1230 /* give the diagnosis algorithm a shot at the new FME state */ 1231 fme_eval(fmep, ffep); 1232 } 1233 1234 void 1235 fme_status(int flags) 1236 { 1237 struct fme *fmep; 1238 1239 if (FMElist == NULL) { 1240 out(flags, "No fault management exercises underway."); 1241 return; 1242 } 1243 1244 for (fmep = FMElist; fmep; fmep = fmep->next) 1245 fme_print(flags, fmep); 1246 } 1247 1248 /* 1249 * "indent" routines used mostly for nicely formatted debug output, but also 1250 * for sanity checking for infinite recursion bugs. 1251 */ 1252 1253 #define MAX_INDENT 1024 1254 static const char *indent_s[MAX_INDENT]; 1255 static int current_indent; 1256 1257 static void 1258 indent_push(const char *s) 1259 { 1260 if (current_indent < MAX_INDENT) 1261 indent_s[current_indent++] = s; 1262 else 1263 out(O_DIE, "unexpected recursion depth (%d)", current_indent); 1264 } 1265 1266 static void 1267 indent_set(const char *s) 1268 { 1269 current_indent = 0; 1270 indent_push(s); 1271 } 1272 1273 static void 1274 indent_pop(void) 1275 { 1276 if (current_indent > 0) 1277 current_indent--; 1278 else 1279 out(O_DIE, "recursion underflow"); 1280 } 1281 1282 static void 1283 indent(void) 1284 { 1285 int i; 1286 if (!Verbose) 1287 return; 1288 for (i = 0; i < current_indent; i++) 1289 out(O_ALTFP|O_VERB|O_NONL, indent_s[i]); 1290 } 1291 1292 static int 1293 suspects_changed(struct fme *fmep) 1294 { 1295 struct event *suspects = fmep->suspects; 1296 struct event *psuspects = fmep->psuspects; 1297 1298 while (suspects != NULL && psuspects != NULL) { 1299 if (suspects != psuspects) 1300 return (1); 1301 suspects = suspects->suspects; 1302 psuspects = psuspects->psuspects; 1303 } 1304 1305 return (suspects != psuspects); 1306 } 1307 1308 #define SLNEW 1 1309 #define SLCHANGED 2 1310 #define SLWAIT 3 1311 #define SLDISPROVED 4 1312 1313 static void 1314 print_suspects(int circumstance, struct fme *fmep) 1315 { 1316 struct event *ep; 1317 1318 out(O_ALTFP|O_NONL, "["); 1319 if (circumstance == SLCHANGED) { 1320 out(O_ALTFP|O_NONL, "FME%d diagnosis changed. state: %s, " 1321 "suspect list:", fmep->id, fme_state2str(fmep->state)); 1322 } else if (circumstance == SLWAIT) { 1323 out(O_ALTFP|O_NONL, "FME%d set wait timer ", fmep->id); 1324 ptree_timeval(O_ALTFP|O_NONL, &fmep->wull); 1325 } else if (circumstance == SLDISPROVED) { 1326 out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS UNKNOWN", fmep->id); 1327 } else { 1328 out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS PRODUCED:", fmep->id); 1329 } 1330 1331 if (circumstance == SLWAIT || circumstance == SLDISPROVED) { 1332 out(O_ALTFP, "]"); 1333 return; 1334 } 1335 1336 for (ep = fmep->suspects; ep; ep = ep->suspects) { 1337 out(O_ALTFP|O_NONL, " "); 1338 itree_pevent_brief(O_ALTFP|O_NONL, ep); 1339 } 1340 out(O_ALTFP, "]"); 1341 } 1342 1343 static struct node * 1344 eventprop_lookup(struct event *ep, const char *propname) 1345 { 1346 return (lut_lookup(ep->props, (void *)propname, NULL)); 1347 } 1348 1349 #define MAXDIGITIDX 23 1350 static char numbuf[MAXDIGITIDX + 1]; 1351 1352 static int 1353 node2uint(struct node *n, uint_t *valp) 1354 { 1355 struct evalue value; 1356 struct lut *globals = NULL; 1357 1358 if (n == NULL) 1359 return (1); 1360 1361 /* 1362 * check value.v since we are being asked to convert an unsigned 1363 * long long int to an unsigned int 1364 */ 1365 if (! eval_expr(n, NULL, NULL, &globals, NULL, NULL, 0, &value) || 1366 value.t != UINT64 || value.v > (1ULL << 32)) 1367 return (1); 1368 1369 *valp = (uint_t)value.v; 1370 1371 return (0); 1372 } 1373 1374 static nvlist_t * 1375 node2fmri(struct node *n) 1376 { 1377 nvlist_t **pa, *f, *p; 1378 struct node *nc; 1379 uint_t depth = 0; 1380 char *numstr, *nullbyte; 1381 char *failure; 1382 int err, i; 1383 1384 /* XXX do we need to be able to handle a non-T_NAME node? */ 1385 if (n == NULL || n->t != T_NAME) 1386 return (NULL); 1387 1388 for (nc = n; nc != NULL; nc = nc->u.name.next) { 1389 if (nc->u.name.child == NULL || nc->u.name.child->t != T_NUM) 1390 break; 1391 depth++; 1392 } 1393 1394 if (nc != NULL) { 1395 /* We bailed early, something went wrong */ 1396 return (NULL); 1397 } 1398 1399 if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0) 1400 out(O_DIE|O_SYS, "alloc of fmri nvl failed"); 1401 pa = alloca(depth * sizeof (nvlist_t *)); 1402 for (i = 0; i < depth; i++) 1403 pa[i] = NULL; 1404 1405 err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC); 1406 err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION); 1407 err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, ""); 1408 err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth); 1409 if (err != 0) { 1410 failure = "basic construction of FMRI failed"; 1411 goto boom; 1412 } 1413 1414 numbuf[MAXDIGITIDX] = '\0'; 1415 nullbyte = &numbuf[MAXDIGITIDX]; 1416 i = 0; 1417 1418 for (nc = n; nc != NULL; nc = nc->u.name.next) { 1419 err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl); 1420 if (err != 0) { 1421 failure = "alloc of an hc-pair failed"; 1422 goto boom; 1423 } 1424 err = nvlist_add_string(p, FM_FMRI_HC_NAME, nc->u.name.s); 1425 numstr = ulltostr(nc->u.name.child->u.ull, nullbyte); 1426 err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr); 1427 if (err != 0) { 1428 failure = "construction of an hc-pair failed"; 1429 goto boom; 1430 } 1431 pa[i++] = p; 1432 } 1433 1434 err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth); 1435 if (err == 0) { 1436 for (i = 0; i < depth; i++) 1437 if (pa[i] != NULL) 1438 nvlist_free(pa[i]); 1439 return (f); 1440 } 1441 failure = "addition of hc-pair array to FMRI failed"; 1442 1443 boom: 1444 for (i = 0; i < depth; i++) 1445 if (pa[i] != NULL) 1446 nvlist_free(pa[i]); 1447 nvlist_free(f); 1448 out(O_DIE, "%s", failure); 1449 /*NOTREACHED*/ 1450 return (NULL); 1451 } 1452 1453 static uint_t 1454 avg(uint_t sum, uint_t cnt) 1455 { 1456 unsigned long long s = sum * 10; 1457 1458 return ((s / cnt / 10) + (((s / cnt % 10) >= 5) ? 1 : 0)); 1459 } 1460 1461 static uint8_t 1462 percentof(uint_t part, uint_t whole) 1463 { 1464 unsigned long long p = part * 1000; 1465 1466 return ((p / whole / 10) + (((p / whole % 10) >= 5) ? 1 : 0)); 1467 } 1468 1469 struct rsl { 1470 struct event *suspect; 1471 nvlist_t *asru; 1472 nvlist_t *fru; 1473 nvlist_t *rsrc; 1474 }; 1475 1476 /* 1477 * rslfree -- free internal members of struct rsl not expected to be 1478 * freed elsewhere. 1479 */ 1480 static void 1481 rslfree(struct rsl *freeme) 1482 { 1483 if (freeme->asru != NULL) 1484 nvlist_free(freeme->asru); 1485 if (freeme->fru != NULL) 1486 nvlist_free(freeme->fru); 1487 if (freeme->rsrc != NULL && freeme->rsrc != freeme->asru) 1488 nvlist_free(freeme->rsrc); 1489 } 1490 1491 /* 1492 * rslcmp -- compare two rsl structures. Use the following 1493 * comparisons to establish cardinality: 1494 * 1495 * 1. Name of the suspect's class. (simple strcmp) 1496 * 2. Name of the suspect's ASRU. (trickier, since nvlist) 1497 * 1498 */ 1499 static int 1500 rslcmp(const void *a, const void *b) 1501 { 1502 struct rsl *r1 = (struct rsl *)a; 1503 struct rsl *r2 = (struct rsl *)b; 1504 int rv; 1505 1506 rv = strcmp(r1->suspect->enode->u.event.ename->u.name.s, 1507 r2->suspect->enode->u.event.ename->u.name.s); 1508 if (rv != 0) 1509 return (rv); 1510 1511 if (r1->asru == NULL && r2->asru == NULL) 1512 return (0); 1513 if (r1->asru == NULL) 1514 return (-1); 1515 if (r2->asru == NULL) 1516 return (1); 1517 return (evnv_cmpnvl(r1->asru, r2->asru, 0)); 1518 } 1519 1520 /* 1521 * rsluniq -- given an array of rsl structures, seek out and "remove" 1522 * any duplicates. Dups are "remove"d by NULLing the suspect pointer 1523 * of the array element. Removal also means updating the number of 1524 * problems and the number of problems which are not faults. User 1525 * provides the first and last element pointers. 1526 */ 1527 static void 1528 rsluniq(struct rsl *first, struct rsl *last, int *nprobs, int *nnonf) 1529 { 1530 struct rsl *cr; 1531 1532 if (*nprobs == 1) 1533 return; 1534 1535 /* 1536 * At this point, we only expect duplicate defects. 1537 * Eversholt's diagnosis algorithm prevents duplicate 1538 * suspects, but we rewrite defects in the platform code after 1539 * the diagnosis is made, and that can introduce new 1540 * duplicates. 1541 */ 1542 while (first <= last) { 1543 if (first->suspect == NULL || !is_defect(first->suspect->t)) { 1544 first++; 1545 continue; 1546 } 1547 cr = first + 1; 1548 while (cr <= last) { 1549 if (is_defect(first->suspect->t)) { 1550 if (rslcmp(first, cr) == 0) { 1551 cr->suspect = NULL; 1552 rslfree(cr); 1553 (*nprobs)--; 1554 (*nnonf)--; 1555 } 1556 } 1557 /* 1558 * assume all defects are in order after our 1559 * sort and short circuit here with "else break" ? 1560 */ 1561 cr++; 1562 } 1563 first++; 1564 } 1565 } 1566 1567 /* 1568 * get_resources -- for a given suspect, determine what ASRU, FRU and 1569 * RSRC nvlists should be advertised in the final suspect list. 1570 */ 1571 void 1572 get_resources(struct event *sp, struct rsl *rsrcs, struct config *croot) 1573 { 1574 struct node *asrudef, *frudef; 1575 nvlist_t *asru, *fru; 1576 nvlist_t *rsrc = NULL; 1577 char *pathstr; 1578 1579 /* 1580 * First find any ASRU and/or FRU defined in the 1581 * initial fault tree. 1582 */ 1583 asrudef = eventprop_lookup(sp, L_ASRU); 1584 frudef = eventprop_lookup(sp, L_FRU); 1585 1586 /* 1587 * Create FMRIs based on those definitions 1588 */ 1589 asru = node2fmri(asrudef); 1590 fru = node2fmri(frudef); 1591 pathstr = ipath2str(NULL, sp->ipp); 1592 1593 /* 1594 * Allow for platform translations of the FMRIs 1595 */ 1596 platform_units_translate(is_defect(sp->t), croot, &asru, &fru, &rsrc, 1597 pathstr); 1598 1599 FREE(pathstr); 1600 rsrcs->suspect = sp; 1601 rsrcs->asru = asru; 1602 rsrcs->fru = fru; 1603 rsrcs->rsrc = rsrc; 1604 } 1605 1606 /* 1607 * trim_suspects -- prior to publishing, we may need to remove some 1608 * suspects from the list. If we're auto-closing upsets, we don't 1609 * want any of those in the published list. If the ASRUs for multiple 1610 * defects resolve to the same ASRU (driver) we only want to publish 1611 * that as a single suspect. 1612 */ 1613 static void 1614 trim_suspects(struct fme *fmep, boolean_t no_upsets, struct rsl **begin, 1615 struct rsl **end) 1616 { 1617 struct event *ep; 1618 struct rsl *rp; 1619 int rpcnt; 1620 1621 /* 1622 * First save the suspects in the psuspects, then copy back 1623 * only the ones we wish to retain. This resets nsuspects to 1624 * zero. 1625 */ 1626 rpcnt = fmep->nsuspects; 1627 save_suspects(fmep); 1628 1629 /* 1630 * allocate an array of resource pointers for the suspects. 1631 * We may end up using less than the full allocation, but this 1632 * is a very short-lived array. publish_suspects() will free 1633 * this array when it's done using it. 1634 */ 1635 rp = *begin = MALLOC(rpcnt * sizeof (struct rsl)); 1636 bzero(rp, rpcnt * sizeof (struct rsl)); 1637 1638 /* first pass, remove any unwanted upsets and populate our array */ 1639 for (ep = fmep->psuspects; ep; ep = ep->psuspects) { 1640 if (no_upsets && is_upset(ep->t)) 1641 continue; 1642 get_resources(ep, rp, fmep->cfgdata->cooked); 1643 rp++; 1644 fmep->nsuspects++; 1645 if (!is_fault(ep->t)) 1646 fmep->nonfault++; 1647 } 1648 1649 /* if all we had was unwanted upsets, we're done */ 1650 if (fmep->nsuspects == 0) 1651 return; 1652 1653 *end = rp - 1; 1654 1655 /* sort the array */ 1656 qsort(*begin, fmep->nsuspects, sizeof (struct rsl), rslcmp); 1657 rsluniq(*begin, *end, &fmep->nsuspects, &fmep->nonfault); 1658 } 1659 1660 /* 1661 * addpayloadprop -- add a payload prop to a problem 1662 */ 1663 static void 1664 addpayloadprop(const char *lhs, struct evalue *rhs, nvlist_t *fault) 1665 { 1666 ASSERT(fault != NULL); 1667 ASSERT(lhs != NULL); 1668 ASSERT(rhs != NULL); 1669 1670 if (rhs->t == UINT64) { 1671 out(O_ALTFP|O_VERB2, "addpayloadprop: %s=%llu", lhs, rhs->v); 1672 1673 if (nvlist_add_uint64(fault, lhs, rhs->v) != 0) 1674 out(O_DIE, 1675 "cannot add payloadprop \"%s\" to fault", lhs); 1676 } else { 1677 out(O_ALTFP|O_VERB2, "addpayloadprop: %s=\"%s\"", 1678 lhs, (char *)(uintptr_t)rhs->v); 1679 1680 if (nvlist_add_string(fault, lhs, (char *)(uintptr_t)rhs->v) != 1681 0) 1682 out(O_DIE, 1683 "cannot add payloadprop \"%s\" to fault", lhs); 1684 } 1685 } 1686 1687 static char *Istatbuf; 1688 static char *Istatbufptr; 1689 static int Istatsz; 1690 1691 /* 1692 * istataddsize -- calculate size of istat and add it to Istatsz 1693 */ 1694 /*ARGSUSED2*/ 1695 static void 1696 istataddsize(const struct istat_entry *lhs, struct stats *rhs, void *arg) 1697 { 1698 int val; 1699 1700 ASSERT(lhs != NULL); 1701 ASSERT(rhs != NULL); 1702 1703 if ((val = stats_counter_value(rhs)) == 0) 1704 return; /* skip zero-valued stats */ 1705 1706 /* count up the size of the stat name */ 1707 Istatsz += ipath2strlen(lhs->ename, lhs->ipath); 1708 Istatsz++; /* for the trailing NULL byte */ 1709 1710 /* count up the size of the stat value */ 1711 Istatsz += snprintf(NULL, 0, "%d", val); 1712 Istatsz++; /* for the trailing NULL byte */ 1713 } 1714 1715 /* 1716 * istat2str -- serialize an istat, writing result to *Istatbufptr 1717 */ 1718 /*ARGSUSED2*/ 1719 static void 1720 istat2str(const struct istat_entry *lhs, struct stats *rhs, void *arg) 1721 { 1722 char *str; 1723 int len; 1724 int val; 1725 1726 ASSERT(lhs != NULL); 1727 ASSERT(rhs != NULL); 1728 1729 if ((val = stats_counter_value(rhs)) == 0) 1730 return; /* skip zero-valued stats */ 1731 1732 /* serialize the stat name */ 1733 str = ipath2str(lhs->ename, lhs->ipath); 1734 len = strlen(str); 1735 1736 ASSERT(Istatbufptr + len + 1 < &Istatbuf[Istatsz]); 1737 (void) strlcpy(Istatbufptr, str, &Istatbuf[Istatsz] - Istatbufptr); 1738 Istatbufptr += len; 1739 FREE(str); 1740 *Istatbufptr++ = '\0'; 1741 1742 /* serialize the stat value */ 1743 Istatbufptr += snprintf(Istatbufptr, &Istatbuf[Istatsz] - Istatbufptr, 1744 "%d", val); 1745 *Istatbufptr++ = '\0'; 1746 1747 ASSERT(Istatbufptr <= &Istatbuf[Istatsz]); 1748 } 1749 1750 void 1751 istat_save() 1752 { 1753 if (Istat_need_save == 0) 1754 return; 1755 1756 /* figure out how big the serialzed info is */ 1757 Istatsz = 0; 1758 lut_walk(Istats, (lut_cb)istataddsize, NULL); 1759 1760 if (Istatsz == 0) { 1761 /* no stats to save */ 1762 fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS); 1763 return; 1764 } 1765 1766 /* create the serialized buffer */ 1767 Istatbufptr = Istatbuf = MALLOC(Istatsz); 1768 lut_walk(Istats, (lut_cb)istat2str, NULL); 1769 1770 /* clear out current saved stats */ 1771 fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS); 1772 1773 /* write out the new version */ 1774 fmd_buf_write(Hdl, NULL, WOBUF_ISTATS, Istatbuf, Istatsz); 1775 FREE(Istatbuf); 1776 1777 Istat_need_save = 0; 1778 } 1779 1780 int 1781 istat_cmp(struct istat_entry *ent1, struct istat_entry *ent2) 1782 { 1783 if (ent1->ename != ent2->ename) 1784 return (ent2->ename - ent1->ename); 1785 if (ent1->ipath != ent2->ipath) 1786 return ((char *)ent2->ipath - (char *)ent1->ipath); 1787 1788 return (0); 1789 } 1790 1791 /* 1792 * istat-verify -- verify the component associated with a stat still exists 1793 * 1794 * if the component no longer exists, this routine resets the stat and 1795 * returns 0. if the component still exists, it returns 1. 1796 */ 1797 static int 1798 istat_verify(struct node *snp, struct istat_entry *entp) 1799 { 1800 struct stats *statp; 1801 nvlist_t *fmri; 1802 1803 fmri = node2fmri(snp->u.event.epname); 1804 if (platform_path_exists(fmri)) { 1805 nvlist_free(fmri); 1806 return (1); 1807 } 1808 nvlist_free(fmri); 1809 1810 /* component no longer in system. zero out the associated stats */ 1811 if ((statp = (struct stats *) 1812 lut_lookup(Istats, entp, (lut_cmp)istat_cmp)) == NULL || 1813 stats_counter_value(statp) == 0) 1814 return (0); /* stat is already reset */ 1815 1816 Istat_need_save = 1; 1817 stats_counter_reset(statp); 1818 return (0); 1819 } 1820 1821 static void 1822 istat_bump(struct node *snp, int n) 1823 { 1824 struct stats *statp; 1825 struct istat_entry ent; 1826 1827 ASSERT(snp != NULL); 1828 ASSERTinfo(snp->t == T_EVENT, ptree_nodetype2str(snp->t)); 1829 ASSERT(snp->u.event.epname != NULL); 1830 1831 /* class name should be hoisted into a single stable entry */ 1832 ASSERT(snp->u.event.ename->u.name.next == NULL); 1833 ent.ename = snp->u.event.ename->u.name.s; 1834 ent.ipath = ipath(snp->u.event.epname); 1835 1836 if (!istat_verify(snp, &ent)) { 1837 /* component no longer exists in system, nothing to do */ 1838 return; 1839 } 1840 1841 if ((statp = (struct stats *) 1842 lut_lookup(Istats, &ent, (lut_cmp)istat_cmp)) == NULL) { 1843 /* need to create the counter */ 1844 int cnt = 0; 1845 struct node *np; 1846 char *sname; 1847 char *snamep; 1848 struct istat_entry *newentp; 1849 1850 /* count up the size of the stat name */ 1851 np = snp->u.event.ename; 1852 while (np != NULL) { 1853 cnt += strlen(np->u.name.s); 1854 cnt++; /* for the '.' or '@' */ 1855 np = np->u.name.next; 1856 } 1857 np = snp->u.event.epname; 1858 while (np != NULL) { 1859 cnt += snprintf(NULL, 0, "%s%llu", 1860 np->u.name.s, np->u.name.child->u.ull); 1861 cnt++; /* for the '/' or trailing NULL byte */ 1862 np = np->u.name.next; 1863 } 1864 1865 /* build the stat name */ 1866 snamep = sname = alloca(cnt); 1867 np = snp->u.event.ename; 1868 while (np != NULL) { 1869 snamep += snprintf(snamep, &sname[cnt] - snamep, 1870 "%s", np->u.name.s); 1871 np = np->u.name.next; 1872 if (np) 1873 *snamep++ = '.'; 1874 } 1875 *snamep++ = '@'; 1876 np = snp->u.event.epname; 1877 while (np != NULL) { 1878 snamep += snprintf(snamep, &sname[cnt] - snamep, 1879 "%s%llu", np->u.name.s, np->u.name.child->u.ull); 1880 np = np->u.name.next; 1881 if (np) 1882 *snamep++ = '/'; 1883 } 1884 *snamep++ = '\0'; 1885 1886 /* create the new stat & add it to our list */ 1887 newentp = MALLOC(sizeof (*newentp)); 1888 *newentp = ent; 1889 statp = stats_new_counter(NULL, sname, 0); 1890 Istats = lut_add(Istats, (void *)newentp, (void *)statp, 1891 (lut_cmp)istat_cmp); 1892 } 1893 1894 /* if n is non-zero, set that value instead of bumping */ 1895 if (n) { 1896 stats_counter_reset(statp); 1897 stats_counter_add(statp, n); 1898 } else 1899 stats_counter_bump(statp); 1900 Istat_need_save = 1; 1901 } 1902 1903 /*ARGSUSED*/ 1904 static void 1905 istat_destructor(void *left, void *right, void *arg) 1906 { 1907 struct istat_entry *entp = (struct istat_entry *)left; 1908 struct stats *statp = (struct stats *)right; 1909 FREE(entp); 1910 stats_delete(statp); 1911 } 1912 1913 /* 1914 * Callback used in a walk of the Istats to reset matching stat counters. 1915 */ 1916 static void 1917 istat_counter_reset_cb(struct istat_entry *entp, struct stats *statp, 1918 const struct ipath *ipp) 1919 { 1920 char *path; 1921 1922 if (entp->ipath == ipp) { 1923 path = ipath2str(entp->ename, ipp); 1924 out(O_ALTFP, "istat_counter_reset_cb: resetting %s", path); 1925 FREE(path); 1926 stats_counter_reset(statp); 1927 Istat_need_save = 1; 1928 } 1929 } 1930 1931 void 1932 istat_fini(void) 1933 { 1934 lut_free(Istats, istat_destructor, NULL); 1935 } 1936 1937 static void 1938 publish_suspects(struct fme *fmep) 1939 { 1940 struct event *ep; 1941 struct rsl *srl = NULL; 1942 struct rsl *erl; 1943 struct rsl *rp; 1944 nvlist_t *fault; 1945 uint8_t cert; 1946 uint_t *frs; 1947 uint_t fravg, frsum, fr; 1948 uint_t messval; 1949 struct node *snp; 1950 int frcnt, fridx; 1951 boolean_t no_upsets = B_FALSE; 1952 boolean_t allfaulty = B_TRUE; 1953 1954 stats_counter_bump(fmep->diags); 1955 1956 /* 1957 * The current fmd interfaces don't allow us to solve a case 1958 * that's already solved. If we make a new case, what of the 1959 * ereports? We don't appear to have an interface that allows 1960 * us to access the ereports attached to a case (if we wanted 1961 * to copy the original case's ereport attachments to the new 1962 * case) and it's also a bit unclear if there would be any 1963 * problems with having ereports attached to multiple cases 1964 * and/or attaching DIAGNOSED ereports to a case. For now, 1965 * we'll just output a message. 1966 */ 1967 if (fmep->posted_suspects || 1968 fmd_case_solved(fmep->hdl, fmep->fmcase)) { 1969 out(O_ALTFP|O_NONL, "Revised diagnosis for case %s: ", 1970 fmd_case_uuid(fmep->hdl, fmep->fmcase)); 1971 for (ep = fmep->suspects; ep; ep = ep->suspects) { 1972 out(O_ALTFP|O_NONL, " "); 1973 itree_pevent_brief(O_ALTFP|O_NONL, ep); 1974 } 1975 out(O_ALTFP, NULL); 1976 return; 1977 } 1978 1979 /* 1980 * If we're auto-closing upsets, we don't want to include them 1981 * in any produced suspect lists or certainty accounting. 1982 */ 1983 if (Autoclose != NULL) 1984 if (strcmp(Autoclose, "true") == 0 || 1985 strcmp(Autoclose, "all") == 0 || 1986 strcmp(Autoclose, "upsets") == 0) 1987 no_upsets = B_TRUE; 1988 1989 trim_suspects(fmep, no_upsets, &srl, &erl); 1990 1991 /* 1992 * If the resulting suspect list has no members, we're 1993 * done. Returning here will simply close the case. 1994 */ 1995 if (fmep->nsuspects == 0) { 1996 out(O_ALTFP, 1997 "[FME%d, case %s (all suspects are upsets)]", 1998 fmep->id, fmd_case_uuid(fmep->hdl, fmep->fmcase)); 1999 FREE(srl); 2000 restore_suspects(fmep); 2001 return; 2002 } 2003 2004 /* 2005 * If the suspect list is all faults, then for a given fault, 2006 * say X of N, X's certainty is computed via: 2007 * 2008 * fitrate(X) / (fitrate(1) + ... + fitrate(N)) * 100 2009 * 2010 * If none of the suspects are faults, and there are N suspects, 2011 * the certainty of a given suspect is 100/N. 2012 * 2013 * If there are are a mixture of faults and other problems in 2014 * the suspect list, we take an average of the faults' 2015 * FITrates and treat this average as the FITrate for any 2016 * non-faults. The fitrate of any given suspect is then 2017 * computed per the first formula above. 2018 */ 2019 if (fmep->nonfault == fmep->nsuspects) { 2020 /* NO faults in the suspect list */ 2021 cert = percentof(1, fmep->nsuspects); 2022 } else { 2023 /* sum the fitrates */ 2024 frs = alloca(fmep->nsuspects * sizeof (uint_t)); 2025 fridx = frcnt = frsum = 0; 2026 2027 for (rp = srl; rp <= erl; rp++) { 2028 struct node *n; 2029 2030 if (rp->suspect == NULL) 2031 continue; 2032 if (!is_fault(rp->suspect->t)) { 2033 frs[fridx++] = 0; 2034 continue; 2035 } 2036 n = eventprop_lookup(rp->suspect, L_FITrate); 2037 if (node2uint(n, &fr) != 0) { 2038 out(O_DEBUG|O_NONL, "event "); 2039 ipath_print(O_DEBUG|O_NONL, 2040 ep->enode->u.event.ename->u.name.s, 2041 ep->ipp); 2042 out(O_DEBUG, " has no FITrate (using 1)"); 2043 fr = 1; 2044 } else if (fr == 0) { 2045 out(O_DEBUG|O_NONL, "event "); 2046 ipath_print(O_DEBUG|O_NONL, 2047 ep->enode->u.event.ename->u.name.s, 2048 ep->ipp); 2049 out(O_DEBUG, " has zero FITrate (using 1)"); 2050 fr = 1; 2051 } 2052 2053 frs[fridx++] = fr; 2054 frsum += fr; 2055 frcnt++; 2056 } 2057 fravg = avg(frsum, frcnt); 2058 for (fridx = 0; fridx < fmep->nsuspects; fridx++) 2059 if (frs[fridx] == 0) { 2060 frs[fridx] = fravg; 2061 frsum += fravg; 2062 } 2063 } 2064 2065 /* Add them in reverse order of our sort, as fmd reverses order */ 2066 for (rp = erl; rp >= srl; rp--) { 2067 if (rp->suspect == NULL) 2068 continue; 2069 if (!is_fault(rp->suspect->t)) 2070 allfaulty = B_FALSE; 2071 if (fmep->nonfault != fmep->nsuspects) 2072 cert = percentof(frs[--fridx], frsum); 2073 fault = fmd_nvl_create_fault(fmep->hdl, 2074 rp->suspect->enode->u.event.ename->u.name.s, 2075 cert, 2076 rp->asru, 2077 rp->fru, 2078 rp->rsrc); 2079 if (fault == NULL) 2080 out(O_DIE, "fault creation failed"); 2081 /* if "message" property exists, add it to the fault */ 2082 if (node2uint(eventprop_lookup(rp->suspect, L_message), 2083 &messval) == 0) { 2084 2085 out(O_ALTFP, 2086 "[FME%d, %s adds message=%d to suspect list]", 2087 fmep->id, 2088 rp->suspect->enode->u.event.ename->u.name.s, 2089 messval); 2090 if (nvlist_add_boolean_value(fault, 2091 FM_SUSPECT_MESSAGE, 2092 (messval) ? B_TRUE : B_FALSE) != 0) { 2093 out(O_DIE, "cannot add no-message to fault"); 2094 } 2095 } 2096 /* add any payload properties */ 2097 lut_walk(rp->suspect->payloadprops, 2098 (lut_cb)addpayloadprop, (void *)fault); 2099 fmd_case_add_suspect(fmep->hdl, fmep->fmcase, fault); 2100 rp->suspect->fault = fault; 2101 rslfree(rp); 2102 2103 /* 2104 * If "action" property exists, evaluate it; this must be done 2105 * before the dupclose check below since some actions may 2106 * modify the asru to be used in fmd_nvl_fmri_faulty. This 2107 * needs to be restructured if any new actions are introduced 2108 * that have effects that we do not want to be visible if 2109 * we decide not to publish in the dupclose check below. 2110 */ 2111 if ((snp = eventprop_lookup(rp->suspect, L_action)) != NULL) { 2112 struct evalue evalue; 2113 2114 out(O_ALTFP|O_NONL, 2115 "[FME%d, %s action ", fmep->id, 2116 rp->suspect->enode->u.event.ename->u.name.s); 2117 ptree_name_iter(O_ALTFP|O_NONL, snp); 2118 out(O_ALTFP, "]"); 2119 Action_nvl = fault; 2120 (void) eval_expr(snp, NULL, NULL, NULL, NULL, 2121 NULL, 0, &evalue); 2122 } 2123 2124 /* 2125 * if "dupclose" tunable is set, check if the asru is 2126 * already marked as "faulty". 2127 */ 2128 if (Dupclose && allfaulty) { 2129 nvlist_t *asru; 2130 2131 out(O_ALTFP|O_VERB, "FMD%d dupclose check ", fmep->id); 2132 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, rp->suspect); 2133 out(O_ALTFP|O_VERB|O_NONL, " "); 2134 if (nvlist_lookup_nvlist(fault, 2135 FM_FAULT_ASRU, &asru) != 0) { 2136 out(O_ALTFP|O_VERB, "NULL asru"); 2137 allfaulty = B_FALSE; 2138 } else if (fmd_nvl_fmri_faulty(fmep->hdl, asru)) { 2139 out(O_ALTFP|O_VERB, "faulty"); 2140 } else { 2141 out(O_ALTFP|O_VERB, "not faulty"); 2142 allfaulty = B_FALSE; 2143 } 2144 } 2145 2146 } 2147 2148 /* 2149 * Close the case if all asrus are already known to be faulty and if 2150 * Dupclose is enabled. Otherwise we are going to publish so take 2151 * any pre-publication actions. 2152 */ 2153 if (Dupclose && allfaulty) { 2154 out(O_ALTFP, "[dupclose FME%d, case %s]", fmep->id, 2155 fmd_case_uuid(fmep->hdl, fmep->fmcase)); 2156 fmd_case_close(fmep->hdl, fmep->fmcase); 2157 } else { 2158 for (rp = erl; rp >= srl; rp--) { 2159 struct event *suspect = rp->suspect; 2160 2161 if (suspect == NULL) 2162 continue; 2163 2164 fault = suspect->fault; 2165 2166 /* if "count" exists, increment the appropriate stat */ 2167 if ((snp = eventprop_lookup(suspect, 2168 L_count)) != NULL) { 2169 out(O_ALTFP|O_NONL, 2170 "[FME%d, %s count ", fmep->id, 2171 suspect->enode->u.event.ename->u.name.s); 2172 ptree_name_iter(O_ALTFP|O_NONL, snp); 2173 out(O_ALTFP, "]"); 2174 istat_bump(snp, 0); 2175 2176 } 2177 } 2178 istat_save(); /* write out any istat changes */ 2179 2180 out(O_ALTFP, "[solving FME%d, case %s]", fmep->id, 2181 fmd_case_uuid(fmep->hdl, fmep->fmcase)); 2182 fmd_case_solve(fmep->hdl, fmep->fmcase); 2183 } 2184 2185 /* 2186 * revert to the original suspect list 2187 */ 2188 FREE(srl); 2189 restore_suspects(fmep); 2190 } 2191 2192 static void 2193 publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep) 2194 { 2195 struct case_list *newcase; 2196 nvlist_t *defect; 2197 2198 out(O_ALTFP, 2199 "[undiagnosable ereport received, " 2200 "creating and closing a new case (%s)]", 2201 Undiag_reason ? Undiag_reason : "reason not provided"); 2202 2203 newcase = MALLOC(sizeof (struct case_list)); 2204 newcase->next = NULL; 2205 2206 newcase->fmcase = fmd_case_open(hdl, NULL); 2207 if (Undiagablecaselist != NULL) 2208 newcase->next = Undiagablecaselist; 2209 Undiagablecaselist = newcase; 2210 2211 if (ffep != NULL) 2212 fmd_case_add_ereport(hdl, newcase->fmcase, ffep); 2213 2214 defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100, 2215 NULL, NULL, NULL); 2216 if (Undiag_reason != NULL) 2217 (void) nvlist_add_string(defect, UNDIAG_REASON, Undiag_reason); 2218 fmd_case_add_suspect(hdl, newcase->fmcase, defect); 2219 2220 fmd_case_solve(hdl, newcase->fmcase); 2221 fmd_case_close(hdl, newcase->fmcase); 2222 } 2223 2224 static void 2225 fme_undiagnosable(struct fme *f) 2226 { 2227 nvlist_t *defect; 2228 2229 out(O_ALTFP, "[solving/closing FME%d, case %s (%s)]", 2230 f->id, fmd_case_uuid(f->hdl, f->fmcase), 2231 Undiag_reason ? Undiag_reason : "undiagnosable"); 2232 2233 defect = fmd_nvl_create_fault(f->hdl, UNDIAGNOSABLE_DEFECT, 100, 2234 NULL, NULL, NULL); 2235 if (Undiag_reason != NULL) 2236 (void) nvlist_add_string(defect, UNDIAG_REASON, Undiag_reason); 2237 fmd_case_add_suspect(f->hdl, f->fmcase, defect); 2238 fmd_case_solve(f->hdl, f->fmcase); 2239 destroy_fme_bufs(f); 2240 fmd_case_close(f->hdl, f->fmcase); 2241 } 2242 2243 /* 2244 * fme_close_case 2245 * 2246 * Find the requested case amongst our fmes and close it. Free up 2247 * the related fme. 2248 */ 2249 void 2250 fme_close_case(fmd_hdl_t *hdl, fmd_case_t *fmcase) 2251 { 2252 struct case_list *ucasep, *prevcasep = NULL; 2253 struct fme *prev = NULL; 2254 struct fme *fmep; 2255 2256 for (ucasep = Undiagablecaselist; ucasep; ucasep = ucasep->next) { 2257 if (fmcase != ucasep->fmcase) { 2258 prevcasep = ucasep; 2259 continue; 2260 } 2261 2262 if (prevcasep == NULL) 2263 Undiagablecaselist = Undiagablecaselist->next; 2264 else 2265 prevcasep->next = ucasep->next; 2266 2267 FREE(ucasep); 2268 return; 2269 } 2270 2271 for (fmep = FMElist; fmep; fmep = fmep->next) { 2272 if (fmep->hdl == hdl && fmep->fmcase == fmcase) 2273 break; 2274 prev = fmep; 2275 } 2276 2277 if (fmep == NULL) { 2278 out(O_WARN, "Eft asked to close unrecognized case [%s].", 2279 fmd_case_uuid(hdl, fmcase)); 2280 return; 2281 } 2282 2283 if (EFMElist == fmep) 2284 EFMElist = prev; 2285 2286 if (prev == NULL) 2287 FMElist = FMElist->next; 2288 else 2289 prev->next = fmep->next; 2290 2291 fmep->next = NULL; 2292 2293 /* Get rid of any timer this fme has set */ 2294 if (fmep->wull != 0) 2295 fmd_timer_remove(fmep->hdl, fmep->timer); 2296 2297 if (ClosedFMEs == NULL) { 2298 ClosedFMEs = fmep; 2299 } else { 2300 fmep->next = ClosedFMEs; 2301 ClosedFMEs = fmep; 2302 } 2303 2304 Open_fme_count--; 2305 2306 /* See if we can close the overflow FME */ 2307 if (Open_fme_count <= Max_fme) { 2308 for (fmep = FMElist; fmep; fmep = fmep->next) { 2309 if (fmep->overflow && !(fmd_case_closed(fmep->hdl, 2310 fmep->fmcase))) 2311 break; 2312 } 2313 2314 if (fmep != NULL) 2315 fmd_case_close(fmep->hdl, fmep->fmcase); 2316 } 2317 } 2318 2319 /* 2320 * fme_set_timer() 2321 * If the time we need to wait for the given FME is less than the 2322 * current timer, kick that old timer out and establish a new one. 2323 */ 2324 static int 2325 fme_set_timer(struct fme *fmep, unsigned long long wull) 2326 { 2327 out(O_ALTFP|O_VERB|O_NONL, " fme_set_timer: request to wait "); 2328 ptree_timeval(O_ALTFP|O_VERB, &wull); 2329 2330 if (wull <= fmep->pull) { 2331 out(O_ALTFP|O_VERB|O_NONL, "already have waited at least "); 2332 ptree_timeval(O_ALTFP|O_VERB, &fmep->pull); 2333 out(O_ALTFP|O_VERB, NULL); 2334 /* we've waited at least wull already, don't need timer */ 2335 return (0); 2336 } 2337 2338 out(O_ALTFP|O_VERB|O_NONL, " currently "); 2339 if (fmep->wull != 0) { 2340 out(O_ALTFP|O_VERB|O_NONL, "waiting "); 2341 ptree_timeval(O_ALTFP|O_VERB, &fmep->wull); 2342 out(O_ALTFP|O_VERB, NULL); 2343 } else { 2344 out(O_ALTFP|O_VERB|O_NONL, "not waiting"); 2345 out(O_ALTFP|O_VERB, NULL); 2346 } 2347 2348 if (fmep->wull != 0) 2349 if (wull >= fmep->wull) 2350 /* New timer would fire later than established timer */ 2351 return (0); 2352 2353 if (fmep->wull != 0) { 2354 fmd_timer_remove(fmep->hdl, fmep->timer); 2355 if (fmep->timer == fmep->htid) { 2356 out(O_ALTFP, 2357 "[stopped hesitating FME%d, case %s]", 2358 fmep->id, 2359 fmd_case_uuid(fmep->hdl, 2360 fmep->fmcase)); 2361 fmep->htid = 0; 2362 } 2363 } 2364 2365 fmep->timer = fmd_timer_install(fmep->hdl, (void *)fmep, 2366 fmep->e0r, wull); 2367 out(O_ALTFP|O_VERB, "timer set, id is %ld", fmep->timer); 2368 fmep->wull = wull; 2369 return (1); 2370 } 2371 2372 void 2373 fme_timer_fired(struct fme *fmep, id_t tid) 2374 { 2375 struct fme *ffmep = NULL; 2376 2377 for (ffmep = FMElist; ffmep; ffmep = ffmep->next) 2378 if (ffmep == fmep) 2379 break; 2380 2381 if (ffmep == NULL) { 2382 out(O_WARN, "Timer fired for an FME (%p) not in FMEs list.", 2383 (void *)fmep); 2384 return; 2385 } 2386 2387 out(O_ALTFP, "Timer fired %lx %lx", tid, fmep->htid); 2388 if (tid != fmep->htid) { 2389 /* 2390 * normal timer (not the hesitation timer) 2391 */ 2392 fmep->pull = fmep->wull; 2393 fmep->wull = 0; 2394 fmd_buf_write(fmep->hdl, fmep->fmcase, 2395 WOBUF_PULL, (void *)&fmep->pull, sizeof (fmep->pull)); 2396 /* 2397 * no point in heistating if we've already waited. 2398 */ 2399 fmep->hesitated = 1; 2400 } else { 2401 fmep->hesitated = 1; 2402 } 2403 fme_eval(fmep, fmep->e0r); 2404 } 2405 2406 /* 2407 * Preserve the fme's suspect list in its psuspects list, NULLing the 2408 * suspects list in the meantime. 2409 */ 2410 static void 2411 save_suspects(struct fme *fmep) 2412 { 2413 struct event *ep; 2414 struct event *nextep; 2415 2416 /* zero out the previous suspect list */ 2417 for (ep = fmep->psuspects; ep; ep = nextep) { 2418 nextep = ep->psuspects; 2419 ep->psuspects = NULL; 2420 } 2421 fmep->psuspects = NULL; 2422 2423 /* zero out the suspect list, copying it to previous suspect list */ 2424 fmep->psuspects = fmep->suspects; 2425 for (ep = fmep->suspects; ep; ep = nextep) { 2426 nextep = ep->suspects; 2427 ep->psuspects = ep->suspects; 2428 ep->suspects = NULL; 2429 ep->is_suspect = 0; 2430 } 2431 fmep->suspects = NULL; 2432 fmep->nsuspects = 0; 2433 fmep->nonfault = 0; 2434 } 2435 2436 /* 2437 * Retrieve the fme's suspect list from its psuspects list. 2438 */ 2439 static void 2440 restore_suspects(struct fme *fmep) 2441 { 2442 struct event *ep; 2443 struct event *nextep; 2444 2445 fmep->nsuspects = fmep->nonfault = 0; 2446 fmep->suspects = fmep->psuspects; 2447 for (ep = fmep->psuspects; ep; ep = nextep) { 2448 fmep->nsuspects++; 2449 if (!is_fault(ep->t)) 2450 fmep->nonfault++; 2451 nextep = ep->psuspects; 2452 ep->suspects = ep->psuspects; 2453 } 2454 } 2455 2456 /* 2457 * this is what we use to call the Emrys prototype code instead of main() 2458 */ 2459 static void 2460 fme_eval(struct fme *fmep, fmd_event_t *ffep) 2461 { 2462 struct event *ep; 2463 unsigned long long my_delay = TIMEVAL_EVENTUALLY; 2464 2465 save_suspects(fmep); 2466 2467 out(O_ALTFP|O_VERB, "Evaluate FME %d", fmep->id); 2468 indent_set(" "); 2469 2470 lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep); 2471 fmep->state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay); 2472 2473 out(O_ALTFP|O_VERB|O_NONL, "FME%d state: %s, suspect list:", fmep->id, 2474 fme_state2str(fmep->state)); 2475 for (ep = fmep->suspects; ep; ep = ep->suspects) { 2476 out(O_ALTFP|O_VERB|O_NONL, " "); 2477 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2478 } 2479 out(O_ALTFP|O_VERB, NULL); 2480 2481 if (fmep->posted_suspects) { 2482 /* 2483 * this FME has already posted a diagnosis, so see if 2484 * the event changed the diagnosis and print a warning 2485 * if it did. 2486 * 2487 */ 2488 if (suspects_changed(fmep)) { 2489 print_suspects(SLCHANGED, fmep); 2490 publish_suspects(fmep); 2491 } 2492 } else { 2493 switch (fmep->state) { 2494 case FME_CREDIBLE: 2495 /* 2496 * if the suspect list contains any upsets, we 2497 * turn off the hesitation logic (by setting 2498 * the hesitate flag which normally indicates 2499 * we've already done the hesitate logic). 2500 * this is done because hesitating with upsets 2501 * causes us to explain away additional soft errors 2502 * while the upset FME stays open. 2503 */ 2504 if (fmep->hesitated == 0) { 2505 struct event *s; 2506 2507 for (s = fmep->suspects; s; s = s->suspects) { 2508 if (s->t == N_UPSET) { 2509 fmep->hesitated = 1; 2510 break; 2511 } 2512 } 2513 } 2514 2515 if (Hesitate && 2516 fmep->suspects != NULL && 2517 fmep->suspects->suspects != NULL && 2518 fmep->hesitated == 0) { 2519 /* 2520 * about to publish multi-entry suspect list, 2521 * set the hesitation timer if not already set. 2522 */ 2523 if (fmep->htid == 0) { 2524 out(O_ALTFP|O_NONL, 2525 "[hesitate FME%d, case %s ", 2526 fmep->id, 2527 fmd_case_uuid(fmep->hdl, 2528 fmep->fmcase)); 2529 ptree_timeval(O_ALTFP|O_NONL, 2530 (unsigned long long *)&Hesitate); 2531 out(O_ALTFP, "]"); 2532 if (fme_set_timer(fmep, Hesitate)) 2533 fmep->htid = fmep->timer; 2534 } else { 2535 out(O_ALTFP, 2536 "[still hesitating FME%d, case %s]", 2537 fmep->id, 2538 fmd_case_uuid(fmep->hdl, 2539 fmep->fmcase)); 2540 } 2541 } else { 2542 print_suspects(SLNEW, fmep); 2543 (void) upsets_eval(fmep, ffep); 2544 publish_suspects(fmep); 2545 fmep->posted_suspects = 1; 2546 fmd_buf_write(fmep->hdl, fmep->fmcase, 2547 WOBUF_POSTD, 2548 (void *)&fmep->posted_suspects, 2549 sizeof (fmep->posted_suspects)); 2550 } 2551 break; 2552 2553 case FME_WAIT: 2554 /* 2555 * singleton suspect list implies 2556 * no point in waiting 2557 */ 2558 if (fmep->suspects && 2559 fmep->suspects->suspects == NULL) { 2560 print_suspects(SLNEW, fmep); 2561 (void) upsets_eval(fmep, ffep); 2562 publish_suspects(fmep); 2563 fmep->posted_suspects = 1; 2564 fmd_buf_write(fmep->hdl, fmep->fmcase, 2565 WOBUF_POSTD, 2566 (void *)&fmep->posted_suspects, 2567 sizeof (fmep->posted_suspects)); 2568 fmep->state = FME_CREDIBLE; 2569 } else { 2570 ASSERT(my_delay > fmep->ull); 2571 (void) fme_set_timer(fmep, my_delay); 2572 print_suspects(SLWAIT, fmep); 2573 } 2574 break; 2575 2576 case FME_DISPROVED: 2577 print_suspects(SLDISPROVED, fmep); 2578 Undiag_reason = UD_UNSOLVD; 2579 fme_undiagnosable(fmep); 2580 break; 2581 } 2582 } 2583 2584 if (fmep->posted_suspects == 1 && Autoclose != NULL) { 2585 int doclose = 0; 2586 2587 if (strcmp(Autoclose, "true") == 0 || 2588 strcmp(Autoclose, "all") == 0) 2589 doclose = 1; 2590 2591 if (strcmp(Autoclose, "upsets") == 0) { 2592 doclose = 1; 2593 for (ep = fmep->suspects; ep; ep = ep->suspects) { 2594 if (ep->t != N_UPSET) { 2595 doclose = 0; 2596 break; 2597 } 2598 } 2599 } 2600 2601 if (doclose) { 2602 out(O_ALTFP, "[closing FME%d, case %s (autoclose)]", 2603 fmep->id, fmd_case_uuid(fmep->hdl, fmep->fmcase)); 2604 2605 destroy_fme_bufs(fmep); 2606 fmd_case_close(fmep->hdl, fmep->fmcase); 2607 } 2608 } 2609 itree_prune(fmep->eventtree); 2610 } 2611 2612 static void indent(void); 2613 static int triggered(struct fme *fmep, struct event *ep, int mark); 2614 static enum fme_state effects_test(struct fme *fmep, 2615 struct event *fault_event, unsigned long long at_latest_by, 2616 unsigned long long *pdelay); 2617 static enum fme_state requirements_test(struct fme *fmep, struct event *ep, 2618 unsigned long long at_latest_by, unsigned long long *pdelay); 2619 static enum fme_state causes_test(struct fme *fmep, struct event *ep, 2620 unsigned long long at_latest_by, unsigned long long *pdelay); 2621 2622 static int 2623 checkconstraints(struct fme *fmep, struct arrow *arrowp) 2624 { 2625 struct constraintlist *ctp; 2626 struct evalue value; 2627 2628 if (arrowp->forever_false) { 2629 char *sep = ""; 2630 indent(); 2631 out(O_ALTFP|O_VERB|O_NONL, " Forever false constraint: "); 2632 for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) { 2633 out(O_ALTFP|O_VERB|O_NONL, sep); 2634 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0); 2635 sep = ", "; 2636 } 2637 out(O_ALTFP|O_VERB, NULL); 2638 return (0); 2639 } 2640 2641 for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) { 2642 if (eval_expr(ctp->cnode, NULL, NULL, 2643 &fmep->globals, fmep->cfgdata->cooked, 2644 arrowp, 0, &value)) { 2645 /* evaluation successful */ 2646 if (value.t == UNDEFINED || value.v == 0) { 2647 /* known false */ 2648 arrowp->forever_false = 1; 2649 indent(); 2650 out(O_ALTFP|O_VERB|O_NONL, 2651 " False constraint: "); 2652 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0); 2653 out(O_ALTFP|O_VERB, NULL); 2654 return (0); 2655 } 2656 } else { 2657 /* evaluation unsuccessful -- unknown value */ 2658 indent(); 2659 out(O_ALTFP|O_VERB|O_NONL, 2660 " Deferred constraint: "); 2661 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0); 2662 out(O_ALTFP|O_VERB, NULL); 2663 return (2); 2664 } 2665 } 2666 /* known true */ 2667 return (1); 2668 } 2669 2670 static int 2671 triggered(struct fme *fmep, struct event *ep, int mark) 2672 { 2673 struct bubble *bp; 2674 struct arrowlist *ap; 2675 int count = 0; 2676 2677 stats_counter_bump(fmep->Tcallcount); 2678 for (bp = itree_next_bubble(ep, NULL); bp; 2679 bp = itree_next_bubble(ep, bp)) { 2680 if (bp->t != B_TO) 2681 continue; 2682 for (ap = itree_next_arrow(bp, NULL); ap; 2683 ap = itree_next_arrow(bp, ap)) { 2684 /* check count of marks against K in the bubble */ 2685 if ((ap->arrowp->mark & mark) && 2686 ++count >= bp->nork) 2687 return (1); 2688 } 2689 } 2690 return (0); 2691 } 2692 2693 static int 2694 mark_arrows(struct fme *fmep, struct event *ep, int mark, 2695 unsigned long long at_latest_by, unsigned long long *pdelay, int keep) 2696 { 2697 struct bubble *bp; 2698 struct arrowlist *ap; 2699 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 2700 unsigned long long my_delay; 2701 enum fme_state result; 2702 int retval = 0; 2703 2704 for (bp = itree_next_bubble(ep, NULL); bp; 2705 bp = itree_next_bubble(ep, bp)) { 2706 if (bp->t != B_FROM) 2707 continue; 2708 stats_counter_bump(fmep->Marrowcount); 2709 for (ap = itree_next_arrow(bp, NULL); ap; 2710 ap = itree_next_arrow(bp, ap)) { 2711 struct event *ep2 = ap->arrowp->head->myevent; 2712 /* 2713 * if we're clearing marks, we can avoid doing 2714 * all that work evaluating constraints. 2715 */ 2716 if (mark == 0) { 2717 ap->arrowp->mark &= ~EFFECTS_COUNTER; 2718 if (keep && (ep2->cached_state & 2719 (WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT))) 2720 ep2->keep_in_tree = 1; 2721 ep2->cached_state &= 2722 ~(WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT); 2723 (void) mark_arrows(fmep, ep2, mark, 0, NULL, 2724 keep); 2725 continue; 2726 } 2727 if (ep2->cached_state & REQMNTS_DISPROVED) { 2728 indent(); 2729 out(O_ALTFP|O_VERB|O_NONL, 2730 " ALREADY DISPROVED "); 2731 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2732 out(O_ALTFP|O_VERB, NULL); 2733 continue; 2734 } 2735 if (ep2->cached_state & WAIT_EFFECT) { 2736 indent(); 2737 out(O_ALTFP|O_VERB|O_NONL, 2738 " ALREADY EFFECTS WAIT "); 2739 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2740 out(O_ALTFP|O_VERB, NULL); 2741 continue; 2742 } 2743 if (ep2->cached_state & CREDIBLE_EFFECT) { 2744 indent(); 2745 out(O_ALTFP|O_VERB|O_NONL, 2746 " ALREADY EFFECTS CREDIBLE "); 2747 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2748 out(O_ALTFP|O_VERB, NULL); 2749 continue; 2750 } 2751 if ((ep2->cached_state & PARENT_WAIT) && 2752 (mark & PARENT_WAIT)) { 2753 indent(); 2754 out(O_ALTFP|O_VERB|O_NONL, 2755 " ALREADY PARENT EFFECTS WAIT "); 2756 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2757 out(O_ALTFP|O_VERB, NULL); 2758 continue; 2759 } 2760 platform_set_payloadnvp(ep2->nvp); 2761 if (checkconstraints(fmep, ap->arrowp) == 0) { 2762 platform_set_payloadnvp(NULL); 2763 indent(); 2764 out(O_ALTFP|O_VERB|O_NONL, 2765 " CONSTRAINTS FAIL "); 2766 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2767 out(O_ALTFP|O_VERB, NULL); 2768 continue; 2769 } 2770 platform_set_payloadnvp(NULL); 2771 ap->arrowp->mark |= EFFECTS_COUNTER; 2772 if (!triggered(fmep, ep2, EFFECTS_COUNTER)) { 2773 indent(); 2774 out(O_ALTFP|O_VERB|O_NONL, 2775 " K-COUNT NOT YET MET "); 2776 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2777 out(O_ALTFP|O_VERB, NULL); 2778 continue; 2779 } 2780 ep2->cached_state &= ~PARENT_WAIT; 2781 result = requirements_test(fmep, ep2, at_latest_by + 2782 ap->arrowp->maxdelay, 2783 &my_delay); 2784 if (result == FME_WAIT) { 2785 retval = WAIT_EFFECT; 2786 if (overall_delay > my_delay) 2787 overall_delay = my_delay; 2788 ep2->cached_state |= WAIT_EFFECT; 2789 indent(); 2790 out(O_ALTFP|O_VERB|O_NONL, " EFFECTS WAIT "); 2791 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2792 out(O_ALTFP|O_VERB, NULL); 2793 indent_push(" E"); 2794 if (mark_arrows(fmep, ep2, PARENT_WAIT, 2795 at_latest_by, &my_delay, 0) == 2796 WAIT_EFFECT) { 2797 retval = WAIT_EFFECT; 2798 if (overall_delay > my_delay) 2799 overall_delay = my_delay; 2800 } 2801 indent_pop(); 2802 } else if (result == FME_DISPROVED) { 2803 indent(); 2804 out(O_ALTFP|O_VERB|O_NONL, 2805 " EFFECTS DISPROVED "); 2806 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2807 out(O_ALTFP|O_VERB, NULL); 2808 } else { 2809 ep2->cached_state |= mark; 2810 indent(); 2811 if (mark == CREDIBLE_EFFECT) 2812 out(O_ALTFP|O_VERB|O_NONL, 2813 " EFFECTS CREDIBLE "); 2814 else 2815 out(O_ALTFP|O_VERB|O_NONL, 2816 " PARENT EFFECTS WAIT "); 2817 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2818 out(O_ALTFP|O_VERB, NULL); 2819 indent_push(" E"); 2820 if (mark_arrows(fmep, ep2, mark, at_latest_by, 2821 &my_delay, 0) == WAIT_EFFECT) { 2822 retval = WAIT_EFFECT; 2823 if (overall_delay > my_delay) 2824 overall_delay = my_delay; 2825 } 2826 indent_pop(); 2827 } 2828 } 2829 } 2830 if (retval == WAIT_EFFECT) 2831 *pdelay = overall_delay; 2832 return (retval); 2833 } 2834 2835 static enum fme_state 2836 effects_test(struct fme *fmep, struct event *fault_event, 2837 unsigned long long at_latest_by, unsigned long long *pdelay) 2838 { 2839 struct event *error_event; 2840 enum fme_state return_value = FME_CREDIBLE; 2841 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 2842 unsigned long long my_delay; 2843 2844 stats_counter_bump(fmep->Ecallcount); 2845 indent_push(" E"); 2846 indent(); 2847 out(O_ALTFP|O_VERB|O_NONL, "->"); 2848 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event); 2849 out(O_ALTFP|O_VERB, NULL); 2850 2851 (void) mark_arrows(fmep, fault_event, CREDIBLE_EFFECT, at_latest_by, 2852 &my_delay, 0); 2853 for (error_event = fmep->observations; 2854 error_event; error_event = error_event->observations) { 2855 indent(); 2856 out(O_ALTFP|O_VERB|O_NONL, " "); 2857 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, error_event); 2858 if (!(error_event->cached_state & CREDIBLE_EFFECT)) { 2859 if (error_event->cached_state & 2860 (PARENT_WAIT|WAIT_EFFECT)) { 2861 return_value = FME_WAIT; 2862 if (overall_delay > my_delay) 2863 overall_delay = my_delay; 2864 out(O_ALTFP|O_VERB, " NOT YET triggered"); 2865 continue; 2866 } 2867 return_value = FME_DISPROVED; 2868 out(O_ALTFP|O_VERB, " NOT triggered"); 2869 break; 2870 } else { 2871 out(O_ALTFP|O_VERB, " triggered"); 2872 } 2873 } 2874 if (return_value == FME_DISPROVED) { 2875 (void) mark_arrows(fmep, fault_event, 0, 0, NULL, 0); 2876 } else { 2877 fault_event->keep_in_tree = 1; 2878 (void) mark_arrows(fmep, fault_event, 0, 0, NULL, 1); 2879 } 2880 2881 indent(); 2882 out(O_ALTFP|O_VERB|O_NONL, "<-EFFECTS %s ", 2883 fme_state2str(return_value)); 2884 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event); 2885 out(O_ALTFP|O_VERB, NULL); 2886 indent_pop(); 2887 if (return_value == FME_WAIT) 2888 *pdelay = overall_delay; 2889 return (return_value); 2890 } 2891 2892 static enum fme_state 2893 requirements_test(struct fme *fmep, struct event *ep, 2894 unsigned long long at_latest_by, unsigned long long *pdelay) 2895 { 2896 int waiting_events; 2897 int credible_events; 2898 int deferred_events; 2899 enum fme_state return_value = FME_CREDIBLE; 2900 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 2901 unsigned long long arrow_delay; 2902 unsigned long long my_delay; 2903 struct event *ep2; 2904 struct bubble *bp; 2905 struct arrowlist *ap; 2906 2907 if (ep->cached_state & REQMNTS_CREDIBLE) { 2908 indent(); 2909 out(O_ALTFP|O_VERB|O_NONL, " REQMNTS ALREADY CREDIBLE "); 2910 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2911 out(O_ALTFP|O_VERB, NULL); 2912 return (FME_CREDIBLE); 2913 } 2914 if (ep->cached_state & REQMNTS_DISPROVED) { 2915 indent(); 2916 out(O_ALTFP|O_VERB|O_NONL, " REQMNTS ALREADY DISPROVED "); 2917 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2918 out(O_ALTFP|O_VERB, NULL); 2919 return (FME_DISPROVED); 2920 } 2921 if (ep->cached_state & REQMNTS_WAIT) { 2922 indent(); 2923 *pdelay = ep->cached_delay; 2924 out(O_ALTFP|O_VERB|O_NONL, " REQMNTS ALREADY WAIT "); 2925 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2926 out(O_ALTFP|O_VERB|O_NONL, ", wait for: "); 2927 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 2928 out(O_ALTFP|O_VERB, NULL); 2929 return (FME_WAIT); 2930 } 2931 stats_counter_bump(fmep->Rcallcount); 2932 indent_push(" R"); 2933 indent(); 2934 out(O_ALTFP|O_VERB|O_NONL, "->"); 2935 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2936 out(O_ALTFP|O_VERB|O_NONL, ", at latest by: "); 2937 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 2938 out(O_ALTFP|O_VERB, NULL); 2939 2940 if (ep->t == N_EREPORT) { 2941 if (ep->count == 0) { 2942 if (fmep->pull >= at_latest_by) { 2943 return_value = FME_DISPROVED; 2944 } else { 2945 ep->cached_delay = *pdelay = at_latest_by; 2946 return_value = FME_WAIT; 2947 } 2948 } 2949 2950 indent(); 2951 switch (return_value) { 2952 case FME_CREDIBLE: 2953 ep->cached_state |= REQMNTS_CREDIBLE; 2954 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS CREDIBLE "); 2955 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2956 break; 2957 case FME_DISPROVED: 2958 ep->cached_state |= REQMNTS_DISPROVED; 2959 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED "); 2960 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2961 break; 2962 case FME_WAIT: 2963 ep->cached_state |= REQMNTS_WAIT; 2964 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS WAIT "); 2965 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2966 out(O_ALTFP|O_VERB|O_NONL, " to "); 2967 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 2968 break; 2969 default: 2970 out(O_DIE, "requirements_test: unexpected fme_state"); 2971 break; 2972 } 2973 out(O_ALTFP|O_VERB, NULL); 2974 indent_pop(); 2975 2976 return (return_value); 2977 } 2978 2979 /* this event is not a report, descend the tree */ 2980 for (bp = itree_next_bubble(ep, NULL); bp; 2981 bp = itree_next_bubble(ep, bp)) { 2982 int n; 2983 2984 if (bp->t != B_FROM) 2985 continue; 2986 2987 n = bp->nork; 2988 2989 credible_events = 0; 2990 waiting_events = 0; 2991 deferred_events = 0; 2992 arrow_delay = TIMEVAL_EVENTUALLY; 2993 /* 2994 * n is -1 for 'A' so adjust it. 2995 * XXX just count up the arrows for now. 2996 */ 2997 if (n < 0) { 2998 n = 0; 2999 for (ap = itree_next_arrow(bp, NULL); ap; 3000 ap = itree_next_arrow(bp, ap)) 3001 n++; 3002 indent(); 3003 out(O_ALTFP|O_VERB, " Bubble Counted N=%d", n); 3004 } else { 3005 indent(); 3006 out(O_ALTFP|O_VERB, " Bubble N=%d", n); 3007 } 3008 3009 if (n == 0) 3010 continue; 3011 if (!(bp->mark & (BUBBLE_ELIDED|BUBBLE_OK))) { 3012 for (ap = itree_next_arrow(bp, NULL); ap; 3013 ap = itree_next_arrow(bp, ap)) { 3014 ep2 = ap->arrowp->head->myevent; 3015 platform_set_payloadnvp(ep2->nvp); 3016 if (checkconstraints(fmep, ap->arrowp) == 0) { 3017 /* 3018 * if any arrow is invalidated by the 3019 * constraints, then we should elide the 3020 * whole bubble to be consistant with 3021 * the tree creation time behaviour 3022 */ 3023 bp->mark |= BUBBLE_ELIDED; 3024 platform_set_payloadnvp(NULL); 3025 break; 3026 } 3027 platform_set_payloadnvp(NULL); 3028 } 3029 } 3030 if (bp->mark & BUBBLE_ELIDED) 3031 continue; 3032 bp->mark |= BUBBLE_OK; 3033 for (ap = itree_next_arrow(bp, NULL); ap; 3034 ap = itree_next_arrow(bp, ap)) { 3035 ep2 = ap->arrowp->head->myevent; 3036 if (n <= credible_events) 3037 break; 3038 3039 ap->arrowp->mark |= REQMNTS_COUNTER; 3040 if (triggered(fmep, ep2, REQMNTS_COUNTER)) 3041 /* XXX adding max timevals! */ 3042 switch (requirements_test(fmep, ep2, 3043 at_latest_by + ap->arrowp->maxdelay, 3044 &my_delay)) { 3045 case FME_DEFERRED: 3046 deferred_events++; 3047 break; 3048 case FME_CREDIBLE: 3049 credible_events++; 3050 break; 3051 case FME_DISPROVED: 3052 break; 3053 case FME_WAIT: 3054 if (my_delay < arrow_delay) 3055 arrow_delay = my_delay; 3056 waiting_events++; 3057 break; 3058 default: 3059 out(O_DIE, 3060 "Bug in requirements_test."); 3061 } 3062 else 3063 deferred_events++; 3064 } 3065 indent(); 3066 out(O_ALTFP|O_VERB, " Credible: %d Waiting %d", 3067 credible_events + deferred_events, waiting_events); 3068 if (credible_events + deferred_events + waiting_events < n) { 3069 /* Can never meet requirements */ 3070 ep->cached_state |= REQMNTS_DISPROVED; 3071 indent(); 3072 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED "); 3073 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3074 out(O_ALTFP|O_VERB, NULL); 3075 indent_pop(); 3076 return (FME_DISPROVED); 3077 } 3078 if (credible_events + deferred_events < n) { 3079 /* will have to wait */ 3080 /* wait time is shortest known */ 3081 if (arrow_delay < overall_delay) 3082 overall_delay = arrow_delay; 3083 return_value = FME_WAIT; 3084 } else if (credible_events < n) { 3085 if (return_value != FME_WAIT) 3086 return_value = FME_DEFERRED; 3087 } 3088 } 3089 3090 /* 3091 * don't mark as FME_DEFERRED. If this event isn't reached by another 3092 * path, then this will be considered FME_CREDIBLE. But if it is 3093 * reached by a different path so the K-count is met, then might 3094 * get overridden by FME_WAIT or FME_DISPROVED. 3095 */ 3096 if (return_value == FME_WAIT) { 3097 ep->cached_state |= REQMNTS_WAIT; 3098 ep->cached_delay = *pdelay = overall_delay; 3099 } else if (return_value == FME_CREDIBLE) { 3100 ep->cached_state |= REQMNTS_CREDIBLE; 3101 } 3102 indent(); 3103 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS %s ", 3104 fme_state2str(return_value)); 3105 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3106 out(O_ALTFP|O_VERB, NULL); 3107 indent_pop(); 3108 return (return_value); 3109 } 3110 3111 static enum fme_state 3112 causes_test(struct fme *fmep, struct event *ep, 3113 unsigned long long at_latest_by, unsigned long long *pdelay) 3114 { 3115 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 3116 unsigned long long my_delay; 3117 int credible_results = 0; 3118 int waiting_results = 0; 3119 enum fme_state fstate; 3120 struct event *tail_event; 3121 struct bubble *bp; 3122 struct arrowlist *ap; 3123 int k = 1; 3124 3125 stats_counter_bump(fmep->Ccallcount); 3126 indent_push(" C"); 3127 indent(); 3128 out(O_ALTFP|O_VERB|O_NONL, "->"); 3129 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3130 out(O_ALTFP|O_VERB, NULL); 3131 3132 for (bp = itree_next_bubble(ep, NULL); bp; 3133 bp = itree_next_bubble(ep, bp)) { 3134 if (bp->t != B_TO) 3135 continue; 3136 k = bp->nork; /* remember the K value */ 3137 for (ap = itree_next_arrow(bp, NULL); ap; 3138 ap = itree_next_arrow(bp, ap)) { 3139 int do_not_follow = 0; 3140 3141 /* 3142 * if we get to the same event multiple times 3143 * only worry about the first one. 3144 */ 3145 if (ap->arrowp->tail->myevent->cached_state & 3146 CAUSES_TESTED) { 3147 indent(); 3148 out(O_ALTFP|O_VERB|O_NONL, 3149 " causes test already run for "); 3150 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, 3151 ap->arrowp->tail->myevent); 3152 out(O_ALTFP|O_VERB, NULL); 3153 continue; 3154 } 3155 3156 /* 3157 * see if false constraint prevents us 3158 * from traversing this arrow 3159 */ 3160 platform_set_payloadnvp(ep->nvp); 3161 if (checkconstraints(fmep, ap->arrowp) == 0) 3162 do_not_follow = 1; 3163 platform_set_payloadnvp(NULL); 3164 if (do_not_follow) { 3165 indent(); 3166 out(O_ALTFP|O_VERB|O_NONL, 3167 " False arrow from "); 3168 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, 3169 ap->arrowp->tail->myevent); 3170 out(O_ALTFP|O_VERB, NULL); 3171 continue; 3172 } 3173 3174 ap->arrowp->tail->myevent->cached_state |= 3175 CAUSES_TESTED; 3176 tail_event = ap->arrowp->tail->myevent; 3177 fstate = hypothesise(fmep, tail_event, at_latest_by, 3178 &my_delay); 3179 3180 switch (fstate) { 3181 case FME_WAIT: 3182 if (my_delay < overall_delay) 3183 overall_delay = my_delay; 3184 waiting_results++; 3185 break; 3186 case FME_CREDIBLE: 3187 credible_results++; 3188 break; 3189 case FME_DISPROVED: 3190 break; 3191 default: 3192 out(O_DIE, "Bug in causes_test"); 3193 } 3194 } 3195 } 3196 /* compare against K */ 3197 if (credible_results + waiting_results < k) { 3198 indent(); 3199 out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES DISPROVED "); 3200 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3201 out(O_ALTFP|O_VERB, NULL); 3202 indent_pop(); 3203 return (FME_DISPROVED); 3204 } 3205 if (waiting_results != 0) { 3206 *pdelay = overall_delay; 3207 indent(); 3208 out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES WAIT "); 3209 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3210 out(O_ALTFP|O_VERB|O_NONL, " to "); 3211 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 3212 out(O_ALTFP|O_VERB, NULL); 3213 indent_pop(); 3214 return (FME_WAIT); 3215 } 3216 indent(); 3217 out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES CREDIBLE "); 3218 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3219 out(O_ALTFP|O_VERB, NULL); 3220 indent_pop(); 3221 return (FME_CREDIBLE); 3222 } 3223 3224 static enum fme_state 3225 hypothesise(struct fme *fmep, struct event *ep, 3226 unsigned long long at_latest_by, unsigned long long *pdelay) 3227 { 3228 enum fme_state rtr, otr; 3229 unsigned long long my_delay; 3230 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 3231 3232 stats_counter_bump(fmep->Hcallcount); 3233 indent_push(" H"); 3234 indent(); 3235 out(O_ALTFP|O_VERB|O_NONL, "->"); 3236 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3237 out(O_ALTFP|O_VERB|O_NONL, ", at latest by: "); 3238 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 3239 out(O_ALTFP|O_VERB, NULL); 3240 3241 rtr = requirements_test(fmep, ep, at_latest_by, &my_delay); 3242 if ((rtr == FME_WAIT) && (my_delay < overall_delay)) 3243 overall_delay = my_delay; 3244 if (rtr != FME_DISPROVED) { 3245 if (is_problem(ep->t)) { 3246 otr = effects_test(fmep, ep, at_latest_by, &my_delay); 3247 if (otr != FME_DISPROVED) { 3248 if (fmep->peek == 0 && ep->is_suspect++ == 0) { 3249 ep->suspects = fmep->suspects; 3250 fmep->suspects = ep; 3251 fmep->nsuspects++; 3252 if (!is_fault(ep->t)) 3253 fmep->nonfault++; 3254 } 3255 } 3256 } else 3257 otr = causes_test(fmep, ep, at_latest_by, &my_delay); 3258 if ((otr == FME_WAIT) && (my_delay < overall_delay)) 3259 overall_delay = my_delay; 3260 if ((otr != FME_DISPROVED) && 3261 ((rtr == FME_WAIT) || (otr == FME_WAIT))) 3262 *pdelay = overall_delay; 3263 } 3264 if (rtr == FME_DISPROVED) { 3265 indent(); 3266 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED "); 3267 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3268 out(O_ALTFP|O_VERB, " (doesn't meet requirements)"); 3269 indent_pop(); 3270 return (FME_DISPROVED); 3271 } 3272 if ((otr == FME_DISPROVED) && is_problem(ep->t)) { 3273 indent(); 3274 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED "); 3275 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3276 out(O_ALTFP|O_VERB, " (doesn't explain all reports)"); 3277 indent_pop(); 3278 return (FME_DISPROVED); 3279 } 3280 if (otr == FME_DISPROVED) { 3281 indent(); 3282 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED "); 3283 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3284 out(O_ALTFP|O_VERB, " (causes are not credible)"); 3285 indent_pop(); 3286 return (FME_DISPROVED); 3287 } 3288 if ((rtr == FME_WAIT) || (otr == FME_WAIT)) { 3289 indent(); 3290 out(O_ALTFP|O_VERB|O_NONL, "<-WAIT "); 3291 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3292 out(O_ALTFP|O_VERB|O_NONL, " to "); 3293 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &overall_delay); 3294 out(O_ALTFP|O_VERB, NULL); 3295 indent_pop(); 3296 return (FME_WAIT); 3297 } 3298 indent(); 3299 out(O_ALTFP|O_VERB|O_NONL, "<-CREDIBLE "); 3300 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3301 out(O_ALTFP|O_VERB, NULL); 3302 indent_pop(); 3303 return (FME_CREDIBLE); 3304 } 3305 3306 /* 3307 * fme_istat_load -- reconstitute any persistent istats 3308 */ 3309 void 3310 fme_istat_load(fmd_hdl_t *hdl) 3311 { 3312 int sz; 3313 char *sbuf; 3314 char *ptr; 3315 3316 if ((sz = fmd_buf_size(hdl, NULL, WOBUF_ISTATS)) == 0) { 3317 out(O_ALTFP, "fme_istat_load: No stats"); 3318 return; 3319 } 3320 3321 sbuf = alloca(sz); 3322 3323 fmd_buf_read(hdl, NULL, WOBUF_ISTATS, sbuf, sz); 3324 3325 /* 3326 * pick apart the serialized stats 3327 * 3328 * format is: 3329 * <class-name>, '@', <path>, '\0', <value>, '\0' 3330 * for example: 3331 * "stat.first@stat0/path0\02\0stat.second@stat0/path1\023\0" 3332 * 3333 * since this is parsing our own serialized data, any parsing issues 3334 * are fatal, so we check for them all with ASSERT() below. 3335 */ 3336 ptr = sbuf; 3337 while (ptr < &sbuf[sz]) { 3338 char *sepptr; 3339 struct node *np; 3340 int val; 3341 3342 sepptr = strchr(ptr, '@'); 3343 ASSERT(sepptr != NULL); 3344 *sepptr = '\0'; 3345 3346 /* construct the event */ 3347 np = newnode(T_EVENT, NULL, 0); 3348 np->u.event.ename = newnode(T_NAME, NULL, 0); 3349 np->u.event.ename->u.name.t = N_STAT; 3350 np->u.event.ename->u.name.s = stable(ptr); 3351 np->u.event.ename->u.name.it = IT_ENAME; 3352 np->u.event.ename->u.name.last = np->u.event.ename; 3353 3354 ptr = sepptr + 1; 3355 ASSERT(ptr < &sbuf[sz]); 3356 ptr += strlen(ptr); 3357 ptr++; /* move past the '\0' separating path from value */ 3358 ASSERT(ptr < &sbuf[sz]); 3359 ASSERT(isdigit(*ptr)); 3360 val = atoi(ptr); 3361 ASSERT(val > 0); 3362 ptr += strlen(ptr); 3363 ptr++; /* move past the final '\0' for this entry */ 3364 3365 np->u.event.epname = pathstring2epnamenp(sepptr + 1); 3366 ASSERT(np->u.event.epname != NULL); 3367 3368 istat_bump(np, val); 3369 tree_free(np); 3370 } 3371 3372 istat_save(); 3373 } 3374