1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * 26 * fme.c -- fault management exercise module 27 * 28 * this module provides the simulated fault management exercise. 29 */ 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 #include <stdio.h> 34 #include <stdlib.h> 35 #include <string.h> 36 #include <strings.h> 37 #include <ctype.h> 38 #include <alloca.h> 39 #include <libnvpair.h> 40 #include <sys/fm/protocol.h> 41 #include <fm/fmd_api.h> 42 #include "alloc.h" 43 #include "out.h" 44 #include "stats.h" 45 #include "stable.h" 46 #include "literals.h" 47 #include "lut.h" 48 #include "tree.h" 49 #include "ptree.h" 50 #include "itree.h" 51 #include "ipath.h" 52 #include "fme.h" 53 #include "evnv.h" 54 #include "eval.h" 55 #include "config.h" 56 #include "platform.h" 57 58 /* imported from eft.c... */ 59 extern char *Autoclose; 60 extern int Dupclose; 61 extern hrtime_t Hesitate; 62 extern nv_alloc_t Eft_nv_hdl; 63 extern int Max_fme; 64 extern fmd_hdl_t *Hdl; 65 66 static int Istat_need_save; 67 void istat_save(void); 68 69 /* fme under construction is global so we can free it on module abort */ 70 static struct fme *Nfmep; 71 72 static const char *Undiag_reason; 73 74 static int Nextid = 0; 75 76 static int Open_fme_count = 0; /* Count of open FMEs */ 77 78 /* list of fault management exercises underway */ 79 static struct fme { 80 struct fme *next; /* next exercise */ 81 unsigned long long ull; /* time when fme was created */ 82 int id; /* FME id */ 83 struct cfgdata *cfgdata; /* full configuration data */ 84 struct lut *eventtree; /* propagation tree for this FME */ 85 /* 86 * The initial error report that created this FME is kept in 87 * two forms. e0 points to the instance tree node and is used 88 * by fme_eval() as the starting point for the inference 89 * algorithm. e0r is the event handle FMD passed to us when 90 * the ereport first arrived and is used when setting timers, 91 * which are always relative to the time of this initial 92 * report. 93 */ 94 struct event *e0; 95 fmd_event_t *e0r; 96 97 id_t timer; /* for setting an fmd time-out */ 98 99 struct event *ecurrent; /* ereport under consideration */ 100 struct event *suspects; /* current suspect list */ 101 struct event *psuspects; /* previous suspect list */ 102 int nsuspects; /* count of suspects */ 103 int nonfault; /* zero if all suspects T_FAULT */ 104 int posted_suspects; /* true if we've posted a diagnosis */ 105 int uniqobs; /* number of unique events observed */ 106 int peek; /* just peeking, don't track suspects */ 107 int overflow; /* true if overflow FME */ 108 enum fme_state { 109 FME_NOTHING = 5000, /* not evaluated yet */ 110 FME_WAIT, /* need to wait for more info */ 111 FME_CREDIBLE, /* suspect list is credible */ 112 FME_DISPROVED, /* no valid suspects found */ 113 FME_DEFERRED /* don't know yet (k-count not met) */ 114 } state; 115 116 unsigned long long pull; /* time passed since created */ 117 unsigned long long wull; /* wait until this time for re-eval */ 118 struct event *observations; /* observation list */ 119 struct lut *globals; /* values of global variables */ 120 /* fmd interfacing */ 121 fmd_hdl_t *hdl; /* handle for talking with fmd */ 122 fmd_case_t *fmcase; /* what fmd 'case' we associate with */ 123 /* stats */ 124 struct stats *Rcount; 125 struct stats *Hcallcount; 126 struct stats *Rcallcount; 127 struct stats *Ccallcount; 128 struct stats *Ecallcount; 129 struct stats *Tcallcount; 130 struct stats *Marrowcount; 131 struct stats *diags; 132 } *FMElist, *EFMElist, *ClosedFMEs; 133 134 static struct case_list { 135 fmd_case_t *fmcase; 136 struct case_list *next; 137 } *Undiagablecaselist; 138 139 static void fme_eval(struct fme *fmep, fmd_event_t *ffep); 140 static enum fme_state hypothesise(struct fme *fmep, struct event *ep, 141 unsigned long long at_latest_by, unsigned long long *pdelay); 142 static struct node *eventprop_lookup(struct event *ep, const char *propname); 143 static struct node *pathstring2epnamenp(char *path); 144 static void publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep); 145 static void restore_suspects(struct fme *fmep); 146 static void save_suspects(struct fme *fmep); 147 static void destroy_fme(struct fme *f); 148 static void fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep, 149 const char *eventstring, const struct ipath *ipp, nvlist_t *nvl); 150 static void istat_counter_reset_cb(struct istat_entry *entp, 151 struct stats *statp, const struct ipath *ipp); 152 153 static struct fme * 154 alloc_fme(void) 155 { 156 struct fme *fmep; 157 158 fmep = MALLOC(sizeof (*fmep)); 159 bzero(fmep, sizeof (*fmep)); 160 return (fmep); 161 } 162 163 /* 164 * fme_ready -- called when all initialization of the FME (except for 165 * stats) has completed successfully. Adds the fme to global lists 166 * and establishes its stats. 167 */ 168 static struct fme * 169 fme_ready(struct fme *fmep) 170 { 171 char nbuf[100]; 172 173 Nfmep = NULL; /* don't need to free this on module abort now */ 174 175 if (EFMElist) { 176 EFMElist->next = fmep; 177 EFMElist = fmep; 178 } else 179 FMElist = EFMElist = fmep; 180 181 (void) sprintf(nbuf, "fme%d.Rcount", fmep->id); 182 fmep->Rcount = stats_new_counter(nbuf, "ereports received", 0); 183 (void) sprintf(nbuf, "fme%d.Hcall", fmep->id); 184 fmep->Hcallcount = stats_new_counter(nbuf, "calls to hypothesise()", 1); 185 (void) sprintf(nbuf, "fme%d.Rcall", fmep->id); 186 fmep->Rcallcount = stats_new_counter(nbuf, 187 "calls to requirements_test()", 1); 188 (void) sprintf(nbuf, "fme%d.Ccall", fmep->id); 189 fmep->Ccallcount = stats_new_counter(nbuf, "calls to causes_test()", 1); 190 (void) sprintf(nbuf, "fme%d.Ecall", fmep->id); 191 fmep->Ecallcount = 192 stats_new_counter(nbuf, "calls to effects_test()", 1); 193 (void) sprintf(nbuf, "fme%d.Tcall", fmep->id); 194 fmep->Tcallcount = stats_new_counter(nbuf, "calls to triggered()", 1); 195 (void) sprintf(nbuf, "fme%d.Marrow", fmep->id); 196 fmep->Marrowcount = stats_new_counter(nbuf, 197 "arrows marked by mark_arrows()", 1); 198 (void) sprintf(nbuf, "fme%d.diags", fmep->id); 199 fmep->diags = stats_new_counter(nbuf, "suspect lists diagnosed", 0); 200 201 out(O_ALTFP|O_VERB2, "newfme: config snapshot contains..."); 202 config_print(O_ALTFP|O_VERB2, fmep->cfgdata->cooked); 203 204 return (fmep); 205 } 206 207 static struct fme * 208 newfme(const char *e0class, const struct ipath *e0ipp) 209 { 210 struct cfgdata *cfgdata; 211 212 if ((cfgdata = config_snapshot()) == NULL) { 213 out(O_ALTFP, "newfme: NULL configuration"); 214 Undiag_reason = UD_NOCONF; 215 return (NULL); 216 } 217 218 Nfmep = alloc_fme(); 219 220 Nfmep->id = Nextid++; 221 Nfmep->cfgdata = cfgdata; 222 Nfmep->posted_suspects = 0; 223 Nfmep->uniqobs = 0; 224 Nfmep->state = FME_NOTHING; 225 Nfmep->pull = 0ULL; 226 Nfmep->overflow = 0; 227 228 Nfmep->fmcase = NULL; 229 Nfmep->hdl = NULL; 230 231 if ((Nfmep->eventtree = itree_create(cfgdata->cooked)) == NULL) { 232 out(O_ALTFP, "newfme: NULL instance tree"); 233 Undiag_reason = UD_INSTFAIL; 234 config_free(cfgdata); 235 FREE(Nfmep); 236 Nfmep = NULL; 237 return (NULL); 238 } 239 240 itree_ptree(O_ALTFP|O_VERB2, Nfmep->eventtree); 241 242 if ((Nfmep->e0 = 243 itree_lookup(Nfmep->eventtree, e0class, e0ipp)) == NULL) { 244 out(O_ALTFP, "newfme: e0 not in instance tree"); 245 Undiag_reason = UD_BADEVENTI; 246 itree_free(Nfmep->eventtree); 247 config_free(cfgdata); 248 FREE(Nfmep); 249 Nfmep = NULL; 250 return (NULL); 251 } 252 253 return (fme_ready(Nfmep)); 254 } 255 256 void 257 fme_fini(void) 258 { 259 struct fme *sfp, *fp; 260 struct case_list *ucasep, *nextcasep; 261 262 ucasep = Undiagablecaselist; 263 while (ucasep != NULL) { 264 nextcasep = ucasep->next; 265 FREE(ucasep); 266 ucasep = nextcasep; 267 } 268 Undiagablecaselist = NULL; 269 270 /* clean up closed fmes */ 271 fp = ClosedFMEs; 272 while (fp != NULL) { 273 sfp = fp->next; 274 destroy_fme(fp); 275 fp = sfp; 276 } 277 ClosedFMEs = NULL; 278 279 fp = FMElist; 280 while (fp != NULL) { 281 sfp = fp->next; 282 destroy_fme(fp); 283 fp = sfp; 284 } 285 FMElist = EFMElist = NULL; 286 287 /* if we were in the middle of creating an fme, free it now */ 288 if (Nfmep) { 289 destroy_fme(Nfmep); 290 Nfmep = NULL; 291 } 292 } 293 294 /* 295 * Allocated space for a buffer name. 20 bytes allows for 296 * a ridiculous 9,999,999 unique observations. 297 */ 298 #define OBBUFNMSZ 20 299 300 /* 301 * serialize_observation 302 * 303 * Create a recoverable version of the current observation 304 * (f->ecurrent). We keep a serialized version of each unique 305 * observation in order that we may resume correctly the fme in the 306 * correct state if eft or fmd crashes and we're restarted. 307 */ 308 static void 309 serialize_observation(struct fme *fp, const char *cls, const struct ipath *ipp) 310 { 311 size_t pkdlen; 312 char tmpbuf[OBBUFNMSZ]; 313 char *pkd = NULL; 314 char *estr; 315 316 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", fp->uniqobs); 317 estr = ipath2str(cls, ipp); 318 fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, strlen(estr) + 1); 319 fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)estr, 320 strlen(estr) + 1); 321 FREE(estr); 322 323 if (fp->ecurrent != NULL && fp->ecurrent->nvp != NULL) { 324 (void) snprintf(tmpbuf, 325 OBBUFNMSZ, "observed%d.nvp", fp->uniqobs); 326 if (nvlist_xpack(fp->ecurrent->nvp, 327 &pkd, &pkdlen, NV_ENCODE_XDR, &Eft_nv_hdl) != 0) 328 out(O_DIE|O_SYS, "pack of observed nvl failed"); 329 fmd_buf_create(fp->hdl, fp->fmcase, tmpbuf, pkdlen); 330 fmd_buf_write(fp->hdl, fp->fmcase, tmpbuf, (void *)pkd, pkdlen); 331 FREE(pkd); 332 } 333 334 fp->uniqobs++; 335 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs, 336 sizeof (fp->uniqobs)); 337 } 338 339 /* 340 * init_fme_bufs -- We keep several bits of state about an fme for 341 * use if eft or fmd crashes and we're restarted. 342 */ 343 static void 344 init_fme_bufs(struct fme *fp) 345 { 346 size_t cfglen = fp->cfgdata->nextfree - fp->cfgdata->begin; 347 348 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_CFGLEN, sizeof (cfglen)); 349 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_CFGLEN, (void *)&cfglen, 350 sizeof (cfglen)); 351 if (cfglen != 0) { 352 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_CFG, cfglen); 353 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_CFG, 354 fp->cfgdata->begin, cfglen); 355 } 356 357 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_PULL, sizeof (fp->pull)); 358 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_PULL, (void *)&fp->pull, 359 sizeof (fp->pull)); 360 361 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_ID, sizeof (fp->id)); 362 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_ID, (void *)&fp->id, 363 sizeof (fp->id)); 364 365 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_NOBS, sizeof (fp->uniqobs)); 366 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_NOBS, (void *)&fp->uniqobs, 367 sizeof (fp->uniqobs)); 368 369 fmd_buf_create(fp->hdl, fp->fmcase, WOBUF_POSTD, 370 sizeof (fp->posted_suspects)); 371 fmd_buf_write(fp->hdl, fp->fmcase, WOBUF_POSTD, 372 (void *)&fp->posted_suspects, sizeof (fp->posted_suspects)); 373 } 374 375 static void 376 destroy_fme_bufs(struct fme *fp) 377 { 378 char tmpbuf[OBBUFNMSZ]; 379 int o; 380 381 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFGLEN); 382 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_CFG); 383 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_PULL); 384 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_ID); 385 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_POSTD); 386 fmd_buf_destroy(fp->hdl, fp->fmcase, WOBUF_NOBS); 387 388 for (o = 0; o < fp->uniqobs; o++) { 389 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", o); 390 fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf); 391 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", o); 392 fmd_buf_destroy(fp->hdl, fp->fmcase, tmpbuf); 393 } 394 } 395 396 /* 397 * reconstitute_observations -- convert a case's serialized observations 398 * back into struct events. Returns zero if all observations are 399 * successfully reconstituted. 400 */ 401 static int 402 reconstitute_observations(struct fme *fmep) 403 { 404 struct event *ep; 405 struct node *epnamenp = NULL; 406 size_t pkdlen; 407 char *pkd = NULL; 408 char *tmpbuf = alloca(OBBUFNMSZ); 409 char *sepptr; 410 char *estr; 411 int ocnt; 412 int elen; 413 414 for (ocnt = 0; ocnt < fmep->uniqobs; ocnt++) { 415 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d", ocnt); 416 elen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf); 417 if (elen == 0) { 418 out(O_ALTFP, 419 "reconstitute_observation: no %s buffer found.", 420 tmpbuf); 421 Undiag_reason = UD_MISSINGOBS; 422 break; 423 } 424 425 estr = MALLOC(elen); 426 fmd_buf_read(fmep->hdl, fmep->fmcase, tmpbuf, estr, elen); 427 sepptr = strchr(estr, '@'); 428 if (sepptr == NULL) { 429 out(O_ALTFP, 430 "reconstitute_observation: %s: " 431 "missing @ separator in %s.", 432 tmpbuf, estr); 433 Undiag_reason = UD_MISSINGPATH; 434 FREE(estr); 435 break; 436 } 437 438 *sepptr = '\0'; 439 if ((epnamenp = pathstring2epnamenp(sepptr + 1)) == NULL) { 440 out(O_ALTFP, 441 "reconstitute_observation: %s: " 442 "trouble converting path string \"%s\" " 443 "to internal representation.", 444 tmpbuf, sepptr + 1); 445 Undiag_reason = UD_MISSINGPATH; 446 FREE(estr); 447 break; 448 } 449 450 /* construct the event */ 451 ep = itree_lookup(fmep->eventtree, 452 stable(estr), ipath(epnamenp)); 453 if (ep == NULL) { 454 out(O_ALTFP, 455 "reconstitute_observation: %s: " 456 "lookup of \"%s\" in itree failed.", 457 tmpbuf, ipath2str(estr, ipath(epnamenp))); 458 Undiag_reason = UD_BADOBS; 459 tree_free(epnamenp); 460 FREE(estr); 461 break; 462 } 463 tree_free(epnamenp); 464 465 /* 466 * We may or may not have a saved nvlist for the observation 467 */ 468 (void) snprintf(tmpbuf, OBBUFNMSZ, "observed%d.nvp", ocnt); 469 pkdlen = fmd_buf_size(fmep->hdl, fmep->fmcase, tmpbuf); 470 if (pkdlen != 0) { 471 pkd = MALLOC(pkdlen); 472 fmd_buf_read(fmep->hdl, 473 fmep->fmcase, tmpbuf, pkd, pkdlen); 474 ASSERT(ep->nvp == NULL); 475 if (nvlist_xunpack(pkd, 476 pkdlen, &ep->nvp, &Eft_nv_hdl) != 0) 477 out(O_DIE|O_SYS, "pack of observed nvl failed"); 478 FREE(pkd); 479 } 480 481 if (ocnt == 0) 482 fmep->e0 = ep; 483 484 FREE(estr); 485 fmep->ecurrent = ep; 486 ep->count++; 487 488 /* link it into list of observations seen */ 489 ep->observations = fmep->observations; 490 fmep->observations = ep; 491 } 492 493 if (ocnt == fmep->uniqobs) { 494 (void) fme_ready(fmep); 495 return (0); 496 } 497 498 return (1); 499 } 500 501 /* 502 * restart_fme -- called during eft initialization. Reconstitutes 503 * an in-progress fme. 504 */ 505 void 506 fme_restart(fmd_hdl_t *hdl, fmd_case_t *inprogress) 507 { 508 nvlist_t *defect; 509 struct case_list *bad; 510 struct fme *fmep; 511 struct cfgdata *cfgdata = NULL; 512 size_t rawsz; 513 514 fmep = alloc_fme(); 515 fmep->fmcase = inprogress; 516 fmep->hdl = hdl; 517 518 if (fmd_buf_size(hdl, inprogress, WOBUF_CFGLEN) != sizeof (size_t)) { 519 out(O_ALTFP, "restart_fme: No config data"); 520 Undiag_reason = UD_MISSINGINFO; 521 goto badcase; 522 } 523 fmd_buf_read(hdl, inprogress, WOBUF_CFGLEN, (void *)&rawsz, 524 sizeof (size_t)); 525 526 if ((fmep->e0r = fmd_case_getprincipal(hdl, inprogress)) == NULL) { 527 out(O_ALTFP, "restart_fme: No event zero"); 528 Undiag_reason = UD_MISSINGZERO; 529 goto badcase; 530 } 531 532 cfgdata = MALLOC(sizeof (struct cfgdata)); 533 cfgdata->cooked = NULL; 534 cfgdata->devcache = NULL; 535 cfgdata->cpucache = NULL; 536 cfgdata->refcnt = 1; 537 538 if (rawsz > 0) { 539 if (fmd_buf_size(hdl, inprogress, WOBUF_CFG) != rawsz) { 540 out(O_ALTFP, "restart_fme: Config data size mismatch"); 541 Undiag_reason = UD_CFGMISMATCH; 542 goto badcase; 543 } 544 cfgdata->begin = MALLOC(rawsz); 545 cfgdata->end = cfgdata->nextfree = cfgdata->begin + rawsz; 546 fmd_buf_read(hdl, 547 inprogress, WOBUF_CFG, cfgdata->begin, rawsz); 548 } else { 549 cfgdata->begin = cfgdata->end = cfgdata->nextfree = NULL; 550 } 551 fmep->cfgdata = cfgdata; 552 553 config_cook(cfgdata); 554 if ((fmep->eventtree = itree_create(cfgdata->cooked)) == NULL) { 555 /* case not properly saved or irretrievable */ 556 out(O_ALTFP, "restart_fme: NULL instance tree"); 557 Undiag_reason = UD_INSTFAIL; 558 goto badcase; 559 } 560 561 itree_ptree(O_ALTFP|O_VERB2, fmep->eventtree); 562 563 if (fmd_buf_size(hdl, inprogress, WOBUF_PULL) == 0) { 564 out(O_ALTFP, "restart_fme: no saved wait time"); 565 Undiag_reason = UD_MISSINGINFO; 566 goto badcase; 567 } else { 568 fmd_buf_read(hdl, inprogress, WOBUF_PULL, (void *)&fmep->pull, 569 sizeof (fmep->pull)); 570 } 571 572 if (fmd_buf_size(hdl, inprogress, WOBUF_POSTD) == 0) { 573 out(O_ALTFP, "restart_fme: no saved posted status"); 574 Undiag_reason = UD_MISSINGINFO; 575 goto badcase; 576 } else { 577 fmd_buf_read(hdl, inprogress, WOBUF_POSTD, 578 (void *)&fmep->posted_suspects, 579 sizeof (fmep->posted_suspects)); 580 } 581 582 if (fmd_buf_size(hdl, inprogress, WOBUF_ID) == 0) { 583 out(O_ALTFP, "restart_fme: no saved id"); 584 Undiag_reason = UD_MISSINGINFO; 585 goto badcase; 586 } else { 587 fmd_buf_read(hdl, inprogress, WOBUF_ID, (void *)&fmep->id, 588 sizeof (fmep->id)); 589 } 590 if (Nextid <= fmep->id) 591 Nextid = fmep->id + 1; 592 593 if (fmd_buf_size(hdl, inprogress, WOBUF_NOBS) == 0) { 594 out(O_ALTFP, "restart_fme: no count of observations"); 595 Undiag_reason = UD_MISSINGINFO; 596 goto badcase; 597 } else { 598 fmd_buf_read(hdl, inprogress, WOBUF_NOBS, 599 (void *)&fmep->uniqobs, sizeof (fmep->uniqobs)); 600 } 601 602 if (reconstitute_observations(fmep) != 0) 603 goto badcase; 604 605 Open_fme_count++; 606 607 /* 608 * ignore solved or closed cases 609 */ 610 if (fmep->posted_suspects || 611 fmd_case_solved(fmep->hdl, fmep->fmcase) || 612 fmd_case_closed(fmep->hdl, fmep->fmcase)) 613 return; 614 615 /* give the diagnosis algorithm a shot at the new FME state */ 616 fme_eval(fmep, NULL); 617 return; 618 619 badcase: 620 if (fmep->eventtree != NULL) 621 itree_free(fmep->eventtree); 622 config_free(cfgdata); 623 destroy_fme_bufs(fmep); 624 FREE(fmep); 625 626 /* 627 * Since we're unable to restart the case, add it to the undiagable 628 * list and solve and close it as appropriate. 629 */ 630 bad = MALLOC(sizeof (struct case_list)); 631 bad->next = NULL; 632 633 if (Undiagablecaselist != NULL) 634 bad->next = Undiagablecaselist; 635 Undiagablecaselist = bad; 636 bad->fmcase = inprogress; 637 638 out(O_ALTFP, "[case %s (unable to restart), ", 639 fmd_case_uuid(hdl, bad->fmcase)); 640 641 if (fmd_case_solved(hdl, bad->fmcase)) { 642 out(O_ALTFP, "already solved, "); 643 } else { 644 out(O_ALTFP, "solving, "); 645 defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100, 646 NULL, NULL, NULL); 647 if (Undiag_reason != NULL) 648 (void) nvlist_add_string(defect, 649 UNDIAG_REASON, Undiag_reason); 650 fmd_case_add_suspect(hdl, bad->fmcase, defect); 651 fmd_case_solve(hdl, bad->fmcase); 652 } 653 654 if (fmd_case_closed(hdl, bad->fmcase)) { 655 out(O_ALTFP, "already closed ]"); 656 } else { 657 out(O_ALTFP, "closing ]"); 658 fmd_case_close(hdl, bad->fmcase); 659 } 660 } 661 662 /*ARGSUSED*/ 663 static void 664 globals_destructor(void *left, void *right, void *arg) 665 { 666 struct evalue *evp = (struct evalue *)right; 667 if (evp->t == NODEPTR) 668 tree_free((struct node *)(uintptr_t)evp->v); 669 evp->v = NULL; 670 FREE(evp); 671 } 672 673 void 674 destroy_fme(struct fme *f) 675 { 676 stats_delete(f->Rcount); 677 stats_delete(f->Hcallcount); 678 stats_delete(f->Rcallcount); 679 stats_delete(f->Ccallcount); 680 stats_delete(f->Ecallcount); 681 stats_delete(f->Tcallcount); 682 stats_delete(f->Marrowcount); 683 stats_delete(f->diags); 684 685 itree_free(f->eventtree); 686 config_free(f->cfgdata); 687 lut_free(f->globals, globals_destructor, NULL); 688 FREE(f); 689 } 690 691 static const char * 692 fme_state2str(enum fme_state s) 693 { 694 switch (s) { 695 case FME_NOTHING: return ("NOTHING"); 696 case FME_WAIT: return ("WAIT"); 697 case FME_CREDIBLE: return ("CREDIBLE"); 698 case FME_DISPROVED: return ("DISPROVED"); 699 case FME_DEFERRED: return ("DEFERRED"); 700 default: return ("UNKNOWN"); 701 } 702 } 703 704 static int 705 is_problem(enum nametype t) 706 { 707 return (t == N_FAULT || t == N_DEFECT || t == N_UPSET); 708 } 709 710 static int 711 is_fault(enum nametype t) 712 { 713 return (t == N_FAULT); 714 } 715 716 static int 717 is_defect(enum nametype t) 718 { 719 return (t == N_DEFECT); 720 } 721 722 static int 723 is_upset(enum nametype t) 724 { 725 return (t == N_UPSET); 726 } 727 728 static void 729 fme_print(int flags, struct fme *fmep) 730 { 731 struct event *ep; 732 733 out(flags, "Fault Management Exercise %d", fmep->id); 734 out(flags, "\t State: %s", fme_state2str(fmep->state)); 735 out(flags|O_NONL, "\t Start time: "); 736 ptree_timeval(flags|O_NONL, &fmep->ull); 737 out(flags, NULL); 738 if (fmep->wull) { 739 out(flags|O_NONL, "\t Wait time: "); 740 ptree_timeval(flags|O_NONL, &fmep->wull); 741 out(flags, NULL); 742 } 743 out(flags|O_NONL, "\t E0: "); 744 if (fmep->e0) 745 itree_pevent_brief(flags|O_NONL, fmep->e0); 746 else 747 out(flags|O_NONL, "NULL"); 748 out(flags, NULL); 749 out(flags|O_NONL, "\tObservations:"); 750 for (ep = fmep->observations; ep; ep = ep->observations) { 751 out(flags|O_NONL, " "); 752 itree_pevent_brief(flags|O_NONL, ep); 753 } 754 out(flags, NULL); 755 out(flags|O_NONL, "\tSuspect list:"); 756 for (ep = fmep->suspects; ep; ep = ep->suspects) { 757 out(flags|O_NONL, " "); 758 itree_pevent_brief(flags|O_NONL, ep); 759 } 760 out(flags, NULL); 761 out(flags|O_VERB2, "\t Tree:"); 762 itree_ptree(flags|O_VERB2, fmep->eventtree); 763 } 764 765 static struct node * 766 pathstring2epnamenp(char *path) 767 { 768 char *sep = "/"; 769 struct node *ret; 770 char *ptr; 771 772 if ((ptr = strtok(path, sep)) == NULL) 773 out(O_DIE, "pathstring2epnamenp: invalid empty class"); 774 775 ret = tree_iname(stable(ptr), NULL, 0); 776 777 while ((ptr = strtok(NULL, sep)) != NULL) 778 ret = tree_name_append(ret, 779 tree_iname(stable(ptr), NULL, 0)); 780 781 return (ret); 782 } 783 784 /* 785 * for a given upset sp, increment the corresponding SERD engine. if the 786 * SERD engine trips, return the ename and ipp of the resulting ereport. 787 * returns true if engine tripped and *enamep and *ippp were filled in. 788 */ 789 static int 790 serd_eval(struct fme *fmep, fmd_hdl_t *hdl, fmd_event_t *ffep, 791 fmd_case_t *fmcase, struct event *sp, const char **enamep, 792 const struct ipath **ippp) 793 { 794 struct node *serdinst; 795 char *serdname; 796 struct node *nid; 797 798 ASSERT(sp->t == N_UPSET); 799 ASSERT(ffep != NULL); 800 801 /* 802 * obtain instanced SERD engine from the upset sp. from this 803 * derive serdname, the string used to identify the SERD engine. 804 */ 805 serdinst = eventprop_lookup(sp, L_engine); 806 807 if (serdinst == NULL) 808 return (NULL); 809 810 serdname = ipath2str(serdinst->u.stmt.np->u.event.ename->u.name.s, 811 ipath(serdinst->u.stmt.np->u.event.epname)); 812 813 /* handle serd engine "id" property, if there is one */ 814 if ((nid = 815 lut_lookup(serdinst->u.stmt.lutp, (void *)L_id, NULL)) != NULL) { 816 struct evalue *gval; 817 char suffixbuf[200]; 818 char *suffix; 819 char *nserdname; 820 size_t nname; 821 822 out(O_ALTFP|O_NONL, "serd \"%s\" id: ", serdname); 823 ptree_name_iter(O_ALTFP|O_NONL, nid); 824 825 ASSERTinfo(nid->t == T_GLOBID, ptree_nodetype2str(nid->t)); 826 827 if ((gval = lut_lookup(fmep->globals, 828 (void *)nid->u.globid.s, NULL)) == NULL) { 829 out(O_ALTFP, " undefined"); 830 } else if (gval->t == UINT64) { 831 out(O_ALTFP, " %llu", gval->v); 832 (void) sprintf(suffixbuf, "%llu", gval->v); 833 suffix = suffixbuf; 834 } else { 835 out(O_ALTFP, " \"%s\"", (char *)(uintptr_t)gval->v); 836 suffix = (char *)(uintptr_t)gval->v; 837 } 838 839 nname = strlen(serdname) + strlen(suffix) + 2; 840 nserdname = MALLOC(nname); 841 (void) snprintf(nserdname, nname, "%s:%s", serdname, suffix); 842 FREE(serdname); 843 serdname = nserdname; 844 } 845 846 if (!fmd_serd_exists(hdl, serdname)) { 847 struct node *nN, *nT; 848 849 /* no SERD engine yet, so create it */ 850 nN = lut_lookup(serdinst->u.stmt.lutp, (void *)L_N, NULL); 851 nT = lut_lookup(serdinst->u.stmt.lutp, (void *)L_T, NULL); 852 853 ASSERT(nN->t == T_NUM); 854 ASSERT(nT->t == T_TIMEVAL); 855 856 fmd_serd_create(hdl, serdname, (uint_t)nN->u.ull, 857 (hrtime_t)nT->u.ull); 858 } 859 860 861 /* 862 * increment SERD engine. if engine fires, reset serd 863 * engine and return trip_strcode 864 */ 865 if (fmd_serd_record(hdl, serdname, ffep)) { 866 struct node *tripinst = lut_lookup(serdinst->u.stmt.lutp, 867 (void *)L_trip, NULL); 868 869 ASSERT(tripinst != NULL); 870 871 *enamep = tripinst->u.event.ename->u.name.s; 872 *ippp = ipath(tripinst->u.event.epname); 873 874 fmd_case_add_serd(hdl, fmcase, serdname); 875 fmd_serd_reset(hdl, serdname); 876 out(O_ALTFP|O_NONL, "[engine fired: %s, sending: ", serdname); 877 ipath_print(O_ALTFP|O_NONL, *enamep, *ippp); 878 out(O_ALTFP, "]"); 879 880 FREE(serdname); 881 return (1); 882 } 883 884 FREE(serdname); 885 return (0); 886 } 887 888 /* 889 * search a suspect list for upsets. feed each upset to serd_eval() and 890 * build up tripped[], an array of ereports produced by the firing of 891 * any SERD engines. then feed each ereport back into 892 * fme_receive_report(). 893 * 894 * returns ntrip, the number of these ereports produced. 895 */ 896 static int 897 upsets_eval(struct fme *fmep, fmd_event_t *ffep) 898 { 899 /* we build an array of tripped ereports that we send ourselves */ 900 struct { 901 const char *ename; 902 const struct ipath *ipp; 903 } *tripped; 904 struct event *sp; 905 int ntrip, nupset, i; 906 907 /* 908 * count the number of upsets to determine the upper limit on 909 * expected trip ereport strings. remember that one upset can 910 * lead to at most one ereport. 911 */ 912 nupset = 0; 913 for (sp = fmep->suspects; sp; sp = sp->suspects) { 914 if (sp->t == N_UPSET) 915 nupset++; 916 } 917 918 if (nupset == 0) 919 return (0); 920 921 /* 922 * get to this point if we have upsets and expect some trip 923 * ereports 924 */ 925 tripped = alloca(sizeof (*tripped) * nupset); 926 bzero((void *)tripped, sizeof (*tripped) * nupset); 927 928 ntrip = 0; 929 for (sp = fmep->suspects; sp; sp = sp->suspects) 930 if (sp->t == N_UPSET && 931 serd_eval(fmep, fmep->hdl, ffep, fmep->fmcase, sp, 932 &tripped[ntrip].ename, &tripped[ntrip].ipp)) 933 ntrip++; 934 935 for (i = 0; i < ntrip; i++) 936 fme_receive_report(fmep->hdl, ffep, 937 tripped[i].ename, tripped[i].ipp, NULL); 938 939 return (ntrip); 940 } 941 942 /* 943 * fme_receive_external_report -- call when an external ereport comes in 944 * 945 * this routine just converts the relevant information from the ereport 946 * into a format used internally and passes it on to fme_receive_report(). 947 */ 948 void 949 fme_receive_external_report(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl, 950 const char *eventstring) 951 { 952 struct node *epnamenp = platform_getpath(nvl); 953 const struct ipath *ipp; 954 955 /* 956 * XFILE: If we ended up without a path, it's an X-file. 957 * For now, use our undiagnosable interface. 958 */ 959 if (epnamenp == NULL) { 960 out(O_ALTFP, "XFILE: Unable to get path from ereport"); 961 Undiag_reason = UD_NOPATH; 962 publish_undiagnosable(hdl, ffep); 963 return; 964 } 965 966 ipp = ipath(epnamenp); 967 tree_free(epnamenp); 968 fme_receive_report(hdl, ffep, stable(eventstring), ipp, nvl); 969 } 970 971 /*ARGSUSED*/ 972 void 973 fme_receive_repair_list(fmd_hdl_t *hdl, fmd_event_t *ffep, nvlist_t *nvl, 974 const char *eventstring) 975 { 976 char *uuid; 977 nvlist_t **nva; 978 uint_t nvc; 979 const struct ipath *ipp; 980 981 if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) != 0 || 982 nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST, 983 &nva, &nvc) != 0) { 984 out(O_ALTFP, "No uuid or fault list for list.repaired event"); 985 return; 986 } 987 988 out(O_ALTFP, "Processing list.repaired from case %s", uuid); 989 990 while (nvc-- != 0) { 991 /* 992 * Reset any istat associated with this path. 993 */ 994 char *path; 995 996 if ((ipp = platform_fault2ipath(*nva++)) == NULL) 997 continue; 998 999 path = ipath2str(NULL, ipp); 1000 out(O_ALTFP, "fme_receive_repair_list: resetting state for %s", 1001 path); 1002 FREE(path); 1003 1004 lut_walk(Istats, (lut_cb)istat_counter_reset_cb, (void *)ipp); 1005 istat_save(); 1006 1007 /* 1008 * We do not have a list of stat engines in a form that 1009 * we can readily clear any associated serd engines. When we 1010 * do, this will be the place to clear them. 1011 */ 1012 } 1013 } 1014 1015 static int mark_arrows(struct fme *fmep, struct event *ep, int mark, 1016 unsigned long long at_latest_by, unsigned long long *pdelay, int keep); 1017 1018 /* ARGSUSED */ 1019 static void 1020 clear_arrows(struct event *ep, struct event *ep2, struct fme *fmep) 1021 { 1022 struct bubble *bp; 1023 struct arrowlist *ap; 1024 1025 ep->cached_state = 0; 1026 ep->keep_in_tree = 0; 1027 for (bp = itree_next_bubble(ep, NULL); bp; 1028 bp = itree_next_bubble(ep, bp)) { 1029 if (bp->t != B_FROM) 1030 continue; 1031 bp->mark = 0; 1032 for (ap = itree_next_arrow(bp, NULL); ap; 1033 ap = itree_next_arrow(bp, ap)) 1034 ap->arrowp->mark = 0; 1035 } 1036 } 1037 1038 static void 1039 fme_receive_report(fmd_hdl_t *hdl, fmd_event_t *ffep, 1040 const char *eventstring, const struct ipath *ipp, nvlist_t *nvl) 1041 { 1042 struct event *ep; 1043 struct fme *fmep = NULL; 1044 struct fme *ofmep = NULL; 1045 struct fme *cfmep, *svfmep; 1046 int matched = 0; 1047 nvlist_t *defect; 1048 1049 out(O_ALTFP|O_NONL, "fme_receive_report: "); 1050 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1051 out(O_ALTFP|O_STAMP, NULL); 1052 1053 /* decide which FME it goes to */ 1054 for (fmep = FMElist; fmep; fmep = fmep->next) { 1055 int prev_verbose; 1056 unsigned long long my_delay = TIMEVAL_EVENTUALLY; 1057 enum fme_state state; 1058 nvlist_t *pre_peek_nvp = NULL; 1059 1060 if (fmep->overflow) { 1061 if (!(fmd_case_closed(fmep->hdl, fmep->fmcase))) 1062 ofmep = fmep; 1063 1064 continue; 1065 } 1066 1067 /* 1068 * ignore solved or closed cases 1069 */ 1070 if (fmep->posted_suspects || 1071 fmd_case_solved(fmep->hdl, fmep->fmcase) || 1072 fmd_case_closed(fmep->hdl, fmep->fmcase)) 1073 continue; 1074 1075 /* look up event in event tree for this FME */ 1076 if ((ep = itree_lookup(fmep->eventtree, 1077 eventstring, ipp)) == NULL) 1078 continue; 1079 1080 /* note observation */ 1081 fmep->ecurrent = ep; 1082 if (ep->count++ == 0) { 1083 /* link it into list of observations seen */ 1084 ep->observations = fmep->observations; 1085 fmep->observations = ep; 1086 ep->nvp = evnv_dupnvl(nvl); 1087 } else { 1088 /* use new payload values for peek */ 1089 pre_peek_nvp = ep->nvp; 1090 ep->nvp = evnv_dupnvl(nvl); 1091 } 1092 1093 /* tell hypothesise() not to mess with suspect list */ 1094 fmep->peek = 1; 1095 1096 /* don't want this to be verbose (unless Debug is set) */ 1097 prev_verbose = Verbose; 1098 if (Debug == 0) 1099 Verbose = 0; 1100 1101 lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep); 1102 state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay); 1103 1104 fmep->peek = 0; 1105 1106 /* put verbose flag back */ 1107 Verbose = prev_verbose; 1108 1109 if (state != FME_DISPROVED) { 1110 /* found an FME that explains the ereport */ 1111 matched++; 1112 out(O_ALTFP|O_NONL, "["); 1113 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1114 out(O_ALTFP, " explained by FME%d]", fmep->id); 1115 1116 if (pre_peek_nvp) 1117 nvlist_free(pre_peek_nvp); 1118 1119 if (ep->count == 1) 1120 serialize_observation(fmep, eventstring, ipp); 1121 1122 if (ffep) 1123 fmd_case_add_ereport(hdl, fmep->fmcase, ffep); 1124 1125 stats_counter_bump(fmep->Rcount); 1126 1127 /* re-eval FME */ 1128 fme_eval(fmep, ffep); 1129 } else { 1130 1131 /* not a match, undo noting of observation */ 1132 fmep->ecurrent = NULL; 1133 if (--ep->count == 0) { 1134 /* unlink it from observations */ 1135 fmep->observations = ep->observations; 1136 ep->observations = NULL; 1137 nvlist_free(ep->nvp); 1138 ep->nvp = NULL; 1139 } else { 1140 nvlist_free(ep->nvp); 1141 ep->nvp = pre_peek_nvp; 1142 } 1143 } 1144 } 1145 1146 if (matched) 1147 return; /* explained by at least one existing FME */ 1148 1149 /* clean up closed fmes */ 1150 cfmep = ClosedFMEs; 1151 while (cfmep != NULL) { 1152 svfmep = cfmep->next; 1153 destroy_fme(cfmep); 1154 cfmep = svfmep; 1155 } 1156 ClosedFMEs = NULL; 1157 1158 if (ofmep) { 1159 out(O_ALTFP|O_NONL, "["); 1160 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1161 out(O_ALTFP, " ADDING TO OVERFLOW FME]"); 1162 if (ffep) 1163 fmd_case_add_ereport(hdl, ofmep->fmcase, ffep); 1164 1165 return; 1166 1167 } else if (Max_fme && (Open_fme_count >= Max_fme)) { 1168 out(O_ALTFP|O_NONL, "["); 1169 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1170 out(O_ALTFP, " MAX OPEN FME REACHED]"); 1171 /* Create overflow fme */ 1172 if ((fmep = newfme(eventstring, ipp)) == NULL) { 1173 out(O_ALTFP|O_NONL, "["); 1174 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1175 out(O_ALTFP, " CANNOT OPEN OVERFLOW FME]"); 1176 publish_undiagnosable(hdl, ffep); 1177 return; 1178 } 1179 1180 Open_fme_count++; 1181 1182 fmep->fmcase = fmd_case_open(hdl, NULL); 1183 fmep->hdl = hdl; 1184 init_fme_bufs(fmep); 1185 fmep->overflow = B_TRUE; 1186 1187 if (ffep) 1188 fmd_case_add_ereport(hdl, fmep->fmcase, ffep); 1189 1190 defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100, 1191 NULL, NULL, NULL); 1192 (void) nvlist_add_string(defect, UNDIAG_REASON, UD_MAXFME); 1193 fmd_case_add_suspect(hdl, fmep->fmcase, defect); 1194 fmd_case_solve(hdl, fmep->fmcase); 1195 return; 1196 } 1197 1198 /* start a new FME */ 1199 if ((fmep = newfme(eventstring, ipp)) == NULL) { 1200 out(O_ALTFP|O_NONL, "["); 1201 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1202 out(O_ALTFP, " CANNOT DIAGNOSE]"); 1203 publish_undiagnosable(hdl, ffep); 1204 return; 1205 } 1206 1207 Open_fme_count++; 1208 1209 /* open a case */ 1210 fmep->fmcase = fmd_case_open(hdl, NULL); 1211 fmep->hdl = hdl; 1212 init_fme_bufs(fmep); 1213 1214 out(O_ALTFP|O_NONL, "["); 1215 ipath_print(O_ALTFP|O_NONL, eventstring, ipp); 1216 out(O_ALTFP, " created FME%d, case %s]", fmep->id, 1217 fmd_case_uuid(hdl, fmep->fmcase)); 1218 1219 ep = fmep->e0; 1220 ASSERT(ep != NULL); 1221 1222 /* note observation */ 1223 fmep->ecurrent = ep; 1224 if (ep->count++ == 0) { 1225 /* link it into list of observations seen */ 1226 ep->observations = fmep->observations; 1227 fmep->observations = ep; 1228 ep->nvp = evnv_dupnvl(nvl); 1229 serialize_observation(fmep, eventstring, ipp); 1230 } else { 1231 /* new payload overrides any previous */ 1232 nvlist_free(ep->nvp); 1233 ep->nvp = evnv_dupnvl(nvl); 1234 } 1235 1236 stats_counter_bump(fmep->Rcount); 1237 1238 if (ffep) { 1239 fmd_case_add_ereport(hdl, fmep->fmcase, ffep); 1240 fmd_case_setprincipal(hdl, fmep->fmcase, ffep); 1241 fmep->e0r = ffep; 1242 } 1243 1244 /* give the diagnosis algorithm a shot at the new FME state */ 1245 fme_eval(fmep, ffep); 1246 } 1247 1248 void 1249 fme_status(int flags) 1250 { 1251 struct fme *fmep; 1252 1253 if (FMElist == NULL) { 1254 out(flags, "No fault management exercises underway."); 1255 return; 1256 } 1257 1258 for (fmep = FMElist; fmep; fmep = fmep->next) 1259 fme_print(flags, fmep); 1260 } 1261 1262 /* 1263 * "indent" routines used mostly for nicely formatted debug output, but also 1264 * for sanity checking for infinite recursion bugs. 1265 */ 1266 1267 #define MAX_INDENT 1024 1268 static const char *indent_s[MAX_INDENT]; 1269 static int current_indent; 1270 1271 static void 1272 indent_push(const char *s) 1273 { 1274 if (current_indent < MAX_INDENT) 1275 indent_s[current_indent++] = s; 1276 else 1277 out(O_DIE, "unexpected recursion depth (%d)", current_indent); 1278 } 1279 1280 static void 1281 indent_set(const char *s) 1282 { 1283 current_indent = 0; 1284 indent_push(s); 1285 } 1286 1287 static void 1288 indent_pop(void) 1289 { 1290 if (current_indent > 0) 1291 current_indent--; 1292 else 1293 out(O_DIE, "recursion underflow"); 1294 } 1295 1296 static void 1297 indent(void) 1298 { 1299 int i; 1300 if (!Verbose) 1301 return; 1302 for (i = 0; i < current_indent; i++) 1303 out(O_ALTFP|O_VERB|O_NONL, indent_s[i]); 1304 } 1305 1306 #define SLNEW 1 1307 #define SLCHANGED 2 1308 #define SLWAIT 3 1309 #define SLDISPROVED 4 1310 1311 static void 1312 print_suspects(int circumstance, struct fme *fmep) 1313 { 1314 struct event *ep; 1315 1316 out(O_ALTFP|O_NONL, "["); 1317 if (circumstance == SLCHANGED) { 1318 out(O_ALTFP|O_NONL, "FME%d diagnosis changed. state: %s, " 1319 "suspect list:", fmep->id, fme_state2str(fmep->state)); 1320 } else if (circumstance == SLWAIT) { 1321 out(O_ALTFP|O_NONL, "FME%d set wait timer ", fmep->id); 1322 ptree_timeval(O_ALTFP|O_NONL, &fmep->wull); 1323 } else if (circumstance == SLDISPROVED) { 1324 out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS UNKNOWN", fmep->id); 1325 } else { 1326 out(O_ALTFP|O_NONL, "FME%d DIAGNOSIS PRODUCED:", fmep->id); 1327 } 1328 1329 if (circumstance == SLWAIT || circumstance == SLDISPROVED) { 1330 out(O_ALTFP, "]"); 1331 return; 1332 } 1333 1334 for (ep = fmep->suspects; ep; ep = ep->suspects) { 1335 out(O_ALTFP|O_NONL, " "); 1336 itree_pevent_brief(O_ALTFP|O_NONL, ep); 1337 } 1338 out(O_ALTFP, "]"); 1339 } 1340 1341 static struct node * 1342 eventprop_lookup(struct event *ep, const char *propname) 1343 { 1344 return (lut_lookup(ep->props, (void *)propname, NULL)); 1345 } 1346 1347 #define MAXDIGITIDX 23 1348 static char numbuf[MAXDIGITIDX + 1]; 1349 1350 static int 1351 node2uint(struct node *n, uint_t *valp) 1352 { 1353 struct evalue value; 1354 struct lut *globals = NULL; 1355 1356 if (n == NULL) 1357 return (1); 1358 1359 /* 1360 * check value.v since we are being asked to convert an unsigned 1361 * long long int to an unsigned int 1362 */ 1363 if (! eval_expr(n, NULL, NULL, &globals, NULL, NULL, 0, &value) || 1364 value.t != UINT64 || value.v > (1ULL << 32)) 1365 return (1); 1366 1367 *valp = (uint_t)value.v; 1368 1369 return (0); 1370 } 1371 1372 static nvlist_t * 1373 node2fmri(struct node *n) 1374 { 1375 nvlist_t **pa, *f, *p; 1376 struct node *nc; 1377 uint_t depth = 0; 1378 char *numstr, *nullbyte; 1379 char *failure; 1380 int err, i; 1381 1382 /* XXX do we need to be able to handle a non-T_NAME node? */ 1383 if (n == NULL || n->t != T_NAME) 1384 return (NULL); 1385 1386 for (nc = n; nc != NULL; nc = nc->u.name.next) { 1387 if (nc->u.name.child == NULL || nc->u.name.child->t != T_NUM) 1388 break; 1389 depth++; 1390 } 1391 1392 if (nc != NULL) { 1393 /* We bailed early, something went wrong */ 1394 return (NULL); 1395 } 1396 1397 if ((err = nvlist_xalloc(&f, NV_UNIQUE_NAME, &Eft_nv_hdl)) != 0) 1398 out(O_DIE|O_SYS, "alloc of fmri nvl failed"); 1399 pa = alloca(depth * sizeof (nvlist_t *)); 1400 for (i = 0; i < depth; i++) 1401 pa[i] = NULL; 1402 1403 err = nvlist_add_string(f, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC); 1404 err |= nvlist_add_uint8(f, FM_VERSION, FM_HC_SCHEME_VERSION); 1405 err |= nvlist_add_string(f, FM_FMRI_HC_ROOT, ""); 1406 err |= nvlist_add_uint32(f, FM_FMRI_HC_LIST_SZ, depth); 1407 if (err != 0) { 1408 failure = "basic construction of FMRI failed"; 1409 goto boom; 1410 } 1411 1412 numbuf[MAXDIGITIDX] = '\0'; 1413 nullbyte = &numbuf[MAXDIGITIDX]; 1414 i = 0; 1415 1416 for (nc = n; nc != NULL; nc = nc->u.name.next) { 1417 err = nvlist_xalloc(&p, NV_UNIQUE_NAME, &Eft_nv_hdl); 1418 if (err != 0) { 1419 failure = "alloc of an hc-pair failed"; 1420 goto boom; 1421 } 1422 err = nvlist_add_string(p, FM_FMRI_HC_NAME, nc->u.name.s); 1423 numstr = ulltostr(nc->u.name.child->u.ull, nullbyte); 1424 err |= nvlist_add_string(p, FM_FMRI_HC_ID, numstr); 1425 if (err != 0) { 1426 failure = "construction of an hc-pair failed"; 1427 goto boom; 1428 } 1429 pa[i++] = p; 1430 } 1431 1432 err = nvlist_add_nvlist_array(f, FM_FMRI_HC_LIST, pa, depth); 1433 if (err == 0) { 1434 for (i = 0; i < depth; i++) 1435 if (pa[i] != NULL) 1436 nvlist_free(pa[i]); 1437 return (f); 1438 } 1439 failure = "addition of hc-pair array to FMRI failed"; 1440 1441 boom: 1442 for (i = 0; i < depth; i++) 1443 if (pa[i] != NULL) 1444 nvlist_free(pa[i]); 1445 nvlist_free(f); 1446 out(O_DIE, "%s", failure); 1447 /*NOTREACHED*/ 1448 return (NULL); 1449 } 1450 1451 static uint_t 1452 avg(uint_t sum, uint_t cnt) 1453 { 1454 unsigned long long s = sum * 10; 1455 1456 return ((s / cnt / 10) + (((s / cnt % 10) >= 5) ? 1 : 0)); 1457 } 1458 1459 static uint8_t 1460 percentof(uint_t part, uint_t whole) 1461 { 1462 unsigned long long p = part * 1000; 1463 1464 return ((p / whole / 10) + (((p / whole % 10) >= 5) ? 1 : 0)); 1465 } 1466 1467 struct rsl { 1468 struct event *suspect; 1469 nvlist_t *asru; 1470 nvlist_t *fru; 1471 nvlist_t *rsrc; 1472 }; 1473 1474 /* 1475 * rslfree -- free internal members of struct rsl not expected to be 1476 * freed elsewhere. 1477 */ 1478 static void 1479 rslfree(struct rsl *freeme) 1480 { 1481 if (freeme->asru != NULL) 1482 nvlist_free(freeme->asru); 1483 if (freeme->fru != NULL) 1484 nvlist_free(freeme->fru); 1485 if (freeme->rsrc != NULL && freeme->rsrc != freeme->asru) 1486 nvlist_free(freeme->rsrc); 1487 } 1488 1489 /* 1490 * rslcmp -- compare two rsl structures. Use the following 1491 * comparisons to establish cardinality: 1492 * 1493 * 1. Name of the suspect's class. (simple strcmp) 1494 * 2. Name of the suspect's ASRU. (trickier, since nvlist) 1495 * 1496 */ 1497 static int 1498 rslcmp(const void *a, const void *b) 1499 { 1500 struct rsl *r1 = (struct rsl *)a; 1501 struct rsl *r2 = (struct rsl *)b; 1502 int rv; 1503 1504 rv = strcmp(r1->suspect->enode->u.event.ename->u.name.s, 1505 r2->suspect->enode->u.event.ename->u.name.s); 1506 if (rv != 0) 1507 return (rv); 1508 1509 if (r1->asru == NULL && r2->asru == NULL) 1510 return (0); 1511 if (r1->asru == NULL) 1512 return (-1); 1513 if (r2->asru == NULL) 1514 return (1); 1515 return (evnv_cmpnvl(r1->asru, r2->asru, 0)); 1516 } 1517 1518 /* 1519 * rsluniq -- given an array of rsl structures, seek out and "remove" 1520 * any duplicates. Dups are "remove"d by NULLing the suspect pointer 1521 * of the array element. Removal also means updating the number of 1522 * problems and the number of problems which are not faults. User 1523 * provides the first and last element pointers. 1524 */ 1525 static void 1526 rsluniq(struct rsl *first, struct rsl *last, int *nprobs, int *nnonf) 1527 { 1528 struct rsl *cr; 1529 1530 if (*nprobs == 1) 1531 return; 1532 1533 /* 1534 * At this point, we only expect duplicate defects. 1535 * Eversholt's diagnosis algorithm prevents duplicate 1536 * suspects, but we rewrite defects in the platform code after 1537 * the diagnosis is made, and that can introduce new 1538 * duplicates. 1539 */ 1540 while (first <= last) { 1541 if (first->suspect == NULL || !is_defect(first->suspect->t)) { 1542 first++; 1543 continue; 1544 } 1545 cr = first + 1; 1546 while (cr <= last) { 1547 if (is_defect(first->suspect->t)) { 1548 if (rslcmp(first, cr) == 0) { 1549 cr->suspect = NULL; 1550 rslfree(cr); 1551 (*nprobs)--; 1552 (*nnonf)--; 1553 } 1554 } 1555 /* 1556 * assume all defects are in order after our 1557 * sort and short circuit here with "else break" ? 1558 */ 1559 cr++; 1560 } 1561 first++; 1562 } 1563 } 1564 1565 /* 1566 * get_resources -- for a given suspect, determine what ASRU, FRU and 1567 * RSRC nvlists should be advertised in the final suspect list. 1568 */ 1569 void 1570 get_resources(struct event *sp, struct rsl *rsrcs, struct config *croot) 1571 { 1572 struct node *asrudef, *frudef; 1573 nvlist_t *asru, *fru; 1574 nvlist_t *rsrc = NULL; 1575 char *pathstr; 1576 1577 /* 1578 * First find any ASRU and/or FRU defined in the 1579 * initial fault tree. 1580 */ 1581 asrudef = eventprop_lookup(sp, L_ASRU); 1582 frudef = eventprop_lookup(sp, L_FRU); 1583 1584 /* 1585 * Create FMRIs based on those definitions 1586 */ 1587 asru = node2fmri(asrudef); 1588 fru = node2fmri(frudef); 1589 pathstr = ipath2str(NULL, sp->ipp); 1590 1591 /* 1592 * Allow for platform translations of the FMRIs 1593 */ 1594 platform_units_translate(is_defect(sp->t), croot, &asru, &fru, &rsrc, 1595 pathstr); 1596 1597 FREE(pathstr); 1598 rsrcs->suspect = sp; 1599 rsrcs->asru = asru; 1600 rsrcs->fru = fru; 1601 rsrcs->rsrc = rsrc; 1602 } 1603 1604 /* 1605 * trim_suspects -- prior to publishing, we may need to remove some 1606 * suspects from the list. If we're auto-closing upsets, we don't 1607 * want any of those in the published list. If the ASRUs for multiple 1608 * defects resolve to the same ASRU (driver) we only want to publish 1609 * that as a single suspect. 1610 */ 1611 static void 1612 trim_suspects(struct fme *fmep, boolean_t no_upsets, struct rsl **begin, 1613 struct rsl **end) 1614 { 1615 struct event *ep; 1616 struct rsl *rp; 1617 int rpcnt; 1618 1619 /* 1620 * First save the suspects in the psuspects, then copy back 1621 * only the ones we wish to retain. This resets nsuspects to 1622 * zero. 1623 */ 1624 rpcnt = fmep->nsuspects; 1625 save_suspects(fmep); 1626 1627 /* 1628 * allocate an array of resource pointers for the suspects. 1629 * We may end up using less than the full allocation, but this 1630 * is a very short-lived array. publish_suspects() will free 1631 * this array when it's done using it. 1632 */ 1633 rp = *begin = MALLOC(rpcnt * sizeof (struct rsl)); 1634 bzero(rp, rpcnt * sizeof (struct rsl)); 1635 1636 /* first pass, remove any unwanted upsets and populate our array */ 1637 for (ep = fmep->psuspects; ep; ep = ep->psuspects) { 1638 if (no_upsets && is_upset(ep->t)) 1639 continue; 1640 get_resources(ep, rp, fmep->cfgdata->cooked); 1641 rp++; 1642 fmep->nsuspects++; 1643 if (!is_fault(ep->t)) 1644 fmep->nonfault++; 1645 } 1646 1647 /* if all we had was unwanted upsets, we're done */ 1648 if (fmep->nsuspects == 0) 1649 return; 1650 1651 *end = rp - 1; 1652 1653 /* sort the array */ 1654 qsort(*begin, fmep->nsuspects, sizeof (struct rsl), rslcmp); 1655 rsluniq(*begin, *end, &fmep->nsuspects, &fmep->nonfault); 1656 } 1657 1658 /* 1659 * addpayloadprop -- add a payload prop to a problem 1660 */ 1661 static void 1662 addpayloadprop(const char *lhs, struct evalue *rhs, nvlist_t *fault) 1663 { 1664 ASSERT(fault != NULL); 1665 ASSERT(lhs != NULL); 1666 ASSERT(rhs != NULL); 1667 1668 if (rhs->t == UINT64) { 1669 out(O_ALTFP|O_VERB2, "addpayloadprop: %s=%llu", lhs, rhs->v); 1670 1671 if (nvlist_add_uint64(fault, lhs, rhs->v) != 0) 1672 out(O_DIE, 1673 "cannot add payloadprop \"%s\" to fault", lhs); 1674 } else { 1675 out(O_ALTFP|O_VERB2, "addpayloadprop: %s=\"%s\"", 1676 lhs, (char *)(uintptr_t)rhs->v); 1677 1678 if (nvlist_add_string(fault, lhs, (char *)(uintptr_t)rhs->v) != 1679 0) 1680 out(O_DIE, 1681 "cannot add payloadprop \"%s\" to fault", lhs); 1682 } 1683 } 1684 1685 static char *Istatbuf; 1686 static char *Istatbufptr; 1687 static int Istatsz; 1688 1689 /* 1690 * istataddsize -- calculate size of istat and add it to Istatsz 1691 */ 1692 /*ARGSUSED2*/ 1693 static void 1694 istataddsize(const struct istat_entry *lhs, struct stats *rhs, void *arg) 1695 { 1696 int val; 1697 1698 ASSERT(lhs != NULL); 1699 ASSERT(rhs != NULL); 1700 1701 if ((val = stats_counter_value(rhs)) == 0) 1702 return; /* skip zero-valued stats */ 1703 1704 /* count up the size of the stat name */ 1705 Istatsz += ipath2strlen(lhs->ename, lhs->ipath); 1706 Istatsz++; /* for the trailing NULL byte */ 1707 1708 /* count up the size of the stat value */ 1709 Istatsz += snprintf(NULL, 0, "%d", val); 1710 Istatsz++; /* for the trailing NULL byte */ 1711 } 1712 1713 /* 1714 * istat2str -- serialize an istat, writing result to *Istatbufptr 1715 */ 1716 /*ARGSUSED2*/ 1717 static void 1718 istat2str(const struct istat_entry *lhs, struct stats *rhs, void *arg) 1719 { 1720 char *str; 1721 int len; 1722 int val; 1723 1724 ASSERT(lhs != NULL); 1725 ASSERT(rhs != NULL); 1726 1727 if ((val = stats_counter_value(rhs)) == 0) 1728 return; /* skip zero-valued stats */ 1729 1730 /* serialize the stat name */ 1731 str = ipath2str(lhs->ename, lhs->ipath); 1732 len = strlen(str); 1733 1734 ASSERT(Istatbufptr + len + 1 < &Istatbuf[Istatsz]); 1735 (void) strlcpy(Istatbufptr, str, &Istatbuf[Istatsz] - Istatbufptr); 1736 Istatbufptr += len; 1737 FREE(str); 1738 *Istatbufptr++ = '\0'; 1739 1740 /* serialize the stat value */ 1741 Istatbufptr += snprintf(Istatbufptr, &Istatbuf[Istatsz] - Istatbufptr, 1742 "%d", val); 1743 *Istatbufptr++ = '\0'; 1744 1745 ASSERT(Istatbufptr <= &Istatbuf[Istatsz]); 1746 } 1747 1748 void 1749 istat_save() 1750 { 1751 if (Istat_need_save == 0) 1752 return; 1753 1754 /* figure out how big the serialzed info is */ 1755 Istatsz = 0; 1756 lut_walk(Istats, (lut_cb)istataddsize, NULL); 1757 1758 if (Istatsz == 0) { 1759 /* no stats to save */ 1760 fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS); 1761 return; 1762 } 1763 1764 /* create the serialized buffer */ 1765 Istatbufptr = Istatbuf = MALLOC(Istatsz); 1766 lut_walk(Istats, (lut_cb)istat2str, NULL); 1767 1768 /* clear out current saved stats */ 1769 fmd_buf_destroy(Hdl, NULL, WOBUF_ISTATS); 1770 1771 /* write out the new version */ 1772 fmd_buf_write(Hdl, NULL, WOBUF_ISTATS, Istatbuf, Istatsz); 1773 FREE(Istatbuf); 1774 1775 Istat_need_save = 0; 1776 } 1777 1778 int 1779 istat_cmp(struct istat_entry *ent1, struct istat_entry *ent2) 1780 { 1781 if (ent1->ename != ent2->ename) 1782 return (ent2->ename - ent1->ename); 1783 if (ent1->ipath != ent2->ipath) 1784 return ((char *)ent2->ipath - (char *)ent1->ipath); 1785 1786 return (0); 1787 } 1788 1789 /* 1790 * istat-verify -- verify the component associated with a stat still exists 1791 * 1792 * if the component no longer exists, this routine resets the stat and 1793 * returns 0. if the component still exists, it returns 1. 1794 */ 1795 static int 1796 istat_verify(struct node *snp, struct istat_entry *entp) 1797 { 1798 struct stats *statp; 1799 nvlist_t *fmri; 1800 1801 fmri = node2fmri(snp->u.event.epname); 1802 if (platform_path_exists(fmri)) { 1803 nvlist_free(fmri); 1804 return (1); 1805 } 1806 nvlist_free(fmri); 1807 1808 /* component no longer in system. zero out the associated stats */ 1809 if ((statp = (struct stats *) 1810 lut_lookup(Istats, entp, (lut_cmp)istat_cmp)) == NULL || 1811 stats_counter_value(statp) == 0) 1812 return (0); /* stat is already reset */ 1813 1814 Istat_need_save = 1; 1815 stats_counter_reset(statp); 1816 return (0); 1817 } 1818 1819 static void 1820 istat_bump(struct node *snp, int n) 1821 { 1822 struct stats *statp; 1823 struct istat_entry ent; 1824 1825 ASSERT(snp != NULL); 1826 ASSERTinfo(snp->t == T_EVENT, ptree_nodetype2str(snp->t)); 1827 ASSERT(snp->u.event.epname != NULL); 1828 1829 /* class name should be hoisted into a single stable entry */ 1830 ASSERT(snp->u.event.ename->u.name.next == NULL); 1831 ent.ename = snp->u.event.ename->u.name.s; 1832 ent.ipath = ipath(snp->u.event.epname); 1833 1834 if (!istat_verify(snp, &ent)) { 1835 /* component no longer exists in system, nothing to do */ 1836 return; 1837 } 1838 1839 if ((statp = (struct stats *) 1840 lut_lookup(Istats, &ent, (lut_cmp)istat_cmp)) == NULL) { 1841 /* need to create the counter */ 1842 int cnt = 0; 1843 struct node *np; 1844 char *sname; 1845 char *snamep; 1846 struct istat_entry *newentp; 1847 1848 /* count up the size of the stat name */ 1849 np = snp->u.event.ename; 1850 while (np != NULL) { 1851 cnt += strlen(np->u.name.s); 1852 cnt++; /* for the '.' or '@' */ 1853 np = np->u.name.next; 1854 } 1855 np = snp->u.event.epname; 1856 while (np != NULL) { 1857 cnt += snprintf(NULL, 0, "%s%llu", 1858 np->u.name.s, np->u.name.child->u.ull); 1859 cnt++; /* for the '/' or trailing NULL byte */ 1860 np = np->u.name.next; 1861 } 1862 1863 /* build the stat name */ 1864 snamep = sname = alloca(cnt); 1865 np = snp->u.event.ename; 1866 while (np != NULL) { 1867 snamep += snprintf(snamep, &sname[cnt] - snamep, 1868 "%s", np->u.name.s); 1869 np = np->u.name.next; 1870 if (np) 1871 *snamep++ = '.'; 1872 } 1873 *snamep++ = '@'; 1874 np = snp->u.event.epname; 1875 while (np != NULL) { 1876 snamep += snprintf(snamep, &sname[cnt] - snamep, 1877 "%s%llu", np->u.name.s, np->u.name.child->u.ull); 1878 np = np->u.name.next; 1879 if (np) 1880 *snamep++ = '/'; 1881 } 1882 *snamep++ = '\0'; 1883 1884 /* create the new stat & add it to our list */ 1885 newentp = MALLOC(sizeof (*newentp)); 1886 *newentp = ent; 1887 statp = stats_new_counter(NULL, sname, 0); 1888 Istats = lut_add(Istats, (void *)newentp, (void *)statp, 1889 (lut_cmp)istat_cmp); 1890 } 1891 1892 /* if n is non-zero, set that value instead of bumping */ 1893 if (n) { 1894 stats_counter_reset(statp); 1895 stats_counter_add(statp, n); 1896 } else 1897 stats_counter_bump(statp); 1898 Istat_need_save = 1; 1899 1900 ipath_print(O_ALTFP|O_VERB2, ent.ename, ent.ipath); 1901 out(O_ALTFP|O_VERB2, " %s to value %d", n ? "set" : "incremented", 1902 stats_counter_value(statp)); 1903 } 1904 1905 /*ARGSUSED*/ 1906 static void 1907 istat_destructor(void *left, void *right, void *arg) 1908 { 1909 struct istat_entry *entp = (struct istat_entry *)left; 1910 struct stats *statp = (struct stats *)right; 1911 FREE(entp); 1912 stats_delete(statp); 1913 } 1914 1915 /* 1916 * Callback used in a walk of the Istats to reset matching stat counters. 1917 */ 1918 static void 1919 istat_counter_reset_cb(struct istat_entry *entp, struct stats *statp, 1920 const struct ipath *ipp) 1921 { 1922 char *path; 1923 1924 if (entp->ipath == ipp) { 1925 path = ipath2str(entp->ename, ipp); 1926 out(O_ALTFP, "istat_counter_reset_cb: resetting %s", path); 1927 FREE(path); 1928 stats_counter_reset(statp); 1929 Istat_need_save = 1; 1930 } 1931 } 1932 1933 void 1934 istat_fini(void) 1935 { 1936 lut_free(Istats, istat_destructor, NULL); 1937 } 1938 1939 static void 1940 publish_suspects(struct fme *fmep) 1941 { 1942 struct rsl *srl = NULL; 1943 struct rsl *erl; 1944 struct rsl *rp; 1945 nvlist_t *fault; 1946 uint8_t cert; 1947 uint_t *frs; 1948 uint_t fravg, frsum, fr; 1949 uint_t messval; 1950 struct node *snp; 1951 int frcnt, fridx; 1952 boolean_t no_upsets = B_FALSE; 1953 boolean_t allfaulty = B_TRUE; 1954 1955 stats_counter_bump(fmep->diags); 1956 1957 /* 1958 * If we're auto-closing upsets, we don't want to include them 1959 * in any produced suspect lists or certainty accounting. 1960 */ 1961 if (Autoclose != NULL) 1962 if (strcmp(Autoclose, "true") == 0 || 1963 strcmp(Autoclose, "all") == 0 || 1964 strcmp(Autoclose, "upsets") == 0) 1965 no_upsets = B_TRUE; 1966 1967 trim_suspects(fmep, no_upsets, &srl, &erl); 1968 1969 /* 1970 * If the resulting suspect list has no members, we're 1971 * done. Returning here will simply close the case. 1972 */ 1973 if (fmep->nsuspects == 0) { 1974 out(O_ALTFP, 1975 "[FME%d, case %s (all suspects are upsets)]", 1976 fmep->id, fmd_case_uuid(fmep->hdl, fmep->fmcase)); 1977 FREE(srl); 1978 restore_suspects(fmep); 1979 return; 1980 } 1981 1982 /* 1983 * If the suspect list is all faults, then for a given fault, 1984 * say X of N, X's certainty is computed via: 1985 * 1986 * fitrate(X) / (fitrate(1) + ... + fitrate(N)) * 100 1987 * 1988 * If none of the suspects are faults, and there are N suspects, 1989 * the certainty of a given suspect is 100/N. 1990 * 1991 * If there are are a mixture of faults and other problems in 1992 * the suspect list, we take an average of the faults' 1993 * FITrates and treat this average as the FITrate for any 1994 * non-faults. The fitrate of any given suspect is then 1995 * computed per the first formula above. 1996 */ 1997 if (fmep->nonfault == fmep->nsuspects) { 1998 /* NO faults in the suspect list */ 1999 cert = percentof(1, fmep->nsuspects); 2000 } else { 2001 /* sum the fitrates */ 2002 frs = alloca(fmep->nsuspects * sizeof (uint_t)); 2003 fridx = frcnt = frsum = 0; 2004 2005 for (rp = srl; rp <= erl; rp++) { 2006 struct node *n; 2007 2008 if (rp->suspect == NULL) 2009 continue; 2010 if (!is_fault(rp->suspect->t)) { 2011 frs[fridx++] = 0; 2012 continue; 2013 } 2014 n = eventprop_lookup(rp->suspect, L_FITrate); 2015 if (node2uint(n, &fr) != 0) { 2016 out(O_DEBUG|O_NONL, "event "); 2017 ipath_print(O_DEBUG|O_NONL, 2018 rp->suspect->enode->u.event.ename->u.name.s, 2019 rp->suspect->ipp); 2020 out(O_DEBUG, " has no FITrate (using 1)"); 2021 fr = 1; 2022 } else if (fr == 0) { 2023 out(O_DEBUG|O_NONL, "event "); 2024 ipath_print(O_DEBUG|O_NONL, 2025 rp->suspect->enode->u.event.ename->u.name.s, 2026 rp->suspect->ipp); 2027 out(O_DEBUG, " has zero FITrate (using 1)"); 2028 fr = 1; 2029 } 2030 2031 frs[fridx++] = fr; 2032 frsum += fr; 2033 frcnt++; 2034 } 2035 fravg = avg(frsum, frcnt); 2036 for (fridx = 0; fridx < fmep->nsuspects; fridx++) 2037 if (frs[fridx] == 0) { 2038 frs[fridx] = fravg; 2039 frsum += fravg; 2040 } 2041 } 2042 2043 /* Add them in reverse order of our sort, as fmd reverses order */ 2044 for (rp = erl; rp >= srl; rp--) { 2045 if (rp->suspect == NULL) 2046 continue; 2047 if (!is_fault(rp->suspect->t)) 2048 allfaulty = B_FALSE; 2049 if (fmep->nonfault != fmep->nsuspects) 2050 cert = percentof(frs[--fridx], frsum); 2051 fault = fmd_nvl_create_fault(fmep->hdl, 2052 rp->suspect->enode->u.event.ename->u.name.s, 2053 cert, 2054 rp->asru, 2055 rp->fru, 2056 rp->rsrc); 2057 if (fault == NULL) 2058 out(O_DIE, "fault creation failed"); 2059 /* if "message" property exists, add it to the fault */ 2060 if (node2uint(eventprop_lookup(rp->suspect, L_message), 2061 &messval) == 0) { 2062 2063 out(O_ALTFP, 2064 "[FME%d, %s adds message=%d to suspect list]", 2065 fmep->id, 2066 rp->suspect->enode->u.event.ename->u.name.s, 2067 messval); 2068 if (nvlist_add_boolean_value(fault, 2069 FM_SUSPECT_MESSAGE, 2070 (messval) ? B_TRUE : B_FALSE) != 0) { 2071 out(O_DIE, "cannot add no-message to fault"); 2072 } 2073 } 2074 /* add any payload properties */ 2075 lut_walk(rp->suspect->payloadprops, 2076 (lut_cb)addpayloadprop, (void *)fault); 2077 fmd_case_add_suspect(fmep->hdl, fmep->fmcase, fault); 2078 rp->suspect->fault = fault; 2079 rslfree(rp); 2080 2081 /* 2082 * If "action" property exists, evaluate it; this must be done 2083 * before the dupclose check below since some actions may 2084 * modify the asru to be used in fmd_nvl_fmri_faulty. This 2085 * needs to be restructured if any new actions are introduced 2086 * that have effects that we do not want to be visible if 2087 * we decide not to publish in the dupclose check below. 2088 */ 2089 if ((snp = eventprop_lookup(rp->suspect, L_action)) != NULL) { 2090 struct evalue evalue; 2091 2092 out(O_ALTFP|O_NONL, 2093 "[FME%d, %s action ", fmep->id, 2094 rp->suspect->enode->u.event.ename->u.name.s); 2095 ptree_name_iter(O_ALTFP|O_NONL, snp); 2096 out(O_ALTFP, "]"); 2097 Action_nvl = fault; 2098 (void) eval_expr(snp, NULL, NULL, NULL, NULL, 2099 NULL, 0, &evalue); 2100 } 2101 2102 /* 2103 * if "dupclose" tunable is set, check if the asru is 2104 * already marked as "faulty". 2105 */ 2106 if (Dupclose && allfaulty) { 2107 nvlist_t *asru; 2108 2109 out(O_ALTFP|O_VERB, "FMD%d dupclose check ", fmep->id); 2110 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, rp->suspect); 2111 out(O_ALTFP|O_VERB|O_NONL, " "); 2112 if (nvlist_lookup_nvlist(fault, 2113 FM_FAULT_ASRU, &asru) != 0) { 2114 out(O_ALTFP|O_VERB, "NULL asru"); 2115 allfaulty = B_FALSE; 2116 } else if (fmd_nvl_fmri_faulty(fmep->hdl, asru)) { 2117 out(O_ALTFP|O_VERB, "faulty"); 2118 } else { 2119 out(O_ALTFP|O_VERB, "not faulty"); 2120 allfaulty = B_FALSE; 2121 } 2122 } 2123 2124 } 2125 2126 /* 2127 * Close the case if all asrus are already known to be faulty and if 2128 * Dupclose is enabled. Otherwise we are going to publish so take 2129 * any pre-publication actions. 2130 */ 2131 if (Dupclose && allfaulty) { 2132 out(O_ALTFP, "[dupclose FME%d, case %s]", fmep->id, 2133 fmd_case_uuid(fmep->hdl, fmep->fmcase)); 2134 fmd_case_close(fmep->hdl, fmep->fmcase); 2135 } else { 2136 for (rp = erl; rp >= srl; rp--) { 2137 struct event *suspect = rp->suspect; 2138 2139 if (suspect == NULL) 2140 continue; 2141 2142 fault = suspect->fault; 2143 2144 /* if "count" exists, increment the appropriate stat */ 2145 if ((snp = eventprop_lookup(suspect, 2146 L_count)) != NULL) { 2147 out(O_ALTFP|O_NONL, 2148 "[FME%d, %s count ", fmep->id, 2149 suspect->enode->u.event.ename->u.name.s); 2150 ptree_name_iter(O_ALTFP|O_NONL, snp); 2151 out(O_ALTFP, "]"); 2152 istat_bump(snp, 0); 2153 2154 } 2155 } 2156 istat_save(); /* write out any istat changes */ 2157 2158 out(O_ALTFP, "[solving FME%d, case %s]", fmep->id, 2159 fmd_case_uuid(fmep->hdl, fmep->fmcase)); 2160 fmd_case_solve(fmep->hdl, fmep->fmcase); 2161 } 2162 2163 /* 2164 * revert to the original suspect list 2165 */ 2166 FREE(srl); 2167 restore_suspects(fmep); 2168 } 2169 2170 static void 2171 publish_undiagnosable(fmd_hdl_t *hdl, fmd_event_t *ffep) 2172 { 2173 struct case_list *newcase; 2174 nvlist_t *defect; 2175 2176 out(O_ALTFP, 2177 "[undiagnosable ereport received, " 2178 "creating and closing a new case (%s)]", 2179 Undiag_reason ? Undiag_reason : "reason not provided"); 2180 2181 newcase = MALLOC(sizeof (struct case_list)); 2182 newcase->next = NULL; 2183 2184 newcase->fmcase = fmd_case_open(hdl, NULL); 2185 if (Undiagablecaselist != NULL) 2186 newcase->next = Undiagablecaselist; 2187 Undiagablecaselist = newcase; 2188 2189 if (ffep != NULL) 2190 fmd_case_add_ereport(hdl, newcase->fmcase, ffep); 2191 2192 defect = fmd_nvl_create_fault(hdl, UNDIAGNOSABLE_DEFECT, 100, 2193 NULL, NULL, NULL); 2194 if (Undiag_reason != NULL) 2195 (void) nvlist_add_string(defect, UNDIAG_REASON, Undiag_reason); 2196 fmd_case_add_suspect(hdl, newcase->fmcase, defect); 2197 2198 fmd_case_solve(hdl, newcase->fmcase); 2199 fmd_case_close(hdl, newcase->fmcase); 2200 } 2201 2202 static void 2203 fme_undiagnosable(struct fme *f) 2204 { 2205 nvlist_t *defect; 2206 2207 out(O_ALTFP, "[solving/closing FME%d, case %s (%s)]", 2208 f->id, fmd_case_uuid(f->hdl, f->fmcase), 2209 Undiag_reason ? Undiag_reason : "undiagnosable"); 2210 2211 defect = fmd_nvl_create_fault(f->hdl, UNDIAGNOSABLE_DEFECT, 100, 2212 NULL, NULL, NULL); 2213 if (Undiag_reason != NULL) 2214 (void) nvlist_add_string(defect, UNDIAG_REASON, Undiag_reason); 2215 fmd_case_add_suspect(f->hdl, f->fmcase, defect); 2216 fmd_case_solve(f->hdl, f->fmcase); 2217 destroy_fme_bufs(f); 2218 fmd_case_close(f->hdl, f->fmcase); 2219 } 2220 2221 /* 2222 * fme_close_case 2223 * 2224 * Find the requested case amongst our fmes and close it. Free up 2225 * the related fme. 2226 */ 2227 void 2228 fme_close_case(fmd_hdl_t *hdl, fmd_case_t *fmcase) 2229 { 2230 struct case_list *ucasep, *prevcasep = NULL; 2231 struct fme *prev = NULL; 2232 struct fme *fmep; 2233 2234 for (ucasep = Undiagablecaselist; ucasep; ucasep = ucasep->next) { 2235 if (fmcase != ucasep->fmcase) { 2236 prevcasep = ucasep; 2237 continue; 2238 } 2239 2240 if (prevcasep == NULL) 2241 Undiagablecaselist = Undiagablecaselist->next; 2242 else 2243 prevcasep->next = ucasep->next; 2244 2245 FREE(ucasep); 2246 return; 2247 } 2248 2249 for (fmep = FMElist; fmep; fmep = fmep->next) { 2250 if (fmep->hdl == hdl && fmep->fmcase == fmcase) 2251 break; 2252 prev = fmep; 2253 } 2254 2255 if (fmep == NULL) { 2256 out(O_WARN, "Eft asked to close unrecognized case [%s].", 2257 fmd_case_uuid(hdl, fmcase)); 2258 return; 2259 } 2260 2261 if (EFMElist == fmep) 2262 EFMElist = prev; 2263 2264 if (prev == NULL) 2265 FMElist = FMElist->next; 2266 else 2267 prev->next = fmep->next; 2268 2269 fmep->next = NULL; 2270 2271 /* Get rid of any timer this fme has set */ 2272 if (fmep->wull != 0) 2273 fmd_timer_remove(fmep->hdl, fmep->timer); 2274 2275 if (ClosedFMEs == NULL) { 2276 ClosedFMEs = fmep; 2277 } else { 2278 fmep->next = ClosedFMEs; 2279 ClosedFMEs = fmep; 2280 } 2281 2282 Open_fme_count--; 2283 2284 /* See if we can close the overflow FME */ 2285 if (Open_fme_count <= Max_fme) { 2286 for (fmep = FMElist; fmep; fmep = fmep->next) { 2287 if (fmep->overflow && !(fmd_case_closed(fmep->hdl, 2288 fmep->fmcase))) 2289 break; 2290 } 2291 2292 if (fmep != NULL) 2293 fmd_case_close(fmep->hdl, fmep->fmcase); 2294 } 2295 } 2296 2297 /* 2298 * fme_set_timer() 2299 * If the time we need to wait for the given FME is less than the 2300 * current timer, kick that old timer out and establish a new one. 2301 */ 2302 static int 2303 fme_set_timer(struct fme *fmep, unsigned long long wull) 2304 { 2305 out(O_ALTFP|O_VERB|O_NONL, " fme_set_timer: request to wait "); 2306 ptree_timeval(O_ALTFP|O_VERB, &wull); 2307 2308 if (wull <= fmep->pull) { 2309 out(O_ALTFP|O_VERB|O_NONL, "already have waited at least "); 2310 ptree_timeval(O_ALTFP|O_VERB, &fmep->pull); 2311 out(O_ALTFP|O_VERB, NULL); 2312 /* we've waited at least wull already, don't need timer */ 2313 return (0); 2314 } 2315 2316 out(O_ALTFP|O_VERB|O_NONL, " currently "); 2317 if (fmep->wull != 0) { 2318 out(O_ALTFP|O_VERB|O_NONL, "waiting "); 2319 ptree_timeval(O_ALTFP|O_VERB, &fmep->wull); 2320 out(O_ALTFP|O_VERB, NULL); 2321 } else { 2322 out(O_ALTFP|O_VERB|O_NONL, "not waiting"); 2323 out(O_ALTFP|O_VERB, NULL); 2324 } 2325 2326 if (fmep->wull != 0) 2327 if (wull >= fmep->wull) 2328 /* New timer would fire later than established timer */ 2329 return (0); 2330 2331 if (fmep->wull != 0) { 2332 fmd_timer_remove(fmep->hdl, fmep->timer); 2333 } 2334 2335 fmep->timer = fmd_timer_install(fmep->hdl, (void *)fmep, 2336 fmep->e0r, wull); 2337 out(O_ALTFP|O_VERB, "timer set, id is %ld", fmep->timer); 2338 fmep->wull = wull; 2339 return (1); 2340 } 2341 2342 void 2343 fme_timer_fired(struct fme *fmep, id_t tid) 2344 { 2345 struct fme *ffmep = NULL; 2346 2347 for (ffmep = FMElist; ffmep; ffmep = ffmep->next) 2348 if (ffmep == fmep) 2349 break; 2350 2351 if (ffmep == NULL) { 2352 out(O_WARN, "Timer fired for an FME (%p) not in FMEs list.", 2353 (void *)fmep); 2354 return; 2355 } 2356 2357 out(O_ALTFP, "Timer fired %lx", tid); 2358 fmep->pull = fmep->wull; 2359 fmep->wull = 0; 2360 fmd_buf_write(fmep->hdl, fmep->fmcase, 2361 WOBUF_PULL, (void *)&fmep->pull, sizeof (fmep->pull)); 2362 fme_eval(fmep, fmep->e0r); 2363 } 2364 2365 /* 2366 * Preserve the fme's suspect list in its psuspects list, NULLing the 2367 * suspects list in the meantime. 2368 */ 2369 static void 2370 save_suspects(struct fme *fmep) 2371 { 2372 struct event *ep; 2373 struct event *nextep; 2374 2375 /* zero out the previous suspect list */ 2376 for (ep = fmep->psuspects; ep; ep = nextep) { 2377 nextep = ep->psuspects; 2378 ep->psuspects = NULL; 2379 } 2380 fmep->psuspects = NULL; 2381 2382 /* zero out the suspect list, copying it to previous suspect list */ 2383 fmep->psuspects = fmep->suspects; 2384 for (ep = fmep->suspects; ep; ep = nextep) { 2385 nextep = ep->suspects; 2386 ep->psuspects = ep->suspects; 2387 ep->suspects = NULL; 2388 ep->is_suspect = 0; 2389 } 2390 fmep->suspects = NULL; 2391 fmep->nsuspects = 0; 2392 fmep->nonfault = 0; 2393 } 2394 2395 /* 2396 * Retrieve the fme's suspect list from its psuspects list. 2397 */ 2398 static void 2399 restore_suspects(struct fme *fmep) 2400 { 2401 struct event *ep; 2402 struct event *nextep; 2403 2404 fmep->nsuspects = fmep->nonfault = 0; 2405 fmep->suspects = fmep->psuspects; 2406 for (ep = fmep->psuspects; ep; ep = nextep) { 2407 fmep->nsuspects++; 2408 if (!is_fault(ep->t)) 2409 fmep->nonfault++; 2410 nextep = ep->psuspects; 2411 ep->suspects = ep->psuspects; 2412 } 2413 } 2414 2415 /* 2416 * this is what we use to call the Emrys prototype code instead of main() 2417 */ 2418 static void 2419 fme_eval(struct fme *fmep, fmd_event_t *ffep) 2420 { 2421 struct event *ep; 2422 unsigned long long my_delay = TIMEVAL_EVENTUALLY; 2423 2424 save_suspects(fmep); 2425 2426 out(O_ALTFP|O_VERB, "Evaluate FME %d", fmep->id); 2427 indent_set(" "); 2428 2429 lut_walk(fmep->eventtree, (lut_cb)clear_arrows, (void *)fmep); 2430 fmep->state = hypothesise(fmep, fmep->e0, fmep->ull, &my_delay); 2431 2432 out(O_ALTFP|O_VERB|O_NONL, "FME%d state: %s, suspect list:", fmep->id, 2433 fme_state2str(fmep->state)); 2434 for (ep = fmep->suspects; ep; ep = ep->suspects) { 2435 out(O_ALTFP|O_VERB|O_NONL, " "); 2436 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2437 } 2438 out(O_ALTFP|O_VERB, NULL); 2439 2440 switch (fmep->state) { 2441 case FME_CREDIBLE: 2442 print_suspects(SLNEW, fmep); 2443 (void) upsets_eval(fmep, ffep); 2444 2445 /* 2446 * we may have already posted suspects in upsets_eval() which 2447 * can recurse into fme_eval() again. If so then just return. 2448 */ 2449 if (fmep->posted_suspects) 2450 return; 2451 2452 publish_suspects(fmep); 2453 fmep->posted_suspects = 1; 2454 fmd_buf_write(fmep->hdl, fmep->fmcase, 2455 WOBUF_POSTD, 2456 (void *)&fmep->posted_suspects, 2457 sizeof (fmep->posted_suspects)); 2458 2459 /* 2460 * Now the suspects have been posted, we can clear up 2461 * the instance tree as we won't be looking at it again. 2462 * Also cancel the timer as the case is now solved. 2463 */ 2464 if (fmep->wull != 0) { 2465 fmd_timer_remove(fmep->hdl, fmep->timer); 2466 fmep->wull = 0; 2467 } 2468 lut_walk(fmep->eventtree, (lut_cb)clear_arrows, 2469 (void *)fmep); 2470 break; 2471 2472 case FME_WAIT: 2473 ASSERT(my_delay > fmep->ull); 2474 (void) fme_set_timer(fmep, my_delay); 2475 print_suspects(SLWAIT, fmep); 2476 break; 2477 2478 case FME_DISPROVED: 2479 print_suspects(SLDISPROVED, fmep); 2480 Undiag_reason = UD_UNSOLVD; 2481 fme_undiagnosable(fmep); 2482 break; 2483 } 2484 2485 if (fmep->posted_suspects == 1 && Autoclose != NULL) { 2486 int doclose = 0; 2487 2488 if (strcmp(Autoclose, "true") == 0 || 2489 strcmp(Autoclose, "all") == 0) 2490 doclose = 1; 2491 2492 if (strcmp(Autoclose, "upsets") == 0) { 2493 doclose = 1; 2494 for (ep = fmep->suspects; ep; ep = ep->suspects) { 2495 if (ep->t != N_UPSET) { 2496 doclose = 0; 2497 break; 2498 } 2499 } 2500 } 2501 2502 if (doclose) { 2503 out(O_ALTFP, "[closing FME%d, case %s (autoclose)]", 2504 fmep->id, fmd_case_uuid(fmep->hdl, fmep->fmcase)); 2505 2506 destroy_fme_bufs(fmep); 2507 fmd_case_close(fmep->hdl, fmep->fmcase); 2508 } 2509 } 2510 itree_prune(fmep->eventtree); 2511 } 2512 2513 static void indent(void); 2514 static int triggered(struct fme *fmep, struct event *ep, int mark); 2515 static enum fme_state effects_test(struct fme *fmep, 2516 struct event *fault_event, unsigned long long at_latest_by, 2517 unsigned long long *pdelay); 2518 static enum fme_state requirements_test(struct fme *fmep, struct event *ep, 2519 unsigned long long at_latest_by, unsigned long long *pdelay); 2520 static enum fme_state causes_test(struct fme *fmep, struct event *ep, 2521 unsigned long long at_latest_by, unsigned long long *pdelay); 2522 2523 static int 2524 checkconstraints(struct fme *fmep, struct arrow *arrowp) 2525 { 2526 struct constraintlist *ctp; 2527 struct evalue value; 2528 2529 if (arrowp->forever_false) { 2530 char *sep = ""; 2531 indent(); 2532 out(O_ALTFP|O_VERB|O_NONL, " Forever false constraint: "); 2533 for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) { 2534 out(O_ALTFP|O_VERB|O_NONL, sep); 2535 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0); 2536 sep = ", "; 2537 } 2538 out(O_ALTFP|O_VERB, NULL); 2539 return (0); 2540 } 2541 2542 for (ctp = arrowp->constraints; ctp != NULL; ctp = ctp->next) { 2543 if (eval_expr(ctp->cnode, NULL, NULL, 2544 &fmep->globals, fmep->cfgdata->cooked, 2545 arrowp, 0, &value)) { 2546 /* evaluation successful */ 2547 if (value.t == UNDEFINED || value.v == 0) { 2548 /* known false */ 2549 arrowp->forever_false = 1; 2550 indent(); 2551 out(O_ALTFP|O_VERB|O_NONL, 2552 " False constraint: "); 2553 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0); 2554 out(O_ALTFP|O_VERB, NULL); 2555 return (0); 2556 } 2557 } else { 2558 /* evaluation unsuccessful -- unknown value */ 2559 indent(); 2560 out(O_ALTFP|O_VERB|O_NONL, 2561 " Deferred constraint: "); 2562 ptree(O_ALTFP|O_VERB|O_NONL, ctp->cnode, 1, 0); 2563 out(O_ALTFP|O_VERB, NULL); 2564 return (2); 2565 } 2566 } 2567 /* known true */ 2568 return (1); 2569 } 2570 2571 static int 2572 triggered(struct fme *fmep, struct event *ep, int mark) 2573 { 2574 struct bubble *bp; 2575 struct arrowlist *ap; 2576 int count = 0; 2577 2578 stats_counter_bump(fmep->Tcallcount); 2579 for (bp = itree_next_bubble(ep, NULL); bp; 2580 bp = itree_next_bubble(ep, bp)) { 2581 if (bp->t != B_TO) 2582 continue; 2583 for (ap = itree_next_arrow(bp, NULL); ap; 2584 ap = itree_next_arrow(bp, ap)) { 2585 /* check count of marks against K in the bubble */ 2586 if ((ap->arrowp->mark & mark) && 2587 ++count >= bp->nork) 2588 return (1); 2589 } 2590 } 2591 return (0); 2592 } 2593 2594 static int 2595 mark_arrows(struct fme *fmep, struct event *ep, int mark, 2596 unsigned long long at_latest_by, unsigned long long *pdelay, int keep) 2597 { 2598 struct bubble *bp; 2599 struct arrowlist *ap; 2600 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 2601 unsigned long long my_delay; 2602 enum fme_state result; 2603 int retval = 0; 2604 2605 for (bp = itree_next_bubble(ep, NULL); bp; 2606 bp = itree_next_bubble(ep, bp)) { 2607 if (bp->t != B_FROM) 2608 continue; 2609 stats_counter_bump(fmep->Marrowcount); 2610 for (ap = itree_next_arrow(bp, NULL); ap; 2611 ap = itree_next_arrow(bp, ap)) { 2612 struct event *ep2 = ap->arrowp->head->myevent; 2613 /* 2614 * if we're clearing marks, we can avoid doing 2615 * all that work evaluating constraints. 2616 */ 2617 if (mark == 0) { 2618 ap->arrowp->mark &= ~EFFECTS_COUNTER; 2619 if (keep && (ep2->cached_state & 2620 (WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT))) 2621 ep2->keep_in_tree = 1; 2622 ep2->cached_state &= 2623 ~(WAIT_EFFECT|CREDIBLE_EFFECT|PARENT_WAIT); 2624 (void) mark_arrows(fmep, ep2, mark, 0, NULL, 2625 keep); 2626 continue; 2627 } 2628 if (ep2->cached_state & REQMNTS_DISPROVED) { 2629 indent(); 2630 out(O_ALTFP|O_VERB|O_NONL, 2631 " ALREADY DISPROVED "); 2632 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2633 out(O_ALTFP|O_VERB, NULL); 2634 continue; 2635 } 2636 if (ep2->cached_state & WAIT_EFFECT) { 2637 indent(); 2638 out(O_ALTFP|O_VERB|O_NONL, 2639 " ALREADY EFFECTS WAIT "); 2640 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2641 out(O_ALTFP|O_VERB, NULL); 2642 continue; 2643 } 2644 if (ep2->cached_state & CREDIBLE_EFFECT) { 2645 indent(); 2646 out(O_ALTFP|O_VERB|O_NONL, 2647 " ALREADY EFFECTS CREDIBLE "); 2648 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2649 out(O_ALTFP|O_VERB, NULL); 2650 continue; 2651 } 2652 if ((ep2->cached_state & PARENT_WAIT) && 2653 (mark & PARENT_WAIT)) { 2654 indent(); 2655 out(O_ALTFP|O_VERB|O_NONL, 2656 " ALREADY PARENT EFFECTS WAIT "); 2657 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2658 out(O_ALTFP|O_VERB, NULL); 2659 continue; 2660 } 2661 platform_set_payloadnvp(ep2->nvp); 2662 if (checkconstraints(fmep, ap->arrowp) == 0) { 2663 platform_set_payloadnvp(NULL); 2664 indent(); 2665 out(O_ALTFP|O_VERB|O_NONL, 2666 " CONSTRAINTS FAIL "); 2667 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2668 out(O_ALTFP|O_VERB, NULL); 2669 continue; 2670 } 2671 platform_set_payloadnvp(NULL); 2672 ap->arrowp->mark |= EFFECTS_COUNTER; 2673 if (!triggered(fmep, ep2, EFFECTS_COUNTER)) { 2674 indent(); 2675 out(O_ALTFP|O_VERB|O_NONL, 2676 " K-COUNT NOT YET MET "); 2677 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2678 out(O_ALTFP|O_VERB, NULL); 2679 continue; 2680 } 2681 ep2->cached_state &= ~PARENT_WAIT; 2682 /* 2683 * if we've reached an ereport and no propagation time 2684 * is specified, use the Hesitate value 2685 */ 2686 if (ep2->t == N_EREPORT && at_latest_by == 0ULL && 2687 ap->arrowp->maxdelay == 0ULL) { 2688 result = requirements_test(fmep, ep2, Hesitate, 2689 &my_delay); 2690 out(O_ALTFP|O_VERB|O_NONL, " default wait "); 2691 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2692 out(O_ALTFP|O_VERB, NULL); 2693 } else { 2694 result = requirements_test(fmep, ep2, 2695 at_latest_by + ap->arrowp->maxdelay, 2696 &my_delay); 2697 } 2698 if (result == FME_WAIT) { 2699 retval = WAIT_EFFECT; 2700 if (overall_delay > my_delay) 2701 overall_delay = my_delay; 2702 ep2->cached_state |= WAIT_EFFECT; 2703 indent(); 2704 out(O_ALTFP|O_VERB|O_NONL, " EFFECTS WAIT "); 2705 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2706 out(O_ALTFP|O_VERB, NULL); 2707 indent_push(" E"); 2708 if (mark_arrows(fmep, ep2, PARENT_WAIT, 2709 at_latest_by, &my_delay, 0) == 2710 WAIT_EFFECT) { 2711 retval = WAIT_EFFECT; 2712 if (overall_delay > my_delay) 2713 overall_delay = my_delay; 2714 } 2715 indent_pop(); 2716 } else if (result == FME_DISPROVED) { 2717 indent(); 2718 out(O_ALTFP|O_VERB|O_NONL, 2719 " EFFECTS DISPROVED "); 2720 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2721 out(O_ALTFP|O_VERB, NULL); 2722 } else { 2723 ep2->cached_state |= mark; 2724 indent(); 2725 if (mark == CREDIBLE_EFFECT) 2726 out(O_ALTFP|O_VERB|O_NONL, 2727 " EFFECTS CREDIBLE "); 2728 else 2729 out(O_ALTFP|O_VERB|O_NONL, 2730 " PARENT EFFECTS WAIT "); 2731 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep2); 2732 out(O_ALTFP|O_VERB, NULL); 2733 indent_push(" E"); 2734 if (mark_arrows(fmep, ep2, mark, at_latest_by, 2735 &my_delay, 0) == WAIT_EFFECT) { 2736 retval = WAIT_EFFECT; 2737 if (overall_delay > my_delay) 2738 overall_delay = my_delay; 2739 } 2740 indent_pop(); 2741 } 2742 } 2743 } 2744 if (retval == WAIT_EFFECT) 2745 *pdelay = overall_delay; 2746 return (retval); 2747 } 2748 2749 static enum fme_state 2750 effects_test(struct fme *fmep, struct event *fault_event, 2751 unsigned long long at_latest_by, unsigned long long *pdelay) 2752 { 2753 struct event *error_event; 2754 enum fme_state return_value = FME_CREDIBLE; 2755 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 2756 unsigned long long my_delay; 2757 2758 stats_counter_bump(fmep->Ecallcount); 2759 indent_push(" E"); 2760 indent(); 2761 out(O_ALTFP|O_VERB|O_NONL, "->"); 2762 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event); 2763 out(O_ALTFP|O_VERB, NULL); 2764 2765 if (mark_arrows(fmep, fault_event, CREDIBLE_EFFECT, at_latest_by, 2766 &my_delay, 0) == WAIT_EFFECT) { 2767 return_value = FME_WAIT; 2768 if (overall_delay > my_delay) 2769 overall_delay = my_delay; 2770 } 2771 for (error_event = fmep->observations; 2772 error_event; error_event = error_event->observations) { 2773 indent(); 2774 out(O_ALTFP|O_VERB|O_NONL, " "); 2775 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, error_event); 2776 if (!(error_event->cached_state & CREDIBLE_EFFECT)) { 2777 if (error_event->cached_state & 2778 (PARENT_WAIT|WAIT_EFFECT)) { 2779 out(O_ALTFP|O_VERB, " NOT YET triggered"); 2780 continue; 2781 } 2782 return_value = FME_DISPROVED; 2783 out(O_ALTFP|O_VERB, " NOT triggered"); 2784 break; 2785 } else { 2786 out(O_ALTFP|O_VERB, " triggered"); 2787 } 2788 } 2789 if (return_value == FME_DISPROVED) { 2790 (void) mark_arrows(fmep, fault_event, 0, 0, NULL, 0); 2791 } else { 2792 fault_event->keep_in_tree = 1; 2793 (void) mark_arrows(fmep, fault_event, 0, 0, NULL, 1); 2794 } 2795 2796 indent(); 2797 out(O_ALTFP|O_VERB|O_NONL, "<-EFFECTS %s ", 2798 fme_state2str(return_value)); 2799 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, fault_event); 2800 out(O_ALTFP|O_VERB, NULL); 2801 indent_pop(); 2802 if (return_value == FME_WAIT) 2803 *pdelay = overall_delay; 2804 return (return_value); 2805 } 2806 2807 static enum fme_state 2808 requirements_test(struct fme *fmep, struct event *ep, 2809 unsigned long long at_latest_by, unsigned long long *pdelay) 2810 { 2811 int waiting_events; 2812 int credible_events; 2813 int deferred_events; 2814 enum fme_state return_value = FME_CREDIBLE; 2815 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 2816 unsigned long long arrow_delay; 2817 unsigned long long my_delay; 2818 struct event *ep2; 2819 struct bubble *bp; 2820 struct arrowlist *ap; 2821 2822 if (ep->cached_state & REQMNTS_CREDIBLE) { 2823 indent(); 2824 out(O_ALTFP|O_VERB|O_NONL, " REQMNTS ALREADY CREDIBLE "); 2825 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2826 out(O_ALTFP|O_VERB, NULL); 2827 return (FME_CREDIBLE); 2828 } 2829 if (ep->cached_state & REQMNTS_DISPROVED) { 2830 indent(); 2831 out(O_ALTFP|O_VERB|O_NONL, " REQMNTS ALREADY DISPROVED "); 2832 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2833 out(O_ALTFP|O_VERB, NULL); 2834 return (FME_DISPROVED); 2835 } 2836 if (ep->cached_state & REQMNTS_WAIT) { 2837 indent(); 2838 *pdelay = ep->cached_delay; 2839 out(O_ALTFP|O_VERB|O_NONL, " REQMNTS ALREADY WAIT "); 2840 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2841 out(O_ALTFP|O_VERB|O_NONL, ", wait for: "); 2842 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 2843 out(O_ALTFP|O_VERB, NULL); 2844 return (FME_WAIT); 2845 } 2846 stats_counter_bump(fmep->Rcallcount); 2847 indent_push(" R"); 2848 indent(); 2849 out(O_ALTFP|O_VERB|O_NONL, "->"); 2850 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2851 out(O_ALTFP|O_VERB|O_NONL, ", at latest by: "); 2852 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 2853 out(O_ALTFP|O_VERB, NULL); 2854 2855 if (ep->t == N_EREPORT) { 2856 if (ep->count == 0) { 2857 if (fmep->pull >= at_latest_by) { 2858 return_value = FME_DISPROVED; 2859 } else { 2860 ep->cached_delay = *pdelay = at_latest_by; 2861 return_value = FME_WAIT; 2862 } 2863 } 2864 2865 indent(); 2866 switch (return_value) { 2867 case FME_CREDIBLE: 2868 ep->cached_state |= REQMNTS_CREDIBLE; 2869 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS CREDIBLE "); 2870 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2871 break; 2872 case FME_DISPROVED: 2873 ep->cached_state |= REQMNTS_DISPROVED; 2874 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED "); 2875 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2876 break; 2877 case FME_WAIT: 2878 ep->cached_state |= REQMNTS_WAIT; 2879 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS WAIT "); 2880 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2881 out(O_ALTFP|O_VERB|O_NONL, " to "); 2882 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 2883 break; 2884 default: 2885 out(O_DIE, "requirements_test: unexpected fme_state"); 2886 break; 2887 } 2888 out(O_ALTFP|O_VERB, NULL); 2889 indent_pop(); 2890 2891 return (return_value); 2892 } 2893 2894 /* this event is not a report, descend the tree */ 2895 for (bp = itree_next_bubble(ep, NULL); bp; 2896 bp = itree_next_bubble(ep, bp)) { 2897 int n; 2898 2899 if (bp->t != B_FROM) 2900 continue; 2901 2902 n = bp->nork; 2903 2904 credible_events = 0; 2905 waiting_events = 0; 2906 deferred_events = 0; 2907 arrow_delay = TIMEVAL_EVENTUALLY; 2908 /* 2909 * n is -1 for 'A' so adjust it. 2910 * XXX just count up the arrows for now. 2911 */ 2912 if (n < 0) { 2913 n = 0; 2914 for (ap = itree_next_arrow(bp, NULL); ap; 2915 ap = itree_next_arrow(bp, ap)) 2916 n++; 2917 indent(); 2918 out(O_ALTFP|O_VERB, " Bubble Counted N=%d", n); 2919 } else { 2920 indent(); 2921 out(O_ALTFP|O_VERB, " Bubble N=%d", n); 2922 } 2923 2924 if (n == 0) 2925 continue; 2926 if (!(bp->mark & (BUBBLE_ELIDED|BUBBLE_OK))) { 2927 for (ap = itree_next_arrow(bp, NULL); ap; 2928 ap = itree_next_arrow(bp, ap)) { 2929 ep2 = ap->arrowp->head->myevent; 2930 platform_set_payloadnvp(ep2->nvp); 2931 if (checkconstraints(fmep, ap->arrowp) == 0) { 2932 /* 2933 * if any arrow is invalidated by the 2934 * constraints, then we should elide the 2935 * whole bubble to be consistant with 2936 * the tree creation time behaviour 2937 */ 2938 bp->mark |= BUBBLE_ELIDED; 2939 platform_set_payloadnvp(NULL); 2940 break; 2941 } 2942 platform_set_payloadnvp(NULL); 2943 } 2944 } 2945 if (bp->mark & BUBBLE_ELIDED) 2946 continue; 2947 bp->mark |= BUBBLE_OK; 2948 for (ap = itree_next_arrow(bp, NULL); ap; 2949 ap = itree_next_arrow(bp, ap)) { 2950 ep2 = ap->arrowp->head->myevent; 2951 if (n <= credible_events) 2952 break; 2953 2954 ap->arrowp->mark |= REQMNTS_COUNTER; 2955 if (triggered(fmep, ep2, REQMNTS_COUNTER)) 2956 /* XXX adding max timevals! */ 2957 switch (requirements_test(fmep, ep2, 2958 at_latest_by + ap->arrowp->maxdelay, 2959 &my_delay)) { 2960 case FME_DEFERRED: 2961 deferred_events++; 2962 break; 2963 case FME_CREDIBLE: 2964 credible_events++; 2965 break; 2966 case FME_DISPROVED: 2967 break; 2968 case FME_WAIT: 2969 if (my_delay < arrow_delay) 2970 arrow_delay = my_delay; 2971 waiting_events++; 2972 break; 2973 default: 2974 out(O_DIE, 2975 "Bug in requirements_test."); 2976 } 2977 else 2978 deferred_events++; 2979 } 2980 indent(); 2981 out(O_ALTFP|O_VERB, " Credible: %d Waiting %d", 2982 credible_events + deferred_events, waiting_events); 2983 if (credible_events + deferred_events + waiting_events < n) { 2984 /* Can never meet requirements */ 2985 ep->cached_state |= REQMNTS_DISPROVED; 2986 indent(); 2987 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS DISPROVED "); 2988 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 2989 out(O_ALTFP|O_VERB, NULL); 2990 indent_pop(); 2991 return (FME_DISPROVED); 2992 } 2993 if (credible_events + deferred_events < n) { 2994 /* will have to wait */ 2995 /* wait time is shortest known */ 2996 if (arrow_delay < overall_delay) 2997 overall_delay = arrow_delay; 2998 return_value = FME_WAIT; 2999 } else if (credible_events < n) { 3000 if (return_value != FME_WAIT) 3001 return_value = FME_DEFERRED; 3002 } 3003 } 3004 3005 /* 3006 * don't mark as FME_DEFERRED. If this event isn't reached by another 3007 * path, then this will be considered FME_CREDIBLE. But if it is 3008 * reached by a different path so the K-count is met, then might 3009 * get overridden by FME_WAIT or FME_DISPROVED. 3010 */ 3011 if (return_value == FME_WAIT) { 3012 ep->cached_state |= REQMNTS_WAIT; 3013 ep->cached_delay = *pdelay = overall_delay; 3014 } else if (return_value == FME_CREDIBLE) { 3015 ep->cached_state |= REQMNTS_CREDIBLE; 3016 } 3017 indent(); 3018 out(O_ALTFP|O_VERB|O_NONL, "<-REQMNTS %s ", 3019 fme_state2str(return_value)); 3020 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3021 out(O_ALTFP|O_VERB, NULL); 3022 indent_pop(); 3023 return (return_value); 3024 } 3025 3026 static enum fme_state 3027 causes_test(struct fme *fmep, struct event *ep, 3028 unsigned long long at_latest_by, unsigned long long *pdelay) 3029 { 3030 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 3031 unsigned long long my_delay; 3032 int credible_results = 0; 3033 int waiting_results = 0; 3034 enum fme_state fstate; 3035 struct event *tail_event; 3036 struct bubble *bp; 3037 struct arrowlist *ap; 3038 int k = 1; 3039 3040 stats_counter_bump(fmep->Ccallcount); 3041 indent_push(" C"); 3042 indent(); 3043 out(O_ALTFP|O_VERB|O_NONL, "->"); 3044 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3045 out(O_ALTFP|O_VERB, NULL); 3046 3047 for (bp = itree_next_bubble(ep, NULL); bp; 3048 bp = itree_next_bubble(ep, bp)) { 3049 if (bp->t != B_TO) 3050 continue; 3051 k = bp->nork; /* remember the K value */ 3052 for (ap = itree_next_arrow(bp, NULL); ap; 3053 ap = itree_next_arrow(bp, ap)) { 3054 int do_not_follow = 0; 3055 3056 /* 3057 * if we get to the same event multiple times 3058 * only worry about the first one. 3059 */ 3060 if (ap->arrowp->tail->myevent->cached_state & 3061 CAUSES_TESTED) { 3062 indent(); 3063 out(O_ALTFP|O_VERB|O_NONL, 3064 " causes test already run for "); 3065 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, 3066 ap->arrowp->tail->myevent); 3067 out(O_ALTFP|O_VERB, NULL); 3068 continue; 3069 } 3070 3071 /* 3072 * see if false constraint prevents us 3073 * from traversing this arrow 3074 */ 3075 platform_set_payloadnvp(ep->nvp); 3076 if (checkconstraints(fmep, ap->arrowp) == 0) 3077 do_not_follow = 1; 3078 platform_set_payloadnvp(NULL); 3079 if (do_not_follow) { 3080 indent(); 3081 out(O_ALTFP|O_VERB|O_NONL, 3082 " False arrow from "); 3083 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, 3084 ap->arrowp->tail->myevent); 3085 out(O_ALTFP|O_VERB, NULL); 3086 continue; 3087 } 3088 3089 ap->arrowp->tail->myevent->cached_state |= 3090 CAUSES_TESTED; 3091 tail_event = ap->arrowp->tail->myevent; 3092 fstate = hypothesise(fmep, tail_event, at_latest_by, 3093 &my_delay); 3094 3095 switch (fstate) { 3096 case FME_WAIT: 3097 if (my_delay < overall_delay) 3098 overall_delay = my_delay; 3099 waiting_results++; 3100 break; 3101 case FME_CREDIBLE: 3102 credible_results++; 3103 break; 3104 case FME_DISPROVED: 3105 break; 3106 default: 3107 out(O_DIE, "Bug in causes_test"); 3108 } 3109 } 3110 } 3111 /* compare against K */ 3112 if (credible_results + waiting_results < k) { 3113 indent(); 3114 out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES DISPROVED "); 3115 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3116 out(O_ALTFP|O_VERB, NULL); 3117 indent_pop(); 3118 return (FME_DISPROVED); 3119 } 3120 if (waiting_results != 0) { 3121 *pdelay = overall_delay; 3122 indent(); 3123 out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES WAIT "); 3124 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3125 out(O_ALTFP|O_VERB|O_NONL, " to "); 3126 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 3127 out(O_ALTFP|O_VERB, NULL); 3128 indent_pop(); 3129 return (FME_WAIT); 3130 } 3131 indent(); 3132 out(O_ALTFP|O_VERB|O_NONL, "<-CAUSES CREDIBLE "); 3133 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3134 out(O_ALTFP|O_VERB, NULL); 3135 indent_pop(); 3136 return (FME_CREDIBLE); 3137 } 3138 3139 static enum fme_state 3140 hypothesise(struct fme *fmep, struct event *ep, 3141 unsigned long long at_latest_by, unsigned long long *pdelay) 3142 { 3143 enum fme_state rtr, otr; 3144 unsigned long long my_delay; 3145 unsigned long long overall_delay = TIMEVAL_EVENTUALLY; 3146 3147 stats_counter_bump(fmep->Hcallcount); 3148 indent_push(" H"); 3149 indent(); 3150 out(O_ALTFP|O_VERB|O_NONL, "->"); 3151 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3152 out(O_ALTFP|O_VERB|O_NONL, ", at latest by: "); 3153 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &at_latest_by); 3154 out(O_ALTFP|O_VERB, NULL); 3155 3156 rtr = requirements_test(fmep, ep, at_latest_by, &my_delay); 3157 if ((rtr == FME_WAIT) && (my_delay < overall_delay)) 3158 overall_delay = my_delay; 3159 if (rtr != FME_DISPROVED) { 3160 if (is_problem(ep->t)) { 3161 otr = effects_test(fmep, ep, at_latest_by, &my_delay); 3162 if (otr != FME_DISPROVED) { 3163 if (fmep->peek == 0 && ep->is_suspect++ == 0) { 3164 ep->suspects = fmep->suspects; 3165 fmep->suspects = ep; 3166 fmep->nsuspects++; 3167 if (!is_fault(ep->t)) 3168 fmep->nonfault++; 3169 } 3170 } 3171 } else 3172 otr = causes_test(fmep, ep, at_latest_by, &my_delay); 3173 if ((otr == FME_WAIT) && (my_delay < overall_delay)) 3174 overall_delay = my_delay; 3175 if ((otr != FME_DISPROVED) && 3176 ((rtr == FME_WAIT) || (otr == FME_WAIT))) 3177 *pdelay = overall_delay; 3178 } 3179 if (rtr == FME_DISPROVED) { 3180 indent(); 3181 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED "); 3182 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3183 out(O_ALTFP|O_VERB, " (doesn't meet requirements)"); 3184 indent_pop(); 3185 return (FME_DISPROVED); 3186 } 3187 if ((otr == FME_DISPROVED) && is_problem(ep->t)) { 3188 indent(); 3189 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED "); 3190 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3191 out(O_ALTFP|O_VERB, " (doesn't explain all reports)"); 3192 indent_pop(); 3193 return (FME_DISPROVED); 3194 } 3195 if (otr == FME_DISPROVED) { 3196 indent(); 3197 out(O_ALTFP|O_VERB|O_NONL, "<-DISPROVED "); 3198 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3199 out(O_ALTFP|O_VERB, " (causes are not credible)"); 3200 indent_pop(); 3201 return (FME_DISPROVED); 3202 } 3203 if ((rtr == FME_WAIT) || (otr == FME_WAIT)) { 3204 indent(); 3205 out(O_ALTFP|O_VERB|O_NONL, "<-WAIT "); 3206 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3207 out(O_ALTFP|O_VERB|O_NONL, " to "); 3208 ptree_timeval(O_ALTFP|O_VERB|O_NONL, &overall_delay); 3209 out(O_ALTFP|O_VERB, NULL); 3210 indent_pop(); 3211 return (FME_WAIT); 3212 } 3213 indent(); 3214 out(O_ALTFP|O_VERB|O_NONL, "<-CREDIBLE "); 3215 itree_pevent_brief(O_ALTFP|O_VERB|O_NONL, ep); 3216 out(O_ALTFP|O_VERB, NULL); 3217 indent_pop(); 3218 return (FME_CREDIBLE); 3219 } 3220 3221 /* 3222 * fme_istat_load -- reconstitute any persistent istats 3223 */ 3224 void 3225 fme_istat_load(fmd_hdl_t *hdl) 3226 { 3227 int sz; 3228 char *sbuf; 3229 char *ptr; 3230 3231 if ((sz = fmd_buf_size(hdl, NULL, WOBUF_ISTATS)) == 0) { 3232 out(O_ALTFP, "fme_istat_load: No stats"); 3233 return; 3234 } 3235 3236 sbuf = alloca(sz); 3237 3238 fmd_buf_read(hdl, NULL, WOBUF_ISTATS, sbuf, sz); 3239 3240 /* 3241 * pick apart the serialized stats 3242 * 3243 * format is: 3244 * <class-name>, '@', <path>, '\0', <value>, '\0' 3245 * for example: 3246 * "stat.first@stat0/path0\02\0stat.second@stat0/path1\023\0" 3247 * 3248 * since this is parsing our own serialized data, any parsing issues 3249 * are fatal, so we check for them all with ASSERT() below. 3250 */ 3251 ptr = sbuf; 3252 while (ptr < &sbuf[sz]) { 3253 char *sepptr; 3254 struct node *np; 3255 int val; 3256 3257 sepptr = strchr(ptr, '@'); 3258 ASSERT(sepptr != NULL); 3259 *sepptr = '\0'; 3260 3261 /* construct the event */ 3262 np = newnode(T_EVENT, NULL, 0); 3263 np->u.event.ename = newnode(T_NAME, NULL, 0); 3264 np->u.event.ename->u.name.t = N_STAT; 3265 np->u.event.ename->u.name.s = stable(ptr); 3266 np->u.event.ename->u.name.it = IT_ENAME; 3267 np->u.event.ename->u.name.last = np->u.event.ename; 3268 3269 ptr = sepptr + 1; 3270 ASSERT(ptr < &sbuf[sz]); 3271 ptr += strlen(ptr); 3272 ptr++; /* move past the '\0' separating path from value */ 3273 ASSERT(ptr < &sbuf[sz]); 3274 ASSERT(isdigit(*ptr)); 3275 val = atoi(ptr); 3276 ASSERT(val > 0); 3277 ptr += strlen(ptr); 3278 ptr++; /* move past the final '\0' for this entry */ 3279 3280 np->u.event.epname = pathstring2epnamenp(sepptr + 1); 3281 ASSERT(np->u.event.epname != NULL); 3282 3283 istat_bump(np, val); 3284 tree_free(np); 3285 } 3286 3287 istat_save(); 3288 } 3289